From: Sapan Bhatia Date: Sun, 23 Dec 2007 09:12:48 +0000 (+0000) Subject: got the thing to boot X-Git-Tag: trellis-2.6.22-Jan-2009~76 X-Git-Url: http://git.onelab.eu/?p=linux-2.6.git;a=commitdiff_plain;h=b00ca60df2e5668c1207d1987628baca277e5b59 got the thing to boot --- diff --git a/trellis.patch b/trellis.patch new file mode 100644 index 000000000..dbfbe5fc4 --- /dev/null +++ b/trellis.patch @@ -0,0 +1,208007 @@ +diff -Nurb linux-2.6.22-570/.config.orig linux-2.6.22-591/.config.orig +--- linux-2.6.22-570/.config.orig 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/.config.orig 2007-12-21 15:54:46.000000000 -0500 +@@ -0,0 +1,1693 @@ ++# ++# Automatically generated make config: don't edit ++# Linux kernel version: 2.6.22-prep ++# Fri Dec 21 15:54:46 2007 ++# ++CONFIG_X86_32=y ++CONFIG_GENERIC_TIME=y ++CONFIG_CLOCKSOURCE_WATCHDOG=y ++CONFIG_GENERIC_CLOCKEVENTS=y ++CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y ++CONFIG_LOCKDEP_SUPPORT=y ++CONFIG_STACKTRACE_SUPPORT=y ++CONFIG_SEMAPHORE_SLEEPERS=y ++CONFIG_X86=y ++CONFIG_MMU=y ++CONFIG_ZONE_DMA=y ++CONFIG_QUICKLIST=y ++CONFIG_GENERIC_ISA_DMA=y ++CONFIG_GENERIC_IOMAP=y ++CONFIG_GENERIC_BUG=y ++CONFIG_GENERIC_HWEIGHT=y ++CONFIG_ARCH_MAY_HAVE_PC_FDC=y ++CONFIG_DMI=y ++CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" ++ ++# ++# Code maturity level options ++# ++CONFIG_EXPERIMENTAL=y ++CONFIG_LOCK_KERNEL=y ++CONFIG_INIT_ENV_ARG_LIMIT=32 ++ ++# ++# General setup ++# ++CONFIG_LOCALVERSION="" ++CONFIG_LOCALVERSION_AUTO=y ++CONFIG_SWAP=y ++CONFIG_SYSVIPC=y ++CONFIG_SYSVIPC_SYSCTL=y ++CONFIG_POSIX_MQUEUE=y ++# CONFIG_BSD_PROCESS_ACCT is not set ++# CONFIG_TASKSTATS is not set ++# CONFIG_USER_NS is not set ++# CONFIG_AUDIT is not set ++CONFIG_IKCONFIG=y ++CONFIG_IKCONFIG_PROC=y ++CONFIG_LOG_BUF_SHIFT=18 ++CONFIG_OOM_PANIC=y ++# CONFIG_CONTAINER_DEBUG is not set ++# CONFIG_CPUSETS is not set ++CONFIG_SYSFS_DEPRECATED=y ++# CONFIG_CONTAINER_CPUACCT is not set ++# CONFIG_CONTAINER_NS is not set ++# CONFIG_RELAY is not set ++CONFIG_BLK_DEV_INITRD=y ++CONFIG_INITRAMFS_SOURCE="" ++CONFIG_CC_OPTIMIZE_FOR_SIZE=y ++CONFIG_SYSCTL=y ++# CONFIG_EMBEDDED is not set ++CONFIG_UID16=y ++CONFIG_SYSCTL_SYSCALL=y ++CONFIG_KALLSYMS=y ++CONFIG_KALLSYMS_ALL=y ++# CONFIG_KALLSYMS_EXTRA_PASS is not set ++CONFIG_HOTPLUG=y ++CONFIG_PRINTK=y ++CONFIG_BUG=y ++CONFIG_ELF_CORE=y ++CONFIG_BASE_FULL=y ++CONFIG_FUTEX=y ++CONFIG_ANON_INODES=y ++CONFIG_EPOLL=y ++CONFIG_SIGNALFD=y ++CONFIG_EVENTFD=y ++CONFIG_SHMEM=y ++CONFIG_VM_EVENT_COUNTERS=y ++CONFIG_SLAB=y ++# CONFIG_SLUB is not set ++# CONFIG_SLOB is not set ++CONFIG_PROC_SMAPS=y ++CONFIG_PROC_CLEAR_REFS=y ++CONFIG_PROC_PAGEMAP=y ++CONFIG_RT_MUTEXES=y ++# CONFIG_TINY_SHMEM is not set ++CONFIG_BASE_SMALL=0 ++CONFIG_PAGE_GROUP_BY_MOBILITY=y ++ ++# ++# Loadable module support ++# ++CONFIG_MODULES=y ++CONFIG_MODULE_UNLOAD=y ++CONFIG_MODULE_FORCE_UNLOAD=y ++# CONFIG_MODVERSIONS is not set ++# CONFIG_MODULE_SRCVERSION_ALL is not set ++# CONFIG_KMOD is not set ++CONFIG_STOP_MACHINE=y ++ ++# ++# Block layer ++# ++CONFIG_BLOCK=y ++CONFIG_LBD=y ++# CONFIG_BLK_DEV_IO_TRACE is not set ++# CONFIG_LSF is not set ++ ++# ++# IO Schedulers ++# ++CONFIG_IOSCHED_NOOP=y ++CONFIG_IOSCHED_AS=y ++CONFIG_IOSCHED_DEADLINE=y ++CONFIG_IOSCHED_CFQ=y ++CONFIG_DEFAULT_AS=y ++# CONFIG_DEFAULT_DEADLINE is not set ++# CONFIG_DEFAULT_CFQ is not set ++# CONFIG_DEFAULT_NOOP is not set ++CONFIG_DEFAULT_IOSCHED="anticipatory" ++ ++# ++# Processor type and features ++# ++# CONFIG_TICK_ONESHOT is not set ++# CONFIG_NO_HZ is not set ++# CONFIG_HIGH_RES_TIMERS is not set ++CONFIG_SMP=y ++# CONFIG_X86_PC is not set ++# CONFIG_X86_ELAN is not set ++# CONFIG_X86_VOYAGER is not set ++# CONFIG_X86_NUMAQ is not set ++# CONFIG_X86_SUMMIT is not set ++# CONFIG_X86_BIGSMP is not set ++# CONFIG_X86_VISWS is not set ++CONFIG_X86_GENERICARCH=y ++# CONFIG_X86_ES7000 is not set ++# CONFIG_PARAVIRT is not set ++CONFIG_X86_CYCLONE_TIMER=y ++# CONFIG_M386 is not set ++# CONFIG_M486 is not set ++# CONFIG_M586 is not set ++# CONFIG_M586TSC is not set ++# CONFIG_M586MMX is not set ++# CONFIG_M686 is not set ++# CONFIG_MPENTIUMII is not set ++CONFIG_MPENTIUMIII=y ++# CONFIG_MPENTIUMM is not set ++# CONFIG_MCORE2 is not set ++# CONFIG_MPENTIUM4 is not set ++# CONFIG_MK6 is not set ++# CONFIG_MK7 is not set ++# CONFIG_MK8 is not set ++# CONFIG_MCRUSOE is not set ++# CONFIG_MEFFICEON is not set ++# CONFIG_MWINCHIPC6 is not set ++# CONFIG_MWINCHIP2 is not set ++# CONFIG_MWINCHIP3D is not set ++# CONFIG_MGEODEGX1 is not set ++# CONFIG_MGEODE_LX is not set ++# CONFIG_MCYRIXIII is not set ++# CONFIG_MVIAC3_2 is not set ++# CONFIG_MVIAC7 is not set ++CONFIG_X86_GENERIC=y ++CONFIG_X86_CMPXCHG=y ++CONFIG_X86_L1_CACHE_SHIFT=7 ++CONFIG_X86_XADD=y ++CONFIG_RWSEM_XCHGADD_ALGORITHM=y ++# CONFIG_ARCH_HAS_ILOG2_U32 is not set ++# CONFIG_ARCH_HAS_ILOG2_U64 is not set ++CONFIG_GENERIC_CALIBRATE_DELAY=y ++CONFIG_X86_WP_WORKS_OK=y ++CONFIG_X86_INVLPG=y ++CONFIG_X86_BSWAP=y ++CONFIG_X86_POPAD_OK=y ++CONFIG_X86_GOOD_APIC=y ++CONFIG_X86_INTEL_USERCOPY=y ++CONFIG_X86_USE_PPRO_CHECKSUM=y ++CONFIG_X86_TSC=y ++CONFIG_X86_CMOV=y ++CONFIG_X86_MINIMUM_CPU_MODEL=4 ++CONFIG_HPET_TIMER=y ++CONFIG_HPET_EMULATE_RTC=y ++CONFIG_NR_CPUS=32 ++CONFIG_SCHED_SMT=y ++CONFIG_SCHED_MC=y ++# CONFIG_PREEMPT_NONE is not set ++CONFIG_PREEMPT_VOLUNTARY=y ++# CONFIG_PREEMPT is not set ++CONFIG_PREEMPT_BKL=y ++CONFIG_X86_LOCAL_APIC=y ++CONFIG_X86_IO_APIC=y ++CONFIG_X86_MCE=y ++CONFIG_X86_MCE_NONFATAL=y ++CONFIG_X86_MCE_P4THERMAL=y ++CONFIG_VM86=y ++# CONFIG_TOSHIBA is not set ++# CONFIG_I8K is not set ++# CONFIG_X86_REBOOTFIXUPS is not set ++CONFIG_MICROCODE=y ++CONFIG_MICROCODE_OLD_INTERFACE=y ++CONFIG_X86_MSR=y ++CONFIG_X86_CPUID=y ++ ++# ++# Firmware Drivers ++# ++# CONFIG_EDD is not set ++# CONFIG_DELL_RBU is not set ++# CONFIG_DCDBAS is not set ++# CONFIG_NOHIGHMEM is not set ++CONFIG_HIGHMEM4G=y ++# CONFIG_HIGHMEM64G is not set ++CONFIG_PAGE_OFFSET=0xC0000000 ++CONFIG_HIGHMEM=y ++CONFIG_ARCH_POPULATES_NODE_MAP=y ++CONFIG_SELECT_MEMORY_MODEL=y ++CONFIG_FLATMEM_MANUAL=y ++# CONFIG_DISCONTIGMEM_MANUAL is not set ++# CONFIG_SPARSEMEM_MANUAL is not set ++CONFIG_FLATMEM=y ++CONFIG_FLAT_NODE_MEM_MAP=y ++# CONFIG_SPARSEMEM_STATIC is not set ++CONFIG_SPLIT_PTLOCK_CPUS=4 ++CONFIG_RESOURCES_64BIT=y ++CONFIG_ZONE_DMA_FLAG=1 ++CONFIG_NR_QUICK=1 ++# CONFIG_HIGHPTE is not set ++# CONFIG_MATH_EMULATION is not set ++CONFIG_MTRR=y ++# CONFIG_EFI is not set ++# CONFIG_IRQBALANCE is not set ++CONFIG_SECCOMP=y ++# CONFIG_HZ_100 is not set ++CONFIG_HZ_250=y ++# CONFIG_HZ_300 is not set ++# CONFIG_HZ_1000 is not set ++CONFIG_HZ=250 ++CONFIG_KEXEC=y ++# CONFIG_CRASH_DUMP is not set ++CONFIG_PHYSICAL_START=0x100000 ++# CONFIG_RELOCATABLE is not set ++CONFIG_PHYSICAL_ALIGN=0x100000 ++# CONFIG_HOTPLUG_CPU is not set ++CONFIG_COMPAT_VDSO=y ++CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y ++ ++# ++# Power management options (ACPI, APM) ++# ++CONFIG_PM=y ++CONFIG_PM_LEGACY=y ++# CONFIG_PM_DEBUG is not set ++CONFIG_PM_SYSFS_DEPRECATED=y ++ ++# ++# ACPI (Advanced Configuration and Power Interface) Support ++# ++CONFIG_ACPI=y ++CONFIG_ACPI_PROCFS=y ++CONFIG_ACPI_AC=y ++CONFIG_ACPI_BATTERY=y ++CONFIG_ACPI_BUTTON=y ++CONFIG_ACPI_FAN=y ++# CONFIG_ACPI_DOCK is not set ++CONFIG_ACPI_PROCESSOR=y ++CONFIG_ACPI_THERMAL=y ++# CONFIG_ACPI_ASUS is not set ++# CONFIG_ACPI_TOSHIBA is not set ++CONFIG_ACPI_BLACKLIST_YEAR=2001 ++CONFIG_ACPI_DEBUG=y ++# CONFIG_ACPI_DEBUG_FUNC_TRACE is not set ++CONFIG_ACPI_EC=y ++CONFIG_ACPI_POWER=y ++CONFIG_ACPI_SYSTEM=y ++CONFIG_X86_PM_TIMER=y ++# CONFIG_ACPI_CONTAINER is not set ++# CONFIG_ACPI_SBS is not set ++# CONFIG_APM is not set ++ ++# ++# CPU Frequency scaling ++# ++CONFIG_CPU_FREQ=y ++CONFIG_CPU_FREQ_TABLE=y ++CONFIG_CPU_FREQ_DEBUG=y ++CONFIG_CPU_FREQ_STAT=y ++# CONFIG_CPU_FREQ_STAT_DETAILS is not set ++CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y ++# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set ++CONFIG_CPU_FREQ_GOV_PERFORMANCE=y ++# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set ++CONFIG_CPU_FREQ_GOV_USERSPACE=y ++CONFIG_CPU_FREQ_GOV_ONDEMAND=y ++# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set ++ ++# ++# CPUFreq processor drivers ++# ++CONFIG_X86_ACPI_CPUFREQ=y ++# CONFIG_X86_POWERNOW_K6 is not set ++# CONFIG_X86_POWERNOW_K7 is not set ++CONFIG_X86_POWERNOW_K8=y ++CONFIG_X86_POWERNOW_K8_ACPI=y ++# CONFIG_X86_GX_SUSPMOD is not set ++# CONFIG_X86_SPEEDSTEP_CENTRINO is not set ++# CONFIG_X86_SPEEDSTEP_ICH is not set ++# CONFIG_X86_SPEEDSTEP_SMI is not set ++# CONFIG_X86_P4_CLOCKMOD is not set ++# CONFIG_X86_CPUFREQ_NFORCE2 is not set ++# CONFIG_X86_LONGRUN is not set ++# CONFIG_X86_LONGHAUL is not set ++# CONFIG_X86_E_POWERSAVER is not set ++ ++# ++# shared options ++# ++CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y ++# CONFIG_X86_SPEEDSTEP_LIB is not set ++ ++# ++# CPU idle PM support ++# ++# CONFIG_CPU_IDLE is not set ++ ++# ++# Bus options (PCI, PCMCIA, EISA, MCA, ISA) ++# ++CONFIG_PCI=y ++# CONFIG_PCI_GOBIOS is not set ++# CONFIG_PCI_GOMMCONFIG is not set ++# CONFIG_PCI_GODIRECT is not set ++CONFIG_PCI_GOANY=y ++CONFIG_PCI_BIOS=y ++CONFIG_PCI_DIRECT=y ++CONFIG_PCI_MMCONFIG=y ++# CONFIG_PCIEPORTBUS is not set ++CONFIG_ARCH_SUPPORTS_MSI=y ++CONFIG_PCI_MSI=y ++# CONFIG_PCI_DEBUG is not set ++# CONFIG_HT_IRQ is not set ++CONFIG_ISA_DMA_API=y ++# CONFIG_ISA is not set ++# CONFIG_MCA is not set ++# CONFIG_SCx200 is not set ++ ++# ++# PCCARD (PCMCIA/CardBus) support ++# ++# CONFIG_PCCARD is not set ++# CONFIG_HOTPLUG_PCI is not set ++ ++# ++# Executable file formats ++# ++CONFIG_BINFMT_ELF=y ++# CONFIG_BINFMT_AOUT is not set ++# CONFIG_BINFMT_MISC is not set ++ ++# ++# Networking ++# ++CONFIG_NET=y ++ ++# ++# Networking options ++# ++# CONFIG_NET_NS is not set ++CONFIG_PACKET=y ++# CONFIG_PACKET_MMAP is not set ++CONFIG_UNIX=y ++CONFIG_XFRM=y ++# CONFIG_XFRM_USER is not set ++# CONFIG_XFRM_SUB_POLICY is not set ++# CONFIG_XFRM_MIGRATE is not set ++# CONFIG_NET_KEY is not set ++CONFIG_INET=y ++CONFIG_IP_MULTICAST=y ++# CONFIG_IP_ADVANCED_ROUTER is not set ++CONFIG_IP_FIB_HASH=y ++CONFIG_IP_PNP=y ++CONFIG_IP_PNP_DHCP=y ++# CONFIG_IP_PNP_BOOTP is not set ++# CONFIG_IP_PNP_RARP is not set ++# CONFIG_NET_IPIP is not set ++# CONFIG_NET_IPGRE is not set ++# CONFIG_IP_MROUTE is not set ++# CONFIG_ARPD is not set ++# CONFIG_SYN_COOKIES is not set ++# CONFIG_INET_AH is not set ++# CONFIG_INET_ESP is not set ++# CONFIG_INET_IPCOMP is not set ++# CONFIG_INET_XFRM_TUNNEL is not set ++CONFIG_INET_TUNNEL=y ++CONFIG_INET_XFRM_MODE_TRANSPORT=y ++CONFIG_INET_XFRM_MODE_TUNNEL=y ++# CONFIG_INET_XFRM_MODE_BEET is not set ++CONFIG_INET_DIAG=y ++CONFIG_INET_TCP_DIAG=y ++# CONFIG_TCP_CONG_ADVANCED is not set ++CONFIG_TCP_CONG_CUBIC=y ++CONFIG_DEFAULT_TCP_CONG="cubic" ++# CONFIG_TCP_MD5SIG is not set ++# CONFIG_IP_VS is not set ++# CONFIG_ICMP_IPOD is not set ++CONFIG_IPV6=y ++# CONFIG_IPV6_PRIVACY is not set ++# CONFIG_IPV6_ROUTER_PREF is not set ++# CONFIG_IPV6_OPTIMISTIC_DAD is not set ++# CONFIG_INET6_AH is not set ++# CONFIG_INET6_ESP is not set ++# CONFIG_INET6_IPCOMP is not set ++# CONFIG_IPV6_MIP6 is not set ++# CONFIG_INET6_XFRM_TUNNEL is not set ++# CONFIG_INET6_TUNNEL is not set ++CONFIG_INET6_XFRM_MODE_TRANSPORT=y ++CONFIG_INET6_XFRM_MODE_TUNNEL=y ++# CONFIG_INET6_XFRM_MODE_BEET is not set ++# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set ++CONFIG_IPV6_SIT=y ++# CONFIG_IPV6_TUNNEL is not set ++# CONFIG_IPV6_MULTIPLE_TABLES is not set ++# CONFIG_NETWORK_SECMARK is not set ++CONFIG_NETFILTER=y ++# CONFIG_NETFILTER_DEBUG is not set ++ ++# ++# Core Netfilter Configuration ++# ++# CONFIG_NETFILTER_NETLINK is not set ++CONFIG_NF_CONNTRACK_ENABLED=m ++CONFIG_NF_CONNTRACK=m ++# CONFIG_NF_CT_ACCT is not set ++# CONFIG_NF_CONNTRACK_MARK is not set ++# CONFIG_NF_CONNTRACK_EVENTS is not set ++# CONFIG_NF_CT_PROTO_SCTP is not set ++# CONFIG_NF_CONNTRACK_AMANDA is not set ++# CONFIG_NF_CONNTRACK_FTP is not set ++# CONFIG_NF_CONNTRACK_H323 is not set ++# CONFIG_NF_CONNTRACK_IRC is not set ++# CONFIG_NF_CONNTRACK_NETBIOS_NS is not set ++# CONFIG_NF_CONNTRACK_PPTP is not set ++# CONFIG_NF_CONNTRACK_SANE is not set ++# CONFIG_NF_CONNTRACK_SIP is not set ++# CONFIG_NF_CONNTRACK_TFTP is not set ++CONFIG_NETFILTER_XTABLES=m ++# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set ++# CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set ++# CONFIG_NETFILTER_XT_TARGET_DSCP is not set ++# CONFIG_NETFILTER_XT_TARGET_MARK is not set ++# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set ++# CONFIG_NETFILTER_XT_TARGET_NFLOG is not set ++# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set ++# CONFIG_NETFILTER_XT_TARGET_SETXID is not set ++# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set ++# CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set ++# CONFIG_NETFILTER_XT_MATCH_CONNMARK is not set ++# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set ++# CONFIG_NETFILTER_XT_MATCH_DCCP is not set ++# CONFIG_NETFILTER_XT_MATCH_DSCP is not set ++# CONFIG_NETFILTER_XT_MATCH_ESP is not set ++# CONFIG_NETFILTER_XT_MATCH_HELPER is not set ++# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set ++# CONFIG_NETFILTER_XT_MATCH_LIMIT is not set ++# CONFIG_NETFILTER_XT_MATCH_MAC is not set ++# CONFIG_NETFILTER_XT_MATCH_MARK is not set ++# CONFIG_NETFILTER_XT_MATCH_POLICY is not set ++# CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set ++# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set ++# CONFIG_NETFILTER_XT_MATCH_QUOTA is not set ++# CONFIG_NETFILTER_XT_MATCH_REALM is not set ++# CONFIG_NETFILTER_XT_MATCH_SCTP is not set ++# CONFIG_NETFILTER_XT_MATCH_STATE is not set ++# CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set ++# CONFIG_NETFILTER_XT_MATCH_STRING is not set ++# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set ++# CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set ++ ++# ++# IP: Netfilter Configuration ++# ++CONFIG_NF_CONNTRACK_IPV4=m ++CONFIG_NF_CONNTRACK_PROC_COMPAT=y ++# CONFIG_IP_NF_QUEUE is not set ++CONFIG_IP_NF_IPTABLES=m ++# CONFIG_IP_NF_MATCH_IPRANGE is not set ++# CONFIG_IP_NF_MATCH_TOS is not set ++# CONFIG_IP_NF_MATCH_RECENT is not set ++# CONFIG_IP_NF_MATCH_ECN is not set ++# CONFIG_IP_NF_MATCH_AH is not set ++# CONFIG_IP_NF_MATCH_TTL is not set ++# CONFIG_IP_NF_MATCH_OWNER is not set ++# CONFIG_IP_NF_MATCH_ADDRTYPE is not set ++CONFIG_IP_NF_FILTER=m ++# CONFIG_IP_NF_TARGET_REJECT is not set ++# CONFIG_IP_NF_TARGET_LOG is not set ++# CONFIG_IP_NF_TARGET_ULOG is not set ++CONFIG_NF_NAT=m ++CONFIG_NF_NAT_NEEDED=y ++# CONFIG_IP_NF_TARGET_MASQUERADE is not set ++# CONFIG_IP_NF_TARGET_REDIRECT is not set ++# CONFIG_IP_NF_TARGET_NETMAP is not set ++# CONFIG_IP_NF_TARGET_SAME is not set ++# CONFIG_NF_NAT_SNMP_BASIC is not set ++# CONFIG_NF_NAT_FTP is not set ++# CONFIG_NF_NAT_IRC is not set ++# CONFIG_NF_NAT_TFTP is not set ++# CONFIG_NF_NAT_AMANDA is not set ++# CONFIG_NF_NAT_PPTP is not set ++# CONFIG_NF_NAT_H323 is not set ++# CONFIG_NF_NAT_SIP is not set ++CONFIG_IP_NF_MANGLE=m ++# CONFIG_IP_NF_TARGET_TOS is not set ++# CONFIG_IP_NF_TARGET_ECN is not set ++# CONFIG_IP_NF_TARGET_TTL is not set ++# CONFIG_IP_NF_TARGET_CLUSTERIP is not set ++# CONFIG_IP_NF_RAW is not set ++# CONFIG_IP_NF_ARPTABLES is not set ++# CONFIG_IP_NF_SET is not set ++ ++# ++# IPv6: Netfilter Configuration (EXPERIMENTAL) ++# ++# CONFIG_NF_CONNTRACK_IPV6 is not set ++# CONFIG_IP6_NF_QUEUE is not set ++# CONFIG_IP6_NF_IPTABLES is not set ++# CONFIG_IP_DCCP is not set ++# CONFIG_IP_SCTP is not set ++# CONFIG_TIPC is not set ++# CONFIG_ATM is not set ++# CONFIG_BRIDGE is not set ++# CONFIG_VLAN_8021Q is not set ++# CONFIG_DECNET is not set ++# CONFIG_LLC2 is not set ++# CONFIG_IPX is not set ++# CONFIG_ATALK is not set ++# CONFIG_X25 is not set ++# CONFIG_LAPB is not set ++# CONFIG_ECONET is not set ++# CONFIG_WAN_ROUTER is not set ++ ++# ++# QoS and/or fair queueing ++# ++# CONFIG_NET_SCHED is not set ++ ++# ++# Network testing ++# ++CONFIG_NET_PKTGEN=m ++# CONFIG_NET_TCPPROBE is not set ++# CONFIG_HAMRADIO is not set ++# CONFIG_IRDA is not set ++# CONFIG_BT is not set ++# CONFIG_AF_RXRPC is not set ++ ++# ++# Wireless ++# ++# CONFIG_CFG80211 is not set ++# CONFIG_WIRELESS_EXT is not set ++# CONFIG_MAC80211 is not set ++# CONFIG_IEEE80211 is not set ++# CONFIG_RFKILL is not set ++ ++# ++# Device Drivers ++# ++ ++# ++# Generic Driver Options ++# ++CONFIG_STANDALONE=y ++CONFIG_PREVENT_FIRMWARE_BUILD=y ++CONFIG_FW_LOADER=y ++# CONFIG_DEBUG_DRIVER is not set ++# CONFIG_DEBUG_DEVRES is not set ++# CONFIG_SYS_HYPERVISOR is not set ++ ++# ++# Connector - unified userspace <-> kernelspace linker ++# ++CONFIG_CONNECTOR=m ++# CONFIG_MTD is not set ++ ++# ++# Parallel port support ++# ++# CONFIG_PARPORT is not set ++ ++# ++# Plug and Play support ++# ++CONFIG_PNP=y ++# CONFIG_PNP_DEBUG is not set ++ ++# ++# Protocols ++# ++CONFIG_PNPACPI=y ++ ++# ++# Block devices ++# ++CONFIG_BLK_DEV_FD=y ++# CONFIG_BLK_CPQ_DA is not set ++# CONFIG_BLK_CPQ_CISS_DA is not set ++# CONFIG_BLK_DEV_DAC960 is not set ++# CONFIG_BLK_DEV_UMEM is not set ++# CONFIG_BLK_DEV_COW_COMMON is not set ++CONFIG_BLK_DEV_LOOP=y ++# CONFIG_BLK_DEV_CRYPTOLOOP is not set ++# CONFIG_BLK_DEV_NBD is not set ++# CONFIG_BLK_DEV_SX8 is not set ++# CONFIG_BLK_DEV_UB is not set ++CONFIG_BLK_DEV_RAM=y ++CONFIG_BLK_DEV_RAM_COUNT=16 ++CONFIG_BLK_DEV_RAM_SIZE=4096 ++CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 ++# CONFIG_CDROM_PKTCDVD is not set ++# CONFIG_ATA_OVER_ETH is not set ++ ++# ++# Misc devices ++# ++# CONFIG_IBM_ASM is not set ++# CONFIG_PHANTOM is not set ++# CONFIG_SGI_IOC4 is not set ++# CONFIG_TIFM_CORE is not set ++# CONFIG_SONY_LAPTOP is not set ++# CONFIG_THINKPAD_ACPI is not set ++CONFIG_IDE=y ++CONFIG_BLK_DEV_IDE=y ++ ++# ++# Please see Documentation/ide.txt for help/info on IDE drives ++# ++# CONFIG_BLK_DEV_IDE_SATA is not set ++# CONFIG_BLK_DEV_HD_IDE is not set ++CONFIG_BLK_DEV_IDEDISK=y ++CONFIG_IDEDISK_MULTI_MODE=y ++CONFIG_BLK_DEV_IDECD=y ++# CONFIG_BLK_DEV_IDETAPE is not set ++# CONFIG_BLK_DEV_IDEFLOPPY is not set ++# CONFIG_BLK_DEV_IDESCSI is not set ++# CONFIG_BLK_DEV_IDEACPI is not set ++# CONFIG_IDE_TASK_IOCTL is not set ++CONFIG_IDE_PROC_FS=y ++ ++# ++# IDE chipset support/bugfixes ++# ++CONFIG_IDE_GENERIC=y ++# CONFIG_BLK_DEV_CMD640 is not set ++# CONFIG_BLK_DEV_IDEPNP is not set ++CONFIG_BLK_DEV_IDEPCI=y ++# CONFIG_IDEPCI_SHARE_IRQ is not set ++CONFIG_IDEPCI_PCIBUS_ORDER=y ++# CONFIG_BLK_DEV_OFFBOARD is not set ++# CONFIG_BLK_DEV_GENERIC is not set ++# CONFIG_BLK_DEV_OPTI621 is not set ++# CONFIG_BLK_DEV_RZ1000 is not set ++CONFIG_BLK_DEV_IDEDMA_PCI=y ++# CONFIG_BLK_DEV_IDEDMA_FORCED is not set ++# CONFIG_IDEDMA_ONLYDISK is not set ++# CONFIG_BLK_DEV_AEC62XX is not set ++# CONFIG_BLK_DEV_ALI15X3 is not set ++CONFIG_BLK_DEV_AMD74XX=y ++# CONFIG_BLK_DEV_ATIIXP is not set ++# CONFIG_BLK_DEV_CMD64X is not set ++# CONFIG_BLK_DEV_TRIFLEX is not set ++# CONFIG_BLK_DEV_CY82C693 is not set ++# CONFIG_BLK_DEV_CS5520 is not set ++# CONFIG_BLK_DEV_CS5530 is not set ++# CONFIG_BLK_DEV_CS5535 is not set ++# CONFIG_BLK_DEV_HPT34X is not set ++# CONFIG_BLK_DEV_HPT366 is not set ++# CONFIG_BLK_DEV_JMICRON is not set ++# CONFIG_BLK_DEV_SC1200 is not set ++CONFIG_BLK_DEV_PIIX=y ++# CONFIG_BLK_DEV_IT8213 is not set ++# CONFIG_BLK_DEV_IT821X is not set ++# CONFIG_BLK_DEV_NS87415 is not set ++# CONFIG_BLK_DEV_PDC202XX_OLD is not set ++# CONFIG_BLK_DEV_PDC202XX_NEW is not set ++# CONFIG_BLK_DEV_SVWKS is not set ++# CONFIG_BLK_DEV_SIIMAGE is not set ++# CONFIG_BLK_DEV_SIS5513 is not set ++# CONFIG_BLK_DEV_SLC90E66 is not set ++# CONFIG_BLK_DEV_TRM290 is not set ++# CONFIG_BLK_DEV_VIA82CXXX is not set ++# CONFIG_BLK_DEV_TC86C001 is not set ++# CONFIG_IDE_ARM is not set ++CONFIG_BLK_DEV_IDEDMA=y ++# CONFIG_IDEDMA_IVB is not set ++# CONFIG_BLK_DEV_HD is not set ++ ++# ++# SCSI device support ++# ++# CONFIG_RAID_ATTRS is not set ++CONFIG_SCSI=y ++# CONFIG_SCSI_TGT is not set ++CONFIG_SCSI_NETLINK=y ++# CONFIG_SCSI_PROC_FS is not set ++ ++# ++# SCSI support type (disk, tape, CD-ROM) ++# ++CONFIG_BLK_DEV_SD=y ++# CONFIG_CHR_DEV_ST is not set ++# CONFIG_CHR_DEV_OSST is not set ++CONFIG_BLK_DEV_SR=y ++# CONFIG_BLK_DEV_SR_VENDOR is not set ++CONFIG_CHR_DEV_SG=y ++# CONFIG_CHR_DEV_SCH is not set ++ ++# ++# Some SCSI devices (e.g. CD jukebox) support multiple LUNs ++# ++# CONFIG_SCSI_MULTI_LUN is not set ++# CONFIG_SCSI_CONSTANTS is not set ++# CONFIG_SCSI_LOGGING is not set ++# CONFIG_SCSI_SCAN_ASYNC is not set ++CONFIG_SCSI_WAIT_SCAN=m ++ ++# ++# SCSI Transports ++# ++CONFIG_SCSI_SPI_ATTRS=y ++CONFIG_SCSI_FC_ATTRS=y ++# CONFIG_SCSI_ISCSI_ATTRS is not set ++# CONFIG_SCSI_SAS_ATTRS is not set ++# CONFIG_SCSI_SAS_LIBSAS is not set ++ ++# ++# SCSI low-level drivers ++# ++# CONFIG_ISCSI_TCP is not set ++CONFIG_BLK_DEV_3W_XXXX_RAID=y ++# CONFIG_SCSI_3W_9XXX is not set ++# CONFIG_SCSI_ACARD is not set ++# CONFIG_SCSI_AACRAID is not set ++CONFIG_SCSI_AIC7XXX=y ++CONFIG_AIC7XXX_CMDS_PER_DEVICE=32 ++CONFIG_AIC7XXX_RESET_DELAY_MS=5000 ++CONFIG_AIC7XXX_DEBUG_ENABLE=y ++CONFIG_AIC7XXX_DEBUG_MASK=0 ++CONFIG_AIC7XXX_REG_PRETTY_PRINT=y ++# CONFIG_SCSI_AIC7XXX_OLD is not set ++CONFIG_SCSI_AIC79XX=y ++CONFIG_AIC79XX_CMDS_PER_DEVICE=32 ++CONFIG_AIC79XX_RESET_DELAY_MS=4000 ++# CONFIG_AIC79XX_DEBUG_ENABLE is not set ++CONFIG_AIC79XX_DEBUG_MASK=0 ++# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set ++# CONFIG_SCSI_AIC94XX is not set ++# CONFIG_SCSI_DPT_I2O is not set ++# CONFIG_SCSI_ADVANSYS is not set ++# CONFIG_SCSI_ARCMSR is not set ++# CONFIG_MEGARAID_NEWGEN is not set ++# CONFIG_MEGARAID_LEGACY is not set ++# CONFIG_MEGARAID_SAS is not set ++# CONFIG_SCSI_HPTIOP is not set ++# CONFIG_SCSI_BUSLOGIC is not set ++# CONFIG_SCSI_DMX3191D is not set ++# CONFIG_SCSI_EATA is not set ++# CONFIG_SCSI_FUTURE_DOMAIN is not set ++# CONFIG_SCSI_GDTH is not set ++# CONFIG_SCSI_IPS is not set ++# CONFIG_SCSI_INITIO is not set ++# CONFIG_SCSI_INIA100 is not set ++# CONFIG_SCSI_STEX is not set ++# CONFIG_SCSI_SYM53C8XX_2 is not set ++# CONFIG_SCSI_IPR is not set ++# CONFIG_SCSI_QLOGIC_1280 is not set ++# CONFIG_SCSI_QLA_FC is not set ++# CONFIG_SCSI_QLA_ISCSI is not set ++# CONFIG_SCSI_LPFC is not set ++# CONFIG_SCSI_DC395x is not set ++# CONFIG_SCSI_DC390T is not set ++# CONFIG_SCSI_NSP32 is not set ++# CONFIG_SCSI_DEBUG is not set ++# CONFIG_SCSI_SRP is not set ++CONFIG_ATA=y ++# CONFIG_ATA_NONSTANDARD is not set ++CONFIG_ATA_ACPI=y ++CONFIG_SATA_AHCI=y ++CONFIG_SATA_SVW=y ++CONFIG_ATA_PIIX=y ++# CONFIG_SATA_MV is not set ++CONFIG_SATA_NV=y ++# CONFIG_PDC_ADMA is not set ++# CONFIG_SATA_QSTOR is not set ++# CONFIG_SATA_PROMISE is not set ++# CONFIG_SATA_SX4 is not set ++CONFIG_SATA_SIL=y ++# CONFIG_SATA_SIL24 is not set ++# CONFIG_SATA_SIS is not set ++# CONFIG_SATA_ULI is not set ++CONFIG_SATA_VIA=y ++# CONFIG_SATA_VITESSE is not set ++# CONFIG_SATA_INIC162X is not set ++# CONFIG_PATA_ALI is not set ++# CONFIG_PATA_AMD is not set ++# CONFIG_PATA_ARTOP is not set ++# CONFIG_PATA_ATIIXP is not set ++# CONFIG_PATA_CMD640_PCI is not set ++# CONFIG_PATA_CMD64X is not set ++# CONFIG_PATA_CS5520 is not set ++# CONFIG_PATA_CS5530 is not set ++# CONFIG_PATA_CS5535 is not set ++# CONFIG_PATA_CYPRESS is not set ++# CONFIG_PATA_EFAR is not set ++# CONFIG_ATA_GENERIC is not set ++# CONFIG_PATA_HPT366 is not set ++# CONFIG_PATA_HPT37X is not set ++# CONFIG_PATA_HPT3X2N is not set ++# CONFIG_PATA_HPT3X3 is not set ++# CONFIG_PATA_IT821X is not set ++# CONFIG_PATA_IT8213 is not set ++# CONFIG_PATA_JMICRON is not set ++# CONFIG_PATA_TRIFLEX is not set ++# CONFIG_PATA_MARVELL is not set ++# CONFIG_PATA_MPIIX is not set ++# CONFIG_PATA_OLDPIIX is not set ++# CONFIG_PATA_NETCELL is not set ++# CONFIG_PATA_NS87410 is not set ++# CONFIG_PATA_OPTI is not set ++# CONFIG_PATA_OPTIDMA is not set ++# CONFIG_PATA_PDC_OLD is not set ++# CONFIG_PATA_RADISYS is not set ++# CONFIG_PATA_RZ1000 is not set ++# CONFIG_PATA_SC1200 is not set ++# CONFIG_PATA_SERVERWORKS is not set ++# CONFIG_PATA_PDC2027X is not set ++# CONFIG_PATA_SIL680 is not set ++# CONFIG_PATA_SIS is not set ++# CONFIG_PATA_VIA is not set ++# CONFIG_PATA_WINBOND is not set ++ ++# ++# Multi-device support (RAID and LVM) ++# ++CONFIG_MD=y ++# CONFIG_BLK_DEV_MD is not set ++CONFIG_BLK_DEV_DM=y ++# CONFIG_DM_DEBUG is not set ++# CONFIG_DM_CRYPT is not set ++# CONFIG_DM_SNAPSHOT is not set ++# CONFIG_DM_MIRROR is not set ++# CONFIG_DM_ZERO is not set ++# CONFIG_DM_MULTIPATH is not set ++# CONFIG_DM_DELAY is not set ++# CONFIG_DM_NETLINK is not set ++ ++# ++# Fusion MPT device support ++# ++CONFIG_FUSION=y ++CONFIG_FUSION_SPI=y ++# CONFIG_FUSION_FC is not set ++# CONFIG_FUSION_SAS is not set ++CONFIG_FUSION_MAX_SGE=128 ++# CONFIG_FUSION_CTL is not set ++ ++# ++# IEEE 1394 (FireWire) support ++# ++# CONFIG_FIREWIRE is not set ++CONFIG_IEEE1394=y ++ ++# ++# Subsystem Options ++# ++# CONFIG_IEEE1394_VERBOSEDEBUG is not set ++ ++# ++# Controllers ++# ++ ++# ++# Texas Instruments PCILynx requires I2C ++# ++CONFIG_IEEE1394_OHCI1394=y ++ ++# ++# Protocols ++# ++# CONFIG_IEEE1394_VIDEO1394 is not set ++# CONFIG_IEEE1394_SBP2 is not set ++# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set ++# CONFIG_IEEE1394_ETH1394 is not set ++# CONFIG_IEEE1394_DV1394 is not set ++CONFIG_IEEE1394_RAWIO=y ++ ++# ++# I2O device support ++# ++# CONFIG_I2O is not set ++# CONFIG_MACINTOSH_DRIVERS is not set ++ ++# ++# Network device support ++# ++CONFIG_NETDEVICES=y ++# CONFIG_DUMMY is not set ++# CONFIG_BONDING is not set ++# CONFIG_EQUALIZER is not set ++CONFIG_TUN=m ++# CONFIG_ETUN is not set ++# CONFIG_NET_SB1000 is not set ++# CONFIG_ARCNET is not set ++# CONFIG_PHYLIB is not set ++ ++# ++# Ethernet (10 or 100Mbit) ++# ++CONFIG_NET_ETHERNET=y ++CONFIG_MII=y ++# CONFIG_HAPPYMEAL is not set ++# CONFIG_SUNGEM is not set ++# CONFIG_CASSINI is not set ++# CONFIG_NET_VENDOR_3COM is not set ++ ++# ++# Tulip family network device support ++# ++CONFIG_NET_TULIP=y ++# CONFIG_DE2104X is not set ++CONFIG_TULIP=y ++# CONFIG_TULIP_MWI is not set ++# CONFIG_TULIP_MMIO is not set ++# CONFIG_TULIP_NAPI is not set ++# CONFIG_DE4X5 is not set ++# CONFIG_WINBOND_840 is not set ++# CONFIG_DM9102 is not set ++# CONFIG_ULI526X is not set ++# CONFIG_HP100 is not set ++CONFIG_NET_PCI=y ++# CONFIG_PCNET32 is not set ++# CONFIG_AMD8111_ETH is not set ++# CONFIG_ADAPTEC_STARFIRE is not set ++CONFIG_B44=y ++CONFIG_FORCEDETH=y ++# CONFIG_FORCEDETH_NAPI is not set ++# CONFIG_DGRS is not set ++# CONFIG_EEPRO100 is not set ++CONFIG_E100=y ++# CONFIG_FEALNX is not set ++# CONFIG_NATSEMI is not set ++# CONFIG_NE2K_PCI is not set ++CONFIG_8139CP=y ++CONFIG_8139TOO=y ++# CONFIG_8139TOO_PIO is not set ++# CONFIG_8139TOO_TUNE_TWISTER is not set ++# CONFIG_8139TOO_8129 is not set ++# CONFIG_8139_OLD_RX_RESET is not set ++# CONFIG_SIS900 is not set ++# CONFIG_EPIC100 is not set ++# CONFIG_SUNDANCE is not set ++# CONFIG_TLAN is not set ++# CONFIG_VIA_RHINE is not set ++# CONFIG_SC92031 is not set ++CONFIG_NETDEV_1000=y ++# CONFIG_ACENIC is not set ++# CONFIG_DL2K is not set ++CONFIG_E1000=y ++# CONFIG_E1000_NAPI is not set ++# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set ++# CONFIG_E1000E is not set ++# CONFIG_NS83820 is not set ++# CONFIG_HAMACHI is not set ++# CONFIG_YELLOWFIN is not set ++CONFIG_R8169=y ++# CONFIG_R8169_NAPI is not set ++# CONFIG_SIS190 is not set ++# CONFIG_SKGE is not set ++CONFIG_SKY2=y ++# CONFIG_SK98LIN is not set ++# CONFIG_VIA_VELOCITY is not set ++CONFIG_TIGON3=y ++CONFIG_BNX2=y ++# CONFIG_QLA3XXX is not set ++# CONFIG_ATL1 is not set ++CONFIG_NETDEV_10000=y ++# CONFIG_CHELSIO_T1 is not set ++# CONFIG_CHELSIO_T3 is not set ++# CONFIG_IXGB is not set ++# CONFIG_S2IO is not set ++# CONFIG_MYRI10GE is not set ++# CONFIG_NETXEN_NIC is not set ++# CONFIG_MLX4_CORE is not set ++# CONFIG_TR is not set ++ ++# ++# Wireless LAN ++# ++# CONFIG_WLAN_PRE80211 is not set ++# CONFIG_WLAN_80211 is not set ++ ++# ++# USB Network Adapters ++# ++# CONFIG_USB_CATC is not set ++# CONFIG_USB_KAWETH is not set ++# CONFIG_USB_PEGASUS is not set ++# CONFIG_USB_RTL8150 is not set ++# CONFIG_USB_USBNET_MII is not set ++# CONFIG_USB_USBNET is not set ++# CONFIG_WAN is not set ++# CONFIG_FDDI is not set ++# CONFIG_HIPPI is not set ++CONFIG_PPP=m ++# CONFIG_PPP_MULTILINK is not set ++# CONFIG_PPP_FILTER is not set ++# CONFIG_PPP_ASYNC is not set ++# CONFIG_PPP_SYNC_TTY is not set ++# CONFIG_PPP_DEFLATE is not set ++# CONFIG_PPP_BSDCOMP is not set ++# CONFIG_PPP_MPPE is not set ++# CONFIG_PPPOE is not set ++# CONFIG_SLIP is not set ++CONFIG_SLHC=m ++# CONFIG_NET_FC is not set ++# CONFIG_SHAPER is not set ++CONFIG_NETCONSOLE=y ++CONFIG_NETPOLL=y ++# CONFIG_NETPOLL_TRAP is not set ++CONFIG_NET_POLL_CONTROLLER=y ++ ++# ++# ISDN subsystem ++# ++# CONFIG_ISDN is not set ++ ++# ++# Telephony Support ++# ++# CONFIG_PHONE is not set ++ ++# ++# Input device support ++# ++CONFIG_INPUT=y ++# CONFIG_INPUT_FF_MEMLESS is not set ++# CONFIG_INPUT_POLLDEV is not set ++ ++# ++# Userland interfaces ++# ++CONFIG_INPUT_MOUSEDEV=y ++CONFIG_INPUT_MOUSEDEV_PSAUX=y ++CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 ++CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 ++# CONFIG_INPUT_JOYDEV is not set ++# CONFIG_INPUT_TSDEV is not set ++CONFIG_INPUT_EVDEV=y ++# CONFIG_INPUT_EVBUG is not set ++ ++# ++# Input Device Drivers ++# ++CONFIG_INPUT_KEYBOARD=y ++CONFIG_KEYBOARD_ATKBD=y ++# CONFIG_KEYBOARD_SUNKBD is not set ++# CONFIG_KEYBOARD_LKKBD is not set ++# CONFIG_KEYBOARD_XTKBD is not set ++# CONFIG_KEYBOARD_NEWTON is not set ++# CONFIG_KEYBOARD_STOWAWAY is not set ++CONFIG_INPUT_MOUSE=y ++CONFIG_MOUSE_PS2=y ++CONFIG_MOUSE_PS2_ALPS=y ++CONFIG_MOUSE_PS2_LOGIPS2PP=y ++CONFIG_MOUSE_PS2_SYNAPTICS=y ++CONFIG_MOUSE_PS2_LIFEBOOK=y ++CONFIG_MOUSE_PS2_TRACKPOINT=y ++# CONFIG_MOUSE_PS2_TOUCHKIT is not set ++# CONFIG_MOUSE_SERIAL is not set ++# CONFIG_MOUSE_APPLETOUCH is not set ++# CONFIG_MOUSE_VSXXXAA is not set ++# CONFIG_INPUT_JOYSTICK is not set ++# CONFIG_INPUT_TABLET is not set ++# CONFIG_INPUT_TOUCHSCREEN is not set ++# CONFIG_INPUT_MISC is not set ++ ++# ++# Hardware I/O ports ++# ++CONFIG_SERIO=y ++CONFIG_SERIO_I8042=y ++# CONFIG_SERIO_SERPORT is not set ++# CONFIG_SERIO_CT82C710 is not set ++# CONFIG_SERIO_PCIPS2 is not set ++CONFIG_SERIO_LIBPS2=y ++# CONFIG_SERIO_RAW is not set ++# CONFIG_GAMEPORT is not set ++ ++# ++# Character devices ++# ++CONFIG_VT=y ++CONFIG_VT_CONSOLE=y ++CONFIG_HW_CONSOLE=y ++# CONFIG_VT_HW_CONSOLE_BINDING is not set ++# CONFIG_SERIAL_NONSTANDARD is not set ++ ++# ++# Serial drivers ++# ++CONFIG_SERIAL_8250=y ++CONFIG_SERIAL_8250_CONSOLE=y ++CONFIG_SERIAL_8250_PCI=y ++CONFIG_SERIAL_8250_PNP=y ++CONFIG_SERIAL_8250_NR_UARTS=4 ++CONFIG_SERIAL_8250_RUNTIME_UARTS=4 ++# CONFIG_SERIAL_8250_EXTENDED is not set ++ ++# ++# Non-8250 serial port support ++# ++CONFIG_SERIAL_CORE=y ++CONFIG_SERIAL_CORE_CONSOLE=y ++# CONFIG_SERIAL_JSM is not set ++CONFIG_UNIX98_PTYS=y ++CONFIG_LEGACY_PTYS=y ++CONFIG_LEGACY_PTY_COUNT=256 ++ ++# ++# IPMI ++# ++# CONFIG_IPMI_HANDLER is not set ++# CONFIG_WATCHDOG is not set ++CONFIG_HW_RANDOM=y ++CONFIG_HW_RANDOM_INTEL=y ++CONFIG_HW_RANDOM_AMD=y ++CONFIG_HW_RANDOM_GEODE=y ++CONFIG_HW_RANDOM_VIA=y ++# CONFIG_NVRAM is not set ++CONFIG_RTC=y ++# CONFIG_R3964 is not set ++# CONFIG_APPLICOM is not set ++# CONFIG_SONYPI is not set ++# CONFIG_AGP is not set ++# CONFIG_DRM is not set ++# CONFIG_MWAVE is not set ++# CONFIG_PC8736x_GPIO is not set ++# CONFIG_NSC_GPIO is not set ++# CONFIG_CS5535_GPIO is not set ++CONFIG_RAW_DRIVER=y ++CONFIG_MAX_RAW_DEVS=256 ++CONFIG_HPET=y ++# CONFIG_HPET_RTC_IRQ is not set ++CONFIG_HPET_MMAP=y ++CONFIG_HANGCHECK_TIMER=y ++ ++# ++# TPM devices ++# ++# CONFIG_TCG_TPM is not set ++# CONFIG_TELCLOCK is not set ++CONFIG_DEVPORT=y ++# CONFIG_I2C is not set ++ ++# ++# SPI support ++# ++# CONFIG_SPI is not set ++# CONFIG_SPI_MASTER is not set ++ ++# ++# Dallas's 1-wire bus ++# ++# CONFIG_W1 is not set ++# CONFIG_HWMON is not set ++ ++# ++# Multifunction device drivers ++# ++# CONFIG_MFD_SM501 is not set ++ ++# ++# Multimedia devices ++# ++# CONFIG_VIDEO_DEV is not set ++# CONFIG_DVB_CORE is not set ++CONFIG_DAB=y ++# CONFIG_USB_DABUSB is not set ++ ++# ++# Graphics support ++# ++# CONFIG_BACKLIGHT_LCD_SUPPORT is not set ++ ++# ++# Display device support ++# ++# CONFIG_DISPLAY_SUPPORT is not set ++# CONFIG_VGASTATE is not set ++CONFIG_VIDEO_OUTPUT_CONTROL=m ++# CONFIG_FB is not set ++ ++# ++# Console display driver support ++# ++CONFIG_VGA_CONSOLE=y ++CONFIG_VGACON_SOFT_SCROLLBACK=y ++CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128 ++CONFIG_VIDEO_SELECT=y ++CONFIG_DUMMY_CONSOLE=y ++ ++# ++# Sound ++# ++CONFIG_SOUND=y ++ ++# ++# Advanced Linux Sound Architecture ++# ++# CONFIG_SND is not set ++ ++# ++# Open Sound System ++# ++CONFIG_SOUND_PRIME=y ++# CONFIG_OSS_OBSOLETE is not set ++# CONFIG_SOUND_TRIDENT is not set ++# CONFIG_SOUND_MSNDCLAS is not set ++# CONFIG_SOUND_MSNDPIN is not set ++# CONFIG_SOUND_OSS is not set ++ ++# ++# HID Devices ++# ++CONFIG_HID=y ++# CONFIG_HID_DEBUG is not set ++ ++# ++# USB Input Devices ++# ++CONFIG_USB_HID=y ++# CONFIG_USB_HIDINPUT_POWERBOOK is not set ++# CONFIG_HID_FF is not set ++# CONFIG_USB_HIDDEV is not set ++ ++# ++# USB support ++# ++CONFIG_USB_ARCH_HAS_HCD=y ++CONFIG_USB_ARCH_HAS_OHCI=y ++CONFIG_USB_ARCH_HAS_EHCI=y ++CONFIG_USB=y ++# CONFIG_USB_DEBUG is not set ++ ++# ++# Miscellaneous USB options ++# ++CONFIG_USB_DEVICEFS=y ++CONFIG_USB_DEVICE_CLASS=y ++# CONFIG_USB_DYNAMIC_MINORS is not set ++# CONFIG_USB_SUSPEND is not set ++# CONFIG_USB_OTG is not set ++ ++# ++# USB Host Controller Drivers ++# ++CONFIG_USB_EHCI_HCD=y ++# CONFIG_USB_EHCI_SPLIT_ISO is not set ++# CONFIG_USB_EHCI_ROOT_HUB_TT is not set ++# CONFIG_USB_EHCI_TT_NEWSCHED is not set ++# CONFIG_USB_EHCI_BIG_ENDIAN_MMIO is not set ++# CONFIG_USB_ISP116X_HCD is not set ++CONFIG_USB_OHCI_HCD=y ++# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set ++# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set ++CONFIG_USB_OHCI_LITTLE_ENDIAN=y ++CONFIG_USB_UHCI_HCD=y ++# CONFIG_USB_SL811_HCD is not set ++ ++# ++# USB Device Class drivers ++# ++# CONFIG_USB_ACM is not set ++CONFIG_USB_PRINTER=y ++ ++# ++# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' ++# ++ ++# ++# may also be needed; see USB_STORAGE Help for more information ++# ++CONFIG_USB_STORAGE=y ++# CONFIG_USB_STORAGE_DEBUG is not set ++# CONFIG_USB_STORAGE_DATAFAB is not set ++# CONFIG_USB_STORAGE_FREECOM is not set ++# CONFIG_USB_STORAGE_ISD200 is not set ++# CONFIG_USB_STORAGE_DPCM is not set ++# CONFIG_USB_STORAGE_USBAT is not set ++# CONFIG_USB_STORAGE_SDDR09 is not set ++# CONFIG_USB_STORAGE_SDDR55 is not set ++# CONFIG_USB_STORAGE_JUMPSHOT is not set ++# CONFIG_USB_STORAGE_ALAUDA is not set ++# CONFIG_USB_STORAGE_KARMA is not set ++# CONFIG_USB_LIBUSUAL is not set ++ ++# ++# USB Imaging devices ++# ++# CONFIG_USB_MDC800 is not set ++# CONFIG_USB_MICROTEK is not set ++CONFIG_USB_MON=y ++ ++# ++# USB port drivers ++# ++ ++# ++# USB Serial Converter support ++# ++# CONFIG_USB_SERIAL is not set ++ ++# ++# USB Miscellaneous drivers ++# ++# CONFIG_USB_EMI62 is not set ++# CONFIG_USB_EMI26 is not set ++# CONFIG_USB_ADUTUX is not set ++# CONFIG_USB_AUERSWALD is not set ++# CONFIG_USB_RIO500 is not set ++# CONFIG_USB_LEGOTOWER is not set ++# CONFIG_USB_LCD is not set ++# CONFIG_USB_BERRY_CHARGE is not set ++# CONFIG_USB_LED is not set ++# CONFIG_USB_CYPRESS_CY7C63 is not set ++# CONFIG_USB_CYTHERM is not set ++# CONFIG_USB_PHIDGET is not set ++# CONFIG_USB_IDMOUSE is not set ++# CONFIG_USB_FTDI_ELAN is not set ++# CONFIG_USB_APPLEDISPLAY is not set ++# CONFIG_USB_SISUSBVGA is not set ++# CONFIG_USB_LD is not set ++# CONFIG_USB_TRANCEVIBRATOR is not set ++# CONFIG_USB_IOWARRIOR is not set ++# CONFIG_USB_TEST is not set ++ ++# ++# USB DSL modem support ++# ++ ++# ++# USB Gadget Support ++# ++# CONFIG_USB_GADGET is not set ++# CONFIG_MMC is not set ++ ++# ++# LED devices ++# ++# CONFIG_NEW_LEDS is not set ++ ++# ++# LED drivers ++# ++ ++# ++# LED Triggers ++# ++ ++# ++# InfiniBand support ++# ++# CONFIG_INFINIBAND is not set ++ ++# ++# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) ++# ++# CONFIG_EDAC is not set ++ ++# ++# Real Time Clock ++# ++# CONFIG_RTC_CLASS is not set ++ ++# ++# DMA Engine support ++# ++# CONFIG_DMA_ENGINE is not set ++ ++# ++# DMA Clients ++# ++ ++# ++# DMA Devices ++# ++ ++# ++# Virtualization ++# ++# CONFIG_KVM is not set ++ ++# ++# File systems ++# ++CONFIG_EXT2_FS=y ++CONFIG_EXT2_FS_XATTR=y ++CONFIG_EXT2_FS_POSIX_ACL=y ++# CONFIG_EXT2_FS_SECURITY is not set ++# CONFIG_EXT2_FS_XIP is not set ++CONFIG_EXT3_FS=y ++CONFIG_EXT3_FS_XATTR=y ++CONFIG_EXT3_FS_POSIX_ACL=y ++# CONFIG_EXT3_FS_SECURITY is not set ++# CONFIG_EXT4DEV_FS is not set ++CONFIG_JBD=y ++# CONFIG_JBD_DEBUG is not set ++CONFIG_FS_MBCACHE=y ++CONFIG_REISERFS_FS=y ++# CONFIG_REISERFS_CHECK is not set ++# CONFIG_REISERFS_PROC_INFO is not set ++CONFIG_REISERFS_FS_XATTR=y ++CONFIG_REISERFS_FS_POSIX_ACL=y ++# CONFIG_REISERFS_FS_SECURITY is not set ++# CONFIG_JFS_FS is not set ++CONFIG_FS_POSIX_ACL=y ++# CONFIG_XFS_FS is not set ++# CONFIG_GFS2_FS is not set ++# CONFIG_OCFS2_FS is not set ++# CONFIG_MINIX_FS is not set ++# CONFIG_ROMFS_FS is not set ++CONFIG_INOTIFY=y ++CONFIG_INOTIFY_USER=y ++# CONFIG_QUOTA is not set ++CONFIG_DNOTIFY=y ++# CONFIG_AUTOFS_FS is not set ++CONFIG_AUTOFS4_FS=y ++CONFIG_FUSE_FS=m ++CONFIG_GENERIC_ACL=y ++ ++# ++# CD-ROM/DVD Filesystems ++# ++CONFIG_ISO9660_FS=y ++# CONFIG_JOLIET is not set ++# CONFIG_ZISOFS is not set ++CONFIG_UDF_FS=m ++CONFIG_UDF_NLS=y ++ ++# ++# DOS/FAT/NT Filesystems ++# ++CONFIG_FAT_FS=y ++CONFIG_MSDOS_FS=y ++CONFIG_VFAT_FS=y ++CONFIG_FAT_DEFAULT_CODEPAGE=437 ++CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" ++CONFIG_NTFS_FS=m ++# CONFIG_NTFS_DEBUG is not set ++CONFIG_NTFS_RW=y ++ ++# ++# Pseudo filesystems ++# ++CONFIG_PROC_FS=y ++CONFIG_PROC_KCORE=y ++CONFIG_PROC_SYSCTL=y ++CONFIG_SYSFS=y ++CONFIG_TMPFS=y ++CONFIG_TMPFS_POSIX_ACL=y ++CONFIG_HUGETLBFS=y ++CONFIG_HUGETLB_PAGE=y ++CONFIG_RAMFS=y ++# CONFIG_CONFIGFS_FS is not set ++ ++# ++# Layered filesystems ++# ++# CONFIG_UNION_FS is not set ++ ++# ++# Miscellaneous filesystems ++# ++# CONFIG_ADFS_FS is not set ++# CONFIG_AFFS_FS is not set ++# CONFIG_HFS_FS is not set ++# CONFIG_HFSPLUS_FS is not set ++# CONFIG_BEFS_FS is not set ++# CONFIG_BFS_FS is not set ++# CONFIG_EFS_FS is not set ++# CONFIG_CRAMFS is not set ++# CONFIG_VXFS_FS is not set ++# CONFIG_HPFS_FS is not set ++# CONFIG_QNX4FS_FS is not set ++# CONFIG_SYSV_FS is not set ++# CONFIG_UFS_FS is not set ++ ++# ++# Network File Systems ++# ++CONFIG_NFS_FS=y ++CONFIG_NFS_V3=y ++# CONFIG_NFS_V3_ACL is not set ++# CONFIG_NFS_V4 is not set ++# CONFIG_NFS_DIRECTIO is not set ++CONFIG_NFSD=y ++CONFIG_NFSD_V3=y ++# CONFIG_NFSD_V3_ACL is not set ++# CONFIG_NFSD_V4 is not set ++CONFIG_NFSD_TCP=y ++CONFIG_ROOT_NFS=y ++CONFIG_LOCKD=y ++CONFIG_LOCKD_V4=y ++CONFIG_EXPORTFS=y ++CONFIG_NFS_COMMON=y ++CONFIG_SUNRPC=y ++# CONFIG_SUNRPC_BIND34 is not set ++# CONFIG_RPCSEC_GSS_KRB5 is not set ++# CONFIG_RPCSEC_GSS_SPKM3 is not set ++CONFIG_SMB_FS=m ++# CONFIG_SMB_NLS_DEFAULT is not set ++# CONFIG_CIFS is not set ++# CONFIG_NCP_FS is not set ++# CONFIG_CODA_FS is not set ++# CONFIG_AFS_FS is not set ++# CONFIG_9P_FS is not set ++ ++# ++# Partition Types ++# ++# CONFIG_PARTITION_ADVANCED is not set ++CONFIG_MSDOS_PARTITION=y ++ ++# ++# Native Language Support ++# ++CONFIG_NLS=y ++CONFIG_NLS_DEFAULT="iso8859-1" ++CONFIG_NLS_CODEPAGE_437=y ++# CONFIG_NLS_CODEPAGE_737 is not set ++# CONFIG_NLS_CODEPAGE_775 is not set ++CONFIG_NLS_CODEPAGE_850=y ++CONFIG_NLS_CODEPAGE_852=y ++# CONFIG_NLS_CODEPAGE_855 is not set ++# CONFIG_NLS_CODEPAGE_857 is not set ++# CONFIG_NLS_CODEPAGE_860 is not set ++# CONFIG_NLS_CODEPAGE_861 is not set ++# CONFIG_NLS_CODEPAGE_862 is not set ++# CONFIG_NLS_CODEPAGE_863 is not set ++# CONFIG_NLS_CODEPAGE_864 is not set ++# CONFIG_NLS_CODEPAGE_865 is not set ++# CONFIG_NLS_CODEPAGE_866 is not set ++# CONFIG_NLS_CODEPAGE_869 is not set ++# CONFIG_NLS_CODEPAGE_936 is not set ++# CONFIG_NLS_CODEPAGE_950 is not set ++# CONFIG_NLS_CODEPAGE_932 is not set ++# CONFIG_NLS_CODEPAGE_949 is not set ++# CONFIG_NLS_CODEPAGE_874 is not set ++# CONFIG_NLS_ISO8859_8 is not set ++# CONFIG_NLS_CODEPAGE_1250 is not set ++# CONFIG_NLS_CODEPAGE_1251 is not set ++CONFIG_NLS_ASCII=y ++CONFIG_NLS_ISO8859_1=y ++CONFIG_NLS_ISO8859_2=y ++# CONFIG_NLS_ISO8859_3 is not set ++# CONFIG_NLS_ISO8859_4 is not set ++# CONFIG_NLS_ISO8859_5 is not set ++# CONFIG_NLS_ISO8859_6 is not set ++# CONFIG_NLS_ISO8859_7 is not set ++# CONFIG_NLS_ISO8859_9 is not set ++# CONFIG_NLS_ISO8859_13 is not set ++# CONFIG_NLS_ISO8859_14 is not set ++CONFIG_NLS_ISO8859_15=y ++# CONFIG_NLS_KOI8_R is not set ++# CONFIG_NLS_KOI8_U is not set ++CONFIG_NLS_UTF8=y ++ ++# ++# Distributed Lock Manager ++# ++# CONFIG_DLM is not set ++ ++# ++# Instrumentation Support ++# ++CONFIG_PROFILING=y ++CONFIG_OPROFILE=y ++CONFIG_KPROBES=y ++ ++# ++# Kernel hacking ++# ++CONFIG_TRACE_IRQFLAGS_SUPPORT=y ++CONFIG_PRINTK_TIME=y ++# CONFIG_ENABLE_MUST_CHECK is not set ++CONFIG_MAGIC_SYSRQ=y ++CONFIG_UNUSED_SYMBOLS=y ++# CONFIG_DEBUG_FS is not set ++# CONFIG_HEADERS_CHECK is not set ++CONFIG_DEBUG_KERNEL=y ++# CONFIG_DEBUG_SHIRQ is not set ++CONFIG_DETECT_SOFTLOCKUP=y ++# CONFIG_SCHEDSTATS is not set ++# CONFIG_TIMER_STATS is not set ++# CONFIG_DEBUG_SLAB is not set ++# CONFIG_DEBUG_RT_MUTEXES is not set ++# CONFIG_RT_MUTEX_TESTER is not set ++# CONFIG_DEBUG_SPINLOCK is not set ++# CONFIG_DEBUG_MUTEXES is not set ++# CONFIG_DEBUG_LOCK_ALLOC is not set ++# CONFIG_PROVE_LOCKING is not set ++# CONFIG_DEBUG_SPINLOCK_SLEEP is not set ++# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set ++# CONFIG_DEBUG_KOBJECT is not set ++# CONFIG_DEBUG_HIGHMEM is not set ++CONFIG_DEBUG_BUGVERBOSE=y ++CONFIG_DEBUG_INFO=y ++# CONFIG_DEBUG_VM is not set ++# CONFIG_DEBUG_LIST is not set ++# CONFIG_FRAME_POINTER is not set ++# CONFIG_UNWIND_INFO is not set ++# CONFIG_FORCED_INLINING is not set ++# CONFIG_RCU_TORTURE_TEST is not set ++# CONFIG_LKDTM is not set ++# CONFIG_FAULT_INJECTION is not set ++# CONFIG_WANT_EXTRA_DEBUG_INFORMATION is not set ++# CONFIG_KGDB is not set ++CONFIG_EARLY_PRINTK=y ++CONFIG_DEBUG_STACKOVERFLOW=y ++# CONFIG_DEBUG_STACK_USAGE is not set ++# CONFIG_DEBUG_RODATA is not set ++# CONFIG_4KSTACKS is not set ++CONFIG_X86_FIND_SMP_CONFIG=y ++CONFIG_X86_MPPARSE=y ++CONFIG_DOUBLEFAULT=y ++ ++# ++# Linux VServer ++# ++CONFIG_VSERVER_FILESHARING=y ++CONFIG_VSERVER_AUTO_LBACK=y ++CONFIG_VSERVER_AUTO_SINGLE=y ++CONFIG_VSERVER_COWBL=y ++# CONFIG_VSERVER_VTIME is not set ++# CONFIG_VSERVER_DEVICE is not set ++CONFIG_VSERVER_PROC_SECURE=y ++CONFIG_VSERVER_HARDCPU=y ++CONFIG_VSERVER_IDLETIME=y ++# CONFIG_VSERVER_IDLELIMIT is not set ++# CONFIG_TAGGING_NONE is not set ++# CONFIG_TAGGING_UID16 is not set ++# CONFIG_TAGGING_GID16 is not set ++CONFIG_TAGGING_ID24=y ++# CONFIG_TAGGING_INTERN is not set ++# CONFIG_TAG_NFSD is not set ++# CONFIG_VSERVER_PRIVACY is not set ++CONFIG_VSERVER_CONTEXTS=256 ++CONFIG_VSERVER_WARN=y ++# CONFIG_VSERVER_DEBUG is not set ++CONFIG_VSERVER=y ++ ++# ++# Security options ++# ++# CONFIG_KEYS is not set ++# CONFIG_SECURITY is not set ++ ++# ++# Cryptographic options ++# ++# CONFIG_CRYPTO is not set ++ ++# ++# Library routines ++# ++CONFIG_BITREVERSE=y ++CONFIG_CRC_CCITT=y ++CONFIG_CRC16=y ++# CONFIG_CRC_ITU_T is not set ++CONFIG_CRC32=y ++CONFIG_LIBCRC32C=y ++CONFIG_ZLIB_INFLATE=y ++CONFIG_PLIST=y ++CONFIG_HAS_IOMEM=y ++CONFIG_HAS_IOPORT=y ++CONFIG_HAS_DMA=y ++CONFIG_GENERIC_HARDIRQS=y ++CONFIG_GENERIC_IRQ_PROBE=y ++CONFIG_GENERIC_PENDING_IRQ=y ++CONFIG_X86_SMP=y ++CONFIG_X86_HT=y ++CONFIG_X86_BIOS_REBOOT=y ++CONFIG_X86_TRAMPOLINE=y ++CONFIG_KTIME_SCALAR=y +diff -Nurb linux-2.6.22-570/Documentation/DocBook/Makefile linux-2.6.22-591/Documentation/DocBook/Makefile +--- linux-2.6.22-570/Documentation/DocBook/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/Documentation/DocBook/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -11,7 +11,7 @@ + procfs-guide.xml writing_usb_driver.xml \ + kernel-api.xml filesystems.xml lsm.xml usb.xml \ + gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ +- genericirq.xml ++ genericirq.xml kgdb.xml + + ### + # The build process is as follows (targets): +diff -Nurb linux-2.6.22-570/Documentation/DocBook/kgdb.tmpl linux-2.6.22-591/Documentation/DocBook/kgdb.tmpl +--- linux-2.6.22-570/Documentation/DocBook/kgdb.tmpl 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/DocBook/kgdb.tmpl 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,250 @@ ++ ++ ++ ++ ++ ++ KGDB Internals ++ ++ ++ ++ Tom ++ Rini ++ ++
++ trini@kernel.crashing.org ++
++
++
++
++ ++ ++ ++ Amit S. ++ Kale ++ ++
++ amitkale@linsyssoft.com ++
++
++
++
++ ++ ++ 2004-2005 ++ MontaVista Software, Inc. ++ ++ ++ 2004 ++ Amit S. Kale ++ ++ ++ ++ ++ This file is licensed under the terms of the GNU General Public License ++ version 2. This program is licensed "as is" without any warranty of any ++ kind, whether express or implied. ++ ++ ++ ++
++ ++ ++ ++ Introduction ++ ++ kgdb is a source level debugger for linux kernel. It is used along ++ with gdb to debug a linux kernel. Kernel developers can debug a kernel ++ similar to application programs with the use of kgdb. It makes it ++ possible to place breakpoints in kernel code, step through the code ++ and observe variables. ++ ++ ++ Two machines are required for using kgdb. One of these machines is a ++ development machine and the other is a test machine. The machines are ++ typically connected through a serial line, a null-modem cable which ++ connects their serial ports. It is also possible however, to use an ++ ethernet connection between the machines. The kernel to be debugged ++ runs on the test machine. gdb runs on the development machine. The ++ serial line or ethernet connection is used by gdb to communicate to ++ the kernel being debugged. ++ ++ ++ ++ Compiling a kernel ++ ++ To enable CONFIG_KGDB, look under the "Kernel debugging" ++ and then select "KGDB: kernel debugging with remote gdb". ++ ++ ++ The first choice for I/O is CONFIG_KGDB_ONLY_MODULES. ++ This means that you will only be able to use KGDB after loading a ++ kernel module that defines how you want to be able to talk with ++ KGDB. There are two other choices (more on some architectures) that ++ can be enabled as modules later, if not picked here. ++ ++ The first of these is CONFIG_KGDB_8250_NOMODULE. ++ This has sub-options such as CONFIG_KGDB_SIMPLE_SERIAL ++ which toggles choosing the serial port by ttyS number or by specifying ++ a port and IRQ number. ++ ++ ++ The second of these choices on most systems for I/O is ++ CONFIG_KGDBOE. This requires that the machine to be ++ debugged has an ethernet card which supports the netpoll API, such as ++ the cards supported by CONFIG_E100. There are no ++ sub-options for this, but a kernel command line option is required. ++ ++ ++ ++ Booting the kernel ++ ++ The Kernel command line option kgdbwait makes kgdb ++ wait for gdb connection during booting of a kernel. If the ++ CONFIG_KGDB_8250 driver is used (or if applicable, ++ another serial driver) this breakpoint will happen very early on, before ++ console output. If you wish to change serial port information and you ++ have enabled both CONFIG_KGDB_8250 and ++ CONFIG_KGDB_SIMPLE_SERIAL then you must pass the option ++ kgdb8250=<io or mmio>,<address>,<baud ++ rate>,<irq> before kgdbwait. ++ The values io or mmio refer to ++ if the address being passed next needs to be memory mapped ++ (mmio) or not. The address must ++ be passed in hex and is the hardware address and will be remapped if ++ passed as mmio. The value ++ baud rate and irq are base-10. ++ The supported values for baud rate are ++ 9600, 19200, ++ 38400, 57600, and ++ 115200. ++ ++ ++ To have KGDB stop the kernel and wait, with the compiled values for the ++ serial driver, pass in: kgdbwait. ++ ++ ++ To specify the values of the SH SCI(F) serial port at boot: ++ kgdbsci=0,115200. ++ ++ ++ To specify the values of the serial port at boot: ++ kgdb8250=io,3f8,115200,3. ++ On IA64 this could also be: ++ kgdb8250=mmio,0xff5e0000,115200,74 ++ And to have KGDB also stop the kernel and wait for GDB to connect, pass in ++ kgdbwait after this arguement. ++ ++ ++ To configure the CONFIG_KGDBOE driver, pass in ++ kgdboe=[src-port]@<src-ip>/[dev],[tgt-port]@<tgt-ip>/[tgt-macaddr] ++ where: ++ ++ src-port (optional): source for UDP packets (defaults to 6443) ++ src-ip: source IP to use (interface address) ++ dev (optional): network interface (eth0) ++ tgt-port (optional): port GDB will use (defaults to 6442) ++ tgt-ip: IP address GDB will be connecting from ++ tgt-macaddr (optional): ethernet MAC address for logging agent (default is broadcast) ++ ++ ++ ++ The CONFIG_KGDBOE driver can be reconfigured at run ++ time, if CONFIG_SYSFS and ++ CONFIG_MODULES by echo'ing a new config string to ++ /sys/module/kgdboe/parameter/kgdboe. The ++ driver can be unconfigured with the special string ++ not_configured. ++ ++ ++ ++ Connecting gdb ++ ++ If you have used any of the methods to have KGDB stop and create ++ an initial breakpoint described in the previous chapter, kgdb prints ++ the message "Waiting for connection from remote gdb..." on the console ++ and waits for connection from gdb. At this point you connect gdb to kgdb. ++ ++ ++ Example (serial): ++ ++ ++ % gdb ./vmlinux ++ (gdb) set remotebaud 115200 ++ (gdb) target remote /dev/ttyS0 ++ ++ ++ Example (ethernet): ++ ++ ++ % gdb ./vmlinux ++ (gdb) target remote udp:192.168.2.2:6443 ++ ++ ++ Once connected, you can debug a kernel the way you would debug an ++ application program. ++ ++ ++ ++ Architecture specific notes ++ ++ SuperH: The NMI switch found on some boards can be used to trigger an ++ initial breakpoint. Subsequent triggers do nothing. If console ++ is enabled on the SCI(F) serial port, and that is the port being used ++ for KGDB, then you must trigger a breakpoint via sysrq, NMI, or ++ some other method prior to connecting, or echo a control-c to the ++ serial port. Also, to use the SCI(F) port for KGDB, the ++ CONFIG_SERIAL_SH_SCI driver must be enabled. ++ ++ ++ ++ The common backend (required) ++ ++ There are a few flags which must be set on every architecture in ++ their <asm/kgdb.h> file. These are: ++ ++ ++ ++ NUMREGBYTES: The size in bytes of all of the registers, so ++ that we can ensure they will all fit into a packet. ++ ++ ++ BUFMAX: The size in bytes of the buffer GDB will read into. ++ This must be larger than NUMREGBYTES. ++ ++ ++ CACHE_FLUSH_IS_SAFE: Set to one if it always safe to call ++ flush_cache_range or flush_icache_range. On some architectures, ++ these functions may not be safe to call on SMP since we keep other ++ CPUs in a holding pattern. ++ ++ ++ ++ ++ ++ There are also the following functions for the common backend, ++ found in kernel/kgdb.c that must be supplied by the ++ architecture-specific backend. No weak version of these is provided. ++ ++!Iinclude/linux/kgdb.h ++ ++ ++ The common backend (optional) ++ ++ These functions are part of the common backend, found in kernel/kgdb.c ++ and are optionally implemented. Some functions (with _hw_ in the name) ++ end up being required on arches which use hardware breakpoints. ++ ++!Ikernel/kgdb.c ++ ++ ++ Driver-Specific Functions ++ ++ Some of the I/O drivers have additional functions that can be ++ called, that are specific to the driver. Calls from other places ++ to these functions must be wrapped in #ifdefs for the driver in ++ question. ++ ++!Idrivers/serial/8250_kgdb.c ++ ++
+diff -Nurb linux-2.6.22-570/Documentation/accounting/getdelays.c linux-2.6.22-591/Documentation/accounting/getdelays.c +--- linux-2.6.22-570/Documentation/accounting/getdelays.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/Documentation/accounting/getdelays.c 2007-12-21 15:36:11.000000000 -0500 +@@ -49,6 +49,7 @@ + int dbg; + int print_delays; + int print_io_accounting; ++int print_task_context_switch_counts; + __u64 stime, utime; + + #define PRINTF(fmt, arg...) { \ +@@ -195,7 +196,7 @@ + "IO %15s%15s\n" + " %15llu%15llu\n" + "MEM %15s%15s\n" +- " %15llu%15llu\n\n", ++ " %15llu%15llu\n" + "count", "real total", "virtual total", "delay total", + t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total, + t->cpu_delay_total, +@@ -204,6 +205,14 @@ + "count", "delay total", t->swapin_count, t->swapin_delay_total); + } + ++void task_context_switch_counts(struct taskstats *t) ++{ ++ printf("\n\nTask %15s%15s\n" ++ " %15lu%15lu\n", ++ "voluntary", "nonvoluntary", ++ t->nvcsw, t->nivcsw); ++} ++ + void print_ioacct(struct taskstats *t) + { + printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n", +@@ -235,7 +244,7 @@ + struct msgtemplate msg; + + while (1) { +- c = getopt(argc, argv, "diw:r:m:t:p:vl"); ++ c = getopt(argc, argv, "qdiw:r:m:t:p:vl"); + if (c < 0) + break; + +@@ -248,6 +257,10 @@ + printf("printing IO accounting\n"); + print_io_accounting = 1; + break; ++ case 'q': ++ printf("printing task/process context switch rates\n"); ++ print_task_context_switch_counts = 1; ++ break; + case 'w': + logfile = strdup(optarg); + printf("write to file %s\n", logfile); +@@ -389,6 +402,8 @@ + print_delayacct((struct taskstats *) NLA_DATA(na)); + if (print_io_accounting) + print_ioacct((struct taskstats *) NLA_DATA(na)); ++ if (print_task_context_switch_counts) ++ task_context_switch_counts((struct taskstats *) NLA_DATA(na)); + if (fd) { + if (write(fd, NLA_DATA(na), na->nla_len) < 0) { + err(1,"write error\n"); +diff -Nurb linux-2.6.22-570/Documentation/accounting/taskstats-struct.txt linux-2.6.22-591/Documentation/accounting/taskstats-struct.txt +--- linux-2.6.22-570/Documentation/accounting/taskstats-struct.txt 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/Documentation/accounting/taskstats-struct.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -22,6 +22,8 @@ + /* Extended accounting fields end */ + Their values are collected if CONFIG_TASK_XACCT is set. + ++4) Per-task and per-thread context switch count statistics ++ + Future extension should add fields to the end of the taskstats struct, and + should not change the relative position of each field within the struct. + +@@ -158,4 +160,8 @@ + + /* Extended accounting fields end */ + ++4) Per-task and per-thread statistics ++ __u64 nvcsw; /* Context voluntary switch counter */ ++ __u64 nivcsw; /* Context involuntary switch counter */ ++ + } +diff -Nurb linux-2.6.22-570/Documentation/cachetlb.txt linux-2.6.22-591/Documentation/cachetlb.txt +--- linux-2.6.22-570/Documentation/cachetlb.txt 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/Documentation/cachetlb.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -253,7 +253,7 @@ + + The first of these two routines is invoked after map_vm_area() + has installed the page table entries. The second is invoked +- before unmap_vm_area() deletes the page table entries. ++ before unmap_kernel_range() deletes the page table entries. + + There exists another whole class of cpu cache issues which currently + require a whole different set of interfaces to handle properly. +diff -Nurb linux-2.6.22-570/Documentation/containers.txt linux-2.6.22-591/Documentation/containers.txt +--- linux-2.6.22-570/Documentation/containers.txt 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/containers.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,543 @@ ++ CONTAINERS ++ ------- ++ ++Written by Paul Menage based on Documentation/cpusets.txt ++ ++Original copyright statements from cpusets.txt: ++Portions Copyright (C) 2004 BULL SA. ++Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. ++Modified by Paul Jackson ++Modified by Christoph Lameter ++ ++CONTENTS: ++========= ++ ++1. Containers ++ 1.1 What are containers ? ++ 1.2 Why are containers needed ? ++ 1.3 How are containers implemented ? ++ 1.4 What does notify_on_release do ? ++ 1.5 How do I use containers ? ++2. Usage Examples and Syntax ++ 2.1 Basic Usage ++ 2.2 Attaching processes ++3. Kernel API ++ 3.1 Overview ++ 3.2 Synchronization ++ 3.3 Subsystem API ++4. Questions ++ ++1. Containers ++========== ++ ++1.1 What are containers ? ++---------------------- ++ ++Containers provide a mechanism for aggregating/partitioning sets of ++tasks, and all their future children, into hierarchical groups with ++specialized behaviour. ++ ++Definitions: ++ ++A *container* associates a set of tasks with a set of parameters for one ++or more subsystems. ++ ++A *subsystem* is a module that makes use of the task grouping ++facilities provided by containers to treat groups of tasks in ++particular ways. A subsystem is typically a "resource controller" that ++schedules a resource or applies per-container limits, but it may be ++anything that wants to act on a group of processes, e.g. a ++virtualization subsystem. ++ ++A *hierarchy* is a set of containers arranged in a tree, such that ++every task in the system is in exactly one of the containers in the ++hierarchy, and a set of subsystems; each subsystem has system-specific ++state attached to each container in the hierarchy. Each hierarchy has ++an instance of the container virtual filesystem associated with it. ++ ++At any one time there may be multiple active hierachies of task ++containers. Each hierarchy is a partition of all tasks in the system. ++ ++User level code may create and destroy containers by name in an ++instance of the container virtual file system, specify and query to ++which container a task is assigned, and list the task pids assigned to ++a container. Those creations and assignments only affect the hierarchy ++associated with that instance of the container file system. ++ ++On their own, the only use for containers is for simple job ++tracking. The intention is that other subsystems hook into the generic ++container support to provide new attributes for containers, such as ++accounting/limiting the resources which processes in a container can ++access. For example, cpusets (see Documentation/cpusets.txt) allows ++you to associate a set of CPUs and a set of memory nodes with the ++tasks in each container. ++ ++1.2 Why are containers needed ? ++---------------------------- ++ ++There are multiple efforts to provide process aggregations in the ++Linux kernel, mainly for resource tracking purposes. Such efforts ++include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server ++namespaces. These all require the basic notion of a ++grouping/partitioning of processes, with newly forked processes ending ++in the same group (container) as their parent process. ++ ++The kernel container patch provides the minimum essential kernel ++mechanisms required to efficiently implement such groups. It has ++minimal impact on the system fast paths, and provides hooks for ++specific subsystems such as cpusets to provide additional behaviour as ++desired. ++ ++Multiple hierarchy support is provided to allow for situations where ++the division of tasks into containers is distinctly different for ++different subsystems - having parallel hierarchies allows each ++hierarchy to be a natural division of tasks, without having to handle ++complex combinations of tasks that would be present if several ++unrelated subsystems needed to be forced into the same tree of ++containers. ++ ++At one extreme, each resource controller or subsystem could be in a ++separate hierarchy; at the other extreme, all subsystems ++would be attached to the same hierarchy. ++ ++As an example of a scenario (originally proposed by vatsa@in.ibm.com) ++that can benefit from multiple hierarchies, consider a large ++university server with various users - students, professors, system ++tasks etc. The resource planning for this server could be along the ++following lines: ++ ++ CPU : Top cpuset ++ / \ ++ CPUSet1 CPUSet2 ++ | | ++ (Profs) (Students) ++ ++ In addition (system tasks) are attached to topcpuset (so ++ that they can run anywhere) with a limit of 20% ++ ++ Memory : Professors (50%), students (30%), system (20%) ++ ++ Disk : Prof (50%), students (30%), system (20%) ++ ++ Network : WWW browsing (20%), Network File System (60%), others (20%) ++ / \ ++ Prof (15%) students (5%) ++ ++Browsers like firefox/lynx go into the WWW network class, while (k)nfsd go ++into NFS network class. ++ ++At the same time firefox/lynx will share an appropriate CPU/Memory class ++depending on who launched it (prof/student). ++ ++With the ability to classify tasks differently for different resources ++(by putting those resource subsystems in different hierarchies) then ++the admin can easily set up a script which receives exec notifications ++and depending on who is launching the browser he can ++ ++ # echo browser_pid > /mnt///tasks ++ ++With only a single hierarchy, he now would potentially have to create ++a separate container for every browser launched and associate it with ++approp network and other resource class. This may lead to ++proliferation of such containers. ++ ++Also lets say that the administrator would like to give enhanced network ++access temporarily to a student's browser (since it is night and the user ++wants to do online gaming :) OR give one of the students simulation ++apps enhanced CPU power, ++ ++With ability to write pids directly to resource classes, its just a ++matter of : ++ ++ # echo pid > /mnt/network//tasks ++ (after some time) ++ # echo pid > /mnt/network//tasks ++ ++Without this ability, he would have to split the container into ++multiple separate ones and then associate the new containers with the ++new resource classes. ++ ++ ++ ++1.3 How are containers implemented ? ++--------------------------------- ++ ++Containers extends the kernel as follows: ++ ++ - Each task in the system has a reference-counted pointer to a ++ css_group. ++ ++ - A css_group contains a set of reference-counted pointers to ++ container_subsys_state objects, one for each container subsystem ++ registered in the system. There is no direct link from a task to ++ the container of which it's a member in each hierarchy, but this ++ can be determined by following pointers through the ++ container_subsys_state objects. This is because accessing the ++ subsystem state is something that's expected to happen frequently ++ and in performance-critical code, whereas operations that require a ++ task's actual container assignments (in particular, moving between ++ containers) are less common. A linked list runs through the cg_list ++ field of each task_struct using the css_group, anchored at ++ css_group->tasks. ++ ++ - A container hierarchy filesystem can be mounted for browsing and ++ manipulation from user space. ++ ++ - You can list all the tasks (by pid) attached to any container. ++ ++The implementation of containers requires a few, simple hooks ++into the rest of the kernel, none in performance critical paths: ++ ++ - in init/main.c, to initialize the root containers and initial ++ css_group at system boot. ++ ++ - in fork and exit, to attach and detach a task from its css_group. ++ ++In addition a new file system, of type "container" may be mounted, to ++enable browsing and modifying the containers presently known to the ++kernel. When mounting a container hierarchy, you may specify a ++comma-separated list of subsystems to mount as the filesystem mount ++options. By default, mounting the container filesystem attempts to ++mount a hierarchy containing all registered subsystems. ++ ++If an active hierarchy with exactly the same set of subsystems already ++exists, it will be reused for the new mount. If no existing hierarchy ++matches, and any of the requested subsystems are in use in an existing ++hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy ++is activated, associated with the requested subsystems. ++ ++It's not currently possible to bind a new subsystem to an active ++container hierarchy, or to unbind a subsystem from an active container ++hierarchy. This may be possible in future, but is fraught with nasty ++error-recovery issues. ++ ++When a container filesystem is unmounted, if there are any ++subcontainers created below the top-level container, that hierarchy ++will remain active even though unmounted; if there are no ++subcontainers then the hierarchy will be deactivated. ++ ++No new system calls are added for containers - all support for ++querying and modifying containers is via this container file system. ++ ++Each task under /proc has an added file named 'container' displaying, ++for each active hierarchy, the subsystem names and the container name ++as the path relative to the root of the container file system. ++ ++Each container is represented by a directory in the container file system ++containing the following files describing that container: ++ ++ - tasks: list of tasks (by pid) attached to that container ++ - notify_on_release flag: run /sbin/container_release_agent on exit? ++ ++Other subsystems such as cpusets may add additional files in each ++container dir ++ ++New containers are created using the mkdir system call or shell ++command. The properties of a container, such as its flags, are ++modified by writing to the appropriate file in that containers ++directory, as listed above. ++ ++The named hierarchical structure of nested containers allows partitioning ++a large system into nested, dynamically changeable, "soft-partitions". ++ ++The attachment of each task, automatically inherited at fork by any ++children of that task, to a container allows organizing the work load ++on a system into related sets of tasks. A task may be re-attached to ++any other container, if allowed by the permissions on the necessary ++container file system directories. ++ ++When a task is moved from one container to another, it gets a new ++css_group pointer - if there's an already existing css_group with the ++desired collection of containers then that group is reused, else a new ++css_group is allocated. Note that the current implementation uses a ++linear search to locate an appropriate existing css_group, so isn't ++very efficient. A future version will use a hash table for better ++performance. ++ ++To allow access from a container to the css_groups (and hence tasks) ++that comprise it, a set of cg_container_link objects form a lattice; ++each cg_container_link is linked into a list of cg_container_links for ++a single container on its cont_link_list field, and a list of ++cg_container_links for a single css_group on its cg_link_list. ++ ++Thus the set of tasks in a container can be listed by iterating over ++each css_group that references the container, and sub-iterating over ++each css_group's task set. ++ ++The use of a Linux virtual file system (vfs) to represent the ++container hierarchy provides for a familiar permission and name space ++for containers, with a minimum of additional kernel code. ++ ++1.4 What does notify_on_release do ? ++------------------------------------ ++ ++*** notify_on_release is disabled in the current patch set. It will be ++*** reactivated in a future patch in a less-intrusive manner ++ ++If the notify_on_release flag is enabled (1) in a container, then ++whenever the last task in the container leaves (exits or attaches to ++some other container) and the last child container of that container ++is removed, then the kernel runs the command specified by the contents ++of the "release_agent" file in that hierarchy's root directory, ++supplying the pathname (relative to the mount point of the container ++file system) of the abandoned container. This enables automatic ++removal of abandoned containers. The default value of ++notify_on_release in the root container at system boot is disabled ++(0). The default value of other containers at creation is the current ++value of their parents notify_on_release setting. The default value of ++a container hierarchy's release_agent path is empty. ++ ++1.5 How do I use containers ? ++-------------------------- ++ ++To start a new job that is to be contained within a container, using ++the "cpuset" container subsystem, the steps are something like: ++ ++ 1) mkdir /dev/container ++ 2) mount -t container -ocpuset cpuset /dev/container ++ 3) Create the new container by doing mkdir's and write's (or echo's) in ++ the /dev/container virtual file system. ++ 4) Start a task that will be the "founding father" of the new job. ++ 5) Attach that task to the new container by writing its pid to the ++ /dev/container tasks file for that container. ++ 6) fork, exec or clone the job tasks from this founding father task. ++ ++For example, the following sequence of commands will setup a container ++named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, ++and then start a subshell 'sh' in that container: ++ ++ mount -t container cpuset -ocpuset /dev/container ++ cd /dev/container ++ mkdir Charlie ++ cd Charlie ++ /bin/echo $$ > tasks ++ sh ++ # The subshell 'sh' is now running in container Charlie ++ # The next line should display '/Charlie' ++ cat /proc/self/container ++ ++2. Usage Examples and Syntax ++============================ ++ ++2.1 Basic Usage ++--------------- ++ ++Creating, modifying, using the containers can be done through the container ++virtual filesystem. ++ ++To mount a container hierarchy will all available subsystems, type: ++# mount -t container xxx /dev/container ++ ++The "xxx" is not interpreted by the container code, but will appear in ++/proc/mounts so may be any useful identifying string that you like. ++ ++To mount a container hierarchy with just the cpuset and numtasks ++subsystems, type: ++# mount -t container -o cpuset,numtasks hier1 /dev/container ++ ++To change the set of subsystems bound to a mounted hierarchy, just ++remount with different options: ++ ++# mount -o remount,cpuset,ns /dev/container ++ ++Note that changing the set of subsystems is currently only supported ++when the hierarchy consists of a single (root) container. Supporting ++the ability to arbitrarily bind/unbind subsystems from an existing ++container hierarchy is intended to be implemented in the future. ++ ++Then under /dev/container you can find a tree that corresponds to the ++tree of the containers in the system. For instance, /dev/container ++is the container that holds the whole system. ++ ++If you want to create a new container under /dev/container: ++# cd /dev/container ++# mkdir my_container ++ ++Now you want to do something with this container. ++# cd my_container ++ ++In this directory you can find several files: ++# ls ++notify_on_release release_agent tasks ++(plus whatever files are added by the attached subsystems) ++ ++Now attach your shell to this container: ++# /bin/echo $$ > tasks ++ ++You can also create containers inside your container by using mkdir in this ++directory. ++# mkdir my_sub_cs ++ ++To remove a container, just use rmdir: ++# rmdir my_sub_cs ++ ++This will fail if the container is in use (has containers inside, or ++has processes attached, or is held alive by other subsystem-specific ++reference). ++ ++2.2 Attaching processes ++----------------------- ++ ++# /bin/echo PID > tasks ++ ++Note that it is PID, not PIDs. You can only attach ONE task at a time. ++If you have several tasks to attach, you have to do it one after another: ++ ++# /bin/echo PID1 > tasks ++# /bin/echo PID2 > tasks ++ ... ++# /bin/echo PIDn > tasks ++ ++3. Kernel API ++============= ++ ++3.1 Overview ++------------ ++ ++Each kernel subsystem that wants to hook into the generic container ++system needs to create a container_subsys object. This contains ++various methods, which are callbacks from the container system, along ++with a subsystem id which will be assigned by the container system. ++ ++Other fields in the container_subsys object include: ++ ++- subsys_id: a unique array index for the subsystem, indicating which ++ entry in container->subsys[] this subsystem should be ++ managing. Initialized by container_register_subsys(); prior to this ++ it should be initialized to -1 ++ ++- hierarchy: an index indicating which hierarchy, if any, this ++ subsystem is currently attached to. If this is -1, then the ++ subsystem is not attached to any hierarchy, and all tasks should be ++ considered to be members of the subsystem's top_container. It should ++ be initialized to -1. ++ ++- name: should be initialized to a unique subsystem name prior to ++ calling container_register_subsystem. Should be no longer than ++ MAX_CONTAINER_TYPE_NAMELEN ++ ++Each container object created by the system has an array of pointers, ++indexed by subsystem id; this pointer is entirely managed by the ++subsystem; the generic container code will never touch this pointer. ++ ++3.2 Synchronization ++------------------- ++ ++There is a global mutex, container_mutex, used by the container ++system. This should be taken by anything that wants to modify a ++container. It may also be taken to prevent containers from being ++modified, but more specific locks may be more appropriate in that ++situation. ++ ++See kernel/container.c for more details. ++ ++Subsystems can take/release the container_mutex via the functions ++container_lock()/container_unlock(), and can ++take/release the callback_mutex via the functions ++container_lock()/container_unlock(). ++ ++Accessing a task's container pointer may be done in the following ways: ++- while holding container_mutex ++- while holding the task's alloc_lock (via task_lock()) ++- inside an rcu_read_lock() section via rcu_dereference() ++ ++3.3 Subsystem API ++-------------------------- ++ ++Each subsystem should: ++ ++- add an entry in linux/container_subsys.h ++- define a container_subsys object called _subsys ++ ++Each subsystem may export the following methods. The only mandatory ++methods are create/destroy. Any others that are null are presumed to ++be successful no-ops. ++ ++int create(struct container *cont) ++LL=container_mutex ++ ++Called to create a subsystem state object for a container. The ++subsystem should set its subsystem pointer for the passed container, ++returning 0 on success or a negative error code. On success, the ++subsystem pointer should point to a structure of type ++container_subsys_state (typically embedded in a larger ++subsystem-specific object), which will be initialized by the container ++system. Note that this will be called at initialization to create the ++root subsystem state for this subsystem; this case can be identified ++by the passed container object having a NULL parent (since it's the ++root of the hierarchy) and may be an appropriate place for ++initialization code. ++ ++void destroy(struct container *cont) ++LL=container_mutex ++ ++The container system is about to destroy the passed container; the ++subsystem should do any necessary cleanup ++ ++int can_attach(struct container_subsys *ss, struct container *cont, ++ struct task_struct *task) ++LL=container_mutex ++ ++Called prior to moving a task into a container; if the subsystem ++returns an error, this will abort the attach operation. If a NULL ++task is passed, then a successful result indicates that *any* ++unspecified task can be moved into the container. Note that this isn't ++called on a fork. If this method returns 0 (success) then this should ++remain valid while the caller holds container_mutex. ++ ++void attach(struct container_subsys *ss, struct container *cont, ++ struct container *old_cont, struct task_struct *task) ++LL=container_mutex ++ ++ ++Called after the task has been attached to the container, to allow any ++post-attachment activity that requires memory allocations or blocking. ++ ++void fork(struct container_subsy *ss, struct task_struct *task) ++LL=callback_mutex, maybe read_lock(tasklist_lock) ++ ++Called when a task is forked into a container. Also called during ++registration for all existing tasks. ++ ++void exit(struct container_subsys *ss, struct task_struct *task) ++LL=callback_mutex ++ ++Called during task exit ++ ++int populate(struct container_subsys *ss, struct container *cont) ++LL=none ++ ++Called after creation of a container to allow a subsystem to populate ++the container directory with file entries. The subsystem should make ++calls to container_add_file() with objects of type cftype (see ++include/linux/container.h for details). Note that although this ++method can return an error code, the error code is currently not ++always handled well. ++ ++void post_clone(struct container_subsys *ss, struct container *cont) ++ ++Called at the end of container_clone() to do any paramater ++initialization which might be required before a task could attach. For ++example in cpusets, no task may attach before 'cpus' and 'mems' are set ++up. ++ ++void bind(struct container_subsys *ss, struct container *root) ++LL=callback_mutex ++ ++Called when a container subsystem is rebound to a different hierarchy ++and root container. Currently this will only involve movement between ++the default hierarchy (which never has sub-containers) and a hierarchy ++that is being created/destroyed (and hence has no sub-containers). ++ ++4. Questions ++============ ++ ++Q: what's up with this '/bin/echo' ? ++A: bash's builtin 'echo' command does not check calls to write() against ++ errors. If you use it in the container file system, you won't be ++ able to tell whether a command succeeded or failed. ++ ++Q: When I attach processes, only the first of the line gets really attached ! ++A: We can only return one error code per call to write(). So you should also ++ put only ONE pid. ++ +diff -Nurb linux-2.6.22-570/Documentation/cpuidle/core.txt linux-2.6.22-591/Documentation/cpuidle/core.txt +--- linux-2.6.22-570/Documentation/cpuidle/core.txt 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/cpuidle/core.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,17 @@ ++ ++ Supporting multiple CPU idle levels in kernel ++ ++ cpuidle ++ ++General Information: ++ ++Various CPUs today support multiple idle levels that are differentiated ++by varying exit latencies and power consumption during idle. ++cpuidle is a generic in-kernel infrastructure that separates ++idle policy (governor) from idle mechanism (driver) and provides a ++standardized infrastructure to support independent development of ++governors and drivers. ++ ++cpuidle resides under /drivers/cpuidle. ++ ++ +diff -Nurb linux-2.6.22-570/Documentation/cpuidle/driver.txt linux-2.6.22-591/Documentation/cpuidle/driver.txt +--- linux-2.6.22-570/Documentation/cpuidle/driver.txt 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/cpuidle/driver.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,24 @@ ++ ++ ++ Supporting multiple CPU idle levels in kernel ++ ++ cpuidle drivers ++ ++ ++ ++ ++cpuidle driver supports capability detection for a particular system. The ++init and exit routines will be called for each online CPU, with a percpu ++cpuidle_driver object and driver should fill in cpuidle_states inside ++cpuidle_driver depending on the CPU capability. ++ ++Driver can handle dynamic state changes (like battery<->AC), by calling ++force_redetect interface. ++ ++It is possible to have more than one driver registered at the same time and ++user can switch between drivers using /sysfs interface. ++ ++Interfaces: ++int cpuidle_register_driver(struct cpuidle_driver *drv); ++void cpuidle_unregister_driver(struct cpuidle_driver *drv); ++int cpuidle_force_redetect(struct cpuidle_device *dev); +diff -Nurb linux-2.6.22-570/Documentation/cpuidle/governor.txt linux-2.6.22-591/Documentation/cpuidle/governor.txt +--- linux-2.6.22-570/Documentation/cpuidle/governor.txt 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/cpuidle/governor.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,24 @@ ++ ++ ++ ++ Supporting multiple CPU idle levels in kernel ++ ++ cpuidle governors ++ ++ ++ ++ ++cpuidle governor is policy routine that decides what idle state to enter at ++any given time. cpuidle core uses different callbacks to governor while ++handling idle entry. ++* select_state callback where governor can determine next idle state to enter ++* prepare_idle callback is called before entering an idle state ++* scan callback is called after a driver forces redetection of the states ++ ++More than one governor can be registered at the same time and ++user can switch between drivers using /sysfs interface. ++ ++Interfaces: ++int cpuidle_register_governor(struct cpuidle_governor *gov); ++void cpuidle_unregister_governor(struct cpuidle_governor *gov); ++ +diff -Nurb linux-2.6.22-570/Documentation/cpuidle/sysfs.txt linux-2.6.22-591/Documentation/cpuidle/sysfs.txt +--- linux-2.6.22-570/Documentation/cpuidle/sysfs.txt 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/cpuidle/sysfs.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,27 @@ ++ ++ ++ Supporting multiple CPU idle levels in kernel ++ ++ cpuidle sysfs ++ ++System global cpuidle information are under ++/sys/devices/system/cpu/cpuidle ++ ++The current interfaces in this directory has self-explanatory names: ++* available_drivers ++* available_governors ++* current_driver ++* current_governor ++ ++Per logical CPU specific cpuidle information are under ++/sys/devices/system/cpu/cpuX/cpuidle ++for each online cpu X ++ ++Under this percpu directory, there is a directory for each idle state supported ++by the driver, which in turn has ++* latency ++* power ++* time ++* usage ++ ++ +diff -Nurb linux-2.6.22-570/Documentation/cpusets.txt linux-2.6.22-591/Documentation/cpusets.txt +--- linux-2.6.22-570/Documentation/cpusets.txt 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/Documentation/cpusets.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -7,6 +7,7 @@ + Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. + Modified by Paul Jackson + Modified by Christoph Lameter ++Modified by Paul Menage + + CONTENTS: + ========= +@@ -16,10 +17,9 @@ + 1.2 Why are cpusets needed ? + 1.3 How are cpusets implemented ? + 1.4 What are exclusive cpusets ? +- 1.5 What does notify_on_release do ? +- 1.6 What is memory_pressure ? +- 1.7 What is memory spread ? +- 1.8 How do I use cpusets ? ++ 1.5 What is memory_pressure ? ++ 1.6 What is memory spread ? ++ 1.7 How do I use cpusets ? + 2. Usage Examples and Syntax + 2.1 Basic Usage + 2.2 Adding/removing cpus +@@ -43,18 +43,19 @@ + hooks, beyond what is already present, required to manage dynamic + job placement on large systems. + +-Each task has a pointer to a cpuset. Multiple tasks may reference +-the same cpuset. Requests by a task, using the sched_setaffinity(2) +-system call to include CPUs in its CPU affinity mask, and using the +-mbind(2) and set_mempolicy(2) system calls to include Memory Nodes +-in its memory policy, are both filtered through that tasks cpuset, +-filtering out any CPUs or Memory Nodes not in that cpuset. The +-scheduler will not schedule a task on a CPU that is not allowed in +-its cpus_allowed vector, and the kernel page allocator will not +-allocate a page on a node that is not allowed in the requesting tasks +-mems_allowed vector. ++Cpusets use the generic container subsystem described in ++Documentation/container.txt. + +-User level code may create and destroy cpusets by name in the cpuset ++Requests by a task, using the sched_setaffinity(2) system call to ++include CPUs in its CPU affinity mask, and using the mbind(2) and ++set_mempolicy(2) system calls to include Memory Nodes in its memory ++policy, are both filtered through that tasks cpuset, filtering out any ++CPUs or Memory Nodes not in that cpuset. The scheduler will not ++schedule a task on a CPU that is not allowed in its cpus_allowed ++vector, and the kernel page allocator will not allocate a page on a ++node that is not allowed in the requesting tasks mems_allowed vector. ++ ++User level code may create and destroy cpusets by name in the container + virtual file system, manage the attributes and permissions of these + cpusets and which CPUs and Memory Nodes are assigned to each cpuset, + specify and query to which cpuset a task is assigned, and list the +@@ -86,9 +87,6 @@ + and a database), or + * NUMA systems running large HPC applications with demanding + performance characteristics. +- * Also cpu_exclusive cpusets are useful for servers running orthogonal +- workloads such as RT applications requiring low latency and HPC +- applications that are throughput sensitive + + These subsets, or "soft partitions" must be able to be dynamically + adjusted, as the job mix changes, without impacting other concurrently +@@ -117,7 +115,7 @@ + - Cpusets are sets of allowed CPUs and Memory Nodes, known to the + kernel. + - Each task in the system is attached to a cpuset, via a pointer +- in the task structure to a reference counted cpuset structure. ++ in the task structure to a reference counted container structure. + - Calls to sched_setaffinity are filtered to just those CPUs + allowed in that tasks cpuset. + - Calls to mbind and set_mempolicy are filtered to just +@@ -131,8 +129,6 @@ + - A cpuset may be marked exclusive, which ensures that no other + cpuset (except direct ancestors and descendents) may contain + any overlapping CPUs or Memory Nodes. +- Also a cpu_exclusive cpuset would be associated with a sched +- domain. + - You can list all the tasks (by pid) attached to any cpuset. + + The implementation of cpusets requires a few, simple hooks +@@ -144,23 +140,15 @@ + allowed in that tasks cpuset. + - in sched.c migrate_all_tasks(), to keep migrating tasks within + the CPUs allowed by their cpuset, if possible. +- - in sched.c, a new API partition_sched_domains for handling +- sched domain changes associated with cpu_exclusive cpusets +- and related changes in both sched.c and arch/ia64/kernel/domain.c + - in the mbind and set_mempolicy system calls, to mask the requested + Memory Nodes by what's allowed in that tasks cpuset. + - in page_alloc.c, to restrict memory to allowed nodes. + - in vmscan.c, to restrict page recovery to the current cpuset. + +-In addition a new file system, of type "cpuset" may be mounted, +-typically at /dev/cpuset, to enable browsing and modifying the cpusets +-presently known to the kernel. No new system calls are added for +-cpusets - all support for querying and modifying cpusets is via +-this cpuset file system. +- +-Each task under /proc has an added file named 'cpuset', displaying +-the cpuset name, as the path relative to the root of the cpuset file +-system. ++You should mount the "container" filesystem type in order to enable ++browsing and modifying the cpusets presently known to the kernel. No ++new system calls are added for cpusets - all support for querying and ++modifying cpusets is via this cpuset file system. + + The /proc//status file for each task has two added lines, + displaying the tasks cpus_allowed (on which CPUs it may be scheduled) +@@ -170,16 +158,15 @@ + Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff + Mems_allowed: ffffffff,ffffffff + +-Each cpuset is represented by a directory in the cpuset file system +-containing the following files describing that cpuset: ++Each cpuset is represented by a directory in the container file system ++containing (on top of the standard container files) the following ++files describing that cpuset: + + - cpus: list of CPUs in that cpuset + - mems: list of Memory Nodes in that cpuset + - memory_migrate flag: if set, move pages to cpusets nodes + - cpu_exclusive flag: is cpu placement exclusive? + - mem_exclusive flag: is memory placement exclusive? +- - tasks: list of tasks (by pid) attached to that cpuset +- - notify_on_release flag: run /sbin/cpuset_release_agent on exit? + - memory_pressure: measure of how much paging pressure in cpuset + + In addition, the root cpuset only has the following file: +@@ -231,15 +218,6 @@ + a direct ancestor or descendent, may share any of the same CPUs or + Memory Nodes. + +-A cpuset that is cpu_exclusive has a scheduler (sched) domain +-associated with it. The sched domain consists of all CPUs in the +-current cpuset that are not part of any exclusive child cpusets. +-This ensures that the scheduler load balancing code only balances +-against the CPUs that are in the sched domain as defined above and +-not all of the CPUs in the system. This removes any overhead due to +-load balancing code trying to pull tasks outside of the cpu_exclusive +-cpuset only to be prevented by the tasks' cpus_allowed mask. +- + A cpuset that is mem_exclusive restricts kernel allocations for + page, buffer and other data commonly shared by the kernel across + multiple users. All cpusets, whether mem_exclusive or not, restrict +@@ -253,21 +231,7 @@ + outside even a mem_exclusive cpuset. + + +-1.5 What does notify_on_release do ? +------------------------------------- +- +-If the notify_on_release flag is enabled (1) in a cpuset, then whenever +-the last task in the cpuset leaves (exits or attaches to some other +-cpuset) and the last child cpuset of that cpuset is removed, then +-the kernel runs the command /sbin/cpuset_release_agent, supplying the +-pathname (relative to the mount point of the cpuset file system) of the +-abandoned cpuset. This enables automatic removal of abandoned cpusets. +-The default value of notify_on_release in the root cpuset at system +-boot is disabled (0). The default value of other cpusets at creation +-is the current value of their parents notify_on_release setting. +- +- +-1.6 What is memory_pressure ? ++1.5 What is memory_pressure ? + ----------------------------- + The memory_pressure of a cpuset provides a simple per-cpuset metric + of the rate that the tasks in a cpuset are attempting to free up in +@@ -324,7 +288,7 @@ + times 1000. + + +-1.7 What is memory spread ? ++1.6 What is memory spread ? + --------------------------- + There are two boolean flag files per cpuset that control where the + kernel allocates pages for the file system buffers and related in +@@ -395,7 +359,7 @@ + can become very uneven. + + +-1.8 How do I use cpusets ? ++1.7 How do I use cpusets ? + -------------------------- + + In order to minimize the impact of cpusets on critical kernel +@@ -485,7 +449,7 @@ + To start a new job that is to be contained within a cpuset, the steps are: + + 1) mkdir /dev/cpuset +- 2) mount -t cpuset none /dev/cpuset ++ 2) mount -t container -ocpuset cpuset /dev/cpuset + 3) Create the new cpuset by doing mkdir's and write's (or echo's) in + the /dev/cpuset virtual file system. + 4) Start a task that will be the "founding father" of the new job. +@@ -497,7 +461,7 @@ + named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, + and then start a subshell 'sh' in that cpuset: + +- mount -t cpuset none /dev/cpuset ++ mount -t container -ocpuset cpuset /dev/cpuset + cd /dev/cpuset + mkdir Charlie + cd Charlie +@@ -529,7 +493,7 @@ + virtual filesystem. + + To mount it, type: +-# mount -t cpuset none /dev/cpuset ++# mount -t container -o cpuset cpuset /dev/cpuset + + Then under /dev/cpuset you can find a tree that corresponds to the + tree of the cpusets in the system. For instance, /dev/cpuset +@@ -572,6 +536,18 @@ + This will fail if the cpuset is in use (has cpusets inside, or has + processes attached). + ++Note that for legacy reasons, the "cpuset" filesystem exists as a ++wrapper around the container filesystem. ++ ++The command ++ ++mount -t cpuset X /dev/cpuset ++ ++is equivalent to ++ ++mount -t container -ocpuset X /dev/cpuset ++echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent ++ + 2.2 Adding/removing cpus + ------------------------ + +diff -Nurb linux-2.6.22-570/Documentation/feature-removal-schedule.txt linux-2.6.22-591/Documentation/feature-removal-schedule.txt +--- linux-2.6.22-570/Documentation/feature-removal-schedule.txt 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/Documentation/feature-removal-schedule.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -162,6 +162,33 @@ + + --------------------------- + ++What: filemap_nopage, filemap_populate ++When: April 2007 ++Why: These legacy interfaces no longer have any callers in the kernel and ++ any functionality provided can be provided with filemap_fault. The ++ removal schedule is short because they are a big maintainence burden ++ and have some bugs. ++Who: Nick Piggin ++ ++--------------------------- ++ ++What: vm_ops.populate, install_page ++When: April 2007 ++Why: These legacy interfaces no longer have any callers in the kernel and ++ any functionality provided can be provided with vm_ops.fault. ++Who: Nick Piggin ++ ++--------------------------- ++ ++What: vm_ops.nopage ++When: February 2008, provided in-kernel callers have been converted ++Why: This interface is replaced by vm_ops.fault, but it has been around ++ forever, is used by a lot of drivers, and doesn't cost much to ++ maintain. ++Who: Nick Piggin ++ ++--------------------------- ++ + What: Interrupt only SA_* flags + When: September 2007 + Why: The interrupt related SA_* flags are replaced by IRQF_* to move them +@@ -280,25 +307,6 @@ + + --------------------------- + +-What: Multipath cached routing support in ipv4 +-When: in 2.6.23 +-Why: Code was merged, then submitter immediately disappeared leaving +- us with no maintainer and lots of bugs. The code should not have +- been merged in the first place, and many aspects of it's +- implementation are blocking more critical core networking +- development. It's marked EXPERIMENTAL and no distribution +- enables it because it cause obscure crashes due to unfixable bugs +- (interfaces don't return errors so memory allocation can't be +- handled, calling contexts of these interfaces make handling +- errors impossible too because they get called after we've +- totally commited to creating a route object, for example). +- This problem has existed for years and no forward progress +- has ever been made, and nobody steps up to try and salvage +- this code, so we're going to finally just get rid of it. +-Who: David S. Miller +- +---------------------------- +- + What: read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer) + When: December 2007 + Why: These functions are a leftover from 2.4 times. They have several +diff -Nurb linux-2.6.22-570/Documentation/filesystems/00-INDEX linux-2.6.22-591/Documentation/filesystems/00-INDEX +--- linux-2.6.22-570/Documentation/filesystems/00-INDEX 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/Documentation/filesystems/00-INDEX 2007-12-21 15:36:11.000000000 -0500 +@@ -84,6 +84,8 @@ + - info and mount options for the UDF filesystem. + ufs.txt + - info on the ufs filesystem. ++unionfs/ ++ - info on the unionfs filesystem + vfat.txt + - info on using the VFAT filesystem used in Windows NT and Windows 95 + vfs.txt +diff -Nurb linux-2.6.22-570/Documentation/filesystems/Locking linux-2.6.22-591/Documentation/filesystems/Locking +--- linux-2.6.22-570/Documentation/filesystems/Locking 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/Documentation/filesystems/Locking 2007-12-21 15:36:11.000000000 -0500 +@@ -510,12 +510,14 @@ + prototypes: + void (*open)(struct vm_area_struct*); + void (*close)(struct vm_area_struct*); ++ struct page *(*fault)(struct vm_area_struct*, struct fault_data *); + struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *); + + locking rules: + BKL mmap_sem + open: no yes + close: no yes ++fault: no yes + nopage: no yes + + ================================================================================ +diff -Nurb linux-2.6.22-570/Documentation/filesystems/configfs/configfs.txt linux-2.6.22-591/Documentation/filesystems/configfs/configfs.txt +--- linux-2.6.22-570/Documentation/filesystems/configfs/configfs.txt 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/Documentation/filesystems/configfs/configfs.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -238,6 +238,8 @@ + struct config_group *(*make_group)(struct config_group *group, + const char *name); + int (*commit_item)(struct config_item *item); ++ void (*disconnect_notify)(struct config_group *group, ++ struct config_item *item); + void (*drop_item)(struct config_group *group, + struct config_item *item); + }; +@@ -268,6 +270,16 @@ + for the item to actually disappear from the subsystem's usage. But it + is gone from configfs. + ++When drop_item() is called, the item's linkage has already been torn ++down. It no longer has a reference on its parent and has no place in ++the item hierarchy. If a client needs to do some cleanup before this ++teardown happens, the subsystem can implement the ++ct_group_ops->disconnect_notify() method. The method is called after ++configfs has removed the item from the filesystem view but before the ++item is removed from its parent group. Like drop_item(), ++disconnect_notify() is void and cannot fail. Client subsystems should ++not drop any references here, as they still must do it in drop_item(). ++ + A config_group cannot be removed while it still has child items. This + is implemented in the configfs rmdir(2) code. ->drop_item() will not be + called, as the item has not been dropped. rmdir(2) will fail, as the +@@ -386,6 +398,33 @@ + rmdir(2). They also are not considered when rmdir(2) on the parent + group is checking for children. + ++[Dependant Subsystems] ++ ++Sometimes other drivers depend on particular configfs items. For ++example, ocfs2 mounts depend on a heartbeat region item. If that ++region item is removed with rmdir(2), the ocfs2 mount must BUG or go ++readonly. Not happy. ++ ++configfs provides two additional API calls: configfs_depend_item() and ++configfs_undepend_item(). A client driver can call ++configfs_depend_item() on an existing item to tell configfs that it is ++depended on. configfs will then return -EBUSY from rmdir(2) for that ++item. When the item is no longer depended on, the client driver calls ++configfs_undepend_item() on it. ++ ++These API cannot be called underneath any configfs callbacks, as ++they will conflict. They can block and allocate. A client driver ++probably shouldn't calling them of its own gumption. Rather it should ++be providing an API that external subsystems call. ++ ++How does this work? Imagine the ocfs2 mount process. When it mounts, ++it asks for a heartbeat region item. This is done via a call into the ++heartbeat code. Inside the heartbeat code, the region item is looked ++up. Here, the heartbeat code calls configfs_depend_item(). If it ++succeeds, then heartbeat knows the region is safe to give to ocfs2. ++If it fails, it was being torn down anyway, and heartbeat can gracefully ++pass up an error. ++ + [Committable Items] + + NOTE: Committable items are currently unimplemented. +diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/00-INDEX linux-2.6.22-591/Documentation/filesystems/unionfs/00-INDEX +--- linux-2.6.22-570/Documentation/filesystems/unionfs/00-INDEX 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/filesystems/unionfs/00-INDEX 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,10 @@ ++00-INDEX ++ - this file. ++concepts.txt ++ - A brief introduction of concepts. ++issues.txt ++ - A summary of known issues with unionfs. ++rename.txt ++ - Information regarding rename operations. ++usage.txt ++ - Usage information and examples. +diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/concepts.txt linux-2.6.22-591/Documentation/filesystems/unionfs/concepts.txt +--- linux-2.6.22-570/Documentation/filesystems/unionfs/concepts.txt 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/filesystems/unionfs/concepts.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,75 @@ ++Unionfs 2.0 CONCEPTS: ++===================== ++ ++This file describes the concepts needed by a namespace unification file ++system. ++ ++Branch Priority: ++================ ++ ++Each branch is assigned a unique priority - starting from 0 (highest ++priority). No two branches can have the same priority. ++ ++ ++Branch Mode: ++============ ++ ++Each branch is assigned a mode - read-write or read-only. This allows ++directories on media mounted read-write to be used in a read-only manner. ++ ++ ++Whiteouts: ++========== ++ ++A whiteout removes a file name from the namespace. Whiteouts are needed when ++one attempts to remove a file on a read-only branch. ++ ++Suppose we have a two-branch union, where branch 0 is read-write and branch ++1 is read-only. And a file 'foo' on branch 1: ++ ++./b0/ ++./b1/ ++./b1/foo ++ ++The unified view would simply be: ++ ++./union/ ++./union/foo ++ ++Since 'foo' is stored on a read-only branch, it cannot be removed. A ++whiteout is used to remove the name 'foo' from the unified namespace. Again, ++since branch 1 is read-only, the whiteout cannot be created there. So, we ++try on a higher priority (lower numerically) branch and create the whiteout ++there. ++ ++./b0/ ++./b0/.wh.foo ++./b1/ ++./b1/foo ++ ++Later, when Unionfs traverses branches (due to lookup or readdir), it ++eliminate 'foo' from the namespace (as well as the whiteout itself.) ++ ++ ++Duplicate Elimination: ++====================== ++ ++It is possible for files on different branches to have the same name. ++Unionfs then has to select which instance of the file to show to the user. ++Given the fact that each branch has a priority associated with it, the ++simplest solution is to take the instance from the highest priority ++(numerically lowest value) and "hide" the others. ++ ++ ++Copyup: ++======= ++ ++When a change is made to the contents of a file's data or meta-data, they ++have to be stored somewhere. The best way is to create a copy of the ++original file on a branch that is writable, and then redirect the write ++though to this copy. The copy must be made on a higher priority branch so ++that lookup and readdir return this newer "version" of the file rather than ++the original (see duplicate elimination). ++ ++ ++For more information, see . +diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/issues.txt linux-2.6.22-591/Documentation/filesystems/unionfs/issues.txt +--- linux-2.6.22-570/Documentation/filesystems/unionfs/issues.txt 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/filesystems/unionfs/issues.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,39 @@ ++KNOWN Unionfs 2.0 ISSUES: ++========================= ++ ++1. The NFS server returns -EACCES for read-only exports, instead of -EROFS. ++ This means we can't reliably detect a read-only NFS export. ++ ++2. Modifying a Unionfs branch directly, while the union is mounted, is ++ currently unsupported, because it could cause a cache incoherency between ++ the union layer and the lower file systems (for that reason, Unionfs ++ currently prohibits using branches which overlap with each other, even ++ partially). We have tested Unionfs under such conditions, and fixed any ++ bugs we found (Unionfs comes with an extensive regression test suite). ++ However, it may still be possible that changes made to lower branches ++ directly could cause cache incoherency which, in the worst case, may case ++ an oops. ++ ++ Unionfs 2.0 has a temporary workaround for this. You can force Unionfs ++ to increase the superblock generation number, and hence purge all cached ++ Unionfs objects, which would then be re-gotten from the lower branches. ++ This should ensure cache consistency. To increase the generation number, ++ executed the command: ++ ++ mount -t unionfs -o remount,incgen none MOUNTPOINT ++ ++ Note that the older way of incrementing the generation number using an ++ ioctl, is no longer supported in Unionfs 2.0. Ioctls in general are not ++ encouraged. Plus, an ioctl is per-file concept, whereas the generation ++ number is a per-file-system concept. Worse, such an ioctl requires an ++ open file, which then has to be invalidated by the very nature of the ++ generation number increase (read: the old generation increase ioctl was ++ pretty racy). ++ ++3. Unionfs should not use lookup_one_len() on the underlying f/s as it ++ confuses NFS. Currently, unionfs_lookup() passes lookup intents to the ++ lower file-system, this eliminates part of the problem. The remaining ++ calls to lookup_one_len may need to be changed to pass an intent. ++ ++ ++For more information, see . +diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/rename.txt linux-2.6.22-591/Documentation/filesystems/unionfs/rename.txt +--- linux-2.6.22-570/Documentation/filesystems/unionfs/rename.txt 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/filesystems/unionfs/rename.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,31 @@ ++Rename is a complex beast. The following table shows which rename(2) operations ++should succeed and which should fail. ++ ++o: success ++E: error (either unionfs or vfs) ++X: EXDEV ++ ++none = file does not exist ++file = file is a file ++dir = file is a empty directory ++child= file is a non-empty directory ++wh = file is a directory containing only whiteouts; this makes it logically ++ empty ++ ++ none file dir child wh ++file o o E E E ++dir o E o E o ++child X E X E X ++wh o E o E o ++ ++ ++Renaming directories: ++===================== ++ ++Whenever a empty (either physically or logically) directory is being renamed, ++the following sequence of events should take place: ++ ++1) Remove whiteouts from both source and destination directory ++2) Rename source to destination ++3) Make destination opaque to prevent anything under it from showing up ++ +diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/usage.txt linux-2.6.22-591/Documentation/filesystems/unionfs/usage.txt +--- linux-2.6.22-570/Documentation/filesystems/unionfs/usage.txt 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/filesystems/unionfs/usage.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,90 @@ ++Unionfs is a stackable unification file system, which can appear to merge ++the contents of several directories (branches), while keeping their physical ++content separate. Unionfs is useful for unified source tree management, ++merged contents of split CD-ROM, merged separate software package ++directories, data grids, and more. Unionfs allows any mix of read-only and ++read-write branches, as well as insertion and deletion of branches anywhere ++in the fan-out. To maintain Unix semantics, Unionfs handles elimination of ++duplicates, partial-error conditions, and more. ++ ++# mount -t unionfs -o branch-option[,union-options[,...]] none MOUNTPOINT ++ ++The available branch-option for the mount command is: ++ ++ dirs=branch[=ro|=rw][:...] ++ ++specifies a separated list of which directories compose the union. ++Directories that come earlier in the list have a higher precedence than ++those which come later. Additionally, read-only or read-write permissions of ++the branch can be specified by appending =ro or =rw (default) to each ++directory. ++ ++Syntax: ++ ++ dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw] ++ ++Example: ++ ++ dirs=/writable_branch=rw:/read-only_branch=ro ++ ++ ++DYNAMIC BRANCH MANAGEMENT AND REMOUNTS ++====================================== ++ ++You can remount a union and change its overall mode, or reconfigure the ++branches, as follows. ++ ++To downgrade a union from read-write to read-only: ++ ++# mount -t unionfs -o remount,ro none MOUNTPOINT ++ ++To upgrade a union from read-only to read-write: ++ ++# mount -t unionfs -o remount,rw none MOUNTPOINT ++ ++To delete a branch /foo, regardless where it is in the current union: ++ ++# mount -t unionfs -o del=/foo none MOUNTPOINT ++ ++To insert (add) a branch /foo before /bar: ++ ++# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT ++ ++To insert (add) a branch /foo (with the "rw" mode flag) before /bar: ++ ++# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT ++ ++To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a ++new highest-priority branch), you can use the above syntax, or use a short ++hand version as follows: ++ ++# mount -t unionfs -o remount,add=/foo none MOUNTPOINT ++ ++To append a branch to the very end (new lowest-priority branch): ++ ++# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT ++ ++To append a branch to the very end (new lowest-priority branch), in ++read-only mode: ++ ++# mount -t unionfs -o remount,add=:/foo:ro none MOUNTPOINT ++ ++Finally, to change the mode of one existing branch, say /foo, from read-only ++to read-write, and change /bar from read-write to read-only: ++ ++# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT ++ ++ ++CACHE CONSISTENCY ++================= ++ ++If you modify any file on any of the lower branches directly, while there is ++a Unionfs 2.0 mounted above any of those branches, you should tell Unionfs ++to purge its caches and re-get the objects. To do that, you have to ++increment the generation number of the superblock using the following ++command: ++ ++# mount -t unionfs -o remount,incgen none MOUNTPOINT ++ ++ ++For more information, see . +diff -Nurb linux-2.6.22-570/Documentation/firmware_class/firmware_sample_firmware_class.c linux-2.6.22-591/Documentation/firmware_class/firmware_sample_firmware_class.c +--- linux-2.6.22-570/Documentation/firmware_class/firmware_sample_firmware_class.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/Documentation/firmware_class/firmware_sample_firmware_class.c 2007-12-21 15:36:11.000000000 -0500 +@@ -78,6 +78,7 @@ + firmware_loading_show, firmware_loading_store); + + static ssize_t firmware_data_read(struct kobject *kobj, ++ struct bin_attribute *bin_attr, + char *buffer, loff_t offset, size_t count) + { + struct class_device *class_dev = to_class_dev(kobj); +@@ -88,6 +89,7 @@ + return count; + } + static ssize_t firmware_data_write(struct kobject *kobj, ++ struct bin_attribute *bin_attr, + char *buffer, loff_t offset, size_t count) + { + struct class_device *class_dev = to_class_dev(kobj); +diff -Nurb linux-2.6.22-570/Documentation/power/freezing-of-tasks.txt linux-2.6.22-591/Documentation/power/freezing-of-tasks.txt +--- linux-2.6.22-570/Documentation/power/freezing-of-tasks.txt 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/power/freezing-of-tasks.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,160 @@ ++Freezing of tasks ++ (C) 2007 Rafael J. Wysocki , GPL ++ ++I. What is the freezing of tasks? ++ ++The freezing of tasks is a mechanism by which user space processes and some ++kernel threads are controlled during hibernation or system-wide suspend (on some ++architectures). ++ ++II. How does it work? ++ ++There are four per-task flags used for that, PF_NOFREEZE, PF_FROZEN, TIF_FREEZE ++and PF_FREEZER_SKIP (the last one is auxiliary). The tasks that have ++PF_NOFREEZE unset (all user space processes and some kernel threads) are ++regarded as 'freezable' and treated in a special way before the system enters a ++suspend state as well as before a hibernation image is created (in what follows ++we only consider hibernation, but the description also applies to suspend). ++ ++Namely, as the first step of the hibernation procedure the function ++freeze_processes() (defined in kernel/power/process.c) is called. It executes ++try_to_freeze_tasks() that sets TIF_FREEZE for all of the freezable tasks and ++sends a fake signal to each of them. A task that receives such a signal and has ++TIF_FREEZE set, should react to it by calling the refrigerator() function ++(defined in kernel/power/process.c), which sets the task's PF_FROZEN flag, ++changes its state to TASK_UNINTERRUPTIBLE and makes it loop until PF_FROZEN is ++cleared for it. Then, we say that the task is 'frozen' and therefore the set of ++functions handling this mechanism is called 'the freezer' (these functions are ++defined in kernel/power/process.c and include/linux/freezer.h). User space ++processes are generally frozen before kernel threads. ++ ++It is not recommended to call refrigerator() directly. Instead, it is ++recommended to use the try_to_freeze() function (defined in ++include/linux/freezer.h), that checks the task's TIF_FREEZE flag and makes the ++task enter refrigerator() if the flag is set. ++ ++For user space processes try_to_freeze() is called automatically from the ++signal-handling code, but the freezable kernel threads need to call it ++explicitly in suitable places. The code to do this may look like the following: ++ ++ do { ++ hub_events(); ++ wait_event_interruptible(khubd_wait, ++ !list_empty(&hub_event_list)); ++ try_to_freeze(); ++ } while (!signal_pending(current)); ++ ++(from drivers/usb/core/hub.c::hub_thread()). ++ ++If a freezable kernel thread fails to call try_to_freeze() after the freezer has ++set TIF_FREEZE for it, the freezing of tasks will fail and the entire ++hibernation operation will be cancelled. For this reason, freezable kernel ++threads must call try_to_freeze() somewhere. ++ ++After the system memory state has been restored from a hibernation image and ++devices have been reinitialized, the function thaw_processes() is called in ++order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that ++have been frozen leave refrigerator() and continue running. ++ ++III. Which kernel threads are freezable? ++ ++Kernel threads are not freezable by default. However, a kernel thread may clear ++PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE ++directly is strongly discouraged). From this point it is regarded as freezable ++and must call try_to_freeze() in a suitable place. ++ ++IV. Why do we do that? ++ ++Generally speaking, there is a couple of reasons to use the freezing of tasks: ++ ++1. The principal reason is to prevent filesystems from being damaged after ++hibernation. At the moment we have no simple means of checkpointing ++filesystems, so if there are any modifications made to filesystem data and/or ++metadata on disks, we cannot bring them back to the state from before the ++modifications. At the same time each hibernation image contains some ++filesystem-related information that must be consistent with the state of the ++on-disk data and metadata after the system memory state has been restored from ++the image (otherwise the filesystems will be damaged in a nasty way, usually ++making them almost impossible to repair). We therefore freeze tasks that might ++cause the on-disk filesystems' data and metadata to be modified after the ++hibernation image has been created and before the system is finally powered off. ++The majority of these are user space processes, but if any of the kernel threads ++may cause something like this to happen, they have to be freezable. ++ ++2. The second reason is to prevent user space processes and some kernel threads ++from interfering with the suspending and resuming of devices. A user space ++process running on a second CPU while we are suspending devices may, for ++example, be troublesome and without the freezing of tasks we would need some ++safeguards against race conditions that might occur in such a case. ++ ++Although Linus Torvalds doesn't like the freezing of tasks, he said this in one ++of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608): ++ ++"RJW:> Why we freeze tasks at all or why we freeze kernel threads? ++ ++Linus: In many ways, 'at all'. ++ ++I _do_ realize the IO request queue issues, and that we cannot actually do ++s2ram with some devices in the middle of a DMA. So we want to be able to ++avoid *that*, there's no question about that. And I suspect that stopping ++user threads and then waiting for a sync is practically one of the easier ++ways to do so. ++ ++So in practice, the 'at all' may become a 'why freeze kernel threads?' and ++freezing user threads I don't find really objectionable." ++ ++Still, there are kernel threads that may want to be freezable. For example, if ++a kernel that belongs to a device driver accesses the device directly, it in ++principle needs to know when the device is suspended, so that it doesn't try to ++access it at that time. However, if the kernel thread is freezable, it will be ++frozen before the driver's .suspend() callback is executed and it will be ++thawed after the driver's .resume() callback has run, so it won't be accessing ++the device while it's suspended. ++ ++3. Another reason for freezing tasks is to prevent user space processes from ++realizing that hibernation (or suspend) operation takes place. Ideally, user ++space processes should not notice that such a system-wide operation has occurred ++and should continue running without any problems after the restore (or resume ++from suspend). Unfortunately, in the most general case this is quite difficult ++to achieve without the freezing of tasks. Consider, for example, a process ++that depends on all CPUs being online while it's running. Since we need to ++disable nonboot CPUs during the hibernation, if this process is not frozen, it ++may notice that the number of CPUs has changed and may start to work incorrectly ++because of that. ++ ++V. Are there any problems related to the freezing of tasks? ++ ++Yes, there are. ++ ++First of all, the freezing of kernel threads may be tricky if they depend one ++on another. For example, if kernel thread A waits for a completion (in the ++TASK_UNINTERRUPTIBLE state) that needs to be done by freezable kernel thread B ++and B is frozen in the meantime, then A will be blocked until B is thawed, which ++may be undesirable. That's why kernel threads are not freezable by default. ++ ++Second, there are the following two problems related to the freezing of user ++space processes: ++1. Putting processes into an uninterruptible sleep distorts the load average. ++2. Now that we have FUSE, plus the framework for doing device drivers in ++userspace, it gets even more complicated because some userspace processes are ++now doing the sorts of things that kernel threads do ++(https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012309.html). ++ ++The problem 1. seems to be fixable, although it hasn't been fixed so far. The ++other one is more serious, but it seems that we can work around it by using ++hibernation (and suspend) notifiers (in that case, though, we won't be able to ++avoid the realization by the user space processes that the hibernation is taking ++place). ++ ++There are also problems that the freezing of tasks tends to expose, although ++they are not directly related to it. For example, if request_firmware() is ++called from a device driver's .resume() routine, it will timeout and eventually ++fail, because the user land process that should respond to the request is frozen ++at this point. So, seemingly, the failure is due to the freezing of tasks. ++Suppose, however, that the firmware file is located on a filesystem accessible ++only through another device that hasn't been resumed yet. In that case, ++request_firmware() will fail regardless of whether or not the freezing of tasks ++is used. Consequently, the problem is not really related to the freezing of ++tasks, since it generally exists anyway. [The solution to this particular ++problem is to keep the firmware in memory after it's loaded for the first time ++and upload if from memory to the device whenever necessary.] +diff -Nurb linux-2.6.22-570/Documentation/power/kernel_threads.txt linux-2.6.22-591/Documentation/power/kernel_threads.txt +--- linux-2.6.22-570/Documentation/power/kernel_threads.txt 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/Documentation/power/kernel_threads.txt 1969-12-31 19:00:00.000000000 -0500 +@@ -1,40 +0,0 @@ +-KERNEL THREADS +- +- +-Freezer +- +-Upon entering a suspended state the system will freeze all +-tasks. This is done by delivering pseudosignals. This affects +-kernel threads, too. To successfully freeze a kernel thread +-the thread has to check for the pseudosignal and enter the +-refrigerator. Code to do this looks like this: +- +- do { +- hub_events(); +- wait_event_interruptible(khubd_wait, !list_empty(&hub_event_list)); +- try_to_freeze(); +- } while (!signal_pending(current)); +- +-from drivers/usb/core/hub.c::hub_thread() +- +- +-The Unfreezable +- +-Some kernel threads however, must not be frozen. The kernel must +-be able to finish pending IO operations and later on be able to +-write the memory image to disk. Kernel threads needed to do IO +-must stay awake. Such threads must mark themselves unfreezable +-like this: +- +- /* +- * This thread doesn't need any user-level access, +- * so get rid of all our resources. +- */ +- daemonize("usb-storage"); +- +- current->flags |= PF_NOFREEZE; +- +-from drivers/usb/storage/usb.c::usb_stor_control_thread() +- +-Such drivers are themselves responsible for staying quiet during +-the actual snapshotting. +diff -Nurb linux-2.6.22-570/Documentation/power/swsusp.txt linux-2.6.22-591/Documentation/power/swsusp.txt +--- linux-2.6.22-570/Documentation/power/swsusp.txt 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/Documentation/power/swsusp.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -140,21 +140,11 @@ + website, and not to the Linux Kernel Mailing List. We are working + toward merging suspend2 into the mainline kernel. + +-Q: A kernel thread must voluntarily freeze itself (call 'refrigerator'). +-I found some kernel threads that don't do it, and they don't freeze +-so the system can't sleep. Is this a known behavior? +- +-A: All such kernel threads need to be fixed, one by one. Select the +-place where the thread is safe to be frozen (no kernel semaphores +-should be held at that point and it must be safe to sleep there), and +-add: +- +- try_to_freeze(); +- +-If the thread is needed for writing the image to storage, you should +-instead set the PF_NOFREEZE process flag when creating the thread (and +-be very careful). ++Q: What is the freezing of tasks and why are we using it? + ++A: The freezing of tasks is a mechanism by which user space processes and some ++kernel threads are controlled during hibernation or system-wide suspend (on some ++architectures). See freezing-of-tasks.txt for details. + + Q: What is the difference between "platform" and "shutdown"? + +diff -Nurb linux-2.6.22-570/Documentation/scsi/scsi_fc_transport.txt linux-2.6.22-591/Documentation/scsi/scsi_fc_transport.txt +--- linux-2.6.22-570/Documentation/scsi/scsi_fc_transport.txt 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/scsi/scsi_fc_transport.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,450 @@ ++ SCSI FC Tansport ++ ============================================= ++ ++Date: 4/12/2007 ++Kernel Revisions for features: ++ rports : <> ++ vports : 2.6.22 (? TBD) ++ ++ ++Introduction ++============ ++This file documents the features and components of the SCSI FC Transport. ++It also provides documents the API between the transport and FC LLDDs. ++The FC transport can be found at: ++ drivers/scsi/scsi_transport_fc.c ++ include/scsi/scsi_transport_fc.h ++ include/scsi/scsi_netlink_fc.h ++ ++This file is found at Documentation/scsi/scsi_fc_transport.txt ++ ++ ++FC Remote Ports (rports) ++======================================================================== ++<< To Be Supplied >> ++ ++ ++FC Virtual Ports (vports) ++======================================================================== ++ ++Overview: ++------------------------------- ++ ++ New FC standards have defined mechanisms which allows for a single physical ++ port to appear on as multiple communication ports. Using the N_Port Id ++ Virtualization (NPIV) mechanism, a point-to-point connection to a Fabric ++ can be assigned more than 1 N_Port_ID. Each N_Port_ID appears as a ++ separate port to other endpoints on the fabric, even though it shares one ++ physical link to the switch for communication. Each N_Port_ID can have a ++ unique view of the fabric based on fabric zoning and array lun-masking ++ (just like a normal non-NPIV adapter). Using the Virtual Fabric (VF) ++ mechanism, adding a fabric header to each frame allows the port to ++ interact with the Fabric Port to join multiple fabrics. The port will ++ obtain an N_Port_ID on each fabric it joins. Each fabric will have its ++ own unique view of endpoints and configuration parameters. NPIV may be ++ used together with VF so that the port can obtain multiple N_Port_IDs ++ on each virtual fabric. ++ ++ The FC transport is now recognizing a new object - a vport. A vport is ++ an entity that has a world-wide unique World Wide Port Name (wwpn) and ++ World Wide Node Name (wwnn). The transport also allows for the FC4's to ++ be specified for the vport, with FCP_Initiator being the primary role ++ expected. Once instantiated by one of the above methods, it will have a ++ distinct N_Port_ID and view of fabric endpoints and storage entities. ++ The fc_host associated with the physical adapter will export the ability ++ to create vports. The transport will create the vport object within the ++ Linux device tree, and instruct the fc_host's driver to instantiate the ++ virtual port. Typically, the driver will create a new scsi_host instance ++ on the vport, resulting in a unique namespace for the vport. ++ Thus, whether a FC port is based on a physical port or on a virtual port, ++ each will appear as a unique scsi_host with its own target and lun space. ++ ++ Note: At this time, the transport is written to create only NPIV-based ++ vports. However, consideration was given to VF-based vports and it ++ should be a minor change to add support if needed. The remaining ++ discussion will concentrate on NPIV. ++ ++ Note: World Wide Name assignment (and uniqueness guarantees) are left ++ up to an administrative entity controling the vport. For example, ++ if vports are to be associated with virtual machines, a XEN mgmt ++ utility would be responsible for creating wwpn/wwnn's for the vport, ++ using it's own naming authority and OUI. (Note: it already does this ++ for virtual MAC addresses). ++ ++ ++Device Trees and Vport Objects: ++------------------------------- ++ ++ Today, the device tree typically contains the scsi_host object, ++ with rports and scsi target objects underneath it. Currently the FC ++ transport creates the vport object and places it under the scsi_host ++ object corresponding to the physical adapter. The LLDD will allocate ++ a new scsi_host for the vport and link it's object under the vport. ++ The remainder of the tree under the vports scsi_host is the same ++ as the non-NPIV case. The transport is written currently to easily ++ allow the parent of the vport to be something other than the scsi_host. ++ This could be used in the future to link the object onto a vm-specific ++ device tree. If the vport's parent is not the physical port's scsi_host, ++ a symbolic link to the vport object will be placed in the physical ++ port's scsi_host. ++ ++ Here's what to expect in the device tree : ++ The typical Physical Port's Scsi_Host: ++ /sys/devices/.../host17/ ++ and it has the typical decendent tree: ++ /sys/devices/.../host17/rport-17:0-0/target17:0:0/17:0:0:0: ++ and then the vport is created on the Physical Port: ++ /sys/devices/.../host17/vport-17:0-0 ++ and the vport's Scsi_Host is then created: ++ /sys/devices/.../host17/vport-17:0-0/host18 ++ and then the rest of the tree progresses, such as: ++ /sys/devices/.../host17/vport-17:0-0/host18/rport-18:0-0/target18:0:0/18:0:0:0: ++ ++ Here's what to expect in the sysfs tree : ++ scsi_hosts: ++ /sys/class/scsi_host/host17 physical port's scsi_host ++ /sys/class/scsi_host/host18 vport's scsi_host ++ fc_hosts: ++ /sys/class/fc_host/host17 physical port's fc_host ++ /sys/class/fc_host/host18 vport's fc_host ++ fc_vports: ++ /sys/class/fc_vports/vport-17:0-0 the vport's fc_vport ++ fc_rports: ++ /sys/class/fc_remote_ports/rport-17:0-0 rport on the physical port ++ /sys/class/fc_remote_ports/rport-18:0-0 rport on the vport ++ ++ ++Vport Attributes: ++------------------------------- ++ ++ The new fc_vport class object has the following attributes ++ ++ node_name: Read_Only ++ The WWNN of the vport ++ ++ port_name: Read_Only ++ The WWPN of the vport ++ ++ roles: Read_Only ++ Indicates the FC4 roles enabled on the vport. ++ ++ symbolic_name: Read_Write ++ A string, appended to the driver's symbolic port name string, which ++ is registered with the switch to identify the vport. For example, ++ a hypervisor could set this string to "Xen Domain 2 VM 5 Vport 2", ++ and this set of identifiers can be seen on switch management screens ++ to identify the port. ++ ++ vport_delete: Write_Only ++ When written with a "1", will tear down the vport. ++ ++ vport_disable: Write_Only ++ When written with a "1", will transition the vport to a disabled. ++ state. The vport will still be instantiated with the Linux kernel, ++ but it will not be active on the FC link. ++ When written with a "0", will enable the vport. ++ ++ vport_last_state: Read_Only ++ Indicates the previous state of the vport. See the section below on ++ "Vport States". ++ ++ vport_state: Read_Only ++ Indicates the state of the vport. See the section below on ++ "Vport States". ++ ++ vport_type: Read_Only ++ Reflects the FC mechanism used to create the virtual port. ++ Only NPIV is supported currently. ++ ++ ++ For the fc_host class object, the following attributes are added for vports: ++ ++ max_npiv_vports: Read_Only ++ Indicates the maximum number of NPIV-based vports that the ++ driver/adapter can support on the fc_host. ++ ++ npiv_vports_inuse: Read_Only ++ Indicates how many NPIV-based vports have been instantiated on the ++ fc_host. ++ ++ vport_create: Write_Only ++ A "simple" create interface to instantiate a vport on an fc_host. ++ A ":" string is written to the attribute. The transport ++ then instantiates the vport object and calls the LLDD to create the ++ vport with the role of FCP_Initiator. Each WWN is specified as 16 ++ hex characters and may *not* contain any prefixes (e.g. 0x, x, etc). ++ ++ vport_delete: Write_Only ++ A "simple" delete interface to teardown a vport. A ":" ++ string is written to the attribute. The transport will locate the ++ vport on the fc_host with the same WWNs and tear it down. Each WWN ++ is specified as 16 hex characters and may *not* contain any prefixes ++ (e.g. 0x, x, etc). ++ ++ ++Vport States: ++------------------------------- ++ ++ Vport instantiation consists of two parts: ++ - Creation with the kernel and LLDD. This means all transport and ++ driver data structures are built up, and device objects created. ++ This is equivalent to a driver "attach" on an adapter, which is ++ independent of the adapter's link state. ++ - Instantiation of the vport on the FC link via ELS traffic, etc. ++ This is equivalent to a "link up" and successfull link initialization. ++ Futher information can be found in the interfaces section below for ++ Vport Creation. ++ ++ Once a vport has been instantiated with the kernel/LLDD, a vport state ++ can be reported via the sysfs attribute. The following states exist: ++ ++ FC_VPORT_UNKNOWN - Unknown ++ An temporary state, typically set only while the vport is being ++ instantiated with the kernel and LLDD. ++ ++ FC_VPORT_ACTIVE - Active ++ The vport has been successfully been created on the FC link. ++ It is fully functional. ++ ++ FC_VPORT_DISABLED - Disabled ++ The vport instantiated, but "disabled". The vport is not instantiated ++ on the FC link. This is equivalent to a physical port with the ++ link "down". ++ ++ FC_VPORT_LINKDOWN - Linkdown ++ The vport is not operational as the physical link is not operational. ++ ++ FC_VPORT_INITIALIZING - Initializing ++ The vport is in the process of instantiating on the FC link. ++ The LLDD will set this state just prior to starting the ELS traffic ++ to create the vport. This state will persist until the vport is ++ successfully created (state becomes FC_VPORT_ACTIVE) or it fails ++ (state is one of the values below). As this state is transitory, ++ it will not be preserved in the "vport_last_state". ++ ++ FC_VPORT_NO_FABRIC_SUPP - No Fabric Support ++ The vport is not operational. One of the following conditions were ++ encountered: ++ - The FC topology is not Point-to-Point ++ - The FC port is not connected to an F_Port ++ - The F_Port has indicated that NPIV is not supported. ++ ++ FC_VPORT_NO_FABRIC_RSCS - No Fabric Resources ++ The vport is not operational. The Fabric failed FDISC with a status ++ indicating that it does not have sufficient resources to complete ++ the operation. ++ ++ FC_VPORT_FABRIC_LOGOUT - Fabric Logout ++ The vport is not operational. The Fabric has LOGO'd the N_Port_ID ++ associated with the vport. ++ ++ FC_VPORT_FABRIC_REJ_WWN - Fabric Rejected WWN ++ The vport is not operational. The Fabric failed FDISC with a status ++ indicating that the WWN's are not valid. ++ ++ FC_VPORT_FAILED - VPort Failed ++ The vport is not operational. This is a catchall for all other ++ error conditions. ++ ++ ++ The following state table indicates the different state transitions: ++ ++ State Event New State ++ -------------------------------------------------------------------- ++ n/a Initialization Unknown ++ Unknown: Link Down Linkdown ++ Link Up & Loop No Fabric Support ++ Link Up & no Fabric No Fabric Support ++ Link Up & FLOGI response No Fabric Support ++ indicates no NPIV support ++ Link Up & FDISC being sent Initializing ++ Disable request Disable ++ Linkdown: Link Up Unknown ++ Initializing: FDISC ACC Active ++ FDISC LS_RJT w/ no resources No Fabric Resources ++ FDISC LS_RJT w/ invalid Fabric Rejected WWN ++ pname or invalid nport_id ++ FDISC LS_RJT failed for Vport Failed ++ other reasons ++ Link Down Linkdown ++ Disable request Disable ++ Disable: Enable request Unknown ++ Active: LOGO received from fabric Fabric Logout ++ Link Down Linkdown ++ Disable request Disable ++ Fabric Logout: Link still up Unknown ++ ++ The following 4 error states all have the same transitions: ++ No Fabric Support: ++ No Fabric Resources: ++ Fabric Rejected WWN: ++ Vport Failed: ++ Disable request Disable ++ Link goes down Linkdown ++ ++ ++Transport <-> LLDD Interfaces : ++------------------------------- ++ ++Vport support by LLDD: ++ ++ The LLDD indicates support for vports by supplying a vport_create() ++ function in the transport template. The presense of this function will ++ cause the creation of the new attributes on the fc_host. As part of ++ the physical port completing its initialization relative to the ++ transport, it should set the max_npiv_vports attribute to indicate the ++ maximum number of vports the driver and/or adapter supports. ++ ++ ++Vport Creation: ++ ++ The LLDD vport_create() syntax is: ++ ++ int vport_create(struct fc_vport *vport, bool disable) ++ ++ where: ++ vport: Is the newly allocated vport object ++ disable: If "true", the vport is to be created in a disabled stated. ++ If "false", the vport is to be enabled upon creation. ++ ++ When a request is made to create a new vport (via sgio/netlink, or the ++ vport_create fc_host attribute), the transport will validate that the LLDD ++ can support another vport (e.g. max_npiv_vports > npiv_vports_inuse). ++ If not, the create request will be failed. If space remains, the transport ++ will increment the vport count, create the vport object, and then call the ++ LLDD's vport_create() function with the newly allocated vport object. ++ ++ As mentioned above, vport creation is divided into two parts: ++ - Creation with the kernel and LLDD. This means all transport and ++ driver data structures are built up, and device objects created. ++ This is equivalent to a driver "attach" on an adapter, which is ++ independent of the adapter's link state. ++ - Instantiation of the vport on the FC link via ELS traffic, etc. ++ This is equivalent to a "link up" and successfull link initialization. ++ ++ The LLDD's vport_create() function will not synchronously wait for both ++ parts to be fully completed before returning. It must validate that the ++ infrastructure exists to support NPIV, and complete the first part of ++ vport creation (data structure build up) before returning. We do not ++ hinge vport_create() on the link-side operation mainly because: ++ - The link may be down. It is not a failure if it is. It simply ++ means the vport is in an inoperable state until the link comes up. ++ This is consistent with the link bouncing post vport creation. ++ - The vport may be created in a disabled state. ++ - This is consistent with a model where: the vport equates to a ++ FC adapter. The vport_create is synonymous with driver attachment ++ to the adapter, which is independent of link state. ++ ++ Note: special error codes have been defined to delineate infrastructure ++ failure cases for quicker resolution. ++ ++ The expected behavior for the LLDD's vport_create() function is: ++ - Validate Infrastructure: ++ - If the driver or adapter cannot support another vport, whether ++ due to improper firmware, (a lie about) max_npiv, or a lack of ++ some other resource - return VPCERR_UNSUPPORTED. ++ - If the driver validates the WWN's against those already active on ++ the adapter and detects an overlap - return VPCERR_BAD_WWN. ++ - If the driver detects the topology is loop, non-fabric, or the ++ FLOGI did not support NPIV - return VPCERR_NO_FABRIC_SUPP. ++ - Allocate data structures. If errors are encountered, such as out ++ of memory conditions, return the respective negative Exxx error code. ++ - If the role is FCP Initiator, the LLDD is to : ++ - Call scsi_host_alloc() to allocate a scsi_host for the vport. ++ - Call scsi_add_host(new_shost, &vport->dev) to start the scsi_host ++ and bind it as a child of the vport device. ++ - Initializes the fc_host attribute values. ++ - Kick of further vport state transitions based on the disable flag and ++ link state - and return success (zero). ++ ++ LLDD Implementers Notes: ++ - It is suggested that there be a different fc_function_templates for ++ the physical port and the virtual port. The physical port's template ++ would have the vport_create, vport_delete, and vport_disable functions, ++ while the vports would not. ++ - It is suggested that there be different scsi_host_templates ++ for the physical port and virtual port. Likely, there are driver ++ attributes, embedded into the scsi_host_template, that are applicable ++ for the physical port only (link speed, topology setting, etc). This ++ ensures that the attributes are applicable to the respective scsi_host. ++ ++ ++Vport Disable/Enable: ++ ++ The LLDD vport_disable() syntax is: ++ ++ int vport_disable(struct fc_vport *vport, bool disable) ++ ++ where: ++ vport: Is vport to to be enabled or disabled ++ disable: If "true", the vport is to be disabled. ++ If "false", the vport is to be enabled. ++ ++ When a request is made to change the disabled state on a vport, the ++ transport will validate the request against the existing vport state. ++ If the request is to disable and the vport is already disabled, the ++ request will fail. Similarly, if the request is to enable, and the ++ vport is not in a disabled state, the request will fail. If the request ++ is valid for the vport state, the transport will call the LLDD to ++ change the vport's state. ++ ++ Within the LLDD, if a vport is disabled, it remains instantiated with ++ the kernel and LLDD, but it is not active or visible on the FC link in ++ any way. (see Vport Creation and the 2 part instantiation discussion). ++ The vport will remain in this state until it is deleted or re-enabled. ++ When enabling a vport, the LLDD reinstantiates the vport on the FC ++ link - essentially restarting the LLDD statemachine (see Vport States ++ above). ++ ++ ++Vport Deletion: ++ ++ The LLDD vport_delete() syntax is: ++ ++ int vport_delete(struct fc_vport *vport) ++ ++ where: ++ vport: Is vport to delete ++ ++ When a request is made to delete a vport (via sgio/netlink, or via the ++ fc_host or fc_vport vport_delete attributes), the transport will call ++ the LLDD to terminate the vport on the FC link, and teardown all other ++ datastructures and references. If the LLDD completes successfully, ++ the transport will teardown the vport objects and complete the vport ++ removal. If the LLDD delete request fails, the vport object will remain, ++ but will be in an indeterminate state. ++ ++ Within the LLDD, the normal code paths for a scsi_host teardown should ++ be followed. E.g. If the vport has a FCP Initiator role, the LLDD ++ will call fc_remove_host() for the vports scsi_host, followed by ++ scsi_remove_host() and scsi_host_put() for the vports scsi_host. ++ ++ ++Other: ++ fc_host port_type attribute: ++ There is a new fc_host port_type value - FC_PORTTYPE_NPIV. This value ++ must be set on all vport-based fc_hosts. Normally, on a physical port, ++ the port_type attribute would be set to NPORT, NLPORT, etc based on the ++ topology type and existence of the fabric. As this is not applicable to ++ a vport, it makes more sense to report the FC mechanism used to create ++ the vport. ++ ++ Driver unload: ++ FC drivers are required to call fc_remove_host() prior to calling ++ scsi_remove_host(). This allows the fc_host to tear down all remote ++ ports prior the scsi_host being torn down. The fc_remove_host() call ++ was updated to remove all vports for the fc_host as well. ++ ++ ++Credits ++======= ++The following people have contributed to this document: ++ ++ ++ ++ ++ ++ ++James Smart ++james.smart@emulex.com ++ +diff -Nurb linux-2.6.22-570/Documentation/sysctl/kernel.txt linux-2.6.22-591/Documentation/sysctl/kernel.txt +--- linux-2.6.22-570/Documentation/sysctl/kernel.txt 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/Documentation/sysctl/kernel.txt 2007-12-21 15:36:11.000000000 -0500 +@@ -29,6 +29,7 @@ + - java-interpreter [ binfmt_java, obsolete ] + - kstack_depth_to_print [ X86 only ] + - l2cr [ PPC only ] ++- mmap_min_addr + - modprobe ==> Documentation/kmod.txt + - msgmax + - msgmnb +@@ -178,6 +179,19 @@ + + ============================================================== + ++mmap_min_addr ++ ++This file indicates the amount of address space which a user process will be ++restricted from mmaping. Since kernel null dereference bugs could ++accidentally operate based on the information in the first couple of pages of ++memory userspace processes should not be allowed to write to them. By default ++this value is set to 0 and no protections will be enforced by the security ++module. Setting this value to something like 64k will allow the vast majority ++of applications to work correctly and provide defense in depth against future ++potential kernel bugs. ++ ++============================================================== ++ + osrelease, ostype & version: + + # cat osrelease +diff -Nurb linux-2.6.22-570/Documentation/sysfs-rules.txt linux-2.6.22-591/Documentation/sysfs-rules.txt +--- linux-2.6.22-570/Documentation/sysfs-rules.txt 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/Documentation/sysfs-rules.txt 2007-12-21 15:36:14.000000000 -0500 +@@ -0,0 +1,166 @@ ++Rules on how to access information in the Linux kernel sysfs ++ ++The kernel exported sysfs exports internal kernel implementation-details ++and depends on internal kernel structures and layout. It is agreed upon ++by the kernel developers that the Linux kernel does not provide a stable ++internal API. As sysfs is a direct export of kernel internal ++structures, the sysfs interface can not provide a stable interface eighter, ++it may always change along with internal kernel changes. ++ ++To minimize the risk of breaking users of sysfs, which are in most cases ++low-level userspace applications, with a new kernel release, the users ++of sysfs must follow some rules to use an as abstract-as-possible way to ++access this filesystem. The current udev and HAL programs already ++implement this and users are encouraged to plug, if possible, into the ++abstractions these programs provide instead of accessing sysfs ++directly. ++ ++But if you really do want or need to access sysfs directly, please follow ++the following rules and then your programs should work with future ++versions of the sysfs interface. ++ ++- Do not use libsysfs ++ It makes assumptions about sysfs which are not true. Its API does not ++ offer any abstraction, it exposes all the kernel driver-core ++ implementation details in its own API. Therefore it is not better than ++ reading directories and opening the files yourself. ++ Also, it is not actively maintained, in the sense of reflecting the ++ current kernel-development. The goal of providing a stable interface ++ to sysfs has failed, it causes more problems, than it solves. It ++ violates many of the rules in this document. ++ ++- sysfs is always at /sys ++ Parsing /proc/mounts is a waste of time. Other mount points are a ++ system configuration bug you should not try to solve. For test cases, ++ possibly support a SYSFS_PATH environment variable to overwrite the ++ applications behavior, but never try to search for sysfs. Never try ++ to mount it, if you are not an early boot script. ++ ++- devices are only "devices" ++ There is no such thing like class-, bus-, physical devices, ++ interfaces, and such that you can rely on in userspace. Everything is ++ just simply a "device". Class-, bus-, physical, ... types are just ++ kernel implementation details, which should not be expected by ++ applications that look for devices in sysfs. ++ ++ The properties of a device are: ++ o devpath (/devices/pci0000:00/0000:00:1d.1/usb2/2-2/2-2:1.0) ++ - identical to the DEVPATH value in the event sent from the kernel ++ at device creation and removal ++ - the unique key to the device at that point in time ++ - the kernels path to the device-directory without the leading ++ /sys, and always starting with with a slash ++ - all elements of a devpath must be real directories. Symlinks ++ pointing to /sys/devices must always be resolved to their real ++ target, and the target path must be used to access the device. ++ That way the devpath to the device matches the devpath of the ++ kernel used at event time. ++ - using or exposing symlink values as elements in a devpath string ++ is a bug in the application ++ ++ o kernel name (sda, tty, 0000:00:1f.2, ...) ++ - a directory name, identical to the last element of the devpath ++ - applications need to handle spaces and characters like '!' in ++ the name ++ ++ o subsystem (block, tty, pci, ...) ++ - simple string, never a path or a link ++ - retrieved by reading the "subsystem"-link and using only the ++ last element of the target path ++ ++ o driver (tg3, ata_piix, uhci_hcd) ++ - a simple string, which may contain spaces, never a path or a ++ link ++ - it is retrieved by reading the "driver"-link and using only the ++ last element of the target path ++ - devices which do not have "driver"-link, just do not have a ++ driver; copying the driver value in a child device context, is a ++ bug in the application ++ ++ o attributes ++ - the files in the device directory or files below a subdirectories ++ of the same device directory ++ - accessing attributes reached by a symlink pointing to another device, ++ like the "device"-link, is a bug in the application ++ ++ Everything else is just a kernel driver-core implementation detail, ++ that should not be assumed to be stable across kernel releases. ++ ++- Properties of parent devices never belong into a child device. ++ Always look at the parent devices themselves for determining device ++ context properties. If the device 'eth0' or 'sda' does not have a ++ "driver"-link, then this device does not have a driver. Its value is empty. ++ Never copy any property of the parent-device into a child-device. Parent ++ device-properties may change dynamically without any notice to the ++ child device. ++ ++- Hierarchy in a single device-tree ++ There is only one valid place in sysfs where hierarchy can be examined ++ and this is below: /sys/devices. ++ It is planned, that all device directories will end up in the tree ++ below this directory. ++ ++- Classification by subsystem ++ There are currently three places for classification of devices: ++ /sys/block, /sys/class and /sys/bus. It is planned that these will ++ not contain any device-directories themselves, but only flat lists of ++ symlinks pointing to the unified /sys/devices tree. ++ All three places have completely different rules on how to access ++ device information. It is planned to merge all three ++ classification-directories into one place at /sys/subsystem, ++ following the layout of the bus-directories. All buses and ++ classes, including the converted block-subsystem, will show up ++ there. ++ The devices belonging to a subsystem will create a symlink in the ++ "devices" directory at /sys/subsystem//devices. ++ ++ If /sys/subsystem exists, /sys/bus, /sys/class and /sys/block can be ++ ignored. If it does not exist, you have always to scan all three ++ places, as the kernel is free to move a subsystem from one place to ++ the other, as long as the devices are still reachable by the same ++ subsystem name. ++ ++ Assuming /sys/class/ and /sys/bus/, or ++ /sys/block and /sys/class/block are not interchangeable, is a bug in ++ the application. ++ ++- Block ++ The converted block-subsystem at /sys/class/block, or ++ /sys/subsystem/block will contain the links for disks and partitions ++ at the same level, never in a hierarchy. Assuming the block-subsytem to ++ contain only disks and not partition-devices in the same flat list is ++ a bug in the application. ++ ++- "device"-link and :-links ++ Never depend on the "device"-link. The "device"-link is a workaround ++ for the old layout, where class-devices are not created in ++ /sys/devices/ like the bus-devices. If the link-resolving of a ++ device-directory does not end in /sys/devices/, you can use the ++ "device"-link to find the parent devices in /sys/devices/. That is the ++ single valid use of the "device"-link, it must never appear in any ++ path as an element. Assuming the existence of the "device"-link for ++ a device in /sys/devices/ is a bug in the application. ++ Accessing /sys/class/net/eth0/device is a bug in the application. ++ ++ Never depend on the class-specific links back to the /sys/class ++ directory. These links are also a workaround for the design mistake ++ that class-devices are not created in /sys/devices. If a device ++ directory does not contain directories for child devices, these links ++ may be used to find the child devices in /sys/class. That is the single ++ valid use of these links, they must never appear in any path as an ++ element. Assuming the existence of these links for devices which are ++ real child device directories in the /sys/devices tree, is a bug in ++ the application. ++ ++ It is planned to remove all these links when when all class-device ++ directories live in /sys/devices. ++ ++- Position of devices along device chain can change. ++ Never depend on a specific parent device position in the devpath, ++ or the chain of parent devices. The kernel is free to insert devices into ++ the chain. You must always request the parent device you are looking for ++ by its subsystem value. You need to walk up the chain until you find ++ the device that matches the expected subsystem. Depending on a specific ++ position of a parent device, or exposing relative paths, using "../" to ++ access the chain of parents, is a bug in the application. ++ +diff -Nurb linux-2.6.22-570/MAINTAINERS linux-2.6.22-591/MAINTAINERS +--- linux-2.6.22-570/MAINTAINERS 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/MAINTAINERS 2007-12-21 15:36:11.000000000 -0500 +@@ -232,15 +232,15 @@ + S: Supported + + ACPI BATTERY DRIVERS +-P: Vladimir P. Lebedev +-M: vladimir.p.lebedev@intel.com ++P: Alexey Starikovskiy ++M: astarikovskiy@suse.de + L: linux-acpi@vger.kernel.org + W: http://acpi.sourceforge.net/ + S: Supported + + ACPI EC DRIVER + P: Alexey Starikovskiy +-M: alexey.y.starikovskiy@linux.intel.com ++M: astarikovskiy@suse.de + L: linux-acpi@vger.kernel.org + W: http://acpi.sourceforge.net/ + S: Supported +@@ -2127,6 +2127,15 @@ + L: kexec@lists.infradead.org + S: Maintained + ++KGDB ++P: Jason Wessel ++M: jason.wessel@windriver.com ++P: Amit S. Kale ++M: amitkale@linsyssoft.com ++W: http://sourceforge.net/projects/kgdb ++L: kgdb-bugreport@lists.sourceforge.net ++S: Maintained ++ + KPROBES + P: Prasanna S Panchamukhi + M: prasanna@in.ibm.com +@@ -3593,6 +3602,15 @@ + W: http://www.kernel.dk + S: Maintained + ++UNIONFS ++P: Erez Zadok ++M: ezk@cs.sunysb.edu ++P: Josef "Jeff" Sipek ++M: jsipek@cs.sunysb.edu ++L: unionfs@filesystems.org ++W: http://unionfs.filesystems.org ++S: Maintained ++ + USB ACM DRIVER + P: Oliver Neukum + M: oliver@neukum.name +diff -Nurb linux-2.6.22-570/Makefile linux-2.6.22-591/Makefile +--- linux-2.6.22-570/Makefile 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/Makefile 2007-12-21 15:36:16.000000000 -0500 +@@ -1,7 +1,7 @@ + VERSION = 2 + PATCHLEVEL = 6 + SUBLEVEL = 22 +-EXTRAVERSION = .14-vs2.3.0.29 ++EXTRAVERSION = -prep + NAME = Holy Dancing Manatees, Batman! + + # *DOCUMENTATION* +@@ -496,6 +496,11 @@ + CFLAGS += -fomit-frame-pointer + endif + ++ifdef CONFIG_UNWIND_INFO ++CFLAGS += -fasynchronous-unwind-tables ++LDFLAGS_vmlinux += --eh-frame-hdr ++endif ++ + ifdef CONFIG_DEBUG_INFO + CFLAGS += -g + endif +diff -Nurb linux-2.6.22-570/Makefile.orig linux-2.6.22-591/Makefile.orig +--- linux-2.6.22-570/Makefile.orig 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/Makefile.orig 1969-12-31 19:00:00.000000000 -0500 +@@ -1,1493 +0,0 @@ +-VERSION = 2 +-PATCHLEVEL = 6 +-SUBLEVEL = 22 +-EXTRAVERSION = .14 +-NAME = Holy Dancing Manatees, Batman! +- +-# *DOCUMENTATION* +-# To see a list of typical targets execute "make help" +-# More info can be located in ./README +-# Comments in this file are targeted only to the developer, do not +-# expect to learn how to build the kernel reading this file. +- +-# Do not: +-# o use make's built-in rules and variables +-# (this increases performance and avoid hard-to-debug behavour); +-# o print "Entering directory ..."; +-MAKEFLAGS += -rR --no-print-directory +- +-# We are using a recursive build, so we need to do a little thinking +-# to get the ordering right. +-# +-# Most importantly: sub-Makefiles should only ever modify files in +-# their own directory. If in some directory we have a dependency on +-# a file in another dir (which doesn't happen often, but it's often +-# unavoidable when linking the built-in.o targets which finally +-# turn into vmlinux), we will call a sub make in that other dir, and +-# after that we are sure that everything which is in that other dir +-# is now up to date. +-# +-# The only cases where we need to modify files which have global +-# effects are thus separated out and done before the recursive +-# descending is started. They are now explicitly listed as the +-# prepare rule. +- +-# To put more focus on warnings, be less verbose as default +-# Use 'make V=1' to see the full commands +- +-ifdef V +- ifeq ("$(origin V)", "command line") +- KBUILD_VERBOSE = $(V) +- endif +-endif +-ifndef KBUILD_VERBOSE +- KBUILD_VERBOSE = 0 +-endif +- +-# Call a source code checker (by default, "sparse") as part of the +-# C compilation. +-# +-# Use 'make C=1' to enable checking of only re-compiled files. +-# Use 'make C=2' to enable checking of *all* source files, regardless +-# of whether they are re-compiled or not. +-# +-# See the file "Documentation/sparse.txt" for more details, including +-# where to get the "sparse" utility. +- +-ifdef C +- ifeq ("$(origin C)", "command line") +- KBUILD_CHECKSRC = $(C) +- endif +-endif +-ifndef KBUILD_CHECKSRC +- KBUILD_CHECKSRC = 0 +-endif +- +-# Use make M=dir to specify directory of external module to build +-# Old syntax make ... SUBDIRS=$PWD is still supported +-# Setting the environment variable KBUILD_EXTMOD take precedence +-ifdef SUBDIRS +- KBUILD_EXTMOD ?= $(SUBDIRS) +-endif +-ifdef M +- ifeq ("$(origin M)", "command line") +- KBUILD_EXTMOD := $(M) +- endif +-endif +- +- +-# kbuild supports saving output files in a separate directory. +-# To locate output files in a separate directory two syntaxes are supported. +-# In both cases the working directory must be the root of the kernel src. +-# 1) O= +-# Use "make O=dir/to/store/output/files/" +-# +-# 2) Set KBUILD_OUTPUT +-# Set the environment variable KBUILD_OUTPUT to point to the directory +-# where the output files shall be placed. +-# export KBUILD_OUTPUT=dir/to/store/output/files/ +-# make +-# +-# The O= assignment takes precedence over the KBUILD_OUTPUT environment +-# variable. +- +- +-# KBUILD_SRC is set on invocation of make in OBJ directory +-# KBUILD_SRC is not intended to be used by the regular user (for now) +-ifeq ($(KBUILD_SRC),) +- +-# OK, Make called in directory where kernel src resides +-# Do we want to locate output files in a separate directory? +-ifdef O +- ifeq ("$(origin O)", "command line") +- KBUILD_OUTPUT := $(O) +- endif +-endif +- +-# That's our default target when none is given on the command line +-PHONY := _all +-_all: +- +-ifneq ($(KBUILD_OUTPUT),) +-# Invoke a second make in the output directory, passing relevant variables +-# check that the output directory actually exists +-saved-output := $(KBUILD_OUTPUT) +-KBUILD_OUTPUT := $(shell cd $(KBUILD_OUTPUT) && /bin/pwd) +-$(if $(KBUILD_OUTPUT),, \ +- $(error output directory "$(saved-output)" does not exist)) +- +-PHONY += $(MAKECMDGOALS) +- +-$(filter-out _all,$(MAKECMDGOALS)) _all: +- $(if $(KBUILD_VERBOSE:1=),@)$(MAKE) -C $(KBUILD_OUTPUT) \ +- KBUILD_SRC=$(CURDIR) \ +- KBUILD_EXTMOD="$(KBUILD_EXTMOD)" -f $(CURDIR)/Makefile $@ +- +-# Leave processing to above invocation of make +-skip-makefile := 1 +-endif # ifneq ($(KBUILD_OUTPUT),) +-endif # ifeq ($(KBUILD_SRC),) +- +-# We process the rest of the Makefile if this is the final invocation of make +-ifeq ($(skip-makefile),) +- +-# If building an external module we do not care about the all: rule +-# but instead _all depend on modules +-PHONY += all +-ifeq ($(KBUILD_EXTMOD),) +-_all: all +-else +-_all: modules +-endif +- +-srctree := $(if $(KBUILD_SRC),$(KBUILD_SRC),$(CURDIR)) +-TOPDIR := $(srctree) +-# FIXME - TOPDIR is obsolete, use srctree/objtree +-objtree := $(CURDIR) +-src := $(srctree) +-obj := $(objtree) +- +-VPATH := $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD)) +- +-export srctree objtree VPATH TOPDIR +- +- +-# SUBARCH tells the usermode build what the underlying arch is. That is set +-# first, and if a usermode build is happening, the "ARCH=um" on the command +-# line overrides the setting of ARCH below. If a native build is happening, +-# then ARCH is assigned, getting whatever value it gets normally, and +-# SUBARCH is subsequently ignored. +- +-SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ +- -e s/arm.*/arm/ -e s/sa110/arm/ \ +- -e s/s390x/s390/ -e s/parisc64/parisc/ \ +- -e s/ppc.*/powerpc/ -e s/mips.*/mips/ ) +- +-# Cross compiling and selecting different set of gcc/bin-utils +-# --------------------------------------------------------------------------- +-# +-# When performing cross compilation for other architectures ARCH shall be set +-# to the target architecture. (See arch/* for the possibilities). +-# ARCH can be set during invocation of make: +-# make ARCH=ia64 +-# Another way is to have ARCH set in the environment. +-# The default ARCH is the host where make is executed. +- +-# CROSS_COMPILE specify the prefix used for all executables used +-# during compilation. Only gcc and related bin-utils executables +-# are prefixed with $(CROSS_COMPILE). +-# CROSS_COMPILE can be set on the command line +-# make CROSS_COMPILE=ia64-linux- +-# Alternatively CROSS_COMPILE can be set in the environment. +-# Default value for CROSS_COMPILE is not to prefix executables +-# Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile +- +-ARCH ?= $(SUBARCH) +-CROSS_COMPILE ?= +- +-# Architecture as present in compile.h +-UTS_MACHINE := $(ARCH) +- +-KCONFIG_CONFIG ?= .config +- +-# SHELL used by kbuild +-CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ +- else if [ -x /bin/bash ]; then echo /bin/bash; \ +- else echo sh; fi ; fi) +- +-HOSTCC = gcc +-HOSTCXX = g++ +-HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer +-HOSTCXXFLAGS = -O2 +- +-# Decide whether to build built-in, modular, or both. +-# Normally, just do built-in. +- +-KBUILD_MODULES := +-KBUILD_BUILTIN := 1 +- +-# If we have only "make modules", don't compile built-in objects. +-# When we're building modules with modversions, we need to consider +-# the built-in objects during the descend as well, in order to +-# make sure the checksums are up to date before we record them. +- +-ifeq ($(MAKECMDGOALS),modules) +- KBUILD_BUILTIN := $(if $(CONFIG_MODVERSIONS),1) +-endif +- +-# If we have "make modules", compile modules +-# in addition to whatever we do anyway. +-# Just "make" or "make all" shall build modules as well +- +-ifneq ($(filter all _all modules,$(MAKECMDGOALS)),) +- KBUILD_MODULES := 1 +-endif +- +-ifeq ($(MAKECMDGOALS),) +- KBUILD_MODULES := 1 +-endif +- +-export KBUILD_MODULES KBUILD_BUILTIN +-export KBUILD_CHECKSRC KBUILD_SRC KBUILD_EXTMOD +- +-# Beautify output +-# --------------------------------------------------------------------------- +-# +-# Normally, we echo the whole command before executing it. By making +-# that echo $($(quiet)$(cmd)), we now have the possibility to set +-# $(quiet) to choose other forms of output instead, e.g. +-# +-# quiet_cmd_cc_o_c = Compiling $(RELDIR)/$@ +-# cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< +-# +-# If $(quiet) is empty, the whole command will be printed. +-# If it is set to "quiet_", only the short version will be printed. +-# If it is set to "silent_", nothing will be printed at all, since +-# the variable $(silent_cmd_cc_o_c) doesn't exist. +-# +-# A simple variant is to prefix commands with $(Q) - that's useful +-# for commands that shall be hidden in non-verbose mode. +-# +-# $(Q)ln $@ :< +-# +-# If KBUILD_VERBOSE equals 0 then the above command will be hidden. +-# If KBUILD_VERBOSE equals 1 then the above command is displayed. +- +-ifeq ($(KBUILD_VERBOSE),1) +- quiet = +- Q = +-else +- quiet=quiet_ +- Q = @ +-endif +- +-# If the user is running make -s (silent mode), suppress echoing of +-# commands +- +-ifneq ($(findstring s,$(MAKEFLAGS)),) +- quiet=silent_ +-endif +- +-export quiet Q KBUILD_VERBOSE +- +- +-# Look for make include files relative to root of kernel src +-MAKEFLAGS += --include-dir=$(srctree) +- +-# We need some generic definitions. +-include $(srctree)/scripts/Kbuild.include +- +-# Make variables (CC, etc...) +- +-AS = $(CROSS_COMPILE)as +-LD = $(CROSS_COMPILE)ld +-CC = $(CROSS_COMPILE)gcc +-CPP = $(CC) -E +-AR = $(CROSS_COMPILE)ar +-NM = $(CROSS_COMPILE)nm +-STRIP = $(CROSS_COMPILE)strip +-OBJCOPY = $(CROSS_COMPILE)objcopy +-OBJDUMP = $(CROSS_COMPILE)objdump +-AWK = awk +-GENKSYMS = scripts/genksyms/genksyms +-DEPMOD = /sbin/depmod +-KALLSYMS = scripts/kallsyms +-PERL = perl +-CHECK = sparse +- +-CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise $(CF) +-MODFLAGS = -DMODULE +-CFLAGS_MODULE = $(MODFLAGS) +-AFLAGS_MODULE = $(MODFLAGS) +-LDFLAGS_MODULE = -r +-CFLAGS_KERNEL = +-AFLAGS_KERNEL = +- +- +-# Use LINUXINCLUDE when you must reference the include/ directory. +-# Needed to be compatible with the O= option +-LINUXINCLUDE := -Iinclude \ +- $(if $(KBUILD_SRC),-Iinclude2 -I$(srctree)/include) \ +- -include include/linux/autoconf.h +- +-CPPFLAGS := -D__KERNEL__ $(LINUXINCLUDE) +- +-CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ +- -fno-strict-aliasing -fno-common +-AFLAGS := -D__ASSEMBLY__ +- +-# Read KERNELRELEASE from include/config/kernel.release (if it exists) +-KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null) +-KERNELVERSION = $(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) +- +-export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION +-export ARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC +-export CPP AR NM STRIP OBJCOPY OBJDUMP MAKE AWK GENKSYMS PERL UTS_MACHINE +-export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS +- +-export CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS +-export CFLAGS CFLAGS_KERNEL CFLAGS_MODULE +-export AFLAGS AFLAGS_KERNEL AFLAGS_MODULE +- +-# When compiling out-of-tree modules, put MODVERDIR in the module +-# tree rather than in the kernel tree. The kernel tree might +-# even be read-only. +-export MODVERDIR := $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_versions +- +-# Files to ignore in find ... statements +- +-RCS_FIND_IGNORE := \( -name SCCS -o -name BitKeeper -o -name .svn -o -name CVS -o -name .pc -o -name .hg -o -name .git \) -prune -o +-export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn --exclude CVS --exclude .pc --exclude .hg --exclude .git +- +-# =========================================================================== +-# Rules shared between *config targets and build targets +- +-# Basic helpers built in scripts/ +-PHONY += scripts_basic +-scripts_basic: +- $(Q)$(MAKE) $(build)=scripts/basic +- +-# To avoid any implicit rule to kick in, define an empty command. +-scripts/basic/%: scripts_basic ; +- +-PHONY += outputmakefile +-# outputmakefile generates a Makefile in the output directory, if using a +-# separate output directory. This allows convenient use of make in the +-# output directory. +-outputmakefile: +-ifneq ($(KBUILD_SRC),) +- $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile \ +- $(srctree) $(objtree) $(VERSION) $(PATCHLEVEL) +-endif +- +-# To make sure we do not include .config for any of the *config targets +-# catch them early, and hand them over to scripts/kconfig/Makefile +-# It is allowed to specify more targets when calling make, including +-# mixing *config targets and build targets. +-# For example 'make oldconfig all'. +-# Detect when mixed targets is specified, and make a second invocation +-# of make so .config is not included in this case either (for *config). +- +-no-dot-config-targets := clean mrproper distclean \ +- cscope TAGS tags help %docs check% \ +- include/linux/version.h headers_% \ +- kernelrelease kernelversion +- +-config-targets := 0 +-mixed-targets := 0 +-dot-config := 1 +- +-ifneq ($(filter $(no-dot-config-targets), $(MAKECMDGOALS)),) +- ifeq ($(filter-out $(no-dot-config-targets), $(MAKECMDGOALS)),) +- dot-config := 0 +- endif +-endif +- +-ifeq ($(KBUILD_EXTMOD),) +- ifneq ($(filter config %config,$(MAKECMDGOALS)),) +- config-targets := 1 +- ifneq ($(filter-out config %config,$(MAKECMDGOALS)),) +- mixed-targets := 1 +- endif +- endif +-endif +- +-ifeq ($(mixed-targets),1) +-# =========================================================================== +-# We're called with mixed targets (*config and build targets). +-# Handle them one by one. +- +-%:: FORCE +- $(Q)$(MAKE) -C $(srctree) KBUILD_SRC= $@ +- +-else +-ifeq ($(config-targets),1) +-# =========================================================================== +-# *config targets only - make sure prerequisites are updated, and descend +-# in scripts/kconfig to make the *config target +- +-# Read arch specific Makefile to set KBUILD_DEFCONFIG as needed. +-# KBUILD_DEFCONFIG may point out an alternative default configuration +-# used for 'make defconfig' +-include $(srctree)/arch/$(ARCH)/Makefile +-export KBUILD_DEFCONFIG +- +-config %config: scripts_basic outputmakefile FORCE +- $(Q)mkdir -p include/linux include/config +- $(Q)$(MAKE) $(build)=scripts/kconfig $@ +- +-else +-# =========================================================================== +-# Build targets only - this includes vmlinux, arch specific targets, clean +-# targets and others. In general all targets except *config targets. +- +-ifeq ($(KBUILD_EXTMOD),) +-# Additional helpers built in scripts/ +-# Carefully list dependencies so we do not try to build scripts twice +-# in parallel +-PHONY += scripts +-scripts: scripts_basic include/config/auto.conf +- $(Q)$(MAKE) $(build)=$(@) +- +-# Objects we will link into vmlinux / subdirs we need to visit +-init-y := init/ +-drivers-y := drivers/ sound/ +-net-y := net/ +-libs-y := lib/ +-core-y := usr/ +-endif # KBUILD_EXTMOD +- +-ifeq ($(dot-config),1) +-# Read in config +--include include/config/auto.conf +- +-ifeq ($(KBUILD_EXTMOD),) +-# Read in dependencies to all Kconfig* files, make sure to run +-# oldconfig if changes are detected. +--include include/config/auto.conf.cmd +- +-# To avoid any implicit rule to kick in, define an empty command +-$(KCONFIG_CONFIG) include/config/auto.conf.cmd: ; +- +-# If .config is newer than include/config/auto.conf, someone tinkered +-# with it and forgot to run make oldconfig. +-# if auto.conf.cmd is missing then we are probably in a cleaned tree so +-# we execute the config step to be sure to catch updated Kconfig files +-include/config/auto.conf: $(KCONFIG_CONFIG) include/config/auto.conf.cmd +- $(Q)$(MAKE) -f $(srctree)/Makefile silentoldconfig +-else +-# external modules needs include/linux/autoconf.h and include/config/auto.conf +-# but do not care if they are up-to-date. Use auto.conf to trigger the test +-PHONY += include/config/auto.conf +- +-include/config/auto.conf: +- $(Q)test -e include/linux/autoconf.h -a -e $@ || ( \ +- echo; \ +- echo " ERROR: Kernel configuration is invalid."; \ +- echo " include/linux/autoconf.h or $@ are missing."; \ +- echo " Run 'make oldconfig && make prepare' on kernel src to fix it."; \ +- echo; \ +- /bin/false) +- +-endif # KBUILD_EXTMOD +- +-else +-# Dummy target needed, because used as prerequisite +-include/config/auto.conf: ; +-endif # $(dot-config) +- +-# The all: target is the default when no target is given on the +-# command line. +-# This allow a user to issue only 'make' to build a kernel including modules +-# Defaults vmlinux but it is usually overridden in the arch makefile +-all: vmlinux +- +-ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE +-CFLAGS += -Os +-else +-CFLAGS += -O2 +-endif +- +-include $(srctree)/arch/$(ARCH)/Makefile +- +-ifdef CONFIG_FRAME_POINTER +-CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,) +-else +-CFLAGS += -fomit-frame-pointer +-endif +- +-ifdef CONFIG_DEBUG_INFO +-CFLAGS += -g +-endif +- +-# Force gcc to behave correct even for buggy distributions +-CFLAGS += $(call cc-option, -fno-stack-protector) +- +-# arch Makefile may override CC so keep this after arch Makefile is included +-NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) +-CHECKFLAGS += $(NOSTDINC_FLAGS) +- +-# warn about C99 declaration after statement +-CFLAGS += $(call cc-option,-Wdeclaration-after-statement,) +- +-# disable pointer signed / unsigned warnings in gcc 4.0 +-CFLAGS += $(call cc-option,-Wno-pointer-sign,) +- +-# Default kernel image to build when no specific target is given. +-# KBUILD_IMAGE may be overruled on the command line or +-# set in the environment +-# Also any assignments in arch/$(ARCH)/Makefile take precedence over +-# this default value +-export KBUILD_IMAGE ?= vmlinux +- +-# +-# INSTALL_PATH specifies where to place the updated kernel and system map +-# images. Default is /boot, but you can set it to other values +-export INSTALL_PATH ?= /boot +- +-# +-# INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory +-# relocations required by build roots. This is not defined in the +-# makefile but the argument can be passed to make if needed. +-# +- +-MODLIB = $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) +-export MODLIB +- +-# +-# INSTALL_MOD_STRIP, if defined, will cause modules to be +-# stripped after they are installed. If INSTALL_MOD_STRIP is '1', then +-# the default option --strip-debug will be used. Otherwise, +-# INSTALL_MOD_STRIP will used as the options to the strip command. +- +-ifdef INSTALL_MOD_STRIP +-ifeq ($(INSTALL_MOD_STRIP),1) +-mod_strip_cmd = $(STRIP) --strip-debug +-else +-mod_strip_cmd = $(STRIP) $(INSTALL_MOD_STRIP) +-endif # INSTALL_MOD_STRIP=1 +-else +-mod_strip_cmd = true +-endif # INSTALL_MOD_STRIP +-export mod_strip_cmd +- +- +-ifeq ($(KBUILD_EXTMOD),) +-core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/ +- +-vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ +- $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ +- $(net-y) $(net-m) $(libs-y) $(libs-m))) +- +-vmlinux-alldirs := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \ +- $(init-n) $(init-) \ +- $(core-n) $(core-) $(drivers-n) $(drivers-) \ +- $(net-n) $(net-) $(libs-n) $(libs-)))) +- +-init-y := $(patsubst %/, %/built-in.o, $(init-y)) +-core-y := $(patsubst %/, %/built-in.o, $(core-y)) +-drivers-y := $(patsubst %/, %/built-in.o, $(drivers-y)) +-net-y := $(patsubst %/, %/built-in.o, $(net-y)) +-libs-y1 := $(patsubst %/, %/lib.a, $(libs-y)) +-libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y)) +-libs-y := $(libs-y1) $(libs-y2) +- +-# Build vmlinux +-# --------------------------------------------------------------------------- +-# vmlinux is built from the objects selected by $(vmlinux-init) and +-# $(vmlinux-main). Most are built-in.o files from top-level directories +-# in the kernel tree, others are specified in arch/$(ARCH)/Makefile. +-# Ordering when linking is important, and $(vmlinux-init) must be first. +-# +-# vmlinux +-# ^ +-# | +-# +-< $(vmlinux-init) +-# | +--< init/version.o + more +-# | +-# +--< $(vmlinux-main) +-# | +--< driver/built-in.o mm/built-in.o + more +-# | +-# +-< kallsyms.o (see description in CONFIG_KALLSYMS section) +-# +-# vmlinux version (uname -v) cannot be updated during normal +-# descending-into-subdirs phase since we do not yet know if we need to +-# update vmlinux. +-# Therefore this step is delayed until just before final link of vmlinux - +-# except in the kallsyms case where it is done just before adding the +-# symbols to the kernel. +-# +-# System.map is generated to document addresses of all kernel symbols +- +-vmlinux-init := $(head-y) $(init-y) +-vmlinux-main := $(core-y) $(libs-y) $(drivers-y) $(net-y) +-vmlinux-all := $(vmlinux-init) $(vmlinux-main) +-vmlinux-lds := arch/$(ARCH)/kernel/vmlinux.lds +-export KBUILD_VMLINUX_OBJS := $(vmlinux-all) +- +-# Rule to link vmlinux - also used during CONFIG_KALLSYMS +-# May be overridden by arch/$(ARCH)/Makefile +-quiet_cmd_vmlinux__ ?= LD $@ +- cmd_vmlinux__ ?= $(LD) $(LDFLAGS) $(LDFLAGS_vmlinux) -o $@ \ +- -T $(vmlinux-lds) $(vmlinux-init) \ +- --start-group $(vmlinux-main) --end-group \ +- $(filter-out $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) FORCE ,$^) +- +-# Generate new vmlinux version +-quiet_cmd_vmlinux_version = GEN .version +- cmd_vmlinux_version = set -e; \ +- if [ ! -r .version ]; then \ +- rm -f .version; \ +- echo 1 >.version; \ +- else \ +- mv .version .old_version; \ +- expr 0$$(cat .old_version) + 1 >.version; \ +- fi; \ +- $(MAKE) $(build)=init +- +-# Generate System.map +-quiet_cmd_sysmap = SYSMAP +- cmd_sysmap = $(CONFIG_SHELL) $(srctree)/scripts/mksysmap +- +-# Link of vmlinux +-# If CONFIG_KALLSYMS is set .version is already updated +-# Generate System.map and verify that the content is consistent +-# Use + in front of the vmlinux_version rule to silent warning with make -j2 +-# First command is ':' to allow us to use + in front of the rule +-define rule_vmlinux__ +- : +- $(if $(CONFIG_KALLSYMS),,+$(call cmd,vmlinux_version)) +- +- $(call cmd,vmlinux__) +- $(Q)echo 'cmd_$@ := $(cmd_vmlinux__)' > $(@D)/.$(@F).cmd +- +- $(Q)$(if $($(quiet)cmd_sysmap), \ +- echo ' $($(quiet)cmd_sysmap) System.map' &&) \ +- $(cmd_sysmap) $@ System.map; \ +- if [ $$? -ne 0 ]; then \ +- rm -f $@; \ +- /bin/false; \ +- fi; +- $(verify_kallsyms) +-endef +- +- +-ifdef CONFIG_KALLSYMS +-# Generate section listing all symbols and add it into vmlinux $(kallsyms.o) +-# It's a three stage process: +-# o .tmp_vmlinux1 has all symbols and sections, but __kallsyms is +-# empty +-# Running kallsyms on that gives us .tmp_kallsyms1.o with +-# the right size - vmlinux version (uname -v) is updated during this step +-# o .tmp_vmlinux2 now has a __kallsyms section of the right size, +-# but due to the added section, some addresses have shifted. +-# From here, we generate a correct .tmp_kallsyms2.o +-# o The correct .tmp_kallsyms2.o is linked into the final vmlinux. +-# o Verify that the System.map from vmlinux matches the map from +-# .tmp_vmlinux2, just in case we did not generate kallsyms correctly. +-# o If CONFIG_KALLSYMS_EXTRA_PASS is set, do an extra pass using +-# .tmp_vmlinux3 and .tmp_kallsyms3.o. This is only meant as a +-# temporary bypass to allow the kernel to be built while the +-# maintainers work out what went wrong with kallsyms. +- +-ifdef CONFIG_KALLSYMS_EXTRA_PASS +-last_kallsyms := 3 +-else +-last_kallsyms := 2 +-endif +- +-kallsyms.o := .tmp_kallsyms$(last_kallsyms).o +- +-define verify_kallsyms +- $(Q)$(if $($(quiet)cmd_sysmap), \ +- echo ' $($(quiet)cmd_sysmap) .tmp_System.map' &&) \ +- $(cmd_sysmap) .tmp_vmlinux$(last_kallsyms) .tmp_System.map +- $(Q)cmp -s System.map .tmp_System.map || \ +- (echo Inconsistent kallsyms data; \ +- echo Try setting CONFIG_KALLSYMS_EXTRA_PASS; \ +- rm .tmp_kallsyms* ; /bin/false ) +-endef +- +-# Update vmlinux version before link +-# Use + in front of this rule to silent warning about make -j1 +-# First command is ':' to allow us to use + in front of this rule +-cmd_ksym_ld = $(cmd_vmlinux__) +-define rule_ksym_ld +- : +- +$(call cmd,vmlinux_version) +- $(call cmd,vmlinux__) +- $(Q)echo 'cmd_$@ := $(cmd_vmlinux__)' > $(@D)/.$(@F).cmd +-endef +- +-# Generate .S file with all kernel symbols +-quiet_cmd_kallsyms = KSYM $@ +- cmd_kallsyms = $(NM) -n $< | $(KALLSYMS) \ +- $(if $(CONFIG_KALLSYMS_ALL),--all-symbols) > $@ +- +-.tmp_kallsyms1.o .tmp_kallsyms2.o .tmp_kallsyms3.o: %.o: %.S scripts FORCE +- $(call if_changed_dep,as_o_S) +- +-.tmp_kallsyms%.S: .tmp_vmlinux% $(KALLSYMS) +- $(call cmd,kallsyms) +- +-# .tmp_vmlinux1 must be complete except kallsyms, so update vmlinux version +-.tmp_vmlinux1: $(vmlinux-lds) $(vmlinux-all) FORCE +- $(call if_changed_rule,ksym_ld) +- +-.tmp_vmlinux2: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms1.o FORCE +- $(call if_changed,vmlinux__) +- +-.tmp_vmlinux3: $(vmlinux-lds) $(vmlinux-all) .tmp_kallsyms2.o FORCE +- $(call if_changed,vmlinux__) +- +-# Needs to visit scripts/ before $(KALLSYMS) can be used. +-$(KALLSYMS): scripts ; +- +-# Generate some data for debugging strange kallsyms problems +-debug_kallsyms: .tmp_map$(last_kallsyms) +- +-.tmp_map%: .tmp_vmlinux% FORCE +- ($(OBJDUMP) -h $< | $(AWK) '/^ +[0-9]/{print $$4 " 0 " $$2}'; $(NM) $<) | sort > $@ +- +-.tmp_map3: .tmp_map2 +- +-.tmp_map2: .tmp_map1 +- +-endif # ifdef CONFIG_KALLSYMS +- +-# vmlinux image - including updated kernel symbols +-vmlinux: $(vmlinux-lds) $(vmlinux-init) $(vmlinux-main) $(kallsyms.o) FORCE +-ifdef CONFIG_HEADERS_CHECK +- $(Q)$(MAKE) -f $(srctree)/Makefile headers_check +-endif +- $(call if_changed_rule,vmlinux__) +- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost $@ +- $(Q)rm -f .old_version +- +-# The actual objects are generated when descending, +-# make sure no implicit rule kicks in +-$(sort $(vmlinux-init) $(vmlinux-main)) $(vmlinux-lds): $(vmlinux-dirs) ; +- +-# Handle descending into subdirectories listed in $(vmlinux-dirs) +-# Preset locale variables to speed up the build process. Limit locale +-# tweaks to this spot to avoid wrong language settings when running +-# make menuconfig etc. +-# Error messages still appears in the original language +- +-PHONY += $(vmlinux-dirs) +-$(vmlinux-dirs): prepare scripts +- $(Q)$(MAKE) $(build)=$@ +- +-# Build the kernel release string +-# +-# The KERNELRELEASE value built here is stored in the file +-# include/config/kernel.release, and is used when executing several +-# make targets, such as "make install" or "make modules_install." +-# +-# The eventual kernel release string consists of the following fields, +-# shown in a hierarchical format to show how smaller parts are concatenated +-# to form the larger and final value, with values coming from places like +-# the Makefile, kernel config options, make command line options and/or +-# SCM tag information. +-# +-# $(KERNELVERSION) +-# $(VERSION) eg, 2 +-# $(PATCHLEVEL) eg, 6 +-# $(SUBLEVEL) eg, 18 +-# $(EXTRAVERSION) eg, -rc6 +-# $(localver-full) +-# $(localver) +-# localversion* (files without backups, containing '~') +-# $(CONFIG_LOCALVERSION) (from kernel config setting) +-# $(localver-auto) (only if CONFIG_LOCALVERSION_AUTO is set) +-# ./scripts/setlocalversion (SCM tag, if one exists) +-# $(LOCALVERSION) (from make command line if provided) +-# +-# Note how the final $(localver-auto) string is included *only* if the +-# kernel config option CONFIG_LOCALVERSION_AUTO is selected. Also, at the +-# moment, only git is supported but other SCMs can edit the script +-# scripts/setlocalversion and add the appropriate checks as needed. +- +-pattern = ".*/localversion[^~]*" +-string = $(shell cat /dev/null \ +- `find $(objtree) $(srctree) -maxdepth 1 -regex $(pattern) | sort -u`) +- +-localver = $(subst $(space),, $(string) \ +- $(patsubst "%",%,$(CONFIG_LOCALVERSION))) +- +-# If CONFIG_LOCALVERSION_AUTO is set scripts/setlocalversion is called +-# and if the SCM is know a tag from the SCM is appended. +-# The appended tag is determined by the SCM used. +-# +-# Currently, only git is supported. +-# Other SCMs can edit scripts/setlocalversion and add the appropriate +-# checks as needed. +-ifdef CONFIG_LOCALVERSION_AUTO +- _localver-auto = $(shell $(CONFIG_SHELL) \ +- $(srctree)/scripts/setlocalversion $(srctree)) +- localver-auto = $(LOCALVERSION)$(_localver-auto) +-endif +- +-localver-full = $(localver)$(localver-auto) +- +-# Store (new) KERNELRELASE string in include/config/kernel.release +-kernelrelease = $(KERNELVERSION)$(localver-full) +-include/config/kernel.release: include/config/auto.conf FORCE +- $(Q)rm -f $@ +- $(Q)echo $(kernelrelease) > $@ +- +- +-# Things we need to do before we recursively start building the kernel +-# or the modules are listed in "prepare". +-# A multi level approach is used. prepareN is processed before prepareN-1. +-# archprepare is used in arch Makefiles and when processed asm symlink, +-# version.h and scripts_basic is processed / created. +- +-# Listed in dependency order +-PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3 +- +-# prepare3 is used to check if we are building in a separate output directory, +-# and if so do: +-# 1) Check that make has not been executed in the kernel src $(srctree) +-# 2) Create the include2 directory, used for the second asm symlink +-prepare3: include/config/kernel.release +-ifneq ($(KBUILD_SRC),) +- @echo ' Using $(srctree) as source for kernel' +- $(Q)if [ -f $(srctree)/.config -o -d $(srctree)/include/config ]; then \ +- echo " $(srctree) is not clean, please run 'make mrproper'";\ +- echo " in the '$(srctree)' directory.";\ +- /bin/false; \ +- fi; +- $(Q)if [ ! -d include2 ]; then mkdir -p include2; fi; +- $(Q)ln -fsn $(srctree)/include/asm-$(ARCH) include2/asm +-endif +- +-# prepare2 creates a makefile if using a separate output directory +-prepare2: prepare3 outputmakefile +- +-prepare1: prepare2 include/linux/version.h include/linux/utsrelease.h \ +- include/asm include/config/auto.conf +-ifneq ($(KBUILD_MODULES),) +- $(Q)mkdir -p $(MODVERDIR) +- $(Q)rm -f $(MODVERDIR)/* +-endif +- +-archprepare: prepare1 scripts_basic +- +-prepare0: archprepare FORCE +- $(Q)$(MAKE) $(build)=. +- $(Q)$(MAKE) $(build)=. missing-syscalls +- +-# All the preparing.. +-prepare: prepare0 +- +-# Leave this as default for preprocessing vmlinux.lds.S, which is now +-# done in arch/$(ARCH)/kernel/Makefile +- +-export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH) +- +-# FIXME: The asm symlink changes when $(ARCH) changes. That's +-# hard to detect, but I suppose "make mrproper" is a good idea +-# before switching between archs anyway. +- +-include/asm: +- @echo ' SYMLINK $@ -> include/asm-$(ARCH)' +- $(Q)if [ ! -d include ]; then mkdir -p include; fi; +- @ln -fsn asm-$(ARCH) $@ +- +-# Generate some files +-# --------------------------------------------------------------------------- +- +-# KERNELRELEASE can change from a few different places, meaning version.h +-# needs to be updated, so this check is forced on all builds +- +-uts_len := 64 +-define filechk_utsrelease.h +- if [ `echo -n "$(KERNELRELEASE)" | wc -c ` -gt $(uts_len) ]; then \ +- echo '"$(KERNELRELEASE)" exceeds $(uts_len) characters' >&2; \ +- exit 1; \ +- fi; \ +- (echo \#define UTS_RELEASE \"$(KERNELRELEASE)\";) +-endef +- +-define filechk_version.h +- (echo \#define LINUX_VERSION_CODE $(shell \ +- expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + $(SUBLEVEL)); \ +- echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))';) +-endef +- +-include/linux/version.h: $(srctree)/Makefile FORCE +- $(call filechk,version.h) +- +-include/linux/utsrelease.h: include/config/kernel.release FORCE +- $(call filechk,utsrelease.h) +- +-# --------------------------------------------------------------------------- +- +-PHONY += depend dep +-depend dep: +- @echo '*** Warning: make $@ is unnecessary now.' +- +-# --------------------------------------------------------------------------- +-# Kernel headers +-INSTALL_HDR_PATH=$(objtree)/usr +-export INSTALL_HDR_PATH +- +-HDRARCHES=$(filter-out generic,$(patsubst $(srctree)/include/asm-%/Kbuild,%,$(wildcard $(srctree)/include/asm-*/Kbuild))) +- +-PHONY += headers_install_all +-headers_install_all: include/linux/version.h scripts_basic FORCE +- $(Q)$(MAKE) $(build)=scripts scripts/unifdef +- $(Q)for arch in $(HDRARCHES); do \ +- $(MAKE) ARCH=$$arch -f $(srctree)/scripts/Makefile.headersinst obj=include BIASMDIR=-bi-$$arch ;\ +- done +- +-PHONY += headers_install +-headers_install: include/linux/version.h scripts_basic FORCE +- @if [ ! -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \ +- echo '*** Error: Headers not exportable for this architecture ($(ARCH))'; \ +- exit 1 ; fi +- $(Q)$(MAKE) $(build)=scripts scripts/unifdef +- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.headersinst obj=include +- +-PHONY += headers_check_all +-headers_check_all: headers_install_all +- $(Q)for arch in $(HDRARCHES); do \ +- $(MAKE) ARCH=$$arch -f $(srctree)/scripts/Makefile.headersinst obj=include BIASMDIR=-bi-$$arch HDRCHECK=1 ;\ +- done +- +-PHONY += headers_check +-headers_check: headers_install +- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.headersinst obj=include HDRCHECK=1 +- +-# --------------------------------------------------------------------------- +-# Modules +- +-ifdef CONFIG_MODULES +- +-# By default, build modules as well +- +-all: modules +- +-# Build modules +- +-PHONY += modules +-modules: $(vmlinux-dirs) $(if $(KBUILD_BUILTIN),vmlinux) +- @echo ' Building modules, stage 2.'; +- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost +- +- +-# Target to prepare building external modules +-PHONY += modules_prepare +-modules_prepare: prepare scripts +- +-# Target to install modules +-PHONY += modules_install +-modules_install: _modinst_ _modinst_post +- +-PHONY += _modinst_ +-_modinst_: +- @if [ -z "`$(DEPMOD) -V 2>/dev/null | grep module-init-tools`" ]; then \ +- echo "Warning: you may need to install module-init-tools"; \ +- echo "See http://www.codemonkey.org.uk/docs/post-halloween-2.6.txt";\ +- sleep 1; \ +- fi +- @rm -rf $(MODLIB)/kernel +- @rm -f $(MODLIB)/source +- @mkdir -p $(MODLIB)/kernel +- @ln -s $(srctree) $(MODLIB)/source +- @if [ ! $(objtree) -ef $(MODLIB)/build ]; then \ +- rm -f $(MODLIB)/build ; \ +- ln -s $(objtree) $(MODLIB)/build ; \ +- fi +- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst +- +-# If System.map exists, run depmod. This deliberately does not have a +-# dependency on System.map since that would run the dependency tree on +-# vmlinux. This depmod is only for convenience to give the initial +-# boot a modules.dep even before / is mounted read-write. However the +-# boot script depmod is the master version. +-ifeq "$(strip $(INSTALL_MOD_PATH))" "" +-depmod_opts := +-else +-depmod_opts := -b $(INSTALL_MOD_PATH) -r +-endif +-PHONY += _modinst_post +-_modinst_post: _modinst_ +- if [ -r System.map -a -x $(DEPMOD) ]; then $(DEPMOD) -ae -F System.map $(depmod_opts) $(KERNELRELEASE); fi +- +-else # CONFIG_MODULES +- +-# Modules not configured +-# --------------------------------------------------------------------------- +- +-modules modules_install: FORCE +- @echo +- @echo "The present kernel configuration has modules disabled." +- @echo "Type 'make config' and enable loadable module support." +- @echo "Then build a kernel with module support enabled." +- @echo +- @exit 1 +- +-endif # CONFIG_MODULES +- +-### +-# Cleaning is done on three levels. +-# make clean Delete most generated files +-# Leave enough to build external modules +-# make mrproper Delete the current configuration, and all generated files +-# make distclean Remove editor backup files, patch leftover files and the like +- +-# Directories & files removed with 'make clean' +-CLEAN_DIRS += $(MODVERDIR) +-CLEAN_FILES += vmlinux System.map \ +- .tmp_kallsyms* .tmp_version .tmp_vmlinux* .tmp_System.map +- +-# Directories & files removed with 'make mrproper' +-MRPROPER_DIRS += include/config include2 usr/include +-MRPROPER_FILES += .config .config.old include/asm .version .old_version \ +- include/linux/autoconf.h include/linux/version.h \ +- include/linux/utsrelease.h \ +- Module.symvers tags TAGS cscope* +- +-# clean - Delete most, but leave enough to build external modules +-# +-clean: rm-dirs := $(CLEAN_DIRS) +-clean: rm-files := $(CLEAN_FILES) +-clean-dirs := $(addprefix _clean_,$(srctree) $(vmlinux-alldirs)) +- +-PHONY += $(clean-dirs) clean archclean +-$(clean-dirs): +- $(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@) +- +-clean: archclean $(clean-dirs) +- $(call cmd,rmdirs) +- $(call cmd,rmfiles) +- @find . $(RCS_FIND_IGNORE) \ +- \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \ +- -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \ +- -o -name '*.symtypes' \) \ +- -type f -print | xargs rm -f +- +-# mrproper - Delete all generated files, including .config +-# +-mrproper: rm-dirs := $(wildcard $(MRPROPER_DIRS)) +-mrproper: rm-files := $(wildcard $(MRPROPER_FILES)) +-mrproper-dirs := $(addprefix _mrproper_,Documentation/DocBook scripts) +- +-PHONY += $(mrproper-dirs) mrproper archmrproper +-$(mrproper-dirs): +- $(Q)$(MAKE) $(clean)=$(patsubst _mrproper_%,%,$@) +- +-mrproper: clean archmrproper $(mrproper-dirs) +- $(call cmd,rmdirs) +- $(call cmd,rmfiles) +- +-# distclean +-# +-PHONY += distclean +- +-distclean: mrproper +- @find $(srctree) $(RCS_FIND_IGNORE) \ +- \( -name '*.orig' -o -name '*.rej' -o -name '*~' \ +- -o -name '*.bak' -o -name '#*#' -o -name '.*.orig' \ +- -o -name '.*.rej' -o -size 0 \ +- -o -name '*%' -o -name '.*.cmd' -o -name 'core' \) \ +- -type f -print | xargs rm -f +- +- +-# Packaging of the kernel to various formats +-# --------------------------------------------------------------------------- +-# rpm target kept for backward compatibility +-package-dir := $(srctree)/scripts/package +- +-%pkg: include/config/kernel.release FORCE +- $(Q)$(MAKE) $(build)=$(package-dir) $@ +-rpm: include/config/kernel.release FORCE +- $(Q)$(MAKE) $(build)=$(package-dir) $@ +- +- +-# Brief documentation of the typical targets used +-# --------------------------------------------------------------------------- +- +-boards := $(wildcard $(srctree)/arch/$(ARCH)/configs/*_defconfig) +-boards := $(notdir $(boards)) +- +-help: +- @echo 'Cleaning targets:' +- @echo ' clean - Remove most generated files but keep the config and' +- @echo ' enough build support to build external modules' +- @echo ' mrproper - Remove all generated files + config + various backup files' +- @echo ' distclean - mrproper + remove editor backup and patch files' +- @echo '' +- @echo 'Configuration targets:' +- @$(MAKE) -f $(srctree)/scripts/kconfig/Makefile help +- @echo '' +- @echo 'Other generic targets:' +- @echo ' all - Build all targets marked with [*]' +- @echo '* vmlinux - Build the bare kernel' +- @echo '* modules - Build all modules' +- @echo ' modules_install - Install all modules to INSTALL_MOD_PATH (default: /)' +- @echo ' dir/ - Build all files in dir and below' +- @echo ' dir/file.[ois] - Build specified target only' +- @echo ' dir/file.ko - Build module including final link' +- @echo ' rpm - Build a kernel as an RPM package' +- @echo ' tags/TAGS - Generate tags file for editors' +- @echo ' cscope - Generate cscope index' +- @echo ' kernelrelease - Output the release version string' +- @echo ' kernelversion - Output the version stored in Makefile' +- @if [ -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \ +- echo ' headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'; \ +- echo ' (default: $(INSTALL_HDR_PATH))'; \ +- fi +- @echo '' +- @echo 'Static analysers' +- @echo ' checkstack - Generate a list of stack hogs' +- @echo ' namespacecheck - Name space analysis on compiled kernel' +- @if [ -r $(srctree)/include/asm-$(ARCH)/Kbuild ]; then \ +- echo ' headers_check - Sanity check on exported headers'; \ +- fi +- @echo '' +- @echo 'Kernel packaging:' +- @$(MAKE) $(build)=$(package-dir) help +- @echo '' +- @echo 'Documentation targets:' +- @$(MAKE) -f $(srctree)/Documentation/DocBook/Makefile dochelp +- @echo '' +- @echo 'Architecture specific targets ($(ARCH)):' +- @$(if $(archhelp),$(archhelp),\ +- echo ' No architecture specific help defined for $(ARCH)') +- @echo '' +- @$(if $(boards), \ +- $(foreach b, $(boards), \ +- printf " %-24s - Build for %s\\n" $(b) $(subst _defconfig,,$(b));) \ +- echo '') +- +- @echo ' make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build' +- @echo ' make V=2 [targets] 2 => give reason for rebuild of target' +- @echo ' make O=dir [targets] Locate all output files in "dir", including .config' +- @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)' +- @echo ' make C=2 [targets] Force check of all c source with $$CHECK' +- @echo '' +- @echo 'Execute "make" or "make all" to build all targets marked with [*] ' +- @echo 'For further info see the ./README file' +- +- +-# Documentation targets +-# --------------------------------------------------------------------------- +-%docs: scripts_basic FORCE +- $(Q)$(MAKE) $(build)=Documentation/DocBook $@ +- +-else # KBUILD_EXTMOD +- +-### +-# External module support. +-# When building external modules the kernel used as basis is considered +-# read-only, and no consistency checks are made and the make +-# system is not used on the basis kernel. If updates are required +-# in the basis kernel ordinary make commands (without M=...) must +-# be used. +-# +-# The following are the only valid targets when building external +-# modules. +-# make M=dir clean Delete all automatically generated files +-# make M=dir modules Make all modules in specified dir +-# make M=dir Same as 'make M=dir modules' +-# make M=dir modules_install +-# Install the modules built in the module directory +-# Assumes install directory is already created +- +-# We are always building modules +-KBUILD_MODULES := 1 +-PHONY += crmodverdir +-crmodverdir: +- $(Q)mkdir -p $(MODVERDIR) +- $(Q)rm -f $(MODVERDIR)/* +- +-PHONY += $(objtree)/Module.symvers +-$(objtree)/Module.symvers: +- @test -e $(objtree)/Module.symvers || ( \ +- echo; \ +- echo " WARNING: Symbol version dump $(objtree)/Module.symvers"; \ +- echo " is missing; modules will have no dependencies and modversions."; \ +- echo ) +- +-module-dirs := $(addprefix _module_,$(KBUILD_EXTMOD)) +-PHONY += $(module-dirs) modules +-$(module-dirs): crmodverdir $(objtree)/Module.symvers +- $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@) +- +-modules: $(module-dirs) +- @echo ' Building modules, stage 2.'; +- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost +- +-PHONY += modules_install +-modules_install: _emodinst_ _emodinst_post +- +-install-dir := $(if $(INSTALL_MOD_DIR),$(INSTALL_MOD_DIR),extra) +-PHONY += _emodinst_ +-_emodinst_: +- $(Q)mkdir -p $(MODLIB)/$(install-dir) +- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst +- +-# Run depmod only is we have System.map and depmod is executable +-quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) +- cmd_depmod = if [ -r System.map -a -x $(DEPMOD) ]; then \ +- $(DEPMOD) -ae -F System.map \ +- $(if $(strip $(INSTALL_MOD_PATH)), \ +- -b $(INSTALL_MOD_PATH) -r) \ +- $(KERNELRELEASE); \ +- fi +- +-PHONY += _emodinst_post +-_emodinst_post: _emodinst_ +- $(call cmd,depmod) +- +-clean-dirs := $(addprefix _clean_,$(KBUILD_EXTMOD)) +- +-PHONY += $(clean-dirs) clean +-$(clean-dirs): +- $(Q)$(MAKE) $(clean)=$(patsubst _clean_%,%,$@) +- +-clean: rm-dirs := $(MODVERDIR) +-clean: $(clean-dirs) +- $(call cmd,rmdirs) +- @find $(KBUILD_EXTMOD) $(RCS_FIND_IGNORE) \ +- \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \ +- -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \) \ +- -type f -print | xargs rm -f +- +-help: +- @echo ' Building external modules.' +- @echo ' Syntax: make -C path/to/kernel/src M=$$PWD target' +- @echo '' +- @echo ' modules - default target, build the module(s)' +- @echo ' modules_install - install the module' +- @echo ' clean - remove generated files in module directory only' +- @echo '' +- +-# Dummies... +-PHONY += prepare scripts +-prepare: ; +-scripts: ; +-endif # KBUILD_EXTMOD +- +-# Generate tags for editors +-# --------------------------------------------------------------------------- +- +-#We want __srctree to totally vanish out when KBUILD_OUTPUT is not set +-#(which is the most common case IMHO) to avoid unneeded clutter in the big tags file. +-#Adding $(srctree) adds about 20M on i386 to the size of the output file! +- +-ifeq ($(src),$(obj)) +-__srctree = +-else +-__srctree = $(srctree)/ +-endif +- +-ifeq ($(ALLSOURCE_ARCHS),) +-ifeq ($(ARCH),um) +-ALLINCLUDE_ARCHS := $(ARCH) $(SUBARCH) +-else +-ALLINCLUDE_ARCHS := $(ARCH) +-endif +-else +-#Allow user to specify only ALLSOURCE_PATHS on the command line, keeping existing behavour. +-ALLINCLUDE_ARCHS := $(ALLSOURCE_ARCHS) +-endif +- +-ALLSOURCE_ARCHS := $(ARCH) +- +-define find-sources +- ( for ARCH in $(ALLSOURCE_ARCHS) ; do \ +- find $(__srctree)arch/$${ARCH} $(RCS_FIND_IGNORE) \ +- -name $1 -print; \ +- done ; \ +- find $(__srctree)security/selinux/include $(RCS_FIND_IGNORE) \ +- -name $1 -print; \ +- find $(__srctree)include $(RCS_FIND_IGNORE) \ +- \( -name config -o -name 'asm-*' \) -prune \ +- -o -name $1 -print; \ +- for ARCH in $(ALLINCLUDE_ARCHS) ; do \ +- find $(__srctree)include/asm-$${ARCH} $(RCS_FIND_IGNORE) \ +- -name $1 -print; \ +- done ; \ +- find $(__srctree)include/asm-generic $(RCS_FIND_IGNORE) \ +- -name $1 -print; \ +- find $(__srctree) $(RCS_FIND_IGNORE) \ +- \( -name include -o -name arch \) -prune -o \ +- -name $1 -print; \ +- ) +-endef +- +-define all-sources +- $(call find-sources,'*.[chS]') +-endef +-define all-kconfigs +- $(call find-sources,'Kconfig*') +-endef +-define all-defconfigs +- $(call find-sources,'defconfig') +-endef +- +-define xtags +- if $1 --version 2>&1 | grep -iq exuberant; then \ +- $(all-sources) | xargs $1 -a \ +- -I __initdata,__exitdata,__acquires,__releases \ +- -I EXPORT_SYMBOL,EXPORT_SYMBOL_GPL \ +- --extra=+f --c-kinds=+px \ +- --regex-asm='/ENTRY\(([^)]*)\).*/\1/'; \ +- $(all-kconfigs) | xargs $1 -a \ +- --langdef=kconfig \ +- --language-force=kconfig \ +- --regex-kconfig='/^[[:blank:]]*config[[:blank:]]+([[:alnum:]_]+)/\1/'; \ +- $(all-defconfigs) | xargs -r $1 -a \ +- --langdef=dotconfig \ +- --language-force=dotconfig \ +- --regex-dotconfig='/^#?[[:blank:]]*(CONFIG_[[:alnum:]_]+)/\1/'; \ +- elif $1 --version 2>&1 | grep -iq emacs; then \ +- $(all-sources) | xargs $1 -a; \ +- $(all-kconfigs) | xargs $1 -a \ +- --regex='/^[ \t]*config[ \t]+\([a-zA-Z0-9_]+\)/\1/'; \ +- $(all-defconfigs) | xargs -r $1 -a \ +- --regex='/^#?[ \t]?\(CONFIG_[a-zA-Z0-9_]+\)/\1/'; \ +- else \ +- $(all-sources) | xargs $1 -a; \ +- fi +-endef +- +-quiet_cmd_cscope-file = FILELST cscope.files +- cmd_cscope-file = (echo \-k; echo \-q; $(all-sources)) > cscope.files +- +-quiet_cmd_cscope = MAKE cscope.out +- cmd_cscope = cscope -b +- +-cscope: FORCE +- $(call cmd,cscope-file) +- $(call cmd,cscope) +- +-quiet_cmd_TAGS = MAKE $@ +-define cmd_TAGS +- rm -f $@; \ +- $(call xtags,etags) +-endef +- +-TAGS: FORCE +- $(call cmd,TAGS) +- +-quiet_cmd_tags = MAKE $@ +-define cmd_tags +- rm -f $@; \ +- $(call xtags,ctags) +-endef +- +-tags: FORCE +- $(call cmd,tags) +- +- +-# Scripts to check various things for consistency +-# --------------------------------------------------------------------------- +- +-includecheck: +- find * $(RCS_FIND_IGNORE) \ +- -name '*.[hcS]' -type f -print | sort \ +- | xargs $(PERL) -w scripts/checkincludes.pl +- +-versioncheck: +- find * $(RCS_FIND_IGNORE) \ +- -name '*.[hcS]' -type f -print | sort \ +- | xargs $(PERL) -w scripts/checkversion.pl +- +-namespacecheck: +- $(PERL) $(srctree)/scripts/namespace.pl +- +-endif #ifeq ($(config-targets),1) +-endif #ifeq ($(mixed-targets),1) +- +-PHONY += checkstack kernelrelease kernelversion +- +-# UML needs a little special treatment here. It wants to use the host +-# toolchain, so needs $(SUBARCH) passed to checkstack.pl. Everyone +-# else wants $(ARCH), including people doing cross-builds, which means +-# that $(SUBARCH) doesn't work here. +-ifeq ($(ARCH), um) +-CHECKSTACK_ARCH := $(SUBARCH) +-else +-CHECKSTACK_ARCH := $(ARCH) +-endif +-checkstack: +- $(OBJDUMP) -d vmlinux $$(find . -name '*.ko') | \ +- $(PERL) $(src)/scripts/checkstack.pl $(CHECKSTACK_ARCH) +- +-kernelrelease: +- $(if $(wildcard include/config/kernel.release), $(Q)echo $(KERNELRELEASE), \ +- $(error kernelrelease not valid - run 'make prepare' to update it)) +-kernelversion: +- @echo $(KERNELVERSION) +- +-# Single targets +-# --------------------------------------------------------------------------- +-# Single targets are compatible with: +-# - build whith mixed source and output +-# - build with separate output dir 'make O=...' +-# - external modules +-# +-# target-dir => where to store outputfile +-# build-dir => directory in kernel source tree to use +- +-ifeq ($(KBUILD_EXTMOD),) +- build-dir = $(patsubst %/,%,$(dir $@)) +- target-dir = $(dir $@) +-else +- zap-slash=$(filter-out .,$(patsubst %/,%,$(dir $@))) +- build-dir = $(KBUILD_EXTMOD)$(if $(zap-slash),/$(zap-slash)) +- target-dir = $(if $(KBUILD_EXTMOD),$(dir $<),$(dir $@)) +-endif +- +-%.s: %.c prepare scripts FORCE +- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) +-%.i: %.c prepare scripts FORCE +- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) +-%.o: %.c prepare scripts FORCE +- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) +-%.lst: %.c prepare scripts FORCE +- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) +-%.s: %.S prepare scripts FORCE +- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) +-%.o: %.S prepare scripts FORCE +- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) +-%.symtypes: %.c prepare scripts FORCE +- $(Q)$(MAKE) $(build)=$(build-dir) $(target-dir)$(notdir $@) +- +-# Modules +-/ %/: prepare scripts FORCE +- $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ +- $(build)=$(build-dir) +-%.ko: prepare scripts FORCE +- $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ +- $(build)=$(build-dir) $(@:.ko=.o) +- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost +- +-# FIXME Should go into a make.lib or something +-# =========================================================================== +- +-quiet_cmd_rmdirs = $(if $(wildcard $(rm-dirs)),CLEAN $(wildcard $(rm-dirs))) +- cmd_rmdirs = rm -rf $(rm-dirs) +- +-quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files))) +- cmd_rmfiles = rm -f $(rm-files) +- +- +-a_flags = -Wp,-MD,$(depfile) $(AFLAGS) $(AFLAGS_KERNEL) \ +- $(NOSTDINC_FLAGS) $(CPPFLAGS) \ +- $(modkern_aflags) $(EXTRA_AFLAGS) $(AFLAGS_$(basetarget).o) +- +-quiet_cmd_as_o_S = AS $@ +-cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< +- +-# read all saved command lines +- +-targets := $(wildcard $(sort $(targets))) +-cmd_files := $(wildcard .*.cmd $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) +- +-ifneq ($(cmd_files),) +- $(cmd_files): ; # Do not try to update included dependency files +- include $(cmd_files) +-endif +- +-# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj=dir +-# Usage: +-# $(Q)$(MAKE) $(clean)=dir +-clean := -f $(if $(KBUILD_SRC),$(srctree)/)scripts/Makefile.clean obj +- +-endif # skip-makefile +- +-PHONY += FORCE +-FORCE: +- +-# Cancel implicit rules on top Makefile, `-rR' will apply to sub-makes. +-Makefile: ; +- +-# Declare the contents of the .PHONY variable as phony. We keep that +-# information in a variable se we can use it in if_changed and friends. +-.PHONY: $(PHONY) +diff -Nurb linux-2.6.22-570/arch/arm/Kconfig linux-2.6.22-591/arch/arm/Kconfig +--- linux-2.6.22-570/arch/arm/Kconfig 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/arm/Kconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -1034,6 +1034,8 @@ + + source "drivers/rtc/Kconfig" + ++source "drivers/dma/Kconfig" ++ + endmenu + + source "fs/Kconfig" +diff -Nurb linux-2.6.22-570/arch/arm/boot/.gitignore.rej linux-2.6.22-591/arch/arm/boot/.gitignore.rej +--- linux-2.6.22-570/arch/arm/boot/.gitignore.rej 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/arm/boot/.gitignore.rej 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,10 @@ ++*************** ++*** 1,2 **** ++ Image ++ zImage ++--- 1,5 ---- ++ Image ++ zImage +++ xipImage +++ bootpImage +++ uImage +diff -Nurb linux-2.6.22-570/arch/arm/kernel/Makefile linux-2.6.22-591/arch/arm/kernel/Makefile +--- linux-2.6.22-570/arch/arm/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -20,6 +20,7 @@ + obj-$(CONFIG_SMP) += smp.o + obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o + obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o ++obj-$(CONFIG_KGDB) += kgdb.o kgdb-jmp.o + + obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o + AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312 +diff -Nurb linux-2.6.22-570/arch/arm/kernel/kgdb-jmp.S linux-2.6.22-591/arch/arm/kernel/kgdb-jmp.S +--- linux-2.6.22-570/arch/arm/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/arm/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,32 @@ ++/* ++ * arch/arm/kernel/kgdb-jmp.S ++ * ++ * Trivial setjmp and longjmp procedures to support bus error recovery ++ * which may occur during kgdb memory read/write operations. ++ * ++ * Author: MontaVista Software, Inc. ++ * source@mvista.com ++ * ++ * 2002-2005 (c) MontaVista Software, Inc. This file is licensed under the ++ * terms of the GNU General Public License version 2. This program as licensed ++ * "as is" without any warranty of any kind, whether express or implied. ++ */ ++#include ++ ++ENTRY (kgdb_fault_setjmp) ++ /* Save registers */ ++ stmia r0, {r0-r14} ++ str lr,[r0, #60] ++ mrs r1,cpsr ++ str r1,[r0,#64] ++ ldr r1,[r0,#4] ++ mov r0, #0 ++ mov pc,lr ++ ++ENTRY (kgdb_fault_longjmp) ++ /* Restore registers */ ++ mov r1,#1 ++ str r1,[r0] ++ ldr r1,[r0, #64] ++ msr spsr,r1 ++ ldmia r0,{r0-pc}^ +diff -Nurb linux-2.6.22-570/arch/arm/kernel/kgdb.c linux-2.6.22-591/arch/arm/kernel/kgdb.c +--- linux-2.6.22-570/arch/arm/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/arm/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,202 @@ ++/* ++ * arch/arm/kernel/kgdb.c ++ * ++ * ARM KGDB support ++ * ++ * Copyright (c) 2002-2004 MontaVista Software, Inc ++ * ++ * Authors: George Davis ++ * Deepak Saxena ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Make a local copy of the registers passed into the handler (bletch) */ ++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs) ++{ ++ int regno; ++ ++ /* Initialize all to zero (??) */ ++ for (regno = 0; regno < GDB_MAX_REGS; regno++) ++ gdb_regs[regno] = 0; ++ ++ gdb_regs[_R0] = kernel_regs->ARM_r0; ++ gdb_regs[_R1] = kernel_regs->ARM_r1; ++ gdb_regs[_R2] = kernel_regs->ARM_r2; ++ gdb_regs[_R3] = kernel_regs->ARM_r3; ++ gdb_regs[_R4] = kernel_regs->ARM_r4; ++ gdb_regs[_R5] = kernel_regs->ARM_r5; ++ gdb_regs[_R6] = kernel_regs->ARM_r6; ++ gdb_regs[_R7] = kernel_regs->ARM_r7; ++ gdb_regs[_R8] = kernel_regs->ARM_r8; ++ gdb_regs[_R9] = kernel_regs->ARM_r9; ++ gdb_regs[_R10] = kernel_regs->ARM_r10; ++ gdb_regs[_FP] = kernel_regs->ARM_fp; ++ gdb_regs[_IP] = kernel_regs->ARM_ip; ++ gdb_regs[_SP] = kernel_regs->ARM_sp; ++ gdb_regs[_LR] = kernel_regs->ARM_lr; ++ gdb_regs[_PC] = kernel_regs->ARM_pc; ++ gdb_regs[_CPSR] = kernel_regs->ARM_cpsr; ++} ++ ++/* Copy local gdb registers back to kgdb regs, for later copy to kernel */ ++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs) ++{ ++ kernel_regs->ARM_r0 = gdb_regs[_R0]; ++ kernel_regs->ARM_r1 = gdb_regs[_R1]; ++ kernel_regs->ARM_r2 = gdb_regs[_R2]; ++ kernel_regs->ARM_r3 = gdb_regs[_R3]; ++ kernel_regs->ARM_r4 = gdb_regs[_R4]; ++ kernel_regs->ARM_r5 = gdb_regs[_R5]; ++ kernel_regs->ARM_r6 = gdb_regs[_R6]; ++ kernel_regs->ARM_r7 = gdb_regs[_R7]; ++ kernel_regs->ARM_r8 = gdb_regs[_R8]; ++ kernel_regs->ARM_r9 = gdb_regs[_R9]; ++ kernel_regs->ARM_r10 = gdb_regs[_R10]; ++ kernel_regs->ARM_fp = gdb_regs[_FP]; ++ kernel_regs->ARM_ip = gdb_regs[_IP]; ++ kernel_regs->ARM_sp = gdb_regs[_SP]; ++ kernel_regs->ARM_lr = gdb_regs[_LR]; ++ kernel_regs->ARM_pc = gdb_regs[_PC]; ++ kernel_regs->ARM_cpsr = gdb_regs[_CPSR]; ++} ++ ++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, ++ struct task_struct *task) ++{ ++ int regno; ++ struct pt_regs *thread_regs; ++ ++ /* Just making sure... */ ++ if (task == NULL) ++ return; ++ ++ /* Initialize to zero */ ++ for (regno = 0; regno < GDB_MAX_REGS; regno++) ++ gdb_regs[regno] = 0; ++ ++ /* Otherwise, we have only some registers from switch_to() */ ++ thread_regs = task_pt_regs(task); ++ gdb_regs[_R0] = thread_regs->ARM_r0; /* Not really valid? */ ++ gdb_regs[_R1] = thread_regs->ARM_r1; /* " " */ ++ gdb_regs[_R2] = thread_regs->ARM_r2; /* " " */ ++ gdb_regs[_R3] = thread_regs->ARM_r3; /* " " */ ++ gdb_regs[_R4] = thread_regs->ARM_r4; ++ gdb_regs[_R5] = thread_regs->ARM_r5; ++ gdb_regs[_R6] = thread_regs->ARM_r6; ++ gdb_regs[_R7] = thread_regs->ARM_r7; ++ gdb_regs[_R8] = thread_regs->ARM_r8; ++ gdb_regs[_R9] = thread_regs->ARM_r9; ++ gdb_regs[_R10] = thread_regs->ARM_r10; ++ gdb_regs[_FP] = thread_regs->ARM_fp; ++ gdb_regs[_IP] = thread_regs->ARM_ip; ++ gdb_regs[_SP] = thread_regs->ARM_sp; ++ gdb_regs[_LR] = thread_regs->ARM_lr; ++ gdb_regs[_PC] = thread_regs->ARM_pc; ++ gdb_regs[_CPSR] = thread_regs->ARM_cpsr; ++} ++ ++static int compiled_break; ++ ++int kgdb_arch_handle_exception(int exception_vector, int signo, ++ int err_code, char *remcom_in_buffer, ++ char *remcom_out_buffer, ++ struct pt_regs *linux_regs) ++{ ++ long addr; ++ char *ptr; ++ ++ switch (remcom_in_buffer[0]) { ++ case 'D': ++ case 'k': ++ case 'c': ++ kgdb_contthread = NULL; ++ ++ /* ++ * Try to read optional parameter, pc unchanged if no parm. ++ * If this was a compiled breakpoint, we need to move ++ * to the next instruction or we will just breakpoint ++ * over and over again. ++ */ ++ ptr = &remcom_in_buffer[1]; ++ if (kgdb_hex2long(&ptr, &addr)) { ++ linux_regs->ARM_pc = addr; ++ } else if (compiled_break == 1) { ++ linux_regs->ARM_pc += 4; ++ } ++ ++ compiled_break = 0; ++ ++ return 0; ++ } ++ ++ return -1; ++} ++ ++static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr) ++{ ++ kgdb_handle_exception(1, SIGTRAP, 0, regs); ++ ++ return 0; ++} ++ ++static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) ++{ ++ compiled_break = 1; ++ kgdb_handle_exception(1, SIGTRAP, 0, regs); ++ ++ return 0; ++} ++ ++static struct undef_hook kgdb_brkpt_hook = { ++ .instr_mask = 0xffffffff, ++ .instr_val = KGDB_BREAKINST, ++ .fn = kgdb_brk_fn ++}; ++ ++static struct undef_hook kgdb_compiled_brkpt_hook = { ++ .instr_mask = 0xffffffff, ++ .instr_val = KGDB_COMPILED_BREAK, ++ .fn = kgdb_compiled_brk_fn ++}; ++ ++/* ++ * Register our undef instruction hooks with ARM undef core. ++ * We regsiter a hook specifically looking for the KGB break inst ++ * and we handle the normal undef case within the do_undefinstr ++ * handler. ++ */ ++int kgdb_arch_init(void) ++{ ++ register_undef_hook(&kgdb_brkpt_hook); ++ register_undef_hook(&kgdb_compiled_brkpt_hook); ++ ++ return 0; ++} ++ ++struct kgdb_arch arch_kgdb_ops = { ++#ifndef __ARMEB__ ++ .gdb_bpt_instr = {0xfe, 0xde, 0xff, 0xe7} ++#else ++ .gdb_bpt_instr = {0xe7, 0xff, 0xde, 0xfe} ++#endif ++}; +diff -Nurb linux-2.6.22-570/arch/arm/kernel/setup.c linux-2.6.22-591/arch/arm/kernel/setup.c +--- linux-2.6.22-570/arch/arm/kernel/setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/kernel/setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -832,6 +832,11 @@ + conswitchp = &dummy_con; + #endif + #endif ++ ++#if defined(CONFIG_KGDB) ++ extern void __init early_trap_init(void); ++ early_trap_init(); ++#endif + } + + +diff -Nurb linux-2.6.22-570/arch/arm/kernel/traps.c linux-2.6.22-591/arch/arm/kernel/traps.c +--- linux-2.6.22-570/arch/arm/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/arm/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500 +@@ -301,6 +301,7 @@ + unsigned int instr; + struct undef_hook *hook; + siginfo_t info; ++ mm_segment_t fs; + void __user *pc; + unsigned long flags; + +@@ -311,6 +312,8 @@ + */ + regs->ARM_pc -= correction; + ++ fs = get_fs(); ++ set_fs(KERNEL_DS); + pc = (void __user *)instruction_pointer(regs); + + if (processor_mode(regs) == SVC_MODE) { +@@ -320,6 +323,7 @@ + } else { + get_user(instr, (u32 __user *)pc); + } ++ set_fs(fs); + + spin_lock_irqsave(&undef_lock, flags); + list_for_each_entry(hook, &undef_hook, node) { +@@ -707,6 +711,13 @@ + + void __init trap_init(void) + { ++#if defined(CONFIG_KGDB) ++ return; ++} ++ ++void __init early_trap_init(void) ++{ ++#endif + unsigned long vectors = CONFIG_VECTORS_BASE; + extern char __stubs_start[], __stubs_end[]; + extern char __vectors_start[], __vectors_end[]; +diff -Nurb linux-2.6.22-570/arch/arm/mach-iop13xx/setup.c linux-2.6.22-591/arch/arm/mach-iop13xx/setup.c +--- linux-2.6.22-570/arch/arm/mach-iop13xx/setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-iop13xx/setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + + #define IOP13XX_UART_XTAL 33334000 + #define IOP13XX_SETUP_DEBUG 0 +@@ -236,19 +237,143 @@ + } + #endif + ++/* ADMA Channels */ ++static struct resource iop13xx_adma_0_resources[] = { ++ [0] = { ++ .start = IOP13XX_ADMA_PHYS_BASE(0), ++ .end = IOP13XX_ADMA_UPPER_PA(0), ++ .flags = IORESOURCE_MEM, ++ }, ++ [1] = { ++ .start = IRQ_IOP13XX_ADMA0_EOT, ++ .end = IRQ_IOP13XX_ADMA0_EOT, ++ .flags = IORESOURCE_IRQ ++ }, ++ [2] = { ++ .start = IRQ_IOP13XX_ADMA0_EOC, ++ .end = IRQ_IOP13XX_ADMA0_EOC, ++ .flags = IORESOURCE_IRQ ++ }, ++ [3] = { ++ .start = IRQ_IOP13XX_ADMA0_ERR, ++ .end = IRQ_IOP13XX_ADMA0_ERR, ++ .flags = IORESOURCE_IRQ ++ } ++}; ++ ++static struct resource iop13xx_adma_1_resources[] = { ++ [0] = { ++ .start = IOP13XX_ADMA_PHYS_BASE(1), ++ .end = IOP13XX_ADMA_UPPER_PA(1), ++ .flags = IORESOURCE_MEM, ++ }, ++ [1] = { ++ .start = IRQ_IOP13XX_ADMA1_EOT, ++ .end = IRQ_IOP13XX_ADMA1_EOT, ++ .flags = IORESOURCE_IRQ ++ }, ++ [2] = { ++ .start = IRQ_IOP13XX_ADMA1_EOC, ++ .end = IRQ_IOP13XX_ADMA1_EOC, ++ .flags = IORESOURCE_IRQ ++ }, ++ [3] = { ++ .start = IRQ_IOP13XX_ADMA1_ERR, ++ .end = IRQ_IOP13XX_ADMA1_ERR, ++ .flags = IORESOURCE_IRQ ++ } ++}; ++ ++static struct resource iop13xx_adma_2_resources[] = { ++ [0] = { ++ .start = IOP13XX_ADMA_PHYS_BASE(2), ++ .end = IOP13XX_ADMA_UPPER_PA(2), ++ .flags = IORESOURCE_MEM, ++ }, ++ [1] = { ++ .start = IRQ_IOP13XX_ADMA2_EOT, ++ .end = IRQ_IOP13XX_ADMA2_EOT, ++ .flags = IORESOURCE_IRQ ++ }, ++ [2] = { ++ .start = IRQ_IOP13XX_ADMA2_EOC, ++ .end = IRQ_IOP13XX_ADMA2_EOC, ++ .flags = IORESOURCE_IRQ ++ }, ++ [3] = { ++ .start = IRQ_IOP13XX_ADMA2_ERR, ++ .end = IRQ_IOP13XX_ADMA2_ERR, ++ .flags = IORESOURCE_IRQ ++ } ++}; ++ ++static u64 iop13xx_adma_dmamask = DMA_64BIT_MASK; ++static struct iop_adma_platform_data iop13xx_adma_0_data = { ++ .hw_id = 0, ++ .pool_size = PAGE_SIZE, ++}; ++ ++static struct iop_adma_platform_data iop13xx_adma_1_data = { ++ .hw_id = 1, ++ .pool_size = PAGE_SIZE, ++}; ++ ++static struct iop_adma_platform_data iop13xx_adma_2_data = { ++ .hw_id = 2, ++ .pool_size = PAGE_SIZE, ++}; ++ ++/* The ids are fixed up later in iop13xx_platform_init */ ++static struct platform_device iop13xx_adma_0_channel = { ++ .name = "iop-adma", ++ .id = 0, ++ .num_resources = 4, ++ .resource = iop13xx_adma_0_resources, ++ .dev = { ++ .dma_mask = &iop13xx_adma_dmamask, ++ .coherent_dma_mask = DMA_64BIT_MASK, ++ .platform_data = (void *) &iop13xx_adma_0_data, ++ }, ++}; ++ ++static struct platform_device iop13xx_adma_1_channel = { ++ .name = "iop-adma", ++ .id = 0, ++ .num_resources = 4, ++ .resource = iop13xx_adma_1_resources, ++ .dev = { ++ .dma_mask = &iop13xx_adma_dmamask, ++ .coherent_dma_mask = DMA_64BIT_MASK, ++ .platform_data = (void *) &iop13xx_adma_1_data, ++ }, ++}; ++ ++static struct platform_device iop13xx_adma_2_channel = { ++ .name = "iop-adma", ++ .id = 0, ++ .num_resources = 4, ++ .resource = iop13xx_adma_2_resources, ++ .dev = { ++ .dma_mask = &iop13xx_adma_dmamask, ++ .coherent_dma_mask = DMA_64BIT_MASK, ++ .platform_data = (void *) &iop13xx_adma_2_data, ++ }, ++}; ++ + void __init iop13xx_map_io(void) + { + /* Initialize the Static Page Table maps */ + iotable_init(iop13xx_std_desc, ARRAY_SIZE(iop13xx_std_desc)); + } + +-static int init_uart = 0; +-static int init_i2c = 0; ++static int init_uart; ++static int init_i2c; ++static int init_adma; + + void __init iop13xx_platform_init(void) + { + int i; +- u32 uart_idx, i2c_idx, plat_idx; ++ u32 uart_idx, i2c_idx, adma_idx, plat_idx; + struct platform_device *iop13xx_devices[IQ81340_MAX_PLAT_DEVICES]; + + /* set the bases so we can read the device id */ +@@ -294,6 +419,12 @@ + } + } + ++ if (init_adma == IOP13XX_INIT_ADMA_DEFAULT) { ++ init_adma |= IOP13XX_INIT_ADMA_0; ++ init_adma |= IOP13XX_INIT_ADMA_1; ++ init_adma |= IOP13XX_INIT_ADMA_2; ++ } ++ + plat_idx = 0; + uart_idx = 0; + i2c_idx = 0; +@@ -332,6 +463,56 @@ + } + } + ++ /* initialize adma channel ids and capabilities */ ++ adma_idx = 0; ++ for (i = 0; i < IQ81340_NUM_ADMA; i++) { ++ struct iop_adma_platform_data *plat_data; ++ if ((init_adma & (1 << i)) && IOP13XX_SETUP_DEBUG) ++ printk(KERN_INFO ++ "Adding adma%d to platform device list\n", i); ++ switch (init_adma & (1 << i)) { ++ case IOP13XX_INIT_ADMA_0: ++ iop13xx_adma_0_channel.id = adma_idx++; ++ iop13xx_devices[plat_idx++] = &iop13xx_adma_0_channel; ++ plat_data = &iop13xx_adma_0_data; ++ dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); ++ dma_cap_set(DMA_XOR, plat_data->cap_mask); ++ dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); ++ dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); ++ dma_cap_set(DMA_MEMSET, plat_data->cap_mask); ++ dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); ++ dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); ++ break; ++ case IOP13XX_INIT_ADMA_1: ++ iop13xx_adma_1_channel.id = adma_idx++; ++ iop13xx_devices[plat_idx++] = &iop13xx_adma_1_channel; ++ plat_data = &iop13xx_adma_1_data; ++ dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); ++ dma_cap_set(DMA_XOR, plat_data->cap_mask); ++ dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); ++ dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); ++ dma_cap_set(DMA_MEMSET, plat_data->cap_mask); ++ dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); ++ dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); ++ break; ++ case IOP13XX_INIT_ADMA_2: ++ iop13xx_adma_2_channel.id = adma_idx++; ++ iop13xx_devices[plat_idx++] = &iop13xx_adma_2_channel; ++ plat_data = &iop13xx_adma_2_data; ++ dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); ++ dma_cap_set(DMA_XOR, plat_data->cap_mask); ++ dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); ++ dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); ++ dma_cap_set(DMA_MEMSET, plat_data->cap_mask); ++ dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); ++ dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); ++ dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); ++ dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); ++ dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask); ++ break; ++ } ++ } ++ + #ifdef CONFIG_MTD_PHYSMAP + iq8134x_flash_resource.end = iq8134x_flash_resource.start + + iq8134x_probe_flash_size() - 1; +@@ -399,5 +580,35 @@ + return 1; + } + ++static int __init iop13xx_init_adma_setup(char *str) ++{ ++ if (str) { ++ while (*str != '\0') { ++ switch (*str) { ++ case '0': ++ init_adma |= IOP13XX_INIT_ADMA_0; ++ break; ++ case '1': ++ init_adma |= IOP13XX_INIT_ADMA_1; ++ break; ++ case '2': ++ init_adma |= IOP13XX_INIT_ADMA_2; ++ break; ++ case ',': ++ case '=': ++ break; ++ default: ++ PRINTK("\"iop13xx_init_adma\" malformed" ++ " at character: \'%c\'", *str); ++ *(str + 1) = '\0'; ++ init_adma = IOP13XX_INIT_ADMA_DEFAULT; ++ } ++ str++; ++ } ++ } ++ return 1; ++} ++ ++__setup("iop13xx_init_adma", iop13xx_init_adma_setup); + __setup("iop13xx_init_uart", iop13xx_init_uart_setup); + __setup("iop13xx_init_i2c", iop13xx_init_i2c_setup); +diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/glantank.c linux-2.6.22-591/arch/arm/mach-iop32x/glantank.c +--- linux-2.6.22-570/arch/arm/mach-iop32x/glantank.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-iop32x/glantank.c 2007-12-21 15:36:11.000000000 -0500 +@@ -180,6 +180,8 @@ + platform_device_register(&iop3xx_i2c1_device); + platform_device_register(&glantank_flash_device); + platform_device_register(&glantank_serial_device); ++ platform_device_register(&iop3xx_dma_0_channel); ++ platform_device_register(&iop3xx_dma_1_channel); + + pm_power_off = glantank_power_off; + } +diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/iq31244.c linux-2.6.22-591/arch/arm/mach-iop32x/iq31244.c +--- linux-2.6.22-570/arch/arm/mach-iop32x/iq31244.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-iop32x/iq31244.c 2007-12-21 15:36:11.000000000 -0500 +@@ -298,9 +298,14 @@ + platform_device_register(&iop3xx_i2c1_device); + platform_device_register(&iq31244_flash_device); + platform_device_register(&iq31244_serial_device); ++ platform_device_register(&iop3xx_dma_0_channel); ++ platform_device_register(&iop3xx_dma_1_channel); + + if (is_ep80219()) + pm_power_off = ep80219_power_off; ++ ++ if (!is_80219()) ++ platform_device_register(&iop3xx_aau_channel); + } + + static int __init force_ep80219_setup(char *str) +diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/iq80321.c linux-2.6.22-591/arch/arm/mach-iop32x/iq80321.c +--- linux-2.6.22-570/arch/arm/mach-iop32x/iq80321.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-iop32x/iq80321.c 2007-12-21 15:36:11.000000000 -0500 +@@ -181,6 +181,9 @@ + platform_device_register(&iop3xx_i2c1_device); + platform_device_register(&iq80321_flash_device); + platform_device_register(&iq80321_serial_device); ++ platform_device_register(&iop3xx_dma_0_channel); ++ platform_device_register(&iop3xx_dma_1_channel); ++ platform_device_register(&iop3xx_aau_channel); + } + + MACHINE_START(IQ80321, "Intel IQ80321") +diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/n2100.c linux-2.6.22-591/arch/arm/mach-iop32x/n2100.c +--- linux-2.6.22-570/arch/arm/mach-iop32x/n2100.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-iop32x/n2100.c 2007-12-21 15:36:11.000000000 -0500 +@@ -245,6 +245,8 @@ + platform_device_register(&iop3xx_i2c0_device); + platform_device_register(&n2100_flash_device); + platform_device_register(&n2100_serial_device); ++ platform_device_register(&iop3xx_dma_0_channel); ++ platform_device_register(&iop3xx_dma_1_channel); + + pm_power_off = n2100_power_off; + +diff -Nurb linux-2.6.22-570/arch/arm/mach-iop33x/iq80331.c linux-2.6.22-591/arch/arm/mach-iop33x/iq80331.c +--- linux-2.6.22-570/arch/arm/mach-iop33x/iq80331.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-iop33x/iq80331.c 2007-12-21 15:36:11.000000000 -0500 +@@ -136,6 +136,9 @@ + platform_device_register(&iop33x_uart0_device); + platform_device_register(&iop33x_uart1_device); + platform_device_register(&iq80331_flash_device); ++ platform_device_register(&iop3xx_dma_0_channel); ++ platform_device_register(&iop3xx_dma_1_channel); ++ platform_device_register(&iop3xx_aau_channel); + } + + MACHINE_START(IQ80331, "Intel IQ80331") +diff -Nurb linux-2.6.22-570/arch/arm/mach-iop33x/iq80332.c linux-2.6.22-591/arch/arm/mach-iop33x/iq80332.c +--- linux-2.6.22-570/arch/arm/mach-iop33x/iq80332.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-iop33x/iq80332.c 2007-12-21 15:36:11.000000000 -0500 +@@ -136,6 +136,9 @@ + platform_device_register(&iop33x_uart0_device); + platform_device_register(&iop33x_uart1_device); + platform_device_register(&iq80332_flash_device); ++ platform_device_register(&iop3xx_dma_0_channel); ++ platform_device_register(&iop3xx_dma_1_channel); ++ platform_device_register(&iop3xx_aau_channel); + } + + MACHINE_START(IQ80332, "Intel IQ80332") +diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp2000/core.c linux-2.6.22-591/arch/arm/mach-ixp2000/core.c +--- linux-2.6.22-570/arch/arm/mach-ixp2000/core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-ixp2000/core.c 2007-12-21 15:36:11.000000000 -0500 +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -184,6 +185,9 @@ + void __init ixp2000_uart_init(void) + { + platform_device_register(&ixp2000_serial_device); ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_platform_port(0, &ixp2000_serial_port); ++#endif + } + + +diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp2000/ixdp2x01.c linux-2.6.22-591/arch/arm/mach-ixp2000/ixdp2x01.c +--- linux-2.6.22-570/arch/arm/mach-ixp2000/ixdp2x01.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-ixp2000/ixdp2x01.c 2007-12-21 15:36:11.000000000 -0500 +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -413,6 +414,11 @@ + platform_add_devices(ixdp2x01_devices, ARRAY_SIZE(ixdp2x01_devices)); + ixp2000_uart_init(); + ixdp2x01_uart_init(); ++ ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_platform_port(0, ixdp2x01_serial_port1); ++ kgdb8250_add_platform_port(1, ixdp2x01_serial_port1); ++#endif + } + + +diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp4xx/coyote-setup.c linux-2.6.22-591/arch/arm/mach-ixp4xx/coyote-setup.c +--- linux-2.6.22-570/arch/arm/mach-ixp4xx/coyote-setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-ixp4xx/coyote-setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -96,6 +96,10 @@ + } + + platform_add_devices(coyote_devices, ARRAY_SIZE(coyote_devices)); ++ ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_platform_port(0, &coyote_uart_data); ++#endif + } + + #ifdef CONFIG_ARCH_ADI_COYOTE +diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp4xx/ixdp425-setup.c linux-2.6.22-591/arch/arm/mach-ixp4xx/ixdp425-setup.c +--- linux-2.6.22-570/arch/arm/mach-ixp4xx/ixdp425-setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-ixp4xx/ixdp425-setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -76,7 +76,8 @@ + .mapbase = IXP4XX_UART1_BASE_PHYS, + .membase = (char *)IXP4XX_UART1_BASE_VIRT + REG_OFFSET, + .irq = IRQ_IXP4XX_UART1, +- .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, ++ .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | ++ UPF_SHARE_IRQ, + .iotype = UPIO_MEM, + .regshift = 2, + .uartclk = IXP4XX_UART_XTAL, +@@ -85,7 +86,8 @@ + .mapbase = IXP4XX_UART2_BASE_PHYS, + .membase = (char *)IXP4XX_UART2_BASE_VIRT + REG_OFFSET, + .irq = IRQ_IXP4XX_UART2, +- .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, ++ .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST | ++ UPF_SHARE_IRQ, + .iotype = UPIO_MEM, + .regshift = 2, + .uartclk = IXP4XX_UART_XTAL, +@@ -123,12 +125,22 @@ + platform_add_devices(ixdp425_devices, ARRAY_SIZE(ixdp425_devices)); + } + ++static void __init ixdp425_map_io(void) ++{ ++ ixp4xx_map_io(); ++ ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_platform_port(0, &ixdp425_uart_data[0]); ++ kgdb8250_add_platform_port(1, &ixdp425_uart_data[1]); ++#endif ++} ++ + #ifdef CONFIG_ARCH_IXDP425 + MACHINE_START(IXDP425, "Intel IXDP425 Development Platform") + /* Maintainer: MontaVista Software, Inc. */ + .phys_io = IXP4XX_PERIPHERAL_BASE_PHYS, + .io_pg_offst = ((IXP4XX_PERIPHERAL_BASE_VIRT) >> 18) & 0xfffc, +- .map_io = ixp4xx_map_io, ++ .map_io = ixdp425_map_io, + .init_irq = ixp4xx_init_irq, + .timer = &ixp4xx_timer, + .boot_params = 0x0100, +@@ -141,7 +153,7 @@ + /* Maintainer: MontaVista Software, Inc. */ + .phys_io = IXP4XX_PERIPHERAL_BASE_PHYS, + .io_pg_offst = ((IXP4XX_PERIPHERAL_BASE_VIRT) >> 18) & 0xfffc, +- .map_io = ixp4xx_map_io, ++ .map_io = ixdp425_map_io, + .init_irq = ixp4xx_init_irq, + .timer = &ixp4xx_timer, + .boot_params = 0x0100, +diff -Nurb linux-2.6.22-570/arch/arm/mach-omap1/serial.c linux-2.6.22-591/arch/arm/mach-omap1/serial.c +--- linux-2.6.22-570/arch/arm/mach-omap1/serial.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-omap1/serial.c 2007-12-21 15:36:11.000000000 -0500 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -199,6 +200,9 @@ + break; + } + omap_serial_reset(&serial_platform_data[i]); ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_platform_port(i, &serial_platform_data[i]); ++#endif + } + } + +diff -Nurb linux-2.6.22-570/arch/arm/mach-pnx4008/core.c linux-2.6.22-591/arch/arm/mach-pnx4008/core.c +--- linux-2.6.22-570/arch/arm/mach-pnx4008/core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-pnx4008/core.c 2007-12-21 15:36:11.000000000 -0500 +@@ -224,6 +224,10 @@ + spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info)); + /* Switch on the UART clocks */ + pnx4008_uart_init(); ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_platform_port(0, &platform_serial_ports[0]); ++ kgdb8250_add_platform_port(1, &platform_serial_ports[1]); ++#endif + } + + static struct map_desc pnx4008_io_desc[] __initdata = { +diff -Nurb linux-2.6.22-570/arch/arm/mach-pxa/Makefile linux-2.6.22-591/arch/arm/mach-pxa/Makefile +--- linux-2.6.22-570/arch/arm/mach-pxa/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-pxa/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -31,6 +31,7 @@ + # Misc features + obj-$(CONFIG_PM) += pm.o sleep.o + obj-$(CONFIG_PXA_SSP) += ssp.o ++obj-$(CONFIG_KGDB_PXA_SERIAL) += kgdb-serial.o + + ifeq ($(CONFIG_PXA27x),y) + obj-$(CONFIG_PM) += standby.o +diff -Nurb linux-2.6.22-570/arch/arm/mach-pxa/kgdb-serial.c linux-2.6.22-591/arch/arm/mach-pxa/kgdb-serial.c +--- linux-2.6.22-570/arch/arm/mach-pxa/kgdb-serial.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/arm/mach-pxa/kgdb-serial.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,97 @@ ++/* ++ * linux/arch/arm/mach-pxa/kgdb-serial.c ++ * ++ * Provides low level kgdb serial support hooks for PXA2xx boards ++ * ++ * Author: Nicolas Pitre ++ * Copyright: (C) 2002-2005 MontaVista Software Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#if defined(CONFIG_KGDB_PXA_FFUART) ++ ++#define UART FFUART ++#define CKEN_UART CKEN6_FFUART ++#define GPIO_RX_MD GPIO34_FFRXD_MD ++#define GPIO_TX_MD GPIO39_FFTXD_MD ++ ++#elif defined(CONFIG_KGDB_PXA_BTUART) ++ ++#define UART BTUART ++#define CKEN_UART CKEN7_BTUART ++#define GPIO_RX_MD GPIO42_BTRXD_MD ++#define GPIO_TX_MD GPIO43_BTTXD_MD ++ ++#elif defined(CONFIG_KGDB_PXA_STUART) ++ ++#define UART STUART ++#define CKEN_UART CKEN5_STUART ++#define GPIO_RX_MD GPIO46_STRXD_MD ++#define GPIO_TX_MD GPIO47_STTXD_MD ++ ++#endif ++ ++#define UART_BAUDRATE (CONFIG_KGDB_BAUDRATE) ++ ++static volatile unsigned long *port = (unsigned long *)&UART; ++ ++static int kgdb_serial_init(void) ++{ ++ pxa_set_cken(CKEN_UART, 1); ++ pxa_gpio_mode(GPIO_RX_MD); ++ pxa_gpio_mode(GPIO_TX_MD); ++ ++ port[UART_IER] = 0; ++ port[UART_LCR] = LCR_DLAB; ++ port[UART_DLL] = ((921600 / UART_BAUDRATE) & 0xff); ++ port[UART_DLM] = ((921600 / UART_BAUDRATE) >> 8); ++ port[UART_LCR] = LCR_WLS1 | LCR_WLS0; ++ port[UART_MCR] = 0; ++ port[UART_IER] = IER_UUE; ++ port[UART_FCR] = FCR_ITL_16; ++ ++ return 0; ++} ++ ++static void kgdb_serial_putchar(u8 c) ++{ ++ if (!(CKEN & CKEN_UART) || port[UART_IER] != IER_UUE) ++ kgdb_serial_init(); ++ while (!(port[UART_LSR] & LSR_TDRQ)) ++ cpu_relax(); ++ port[UART_TX] = c; ++} ++ ++static void kgdb_serial_flush(void) ++{ ++ if ((CKEN & CKEN_UART) && (port[UART_IER] & IER_UUE)) ++ while (!(port[UART_LSR] & LSR_TEMT)) ++ cpu_relax(); ++} ++ ++static int kgdb_serial_getchar(void) ++{ ++ unsigned char c; ++ if (!(CKEN & CKEN_UART) || port[UART_IER] != IER_UUE) ++ kgdb_serial_init(); ++ while (!(port[UART_LSR] & UART_LSR_DR)) ++ cpu_relax(); ++ c = port[UART_RX]; ++ return c; ++} ++ ++struct kgdb_io kgdb_io_ops = { ++ .init = kgdb_serial_init, ++ .write_char = kgdb_serial_putchar, ++ .flush = kgdb_serial_flush, ++ .read_char = kgdb_serial_getchar, ++}; +diff -Nurb linux-2.6.22-570/arch/arm/mach-versatile/core.c linux-2.6.22-591/arch/arm/mach-versatile/core.c +--- linux-2.6.22-570/arch/arm/mach-versatile/core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mach-versatile/core.c 2007-12-21 15:36:11.000000000 -0500 +@@ -184,6 +184,14 @@ + .type = MT_DEVICE + }, + #endif ++#ifdef CONFIG_KGDB_AMBA_PL011 ++ { ++ .virtual = IO_ADDRESS(CONFIG_KGDB_AMBA_BASE), ++ .pfn = __phys_to_pfn(CONFIG_KGDB_AMBA_BASE), ++ .length = SZ_4K, ++ .type = MT_DEVICE ++ }, ++#endif + #ifdef CONFIG_PCI + { + .virtual = IO_ADDRESS(VERSATILE_PCI_CORE_BASE), +diff -Nurb linux-2.6.22-570/arch/arm/mm/extable.c linux-2.6.22-591/arch/arm/mm/extable.c +--- linux-2.6.22-570/arch/arm/mm/extable.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/mm/extable.c 2007-12-21 15:36:11.000000000 -0500 +@@ -2,6 +2,7 @@ + * linux/arch/arm/mm/extable.c + */ + #include ++#include + #include + + int fixup_exception(struct pt_regs *regs) +@@ -11,6 +12,12 @@ + fixup = search_exception_tables(instruction_pointer(regs)); + if (fixup) + regs->ARM_pc = fixup->fixup; ++#ifdef CONFIG_KGDB ++ if (atomic_read(&debugger_active) && kgdb_may_fault) ++ /* Restore our previous state. */ ++ kgdb_fault_longjmp(kgdb_fault_jmp_regs); ++ /* Not reached. */ ++#endif + + return fixup != NULL; + } +diff -Nurb linux-2.6.22-570/arch/arm/plat-iop/Makefile linux-2.6.22-591/arch/arm/plat-iop/Makefile +--- linux-2.6.22-570/arch/arm/plat-iop/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/arm/plat-iop/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -12,6 +12,7 @@ + obj-$(CONFIG_ARCH_IOP32X) += time.o + obj-$(CONFIG_ARCH_IOP32X) += io.o + obj-$(CONFIG_ARCH_IOP32X) += cp6.o ++obj-$(CONFIG_ARCH_IOP32X) += adma.o + + # IOP33X + obj-$(CONFIG_ARCH_IOP33X) += gpio.o +@@ -21,6 +22,7 @@ + obj-$(CONFIG_ARCH_IOP33X) += time.o + obj-$(CONFIG_ARCH_IOP33X) += io.o + obj-$(CONFIG_ARCH_IOP33X) += cp6.o ++obj-$(CONFIG_ARCH_IOP33X) += adma.o + + # IOP13XX + obj-$(CONFIG_ARCH_IOP13XX) += cp6.o +diff -Nurb linux-2.6.22-570/arch/arm/plat-iop/adma.c linux-2.6.22-591/arch/arm/plat-iop/adma.c +--- linux-2.6.22-570/arch/arm/plat-iop/adma.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/arm/plat-iop/adma.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,209 @@ ++/* ++ * platform device definitions for the iop3xx dma/xor engines ++ * Copyright © 2006, Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_ARCH_IOP32X ++#define IRQ_DMA0_EOT IRQ_IOP32X_DMA0_EOT ++#define IRQ_DMA0_EOC IRQ_IOP32X_DMA0_EOC ++#define IRQ_DMA0_ERR IRQ_IOP32X_DMA0_ERR ++ ++#define IRQ_DMA1_EOT IRQ_IOP32X_DMA1_EOT ++#define IRQ_DMA1_EOC IRQ_IOP32X_DMA1_EOC ++#define IRQ_DMA1_ERR IRQ_IOP32X_DMA1_ERR ++ ++#define IRQ_AA_EOT IRQ_IOP32X_AA_EOT ++#define IRQ_AA_EOC IRQ_IOP32X_AA_EOC ++#define IRQ_AA_ERR IRQ_IOP32X_AA_ERR ++#endif ++#ifdef CONFIG_ARCH_IOP33X ++#define IRQ_DMA0_EOT IRQ_IOP33X_DMA0_EOT ++#define IRQ_DMA0_EOC IRQ_IOP33X_DMA0_EOC ++#define IRQ_DMA0_ERR IRQ_IOP33X_DMA0_ERR ++ ++#define IRQ_DMA1_EOT IRQ_IOP33X_DMA1_EOT ++#define IRQ_DMA1_EOC IRQ_IOP33X_DMA1_EOC ++#define IRQ_DMA1_ERR IRQ_IOP33X_DMA1_ERR ++ ++#define IRQ_AA_EOT IRQ_IOP33X_AA_EOT ++#define IRQ_AA_EOC IRQ_IOP33X_AA_EOC ++#define IRQ_AA_ERR IRQ_IOP33X_AA_ERR ++#endif ++/* AAU and DMA Channels */ ++static struct resource iop3xx_dma_0_resources[] = { ++ [0] = { ++ .start = IOP3XX_DMA_PHYS_BASE(0), ++ .end = IOP3XX_DMA_UPPER_PA(0), ++ .flags = IORESOURCE_MEM, ++ }, ++ [1] = { ++ .start = IRQ_DMA0_EOT, ++ .end = IRQ_DMA0_EOT, ++ .flags = IORESOURCE_IRQ ++ }, ++ [2] = { ++ .start = IRQ_DMA0_EOC, ++ .end = IRQ_DMA0_EOC, ++ .flags = IORESOURCE_IRQ ++ }, ++ [3] = { ++ .start = IRQ_DMA0_ERR, ++ .end = IRQ_DMA0_ERR, ++ .flags = IORESOURCE_IRQ ++ } ++}; ++ ++static struct resource iop3xx_dma_1_resources[] = { ++ [0] = { ++ .start = IOP3XX_DMA_PHYS_BASE(1), ++ .end = IOP3XX_DMA_UPPER_PA(1), ++ .flags = IORESOURCE_MEM, ++ }, ++ [1] = { ++ .start = IRQ_DMA1_EOT, ++ .end = IRQ_DMA1_EOT, ++ .flags = IORESOURCE_IRQ ++ }, ++ [2] = { ++ .start = IRQ_DMA1_EOC, ++ .end = IRQ_DMA1_EOC, ++ .flags = IORESOURCE_IRQ ++ }, ++ [3] = { ++ .start = IRQ_DMA1_ERR, ++ .end = IRQ_DMA1_ERR, ++ .flags = IORESOURCE_IRQ ++ } ++}; ++ ++ ++static struct resource iop3xx_aau_resources[] = { ++ [0] = { ++ .start = IOP3XX_AAU_PHYS_BASE, ++ .end = IOP3XX_AAU_UPPER_PA, ++ .flags = IORESOURCE_MEM, ++ }, ++ [1] = { ++ .start = IRQ_AA_EOT, ++ .end = IRQ_AA_EOT, ++ .flags = IORESOURCE_IRQ ++ }, ++ [2] = { ++ .start = IRQ_AA_EOC, ++ .end = IRQ_AA_EOC, ++ .flags = IORESOURCE_IRQ ++ }, ++ [3] = { ++ .start = IRQ_AA_ERR, ++ .end = IRQ_AA_ERR, ++ .flags = IORESOURCE_IRQ ++ } ++}; ++ ++static u64 iop3xx_adma_dmamask = DMA_32BIT_MASK; ++ ++static struct iop_adma_platform_data iop3xx_dma_0_data = { ++ .hw_id = DMA0_ID, ++ .pool_size = PAGE_SIZE, ++}; ++ ++static struct iop_adma_platform_data iop3xx_dma_1_data = { ++ .hw_id = DMA1_ID, ++ .pool_size = PAGE_SIZE, ++}; ++ ++static struct iop_adma_platform_data iop3xx_aau_data = { ++ .hw_id = AAU_ID, ++ .pool_size = 3 * PAGE_SIZE, ++}; ++ ++struct platform_device iop3xx_dma_0_channel = { ++ .name = "iop-adma", ++ .id = 0, ++ .num_resources = 4, ++ .resource = iop3xx_dma_0_resources, ++ .dev = { ++ .dma_mask = &iop3xx_adma_dmamask, ++ .coherent_dma_mask = DMA_64BIT_MASK, ++ .platform_data = (void *) &iop3xx_dma_0_data, ++ }, ++}; ++ ++struct platform_device iop3xx_dma_1_channel = { ++ .name = "iop-adma", ++ .id = 1, ++ .num_resources = 4, ++ .resource = iop3xx_dma_1_resources, ++ .dev = { ++ .dma_mask = &iop3xx_adma_dmamask, ++ .coherent_dma_mask = DMA_64BIT_MASK, ++ .platform_data = (void *) &iop3xx_dma_1_data, ++ }, ++}; ++ ++struct platform_device iop3xx_aau_channel = { ++ .name = "iop-adma", ++ .id = 2, ++ .num_resources = 4, ++ .resource = iop3xx_aau_resources, ++ .dev = { ++ .dma_mask = &iop3xx_adma_dmamask, ++ .coherent_dma_mask = DMA_64BIT_MASK, ++ .platform_data = (void *) &iop3xx_aau_data, ++ }, ++}; ++ ++static int __init iop3xx_adma_cap_init(void) ++{ ++ #ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */ ++ dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask); ++ dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); ++ #else ++ dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask); ++ dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask); ++ dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); ++ #endif ++ ++ #ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */ ++ dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask); ++ dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); ++ #else ++ dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask); ++ dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask); ++ dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); ++ #endif ++ ++ #ifdef CONFIG_ARCH_IOP32X /* the 32x AAU does not perform zero sum */ ++ dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); ++ dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); ++ dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); ++ #else ++ dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); ++ dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask); ++ dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); ++ dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); ++ #endif ++ ++ return 0; ++} ++ ++arch_initcall(iop3xx_adma_cap_init); +diff -Nurb linux-2.6.22-570/arch/i386/Kconfig linux-2.6.22-591/arch/i386/Kconfig +--- linux-2.6.22-570/arch/i386/Kconfig 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/i386/Kconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -1053,6 +1053,8 @@ + + source "arch/i386/kernel/cpu/cpufreq/Kconfig" + ++source "drivers/cpuidle/Kconfig" ++ + endmenu + + menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)" +diff -Nurb linux-2.6.22-570/arch/i386/kernel/Makefile linux-2.6.22-591/arch/i386/kernel/Makefile +--- linux-2.6.22-570/arch/i386/kernel/Makefile 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/arch/i386/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -39,6 +39,7 @@ + obj-$(CONFIG_EARLY_PRINTK) += early_printk.o + obj-$(CONFIG_HPET_TIMER) += hpet.o + obj-$(CONFIG_K8_NB) += k8.o ++obj-$(CONFIG_STACK_UNWIND) += unwind.o + + obj-$(CONFIG_VMI) += vmi.o vmiclock.o + obj-$(CONFIG_PARAVIRT) += paravirt.o +diff -Nurb linux-2.6.22-570/arch/i386/kernel/acpi/boot.c linux-2.6.22-591/arch/i386/kernel/acpi/boot.c +--- linux-2.6.22-570/arch/i386/kernel/acpi/boot.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/i386/kernel/acpi/boot.c 2007-12-21 15:36:11.000000000 -0500 +@@ -950,14 +950,6 @@ + }, + { + .callback = force_acpi_ht, +- .ident = "DELL GX240", +- .matches = { +- DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"), +- DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"), +- }, +- }, +- { +- .callback = force_acpi_ht, + .ident = "HP VISUALIZE NT Workstation", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), +diff -Nurb linux-2.6.22-570/arch/i386/kernel/apm.c linux-2.6.22-591/arch/i386/kernel/apm.c +--- linux-2.6.22-570/arch/i386/kernel/apm.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/i386/kernel/apm.c 2007-12-21 15:36:11.000000000 -0500 +@@ -222,6 +222,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2311,7 +2312,6 @@ + remove_proc_entry("apm", NULL); + return err; + } +- kapmd_task->flags |= PF_NOFREEZE; + wake_up_process(kapmd_task); + + if (num_online_cpus() > 1 && !smp ) { +diff -Nurb linux-2.6.22-570/arch/i386/kernel/io_apic.c linux-2.6.22-591/arch/i386/kernel/io_apic.c +--- linux-2.6.22-570/arch/i386/kernel/io_apic.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/arch/i386/kernel/io_apic.c 2007-12-21 15:36:11.000000000 -0500 +@@ -667,6 +667,7 @@ + set_pending_irq(i, cpumask_of_cpu(0)); + } + ++ set_freezable(); + for ( ; ; ) { + time_remaining = schedule_timeout_interruptible(time_remaining); + try_to_freeze(); +diff -Nurb linux-2.6.22-570/arch/i386/kernel/kgdb-jmp.S linux-2.6.22-591/arch/i386/kernel/kgdb-jmp.S +--- linux-2.6.22-570/arch/i386/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/i386/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,74 @@ ++/* ++ * arch/i386/kernel/kgdb-jmp.S ++ * ++ * Save and restore system registers so that within a limited frame we ++ * may have a fault and "jump back" to a known safe location. ++ * ++ * Author: George Anzinger ++ * ++ * Cribbed from glibc, which carries the following: ++ * Copyright (C) 1996, 1996, 1997, 2000, 2001 Free Software Foundation, Inc. ++ * Copyright (C) 2005 by MontaVista Software. ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program as licensed "as is" without any warranty of ++ * any kind, whether express or implied. ++ */ ++ ++#include ++ ++#define PCOFF 0 ++#define LINKAGE 4 /* just the return address */ ++#define PTR_SIZE 4 ++#define PARMS LINKAGE /* no space for saved regs */ ++#define JMPBUF PARMS ++#define VAL JMPBUF+PTR_SIZE ++ ++#define JB_BX 0 ++#define JB_SI 1 ++#define JB_DI 2 ++#define JB_BP 3 ++#define JB_SP 4 ++#define JB_PC 5 ++ ++/* This must be called prior to kgdb_fault_longjmp and ++ * kgdb_fault_longjmp must not be called outside of the context of the ++ * last call to kgdb_fault_setjmp. ++ * kgdb_fault_setjmp(int *jmp_buf[6]) ++ */ ++ENTRY(kgdb_fault_setjmp) ++ movl JMPBUF(%esp), %eax ++ ++ /* Save registers. */ ++ movl %ebx, (JB_BX*4)(%eax) ++ movl %esi, (JB_SI*4)(%eax) ++ movl %edi, (JB_DI*4)(%eax) ++ /* Save SP as it will be after we return. */ ++ leal JMPBUF(%esp), %ecx ++ movl %ecx, (JB_SP*4)(%eax) ++ movl PCOFF(%esp), %ecx /* Save PC we are returning to now. */ ++ movl %ecx, (JB_PC*4)(%eax) ++ movl %ebp, (JB_BP*4)(%eax) /* Save caller's frame pointer. */ ++ ++ /* Restore state so we can now try the access. */ ++ movl JMPBUF(%esp), %ecx /* User's jmp_buf in %ecx. */ ++ /* Save the return address now. */ ++ movl (JB_PC*4)(%ecx), %edx ++ /* Restore registers. */ ++ movl $0, %eax ++ movl (JB_SP*4)(%ecx), %esp ++ jmp *%edx /* Jump to saved PC. */ ++ ++/* kgdb_fault_longjmp(int *jmp_buf[6]) */ ++ENTRY(kgdb_fault_longjmp) ++ movl JMPBUF(%esp), %ecx /* User's jmp_buf in %ecx. */ ++ /* Save the return address now. */ ++ movl (JB_PC*4)(%ecx), %edx ++ /* Restore registers. */ ++ movl (JB_BX*4)(%ecx), %ebx ++ movl (JB_SI*4)(%ecx), %esi ++ movl (JB_DI*4)(%ecx), %edi ++ movl (JB_BP*4)(%ecx), %ebp ++ movl $1, %eax ++ movl (JB_SP*4)(%ecx), %esp ++ jmp *%edx /* Jump to saved PC. */ +diff -Nurb linux-2.6.22-570/arch/i386/kernel/kgdb.c linux-2.6.22-591/arch/i386/kernel/kgdb.c +--- linux-2.6.22-570/arch/i386/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/i386/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,388 @@ ++/* ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2, or (at your option) any ++ * later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ */ ++ ++/* ++ * Copyright (C) 2000-2001 VERITAS Software Corporation. ++ * Copyright (C) 2007 Wind River Systems, Inc. ++ */ ++/* ++ * Contributor: Lake Stevens Instrument Division$ ++ * Written by: Glenn Engel $ ++ * Updated by: Amit Kale ++ * Updated by: Tom Rini ++ * Updated by: Jason Wessel ++ * Modified for 386 by Jim Kingdon, Cygnus Support. ++ * Origianl kgdb, compatibility with 2.1.xx kernel by ++ * David Grothe ++ * Additional support from Tigran Aivazian ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* for linux pt_regs struct */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "mach_ipi.h" ++ ++/* Put the error code here just in case the user cares. */ ++int gdb_i386errcode; ++/* Likewise, the vector number here (since GDB only gets the signal ++ number through the usual means, and that's not very specific). */ ++int gdb_i386vector = -1; ++ ++extern atomic_t cpu_doing_single_step; ++ ++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) ++{ ++ gdb_regs[_EAX] = regs->eax; ++ gdb_regs[_EBX] = regs->ebx; ++ gdb_regs[_ECX] = regs->ecx; ++ gdb_regs[_EDX] = regs->edx; ++ gdb_regs[_ESI] = regs->esi; ++ gdb_regs[_EDI] = regs->edi; ++ gdb_regs[_EBP] = regs->ebp; ++ gdb_regs[_DS] = regs->xds; ++ gdb_regs[_ES] = regs->xes; ++ gdb_regs[_PS] = regs->eflags; ++ gdb_regs[_CS] = regs->xcs; ++ gdb_regs[_PC] = regs->eip; ++ gdb_regs[_ESP] = (int)(®s->esp); ++ gdb_regs[_SS] = __KERNEL_DS; ++ gdb_regs[_FS] = 0xFFFF; ++ gdb_regs[_GS] = 0xFFFF; ++} ++ ++/* ++ * Extracts ebp, esp and eip values understandable by gdb from the values ++ * saved by switch_to. ++ * thread.esp points to ebp. flags and ebp are pushed in switch_to hence esp ++ * prior to entering switch_to is 8 greater then the value that is saved. ++ * If switch_to changes, change following code appropriately. ++ */ ++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) ++{ ++ gdb_regs[_EAX] = 0; ++ gdb_regs[_EBX] = 0; ++ gdb_regs[_ECX] = 0; ++ gdb_regs[_EDX] = 0; ++ gdb_regs[_ESI] = 0; ++ gdb_regs[_EDI] = 0; ++ gdb_regs[_EBP] = *(unsigned long *)p->thread.esp; ++ gdb_regs[_DS] = __KERNEL_DS; ++ gdb_regs[_ES] = __KERNEL_DS; ++ gdb_regs[_PS] = 0; ++ gdb_regs[_CS] = __KERNEL_CS; ++ gdb_regs[_PC] = p->thread.eip; ++ gdb_regs[_ESP] = p->thread.esp; ++ gdb_regs[_SS] = __KERNEL_DS; ++ gdb_regs[_FS] = 0xFFFF; ++ gdb_regs[_GS] = 0xFFFF; ++} ++ ++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) ++{ ++ regs->eax = gdb_regs[_EAX]; ++ regs->ebx = gdb_regs[_EBX]; ++ regs->ecx = gdb_regs[_ECX]; ++ regs->edx = gdb_regs[_EDX]; ++ regs->esi = gdb_regs[_ESI]; ++ regs->edi = gdb_regs[_EDI]; ++ regs->ebp = gdb_regs[_EBP]; ++ regs->xds = gdb_regs[_DS]; ++ regs->xes = gdb_regs[_ES]; ++ regs->eflags = gdb_regs[_PS]; ++ regs->xcs = gdb_regs[_CS]; ++ regs->eip = gdb_regs[_PC]; ++} ++ ++static struct hw_breakpoint { ++ unsigned enabled; ++ unsigned type; ++ unsigned len; ++ unsigned addr; ++} breakinfo[4] = { ++ { .enabled = 0 }, ++ { .enabled = 0 }, ++ { .enabled = 0 }, ++ { .enabled = 0 }, ++}; ++ ++static void kgdb_correct_hw_break(void) ++{ ++ int breakno; ++ int correctit; ++ int breakbit; ++ unsigned long dr7; ++ ++ get_debugreg(dr7, 7); ++ correctit = 0; ++ for (breakno = 0; breakno < 3; breakno++) { ++ breakbit = 2 << (breakno << 1); ++ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 |= breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ dr7 |= (((breakinfo[breakno].len << 2) | ++ breakinfo[breakno].type) << 16) << ++ (breakno << 2); ++ switch (breakno) { ++ case 0: ++ set_debugreg(breakinfo[breakno].addr, 0); ++ break; ++ ++ case 1: ++ set_debugreg(breakinfo[breakno].addr, 1); ++ break; ++ ++ case 2: ++ set_debugreg(breakinfo[breakno].addr, 2); ++ break; ++ ++ case 3: ++ set_debugreg(breakinfo[breakno].addr, 3); ++ break; ++ } ++ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 &= ~breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ } ++ } ++ if (correctit) ++ set_debugreg(dr7, 7); ++} ++ ++static int kgdb_remove_hw_break(unsigned long addr, int len, ++ enum kgdb_bptype bptype) ++{ ++ int i, idx = -1; ++ for (i = 0; i < 4; i++) { ++ if (breakinfo[i].addr == addr && breakinfo[i].enabled) { ++ idx = i; ++ break; ++ } ++ } ++ if (idx == -1) ++ return -1; ++ ++ breakinfo[idx].enabled = 0; ++ return 0; ++} ++ ++static void kgdb_remove_all_hw_break(void) ++{ ++ int i; ++ ++ for (i = 0; i < 4; i++) { ++ memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint)); ++ } ++} ++ ++static int kgdb_set_hw_break(unsigned long addr, int len, ++ enum kgdb_bptype bptype) ++{ ++ int i, idx = -1; ++ for (i = 0; i < 4; i++) { ++ if (!breakinfo[i].enabled) { ++ idx = i; ++ break; ++ } ++ } ++ if (idx == -1) ++ return -1; ++ if (bptype == bp_hardware_breakpoint) { ++ breakinfo[idx].type = 0; ++ breakinfo[idx].len = 0; ++ } else if (bptype == bp_write_watchpoint) { ++ breakinfo[idx].type = 1; ++ if (len == 1 || len == 2 || len == 4) ++ breakinfo[idx].len = len - 1; ++ else ++ return -1; ++ } else if (bptype == bp_access_watchpoint) { ++ breakinfo[idx].type = 3; ++ if (len == 1 || len == 2 || len == 4) ++ breakinfo[idx].len = len - 1; ++ else ++ return -1; ++ } else ++ return -1; ++ breakinfo[idx].enabled = 1; ++ breakinfo[idx].addr = addr; ++ return 0; ++} ++ ++void kgdb_disable_hw_debug(struct pt_regs *regs) ++{ ++ /* Disable hardware debugging while we are in kgdb */ ++ set_debugreg(0, 7); ++} ++ ++void kgdb_post_master_code(struct pt_regs *regs, int e_vector, int err_code) ++{ ++ /* Master processor is completely in the debugger */ ++ gdb_i386vector = e_vector; ++ gdb_i386errcode = err_code; ++} ++ ++#ifdef CONFIG_SMP ++void kgdb_roundup_cpus(unsigned long flags) ++{ ++ send_IPI_allbutself(APIC_DM_NMI); ++} ++#endif ++ ++int kgdb_arch_handle_exception(int e_vector, int signo, ++ int err_code, char *remcom_in_buffer, ++ char *remcom_out_buffer, ++ struct pt_regs *linux_regs) ++{ ++ long addr; ++ char *ptr; ++ int newPC, dr6; ++ ++ switch (remcom_in_buffer[0]) { ++ case 'c': ++ case 's': ++ /* try to read optional parameter, pc unchanged if no parm */ ++ ptr = &remcom_in_buffer[1]; ++ if (kgdb_hex2long(&ptr, &addr)) ++ linux_regs->eip = addr; ++ newPC = linux_regs->eip; ++ ++ /* clear the trace bit */ ++ linux_regs->eflags &= ~TF_MASK; ++ atomic_set(&cpu_doing_single_step, -1); ++ ++ /* set the trace bit if we're stepping */ ++ if (remcom_in_buffer[0] == 's') { ++ linux_regs->eflags |= TF_MASK; ++ debugger_step = 1; ++ atomic_set(&cpu_doing_single_step,raw_smp_processor_id()); ++ } ++ ++ get_debugreg(dr6, 6); ++ if (!(dr6 & 0x4000)) { ++ long breakno; ++ for (breakno = 0; breakno < 4; ++breakno) { ++ if (dr6 & (1 << breakno) && ++ breakinfo[breakno].type == 0) { ++ /* Set restore flag */ ++ linux_regs->eflags |= X86_EFLAGS_RF; ++ break; ++ } ++ } ++ } ++ set_debugreg(0, 6); ++ kgdb_correct_hw_break(); ++ ++ return (0); ++ } /* switch */ ++ /* this means that we do not want to exit from the handler */ ++ return -1; ++} ++ ++/* Register KGDB with the i386die_chain so that we hook into all of the right ++ * spots. */ ++static int kgdb_notify(struct notifier_block *self, unsigned long cmd, ++ void *ptr) ++{ ++ struct die_args *args = ptr; ++ struct pt_regs *regs = args->regs; ++ ++ /* Bad memory access? */ ++ if (cmd == DIE_PAGE_FAULT_NO_CONTEXT && atomic_read(&debugger_active) ++ && kgdb_may_fault) { ++ kgdb_fault_longjmp(kgdb_fault_jmp_regs); ++ return NOTIFY_STOP; ++ } else if (cmd == DIE_PAGE_FAULT) ++ /* A normal page fault, ignore. */ ++ return NOTIFY_DONE; ++ else if ((cmd == DIE_NMI || cmd == DIE_NMI_IPI || ++ cmd == DIE_NMIWATCHDOG) && atomic_read(&debugger_active)) { ++ /* CPU roundup */ ++ kgdb_nmihook(raw_smp_processor_id(), regs); ++ return NOTIFY_STOP; ++ } else if (cmd == DIE_DEBUG ++ && atomic_read(&cpu_doing_single_step) == raw_smp_processor_id() ++ && user_mode(regs)) { ++ /* single step exception from kernel space to user space so ++ * eat the exception and continue the process ++ */ ++ printk(KERN_ERR "KGDB: trap/step from kernel to user space, resuming...\n"); ++ kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c","",regs); ++ return NOTIFY_STOP; ++ } else if (cmd == DIE_NMI_IPI || cmd == DIE_NMI || user_mode(regs) || ++ (cmd == DIE_DEBUG && atomic_read(&debugger_active))) ++ /* Normal watchdog event or userspace debugging, or spurious ++ * debug exception, ignore. */ ++ return NOTIFY_DONE; ++ ++ kgdb_handle_exception(args->trapnr, args->signr, args->err, regs); ++ ++ return NOTIFY_STOP; ++} ++ ++static struct notifier_block kgdb_notifier = { ++ .notifier_call = kgdb_notify, ++}; ++ ++int kgdb_arch_init(void) ++{ ++ register_die_notifier(&kgdb_notifier); ++ return 0; ++} ++ ++/* ++ * Skip an int3 exception when it occurs after a breakpoint has been ++ * removed. Backtrack eip by 1 since the int3 would have caused it to ++ * increment by 1. ++ */ ++ ++int kgdb_skipexception(int exception, struct pt_regs *regs) ++{ ++ if (exception == 3 && kgdb_isremovedbreak(regs->eip - 1)) { ++ regs->eip -= 1; ++ return 1; ++ } ++ return 0; ++} ++ ++unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs) ++{ ++ if (exception == 3) { ++ return instruction_pointer(regs) - 1; ++ } ++ return instruction_pointer(regs); ++} ++ ++struct kgdb_arch arch_kgdb_ops = { ++ .gdb_bpt_instr = {0xcc}, ++ .flags = KGDB_HW_BREAKPOINT, ++ .set_hw_breakpoint = kgdb_set_hw_break, ++ .remove_hw_breakpoint = kgdb_remove_hw_break, ++ .remove_all_hw_break = kgdb_remove_all_hw_break, ++ .correct_hw_break = kgdb_correct_hw_break, ++}; +diff -Nurb linux-2.6.22-570/arch/i386/kernel/process.c linux-2.6.22-591/arch/i386/kernel/process.c +--- linux-2.6.22-570/arch/i386/kernel/process.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/i386/kernel/process.c 2007-12-21 15:36:11.000000000 -0500 +@@ -179,13 +179,13 @@ + + /* endless idle loop with no priority at all */ + while (1) { +- tick_nohz_stop_sched_tick(); + while (!need_resched()) { + void (*idle)(void); + + if (__get_cpu_var(cpu_idle_state)) + __get_cpu_var(cpu_idle_state) = 0; + ++ tick_nohz_stop_sched_tick(); + check_pgt_cache(); + rmb(); + idle = pm_idle; +diff -Nurb linux-2.6.22-570/arch/i386/kernel/setup.c linux-2.6.22-591/arch/i386/kernel/setup.c +--- linux-2.6.22-570/arch/i386/kernel/setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/i386/kernel/setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -124,6 +124,7 @@ + #endif + + extern void early_cpu_init(void); ++extern void early_trap_init(void); + extern int root_mountflags; + + unsigned long saved_videomode; +@@ -514,6 +515,7 @@ + memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); + pre_setup_arch_hook(); + early_cpu_init(); ++ early_trap_init(); + + /* + * FIXME: This isn't an official loader_type right +diff -Nurb linux-2.6.22-570/arch/i386/kernel/signal.c linux-2.6.22-591/arch/i386/kernel/signal.c +--- linux-2.6.22-570/arch/i386/kernel/signal.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/i386/kernel/signal.c 2007-12-21 15:36:11.000000000 -0500 +@@ -199,6 +199,13 @@ + return eax; + + badframe: ++ if (show_unhandled_signals && printk_ratelimit()) ++ printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx" ++ " esp:%lx oeax:%lx\n", ++ current->pid > 1 ? KERN_INFO : KERN_EMERG, ++ current->comm, current->pid, frame, regs->eip, ++ regs->esp, regs->orig_eax); ++ + force_sig(SIGSEGV, current); + return 0; + } +diff -Nurb linux-2.6.22-570/arch/i386/kernel/syscall_table.S linux-2.6.22-591/arch/i386/kernel/syscall_table.S +--- linux-2.6.22-570/arch/i386/kernel/syscall_table.S 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/i386/kernel/syscall_table.S 2007-12-21 15:36:11.000000000 -0500 +@@ -323,3 +323,6 @@ + .long sys_signalfd + .long sys_timerfd + .long sys_eventfd ++ .long sys_revokeat ++ .long sys_frevoke /* 325 */ ++ .long sys_fallocate +diff -Nurb linux-2.6.22-570/arch/i386/kernel/traps.c linux-2.6.22-591/arch/i386/kernel/traps.c +--- linux-2.6.22-570/arch/i386/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/i386/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500 +@@ -97,6 +97,11 @@ + + int kstack_depth_to_print = 24; + static unsigned int code_bytes = 64; ++#ifdef CONFIG_STACK_UNWIND ++static int call_trace = 1; ++#else ++#define call_trace (-1) ++#endif + + static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) + { +@@ -136,6 +141,34 @@ + return ebp; + } + ++struct ops_and_data { ++ struct stacktrace_ops *ops; ++ void *data; ++}; ++ ++static asmlinkage int ++dump_trace_unwind(struct unwind_frame_info *info, void *data) ++{ ++ struct ops_and_data *oad = (struct ops_and_data *)data; ++ int n = 0; ++ unsigned long sp = UNW_SP(info); ++ ++ if (arch_unw_user_mode(info)) ++ return -1; ++ while (unwind(info) == 0 && UNW_PC(info)) { ++ n++; ++ oad->ops->address(oad->data, UNW_PC(info)); ++ if (arch_unw_user_mode(info)) ++ break; ++ if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) ++ && sp > UNW_SP(info)) ++ break; ++ sp = UNW_SP(info); ++ touch_nmi_watchdog(); ++ } ++ return n; ++} ++ + #define MSG(msg) ops->warning(data, msg) + + void dump_trace(struct task_struct *task, struct pt_regs *regs, +@@ -147,6 +180,41 @@ + if (!task) + task = current; + ++ if (call_trace >= 0) { ++ int unw_ret = 0; ++ struct unwind_frame_info info; ++ struct ops_and_data oad = { .ops = ops, .data = data }; ++ ++ if (regs) { ++ if (unwind_init_frame_info(&info, task, regs) == 0) ++ unw_ret = dump_trace_unwind(&info, &oad); ++ } else if (task == current) ++ unw_ret = unwind_init_running(&info, dump_trace_unwind, ++ &oad); ++ else { ++ if (unwind_init_blocked(&info, task) == 0) ++ unw_ret = dump_trace_unwind(&info, &oad); ++ } ++ if (unw_ret > 0) { ++ if (call_trace == 1 && !arch_unw_user_mode(&info)) { ++ ops->warning_symbol(data, ++ "DWARF2 unwinder stuck at %s", ++ UNW_PC(&info)); ++ if (UNW_SP(&info) >= PAGE_OFFSET) { ++ MSG("Leftover inexact backtrace:"); ++ stack = (void *)UNW_SP(&info); ++ if (!stack) ++ return; ++ ebp = UNW_FP(&info); ++ } else ++ MSG("Full inexact backtrace again:"); ++ } else if (call_trace >= 1) ++ return; ++ else ++ MSG("Full inexact backtrace again:"); ++ } else ++ MSG("Inexact backtrace:"); ++ } + if (!stack) { + unsigned long dummy; + stack = &dummy; +@@ -614,6 +682,13 @@ + + current->thread.error_code = error_code; + current->thread.trap_no = 13; ++ if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && ++ printk_ratelimit()) ++ printk(KERN_INFO ++ "%s[%d] general protection eip:%lx esp:%lx error:%lx\n", ++ current->comm, current->pid, ++ regs->eip, regs->esp, error_code); ++ + force_sig(SIGSEGV, current); + return; + +@@ -854,6 +929,7 @@ + */ + clear_dr7: + set_debugreg(0, 7); ++ notify_die(DIE_DEBUG, "debug2", regs, condition, error_code, SIGTRAP); + return; + + debug_vm86: +@@ -1118,6 +1194,12 @@ + _set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3)); + } + ++/* Some traps need to be set early. */ ++void __init early_trap_init(void) { ++ set_intr_gate(1,&debug); ++ set_system_intr_gate(3, &int3); /* int3 can be called from all */ ++ set_intr_gate(14,&page_fault); ++} + + void __init trap_init(void) + { +@@ -1134,10 +1216,8 @@ + #endif + + set_trap_gate(0,÷_error); +- set_intr_gate(1,&debug); + set_intr_gate(2,&nmi); +- set_system_intr_gate(3, &int3); /* int3/4 can be called from all */ +- set_system_gate(4,&overflow); ++ set_system_gate(4,&overflow); /* int4/5 can be called from all */ + set_trap_gate(5,&bounds); + set_trap_gate(6,&invalid_op); + set_trap_gate(7,&device_not_available); +@@ -1147,7 +1227,6 @@ + set_trap_gate(11,&segment_not_present); + set_trap_gate(12,&stack_segment); + set_trap_gate(13,&general_protection); +- set_intr_gate(14,&page_fault); + set_trap_gate(15,&spurious_interrupt_bug); + set_trap_gate(16,&coprocessor_error); + set_trap_gate(17,&alignment_check); +@@ -1204,3 +1283,19 @@ + return 1; + } + __setup("code_bytes=", code_bytes_setup); ++ ++#ifdef CONFIG_STACK_UNWIND ++static int __init call_trace_setup(char *s) ++{ ++ if (strcmp(s, "old") == 0) ++ call_trace = -1; ++ else if (strcmp(s, "both") == 0) ++ call_trace = 0; ++ else if (strcmp(s, "newfallback") == 0) ++ call_trace = 1; ++ else if (strcmp(s, "new") == 2) ++ call_trace = 2; ++ return 1; ++} ++__setup("call_trace=", call_trace_setup); ++#endif +diff -Nurb linux-2.6.22-570/arch/i386/kernel/unwind.S linux-2.6.22-591/arch/i386/kernel/unwind.S +--- linux-2.6.22-570/arch/i386/kernel/unwind.S 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/i386/kernel/unwind.S 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,36 @@ ++/* Assembler support code for dwarf2 unwinder */ ++#include ++#include ++#include ++#include ++#include ++ ++ENTRY(arch_unwind_init_running) ++ CFI_STARTPROC ++ movl 4(%esp), %edx ++ movl (%esp), %ecx ++ leal 4(%esp), %eax ++ movl %ebx, PT_EBX(%edx) ++ xorl %ebx, %ebx ++ movl %ebx, PT_ECX(%edx) ++ movl %ebx, PT_EDX(%edx) ++ movl %esi, PT_ESI(%edx) ++ movl %edi, PT_EDI(%edx) ++ movl %ebp, PT_EBP(%edx) ++ movl %ebx, PT_EAX(%edx) ++ movl $__USER_DS, PT_DS(%edx) ++ movl $__USER_DS, PT_ES(%edx) ++ movl $0, PT_FS(%edx) ++ movl %ebx, PT_ORIG_EAX(%edx) ++ movl %ecx, PT_EIP(%edx) ++ movl 12(%esp), %ecx ++ movl $__KERNEL_CS, PT_CS(%edx) ++ movl %ebx, PT_EFLAGS(%edx) ++ movl %eax, PT_OLDESP(%edx) ++ movl 8(%esp), %eax ++ movl %ecx, 8(%esp) ++ movl PT_EBX(%edx), %ebx ++ movl $__KERNEL_DS, PT_OLDSS(%edx) ++ jmpl *%eax ++ CFI_ENDPROC ++ENDPROC(arch_unwind_init_running) +diff -Nurb linux-2.6.22-570/arch/i386/mach-voyager/voyager_thread.c linux-2.6.22-591/arch/i386/mach-voyager/voyager_thread.c +--- linux-2.6.22-570/arch/i386/mach-voyager/voyager_thread.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/i386/mach-voyager/voyager_thread.c 2007-12-21 15:36:11.000000000 -0500 +@@ -52,7 +52,7 @@ + NULL, + }; + +- if ((ret = call_usermodehelper(argv[0], argv, envp, 1)) != 0) { ++ if ((ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) { + printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", + string, ret); + } +diff -Nurb linux-2.6.22-570/arch/i386/mm/fault.c linux-2.6.22-591/arch/i386/mm/fault.c +--- linux-2.6.22-570/arch/i386/mm/fault.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/i386/mm/fault.c 2007-12-21 15:36:11.000000000 -0500 +@@ -284,6 +284,8 @@ + return 0; + } + ++int show_unhandled_signals = 1; ++ + /* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate +@@ -471,6 +473,14 @@ + if (is_prefetch(regs, address, error_code)) + return; + ++ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && ++ printk_ratelimit()) { ++ printk("%s%s[%d]: segfault at %08lx eip %08lx " ++ "esp %08lx error %lx\n", ++ tsk->pid > 1 ? KERN_INFO : KERN_EMERG, ++ tsk->comm, tsk->pid, address, regs->eip, ++ regs->esp, error_code); ++ } + tsk->thread.cr2 = address; + /* Kernel addresses are always protection faults */ + tsk->thread.error_code = error_code | (address >= TASK_SIZE); +@@ -508,6 +518,10 @@ + if (is_prefetch(regs, address, error_code)) + return; + ++ if (notify_die(DIE_PAGE_FAULT_NO_CONTEXT, "no context", regs, ++ error_code, 14, SIGSEGV) == NOTIFY_STOP) ++ return; ++ + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. +diff -Nurb linux-2.6.22-570/arch/ia64/hp/sim/simeth.c linux-2.6.22-591/arch/ia64/hp/sim/simeth.c +--- linux-2.6.22-570/arch/ia64/hp/sim/simeth.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ia64/hp/sim/simeth.c 2007-12-21 15:36:14.000000000 -0500 +@@ -300,6 +300,9 @@ + return NOTIFY_DONE; + } + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE; + + /* +diff -Nurb linux-2.6.22-570/arch/ia64/kernel/Makefile linux-2.6.22-591/arch/ia64/kernel/Makefile +--- linux-2.6.22-570/arch/ia64/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ia64/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -35,6 +35,7 @@ + obj-$(CONFIG_PCI_MSI) += msi_ia64.o + mca_recovery-y += mca_drv.o mca_drv_asm.o + obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o ++obj-$(CONFIG_KGDB) += kgdb.o kgdb-jmp.o + + obj-$(CONFIG_IA64_ESI) += esi.o + ifneq ($(CONFIG_IA64_ESI),) +diff -Nurb linux-2.6.22-570/arch/ia64/kernel/kgdb-jmp.S linux-2.6.22-591/arch/ia64/kernel/kgdb-jmp.S +--- linux-2.6.22-570/arch/ia64/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/ia64/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,238 @@ ++/* setjmp() and longjmp() assembler support for kdb on ia64. ++ ++ This code was copied from glibc CVS as of 2001-06-27 and modified where ++ necessary to fit the kernel. ++ Keith Owens 2001-06-27 ++ */ ++ ++/* Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. ++ Contributed by David Mosberger-Tang . ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Library General Public License as ++ published by the Free Software Foundation; either version 2 of the ++ License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Library General Public License for more details. ++ ++ You should have received a copy of the GNU Library General Public ++ License along with the GNU C Library; see the file COPYING.LIB. If ++ not, write to the Free Software Foundation, Inc., ++ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++*/ ++ ++#include ++GLOBAL_ENTRY(kgdb_fault_setjmp) ++ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) ++ alloc loc1=ar.pfs,2,2,2,0 ++ mov r16=ar.unat ++ ;; ++ mov r17=ar.fpsr ++ mov r2=in0 ++ add r3=8,in0 ++ ;; ++.mem.offset 0,0; ++ st8.spill.nta [r2]=sp,16 // r12 (sp) ++.mem.offset 8,0; ++ st8.spill.nta [r3]=gp,16 // r1 (gp) ++ ;; ++ st8.nta [r2]=r16,16 // save caller's unat ++ st8.nta [r3]=r17,16 // save fpsr ++ add r8=0xa0,in0 ++ ;; ++.mem.offset 160,0; ++ st8.spill.nta [r2]=r4,16 // r4 ++.mem.offset 168,0; ++ st8.spill.nta [r3]=r5,16 // r5 ++ add r9=0xb0,in0 ++ ;; ++ stf.spill.nta [r8]=f2,32 ++ stf.spill.nta [r9]=f3,32 ++ mov loc0=rp ++ .body ++ ;; ++ stf.spill.nta [r8]=f4,32 ++ stf.spill.nta [r9]=f5,32 ++ mov r17=b1 ++ ;; ++ stf.spill.nta [r8]=f16,32 ++ stf.spill.nta [r9]=f17,32 ++ mov r18=b2 ++ ;; ++ stf.spill.nta [r8]=f18,32 ++ stf.spill.nta [r9]=f19,32 ++ mov r19=b3 ++ ;; ++ stf.spill.nta [r8]=f20,32 ++ stf.spill.nta [r9]=f21,32 ++ mov r20=b4 ++ ;; ++ stf.spill.nta [r8]=f22,32 ++ stf.spill.nta [r9]=f23,32 ++ mov r21=b5 ++ ;; ++ stf.spill.nta [r8]=f24,32 ++ stf.spill.nta [r9]=f25,32 ++ mov r22=ar.lc ++ ;; ++ stf.spill.nta [r8]=f26,32 ++ stf.spill.nta [r9]=f27,32 ++ mov r24=pr ++ ;; ++ stf.spill.nta [r8]=f28,32 ++ stf.spill.nta [r9]=f29,32 ++ ;; ++ stf.spill.nta [r8]=f30 ++ stf.spill.nta [r9]=f31 ++ ++.mem.offset 0,0; ++ st8.spill.nta [r2]=r6,16 // r6 ++.mem.offset 8,0; ++ st8.spill.nta [r3]=r7,16 // r7 ++ ;; ++ mov r23=ar.bsp ++ mov r25=ar.unat ++ st8.nta [r2]=loc0,16 // b0 ++ st8.nta [r3]=r17,16 // b1 ++ ;; ++ st8.nta [r2]=r18,16 // b2 ++ st8.nta [r3]=r19,16 // b3 ++ ;; ++ st8.nta [r2]=r20,16 // b4 ++ st8.nta [r3]=r21,16 // b5 ++ ;; ++ st8.nta [r2]=loc1,16 // ar.pfs ++ st8.nta [r3]=r22,16 // ar.lc ++ ;; ++ st8.nta [r2]=r24,16 // pr ++ st8.nta [r3]=r23,16 // ar.bsp ++ ;; ++ st8.nta [r2]=r25 // ar.unat ++ st8.nta [r3]=in0 // &__jmp_buf ++ mov r8=0 ++ mov rp=loc0 ++ mov ar.pfs=loc1 ++ br.ret.sptk.few rp ++END(kdba_setjmp) ++#define pPos p6 /* is rotate count positive? */ ++#define pNeg p7 /* is rotate count negative? */ ++GLOBAL_ENTRY(kgdb_fault_longjmp) ++ alloc r8=ar.pfs,2,1,0,0 ++ mov r27=ar.rsc ++ add r2=0x98,in0 // r2 <- &jmpbuf.orig_jmp_buf_addr ++ ;; ++ ld8 r8=[r2],-16 // r8 <- orig_jmp_buf_addr ++ mov r10=ar.bsp ++ and r11=~0x3,r27 // clear ar.rsc.mode ++ ;; ++ flushrs // flush dirty regs to backing store (must be first in insn grp) ++ ld8 r23=[r2],8 // r23 <- jmpbuf.ar_bsp ++ sub r8=r8,in0 // r8 <- &orig_jmpbuf - &jmpbuf ++ ;; ++ ld8 r25=[r2] // r25 <- jmpbuf.ar_unat ++ extr.u r8=r8,3,6 // r8 <- (&orig_jmpbuf - &jmpbuf)/8 & 0x3f ++ ;; ++ cmp.lt pNeg,pPos=r8,r0 ++ mov r2=in0 ++ ;; ++(pPos) mov r16=r8 ++(pNeg) add r16=64,r8 ++(pPos) sub r17=64,r8 ++(pNeg) sub r17=r0,r8 ++ ;; ++ mov ar.rsc=r11 // put RSE in enforced lazy mode ++ shr.u r8=r25,r16 ++ add r3=8,in0 // r3 <- &jmpbuf.r1 ++ shl r9=r25,r17 ++ ;; ++ or r25=r8,r9 ++ ;; ++ mov r26=ar.rnat ++ mov ar.unat=r25 // setup ar.unat (NaT bits for r1, r4-r7, and r12) ++ ;; ++ ld8.fill.nta sp=[r2],16 // r12 (sp) ++ ld8.fill.nta gp=[r3],16 // r1 (gp) ++ dep r11=-1,r23,3,6 // r11 <- ia64_rse_rnat_addr(jmpbuf.ar_bsp) ++ ;; ++ ld8.nta r16=[r2],16 // caller's unat ++ ld8.nta r17=[r3],16 // fpsr ++ ;; ++ ld8.fill.nta r4=[r2],16 // r4 ++ ld8.fill.nta r5=[r3],16 // r5 (gp) ++ cmp.geu p8,p0=r10,r11 // p8 <- (ar.bsp >= jmpbuf.ar_bsp) ++ ;; ++ ld8.fill.nta r6=[r2],16 // r6 ++ ld8.fill.nta r7=[r3],16 // r7 ++ ;; ++ mov ar.unat=r16 // restore caller's unat ++ mov ar.fpsr=r17 // restore fpsr ++ ;; ++ ld8.nta r16=[r2],16 // b0 ++ ld8.nta r17=[r3],16 // b1 ++ ;; ++(p8) ld8 r26=[r11] // r26 <- *ia64_rse_rnat_addr(jmpbuf.ar_bsp) ++ mov ar.bspstore=r23 // restore ar.bspstore ++ ;; ++ ld8.nta r18=[r2],16 // b2 ++ ld8.nta r19=[r3],16 // b3 ++ ;; ++ ld8.nta r20=[r2],16 // b4 ++ ld8.nta r21=[r3],16 // b5 ++ ;; ++ ld8.nta r11=[r2],16 // ar.pfs ++ ld8.nta r22=[r3],56 // ar.lc ++ ;; ++ ld8.nta r24=[r2],32 // pr ++ mov b0=r16 ++ ;; ++ ldf.fill.nta f2=[r2],32 ++ ldf.fill.nta f3=[r3],32 ++ mov b1=r17 ++ ;; ++ ldf.fill.nta f4=[r2],32 ++ ldf.fill.nta f5=[r3],32 ++ mov b2=r18 ++ ;; ++ ldf.fill.nta f16=[r2],32 ++ ldf.fill.nta f17=[r3],32 ++ mov b3=r19 ++ ;; ++ ldf.fill.nta f18=[r2],32 ++ ldf.fill.nta f19=[r3],32 ++ mov b4=r20 ++ ;; ++ ldf.fill.nta f20=[r2],32 ++ ldf.fill.nta f21=[r3],32 ++ mov b5=r21 ++ ;; ++ ldf.fill.nta f22=[r2],32 ++ ldf.fill.nta f23=[r3],32 ++ mov ar.lc=r22 ++ ;; ++ ldf.fill.nta f24=[r2],32 ++ ldf.fill.nta f25=[r3],32 ++ cmp.eq p8,p9=0,in1 ++ ;; ++ ldf.fill.nta f26=[r2],32 ++ ldf.fill.nta f27=[r3],32 ++ mov ar.pfs=r11 ++ ;; ++ ldf.fill.nta f28=[r2],32 ++ ldf.fill.nta f29=[r3],32 ++ ;; ++ ldf.fill.nta f30=[r2] ++ ldf.fill.nta f31=[r3] ++(p8) mov r8=1 ++ ++ mov ar.rnat=r26 // restore ar.rnat ++ ;; ++ mov ar.rsc=r27 // restore ar.rsc ++(p9) mov r8=in1 ++ ++ invala // virt. -> phys. regnum mapping may change ++ mov pr=r24,-1 ++ br.ret.sptk.few rp ++END(kgdb_fault_longjmp) +diff -Nurb linux-2.6.22-570/arch/ia64/kernel/kgdb.c linux-2.6.22-591/arch/ia64/kernel/kgdb.c +--- linux-2.6.22-570/arch/ia64/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/ia64/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,944 @@ ++/* ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2, or (at your option) any ++ * later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ */ ++ ++/* ++ * Copyright (C) 2000-2001 VERITAS Software Corporation. ++ * (c) Copyright 2005 Hewlett-Packard Development Company, L.P. ++ * Bob Picco ++ */ ++/* ++ * Contributor: Lake Stevens Instrument Division$ ++ * Written by: Glenn Engel $ ++ * Updated by: Amit Kale ++ * Modified for 386 by Jim Kingdon, Cygnus Support. ++ * Origianl kgdb, compatibility with 2.1.xx kernel by David Grothe ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* for linux pt_regs struct */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define NUM_REGS 590 ++#define REGISTER_BYTES (NUM_REGS*8+128*8) ++#define REGISTER_BYTE(N) (((N) * 8) \ ++ + ((N) <= IA64_FR0_REGNUM ? \ ++ 0 : 8 * (((N) > IA64_FR127_REGNUM) ? 128 : (N) - IA64_FR0_REGNUM))) ++#define REGISTER_SIZE(N) \ ++ (((N) >= IA64_FR0_REGNUM && (N) <= IA64_FR127_REGNUM) ? 16 : 8) ++#define IA64_GR0_REGNUM 0 ++#define IA64_FR0_REGNUM 128 ++#define IA64_FR127_REGNUM (IA64_FR0_REGNUM+127) ++#define IA64_PR0_REGNUM 256 ++#define IA64_BR0_REGNUM 320 ++#define IA64_VFP_REGNUM 328 ++#define IA64_PR_REGNUM 330 ++#define IA64_IP_REGNUM 331 ++#define IA64_PSR_REGNUM 332 ++#define IA64_CFM_REGNUM 333 ++#define IA64_AR0_REGNUM 334 ++#define IA64_NAT0_REGNUM 462 ++#define IA64_NAT31_REGNUM (IA64_NAT0_REGNUM+31) ++#define IA64_NAT32_REGNUM (IA64_NAT0_REGNUM+32) ++#define IA64_RSC_REGNUM (IA64_AR0_REGNUM+16) ++#define IA64_BSP_REGNUM (IA64_AR0_REGNUM+17) ++#define IA64_BSPSTORE_REGNUM (IA64_AR0_REGNUM+18) ++#define IA64_RNAT_REGNUM (IA64_AR0_REGNUM+19) ++#define IA64_FCR_REGNUM (IA64_AR0_REGNUM+21) ++#define IA64_EFLAG_REGNUM (IA64_AR0_REGNUM+24) ++#define IA64_CSD_REGNUM (IA64_AR0_REGNUM+25) ++#define IA64_SSD_REGNUM (IA64_AR0_REGNUM+26) ++#define IA64_CFLG_REGNUM (IA64_AR0_REGNUM+27) ++#define IA64_FSR_REGNUM (IA64_AR0_REGNUM+28) ++#define IA64_FIR_REGNUM (IA64_AR0_REGNUM+29) ++#define IA64_FDR_REGNUM (IA64_AR0_REGNUM+30) ++#define IA64_CCV_REGNUM (IA64_AR0_REGNUM+32) ++#define IA64_UNAT_REGNUM (IA64_AR0_REGNUM+36) ++#define IA64_FPSR_REGNUM (IA64_AR0_REGNUM+40) ++#define IA64_ITC_REGNUM (IA64_AR0_REGNUM+44) ++#define IA64_PFS_REGNUM (IA64_AR0_REGNUM+64) ++#define IA64_LC_REGNUM (IA64_AR0_REGNUM+65) ++#define IA64_EC_REGNUM (IA64_AR0_REGNUM+66) ++ ++#define REGISTER_INDEX(N) (REGISTER_BYTE(N) / sizeof (unsigned long)) ++#define BREAK_INSTR_ALIGN (~0xfULL) ++ ++#define ptoff(V) ((unsigned int) &((struct pt_regs *)0x0)->V) ++struct reg_to_ptreg_index { ++ unsigned int reg; ++ unsigned int ptregoff; ++}; ++ ++static struct reg_to_ptreg_index gr_reg_to_ptreg_index[] = { ++ {IA64_GR0_REGNUM + 1, ptoff(r1)}, ++ {IA64_GR0_REGNUM + 2, ptoff(r2)}, ++ {IA64_GR0_REGNUM + 3, ptoff(r3)}, ++ {IA64_GR0_REGNUM + 8, ptoff(r8)}, ++ {IA64_GR0_REGNUM + 9, ptoff(r9)}, ++ {IA64_GR0_REGNUM + 10, ptoff(r10)}, ++ {IA64_GR0_REGNUM + 11, ptoff(r11)}, ++ {IA64_GR0_REGNUM + 12, ptoff(r12)}, ++ {IA64_GR0_REGNUM + 13, ptoff(r13)}, ++ {IA64_GR0_REGNUM + 14, ptoff(r14)}, ++ {IA64_GR0_REGNUM + 15, ptoff(r15)}, ++ {IA64_GR0_REGNUM + 16, ptoff(r16)}, ++ {IA64_GR0_REGNUM + 17, ptoff(r17)}, ++ {IA64_GR0_REGNUM + 18, ptoff(r18)}, ++ {IA64_GR0_REGNUM + 19, ptoff(r19)}, ++ {IA64_GR0_REGNUM + 20, ptoff(r20)}, ++ {IA64_GR0_REGNUM + 21, ptoff(r21)}, ++ {IA64_GR0_REGNUM + 22, ptoff(r22)}, ++ {IA64_GR0_REGNUM + 23, ptoff(r23)}, ++ {IA64_GR0_REGNUM + 24, ptoff(r24)}, ++ {IA64_GR0_REGNUM + 25, ptoff(r25)}, ++ {IA64_GR0_REGNUM + 26, ptoff(r26)}, ++ {IA64_GR0_REGNUM + 27, ptoff(r27)}, ++ {IA64_GR0_REGNUM + 28, ptoff(r28)}, ++ {IA64_GR0_REGNUM + 29, ptoff(r29)}, ++ {IA64_GR0_REGNUM + 30, ptoff(r30)}, ++ {IA64_GR0_REGNUM + 31, ptoff(r31)}, ++}; ++ ++static struct reg_to_ptreg_index br_reg_to_ptreg_index[] = { ++ {IA64_BR0_REGNUM, ptoff(b0)}, ++ {IA64_BR0_REGNUM + 6, ptoff(b6)}, ++ {IA64_BR0_REGNUM + 7, ptoff(b7)}, ++}; ++ ++static struct reg_to_ptreg_index ar_reg_to_ptreg_index[] = { ++ {IA64_PFS_REGNUM, ptoff(ar_pfs)}, ++ {IA64_UNAT_REGNUM, ptoff(ar_unat)}, ++ {IA64_RNAT_REGNUM, ptoff(ar_rnat)}, ++ {IA64_BSPSTORE_REGNUM, ptoff(ar_bspstore)}, ++ {IA64_RSC_REGNUM, ptoff(ar_rsc)}, ++ {IA64_CSD_REGNUM, ptoff(ar_csd)}, ++ {IA64_SSD_REGNUM, ptoff(ar_ssd)}, ++ {IA64_FPSR_REGNUM, ptoff(ar_fpsr)}, ++ {IA64_CCV_REGNUM, ptoff(ar_ccv)}, ++}; ++ ++extern atomic_t cpu_doing_single_step; ++ ++static int kgdb_gr_reg(int regnum, struct unw_frame_info *info, ++ unsigned long *reg, int rw) ++{ ++ char nat; ++ ++ if ((regnum >= IA64_GR0_REGNUM && regnum <= (IA64_GR0_REGNUM + 1)) || ++ (regnum >= (IA64_GR0_REGNUM + 4) && ++ regnum <= (IA64_GR0_REGNUM + 7))) ++ return !unw_access_gr(info, regnum - IA64_GR0_REGNUM, ++ reg, &nat, rw); ++ else ++ return 0; ++} ++static int kgdb_gr_ptreg(int regnum, struct pt_regs * ptregs, ++ struct unw_frame_info *info, unsigned long *reg, int rw) ++{ ++ int i, result = 1; ++ char nat; ++ ++ if (!((regnum >= (IA64_GR0_REGNUM + 2) && ++ regnum <= (IA64_GR0_REGNUM + 3)) || ++ (regnum >= (IA64_GR0_REGNUM + 8) && ++ regnum <= (IA64_GR0_REGNUM + 15)) || ++ (regnum >= (IA64_GR0_REGNUM + 16) && ++ regnum <= (IA64_GR0_REGNUM + 31)))) ++ return 0; ++ else if (rw && ptregs) { ++ for (i = 0; i < ARRAY_SIZE(gr_reg_to_ptreg_index); i++) ++ if (gr_reg_to_ptreg_index[i].reg == regnum) { ++ *((unsigned long *)(((void *)ptregs) + ++ gr_reg_to_ptreg_index[i].ptregoff)) = *reg; ++ break; ++ } ++ } else if (!rw && ptregs) { ++ for (i = 0; i < ARRAY_SIZE(gr_reg_to_ptreg_index); i++) ++ if (gr_reg_to_ptreg_index[i].reg == regnum) { ++ *reg = *((unsigned long *) ++ (((void *)ptregs) + ++ gr_reg_to_ptreg_index[i].ptregoff)); ++ break; ++ } ++ } else ++ result = !unw_access_gr(info, regnum - IA64_GR0_REGNUM, ++ reg, &nat, rw); ++ return result; ++} ++ ++static int kgdb_br_reg(int regnum, struct pt_regs * ptregs, ++ struct unw_frame_info *info, unsigned long *reg, int rw) ++{ ++ int i, result = 1; ++ ++ if (!(regnum >= IA64_BR0_REGNUM && regnum <= (IA64_BR0_REGNUM + 7))) ++ return 0; ++ ++ switch (regnum) { ++ case IA64_BR0_REGNUM: ++ case IA64_BR0_REGNUM + 6: ++ case IA64_BR0_REGNUM + 7: ++ if (rw) { ++ for (i = 0; i < ARRAY_SIZE(br_reg_to_ptreg_index); i++) ++ if (br_reg_to_ptreg_index[i].reg == regnum) { ++ *((unsigned long *) ++ (((void *)ptregs) + ++ br_reg_to_ptreg_index[i].ptregoff)) = ++ *reg; ++ break; ++ } ++ } else ++ for (i = 0; i < ARRAY_SIZE(br_reg_to_ptreg_index); i++) ++ if (br_reg_to_ptreg_index[i].reg == regnum) { ++ *reg = *((unsigned long *) ++ (((void *)ptregs) + ++ br_reg_to_ptreg_index[i]. ++ ptregoff)); ++ break; ++ } ++ break; ++ case IA64_BR0_REGNUM + 1: ++ case IA64_BR0_REGNUM + 2: ++ case IA64_BR0_REGNUM + 3: ++ case IA64_BR0_REGNUM + 4: ++ case IA64_BR0_REGNUM + 5: ++ result = !unw_access_br(info, regnum - IA64_BR0_REGNUM, ++ reg, rw); ++ break; ++ } ++ ++ return result; ++} ++ ++static int kgdb_fr_reg(int regnum, char *inbuffer, struct pt_regs * ptregs, ++ struct unw_frame_info *info, unsigned long *reg, ++ struct ia64_fpreg *freg, int rw) ++{ ++ int result = 1; ++ ++ if (!(regnum >= IA64_FR0_REGNUM && regnum <= (IA64_FR0_REGNUM + 127))) ++ return 0; ++ ++ switch (regnum) { ++ case IA64_FR0_REGNUM + 6: ++ case IA64_FR0_REGNUM + 7: ++ case IA64_FR0_REGNUM + 8: ++ case IA64_FR0_REGNUM + 9: ++ case IA64_FR0_REGNUM + 10: ++ case IA64_FR0_REGNUM + 11: ++ case IA64_FR0_REGNUM + 12: ++ if (rw) { ++ char *ptr = inbuffer; ++ ++ freg->u.bits[0] = *reg; ++ kgdb_hex2long(&ptr, &freg->u.bits[1]); ++ *(&ptregs->f6 + (regnum - (IA64_FR0_REGNUM + 6))) = ++ *freg; ++ break; ++ } else if (!ptregs) ++ result = !unw_access_fr(info, regnum - IA64_FR0_REGNUM, ++ freg, rw); ++ else ++ *freg = ++ *(&ptregs->f6 + (regnum - (IA64_FR0_REGNUM + 6))); ++ break; ++ default: ++ if (!rw) ++ result = !unw_access_fr(info, regnum - IA64_FR0_REGNUM, ++ freg, rw); ++ else ++ result = 0; ++ break; ++ } ++ ++ return result; ++} ++ ++static int kgdb_ar_reg(int regnum, struct pt_regs * ptregs, ++ struct unw_frame_info *info, unsigned long *reg, int rw) ++{ ++ int result = 0, i; ++ ++ if (!(regnum >= IA64_AR0_REGNUM && regnum <= IA64_EC_REGNUM)) ++ return 0; ++ ++ if (rw && ptregs) { ++ for (i = 0; i < ARRAY_SIZE(ar_reg_to_ptreg_index); i++) ++ if (ar_reg_to_ptreg_index[i].reg == regnum) { ++ *((unsigned long *) (((void *)ptregs) + ++ ar_reg_to_ptreg_index[i].ptregoff)) = ++ *reg; ++ result = 1; ++ break; ++ } ++ } else if (ptregs) { ++ for (i = 0; i < ARRAY_SIZE(ar_reg_to_ptreg_index); i++) ++ if (ar_reg_to_ptreg_index[i].reg == regnum) { ++ *reg = *((unsigned long *) (((void *)ptregs) + ++ ar_reg_to_ptreg_index[i].ptregoff)); ++ result = 1; ++ break; ++ } ++ } ++ ++ if (result) ++ return result; ++ ++ result = 1; ++ ++ switch (regnum) { ++ case IA64_CSD_REGNUM: ++ result = !unw_access_ar(info, UNW_AR_CSD, reg, rw); ++ break; ++ case IA64_SSD_REGNUM: ++ result = !unw_access_ar(info, UNW_AR_SSD, reg, rw); ++ break; ++ case IA64_UNAT_REGNUM: ++ result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw); ++ break; ++ case IA64_RNAT_REGNUM: ++ result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw); ++ break; ++ case IA64_BSPSTORE_REGNUM: ++ result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw); ++ break; ++ case IA64_PFS_REGNUM: ++ result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw); ++ break; ++ case IA64_LC_REGNUM: ++ result = !unw_access_ar(info, UNW_AR_LC, reg, rw); ++ break; ++ case IA64_EC_REGNUM: ++ result = !unw_access_ar(info, UNW_AR_EC, reg, rw); ++ break; ++ case IA64_FPSR_REGNUM: ++ result = !unw_access_ar(info, UNW_AR_FPSR, reg, rw); ++ break; ++ case IA64_RSC_REGNUM: ++ result = !unw_access_ar(info, UNW_AR_RSC, reg, rw); ++ break; ++ case IA64_CCV_REGNUM: ++ result = !unw_access_ar(info, UNW_AR_CCV, reg, rw); ++ break; ++ default: ++ result = 0; ++ } ++ ++ return result; ++} ++ ++void kgdb_get_reg(char *outbuffer, int regnum, struct unw_frame_info *info, ++ struct pt_regs *ptregs) ++{ ++ unsigned long reg, size = 0, *mem = ® ++ struct ia64_fpreg freg; ++ ++ if (kgdb_gr_reg(regnum, info, ®, 0) || ++ kgdb_gr_ptreg(regnum, ptregs, info, ®, 0) || ++ kgdb_br_reg(regnum, ptregs, info, ®, 0) || ++ kgdb_ar_reg(regnum, ptregs, info, ®, 0)) ++ size = sizeof(reg); ++ else if (kgdb_fr_reg(regnum, NULL, ptregs, info, ®, &freg, 0)) { ++ size = sizeof(freg); ++ mem = (unsigned long *)&freg; ++ } else if (regnum == IA64_IP_REGNUM) { ++ if (!ptregs) { ++ unw_get_ip(info, ®); ++ size = sizeof(reg); ++ } else { ++ reg = ptregs->cr_iip; ++ size = sizeof(reg); ++ } ++ } else if (regnum == IA64_CFM_REGNUM) { ++ if (!ptregs) ++ unw_get_cfm(info, ®); ++ else ++ reg = ptregs->cr_ifs; ++ size = sizeof(reg); ++ } else if (regnum == IA64_PSR_REGNUM) { ++ if (!ptregs && kgdb_usethread) ++ ptregs = (struct pt_regs *) ++ ((unsigned long)kgdb_usethread + ++ IA64_STK_OFFSET) - 1; ++ if (ptregs) ++ reg = ptregs->cr_ipsr; ++ size = sizeof(reg); ++ } else if (regnum == IA64_PR_REGNUM) { ++ if (ptregs) ++ reg = ptregs->pr; ++ else ++ unw_access_pr(info, ®, 0); ++ size = sizeof(reg); ++ } else if (regnum == IA64_BSP_REGNUM) { ++ unw_get_bsp(info, ®); ++ size = sizeof(reg); ++ } ++ ++ if (size) { ++ kgdb_mem2hex((char *) mem, outbuffer, size); ++ outbuffer[size*2] = 0; ++ } ++ else ++ strcpy(outbuffer, "E0"); ++ ++ return; ++} ++ ++void kgdb_put_reg(char *inbuffer, char *outbuffer, int regnum, ++ struct unw_frame_info *info, struct pt_regs *ptregs) ++{ ++ unsigned long reg; ++ struct ia64_fpreg freg; ++ char *ptr = inbuffer; ++ ++ kgdb_hex2long(&ptr, ®); ++ strcpy(outbuffer, "OK"); ++ ++ if (kgdb_gr_reg(regnum, info, ®, 1) || ++ kgdb_gr_ptreg(regnum, ptregs, info, ®, 1) || ++ kgdb_br_reg(regnum, ptregs, info, ®, 1) || ++ kgdb_fr_reg(regnum, inbuffer, ptregs, info, ®, &freg, 1) || ++ kgdb_ar_reg(regnum, ptregs, info, ®, 1)) ; ++ else if (regnum == IA64_IP_REGNUM) ++ ptregs->cr_iip = reg; ++ else if (regnum == IA64_CFM_REGNUM) ++ ptregs->cr_ifs = reg; ++ else if (regnum == IA64_PSR_REGNUM) ++ ptregs->cr_ipsr = reg; ++ else if (regnum == IA64_PR_REGNUM) ++ ptregs->pr = reg; ++ else ++ strcpy(outbuffer, "E01"); ++ return; ++} ++ ++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) ++{ ++} ++ ++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) ++{ ++} ++ ++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) ++{ ++ ++} ++ ++#define MAX_HW_BREAKPOINT (20) ++long hw_break_total_dbr, hw_break_total_ibr; ++#define HW_BREAKPOINT (hw_break_total_dbr + hw_break_total_ibr) ++#define WATCH_INSTRUCTION 0x0 ++#define WATCH_WRITE 0x1 ++#define WATCH_READ 0x2 ++#define WATCH_ACCESS 0x3 ++ ++#define HWCAP_DBR ((1 << WATCH_WRITE) | (1 << WATCH_READ)) ++#define HWCAP_IBR (1 << WATCH_INSTRUCTION) ++struct hw_breakpoint { ++ unsigned enabled; ++ unsigned long capable; ++ unsigned long type; ++ unsigned long mask; ++ unsigned long addr; ++} *breakinfo; ++ ++static struct hw_breakpoint hwbreaks[MAX_HW_BREAKPOINT]; ++ ++enum instruction_type { A, I, M, F, B, L, X, u }; ++ ++static enum instruction_type bundle_encoding[32][3] = { ++ {M, I, I}, /* 00 */ ++ {M, I, I}, /* 01 */ ++ {M, I, I}, /* 02 */ ++ {M, I, I}, /* 03 */ ++ {M, L, X}, /* 04 */ ++ {M, L, X}, /* 05 */ ++ {u, u, u}, /* 06 */ ++ {u, u, u}, /* 07 */ ++ {M, M, I}, /* 08 */ ++ {M, M, I}, /* 09 */ ++ {M, M, I}, /* 0A */ ++ {M, M, I}, /* 0B */ ++ {M, F, I}, /* 0C */ ++ {M, F, I}, /* 0D */ ++ {M, M, F}, /* 0E */ ++ {M, M, F}, /* 0F */ ++ {M, I, B}, /* 10 */ ++ {M, I, B}, /* 11 */ ++ {M, B, B}, /* 12 */ ++ {M, B, B}, /* 13 */ ++ {u, u, u}, /* 14 */ ++ {u, u, u}, /* 15 */ ++ {B, B, B}, /* 16 */ ++ {B, B, B}, /* 17 */ ++ {M, M, B}, /* 18 */ ++ {M, M, B}, /* 19 */ ++ {u, u, u}, /* 1A */ ++ {u, u, u}, /* 1B */ ++ {M, F, B}, /* 1C */ ++ {M, F, B}, /* 1D */ ++ {u, u, u}, /* 1E */ ++ {u, u, u}, /* 1F */ ++}; ++ ++int kgdb_validate_break_address(unsigned long addr) ++{ ++ int error; ++ char tmp_variable[BREAK_INSTR_SIZE]; ++ error = kgdb_get_mem((char *)(addr & BREAK_INSTR_ALIGN), tmp_variable, ++ BREAK_INSTR_SIZE); ++ return error; ++} ++ ++int kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr) ++{ ++ extern unsigned long _start[]; ++ unsigned long slot = addr & BREAK_INSTR_ALIGN, bundle_addr; ++ unsigned long template; ++ struct bundle { ++ struct { ++ unsigned long long template:5; ++ unsigned long long slot0:41; ++ unsigned long long slot1_p0:64 - 46; ++ } quad0; ++ struct { ++ unsigned long long slot1_p1:41 - (64 - 46); ++ unsigned long long slot2:41; ++ } quad1; ++ } bundle; ++ int ret; ++ ++ bundle_addr = addr & ~0xFULL; ++ ++ if (bundle_addr == (unsigned long)_start) ++ return 0; ++ ++ ret = kgdb_get_mem((char *)bundle_addr, (char *)&bundle, ++ BREAK_INSTR_SIZE); ++ if (ret < 0) ++ return ret; ++ ++ if (slot > 2) ++ slot = 0; ++ ++ memcpy(saved_instr, &bundle, BREAK_INSTR_SIZE); ++ template = bundle.quad0.template; ++ ++ if (slot == 1 && bundle_encoding[template][1] == L) ++ slot = 2; ++ ++ switch (slot) { ++ case 0: ++ bundle.quad0.slot0 = BREAKNUM; ++ break; ++ case 1: ++ bundle.quad0.slot1_p0 = BREAKNUM; ++ bundle.quad1.slot1_p1 = (BREAKNUM >> (64 - 46)); ++ break; ++ case 2: ++ bundle.quad1.slot2 = BREAKNUM; ++ break; ++ } ++ ++ return kgdb_set_mem((char *)bundle_addr, (char *)&bundle, ++ BREAK_INSTR_SIZE); ++} ++ ++int kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle) ++{ ++ extern unsigned long _start[]; ++ ++ addr = addr & BREAK_INSTR_ALIGN; ++ if (addr == (unsigned long)_start) ++ return 0; ++ return kgdb_set_mem((char *)addr, (char *)bundle, BREAK_INSTR_SIZE); ++} ++ ++volatile static struct smp_unw { ++ struct unw_frame_info *unw; ++ struct task_struct *task; ++} smp_unw[NR_CPUS]; ++ ++static int inline kgdb_get_blocked_state(struct task_struct *p, ++ struct unw_frame_info *unw) ++{ ++ unsigned long ip; ++ int count = 0; ++ ++ unw_init_from_blocked_task(unw, p); ++ ip = 0UL; ++ do { ++ if (unw_unwind(unw) < 0) ++ return -1; ++ unw_get_ip(unw, &ip); ++ if (!in_sched_functions(ip)) ++ break; ++ } while (count++ < 16); ++ ++ if (!ip) ++ return -1; ++ else ++ return 0; ++} ++ ++static void inline kgdb_wait(struct pt_regs *regs) ++{ ++ unsigned long hw_breakpoint_status = ia64_getreg(_IA64_REG_PSR); ++ if (hw_breakpoint_status & IA64_PSR_DB) ++ ia64_setreg(_IA64_REG_PSR_L, ++ hw_breakpoint_status ^ IA64_PSR_DB); ++ kgdb_nmihook(smp_processor_id(), regs); ++ if (hw_breakpoint_status & IA64_PSR_DB) ++ ia64_setreg(_IA64_REG_PSR_L, hw_breakpoint_status); ++ ++ return; ++} ++ ++static void inline normalize(struct unw_frame_info *running, ++ struct pt_regs *regs) ++{ ++ unsigned long sp; ++ ++ do { ++ unw_get_sp(running, &sp); ++ if ((sp + 0x10) >= (unsigned long)regs) ++ break; ++ } while (unw_unwind(running) >= 0); ++ ++ return; ++} ++ ++static void kgdb_init_running(struct unw_frame_info *unw, void *data) ++{ ++ struct pt_regs *regs; ++ ++ regs = data; ++ normalize(unw, regs); ++ smp_unw[smp_processor_id()].unw = unw; ++ kgdb_wait(regs); ++} ++ ++void kgdb_wait_ipi(struct pt_regs *regs) ++{ ++ struct unw_frame_info unw; ++ ++ smp_unw[smp_processor_id()].task = current; ++ ++ if (user_mode(regs)) { ++ smp_unw[smp_processor_id()].unw = (struct unw_frame_info *)1; ++ kgdb_wait(regs); ++ } else { ++ if (current->state == TASK_RUNNING) ++ unw_init_running(kgdb_init_running, regs); ++ else { ++ if (kgdb_get_blocked_state(current, &unw)) ++ smp_unw[smp_processor_id()].unw = ++ (struct unw_frame_info *)1; ++ else ++ smp_unw[smp_processor_id()].unw = &unw; ++ kgdb_wait(regs); ++ } ++ } ++ ++ smp_unw[smp_processor_id()].unw = NULL; ++ return; ++} ++ ++void kgdb_roundup_cpus(unsigned long flags) ++{ ++ if (num_online_cpus() > 1) ++ smp_send_nmi_allbutself(); ++} ++ ++static volatile int kgdb_hwbreak_sstep[NR_CPUS]; ++ ++static int kgdb_notify(struct notifier_block *self, unsigned long cmd, ++ void *ptr) ++{ ++ struct die_args *args = ptr; ++ struct pt_regs *regs = args->regs; ++ unsigned long err = args->err; ++ ++ switch (cmd) { ++ default: ++ return NOTIFY_DONE; ++ case DIE_PAGE_FAULT_NO_CONTEXT: ++ if (atomic_read(&debugger_active) && kgdb_may_fault) { ++ kgdb_fault_longjmp(kgdb_fault_jmp_regs); ++ return NOTIFY_STOP; ++ } ++ break; ++ case DIE_BREAK: ++ if (user_mode(regs) || err == 0x80001) ++ return NOTIFY_DONE; ++ break; ++ case DIE_FAULT: ++ if (user_mode(regs)) ++ return NOTIFY_DONE; ++ else if (err == 36 && kgdb_hwbreak_sstep[smp_processor_id()]) { ++ kgdb_hwbreak_sstep[smp_processor_id()] = 0; ++ regs->cr_ipsr &= ~IA64_PSR_SS; ++ return NOTIFY_STOP; ++ } ++ case DIE_MCA_MONARCH_PROCESS: ++ case DIE_INIT_MONARCH_PROCESS: ++ break; ++ } ++ ++ kgdb_handle_exception(args->trapnr, args->signr, args->err, regs); ++ return NOTIFY_STOP; ++} ++ ++static struct notifier_block kgdb_notifier = { ++ .notifier_call = kgdb_notify, ++}; ++ ++int kgdb_arch_init(void) ++{ ++ atomic_notifier_chain_register(&ia64die_chain, &kgdb_notifier); ++ return 0; ++} ++ ++static void do_kgdb_handle_exception(struct unw_frame_info *, void *data); ++ ++struct kgdb_state { ++ int e_vector; ++ int signo; ++ unsigned long err_code; ++ struct pt_regs *regs; ++ struct unw_frame_info *unw; ++ char *inbuf; ++ char *outbuf; ++ int unwind; ++ int ret; ++}; ++ ++static void inline kgdb_pc(struct pt_regs *regs, unsigned long pc) ++{ ++ regs->cr_iip = pc & ~0xf; ++ ia64_psr(regs)->ri = pc & 0x3; ++ return; ++} ++ ++int kgdb_arch_handle_exception(int e_vector, int signo, ++ int err_code, char *remcom_in_buffer, ++ char *remcom_out_buffer, ++ struct pt_regs *linux_regs) ++{ ++ struct kgdb_state info; ++ ++ info.e_vector = e_vector; ++ info.signo = signo; ++ info.err_code = err_code; ++ info.unw = (void *)0; ++ info.inbuf = remcom_in_buffer; ++ info.outbuf = remcom_out_buffer; ++ info.unwind = 0; ++ info.ret = -1; ++ ++ if (remcom_in_buffer[0] == 'c' || remcom_in_buffer[0] == 's') { ++ info.regs = linux_regs; ++ do_kgdb_handle_exception(NULL, &info); ++ } else if (kgdb_usethread == current) { ++ info.regs = linux_regs; ++ info.unwind = 1; ++ unw_init_running(do_kgdb_handle_exception, &info); ++ } else if (kgdb_usethread->state != TASK_RUNNING) { ++ struct unw_frame_info unw_info; ++ ++ if (kgdb_get_blocked_state(kgdb_usethread, &unw_info)) { ++ info.ret = 1; ++ goto bad; ++ } ++ info.regs = NULL; ++ do_kgdb_handle_exception(&unw_info, &info); ++ } else { ++ int i; ++ ++ for (i = 0; i < NR_CPUS; i++) ++ if (smp_unw[i].task == kgdb_usethread && smp_unw[i].unw ++ && smp_unw[i].unw != (struct unw_frame_info *)1) { ++ info.regs = NULL; ++ do_kgdb_handle_exception(smp_unw[i].unw, &info); ++ break; ++ } else { ++ info.ret = 1; ++ goto bad; ++ } ++ } ++ ++ bad: ++ if (info.ret != -1 && remcom_in_buffer[0] == 'p') { ++ unsigned long bad = 0xbad4badbadbadbadUL; ++ ++ printk("kgdb_arch_handle_exception: p packet bad (%s)\n", ++ remcom_in_buffer); ++ kgdb_mem2hex((char *)&bad, remcom_out_buffer, sizeof(bad)); ++ remcom_out_buffer[sizeof(bad) * 2] = 0; ++ info.ret = -1; ++ } ++ return info.ret; ++} ++ ++/* ++ * This is done because I evidently made an incorrect 'p' encoding ++ * when my patch for gdb was committed. It was later corrected. This ++ * check supports both my wrong encoding of the register number and ++ * the correct encoding. Eventually this should be eliminated and ++ * kgdb_hex2long should be demarshalling the regnum. ++ */ ++static inline int check_packet(unsigned int regnum, char *packet) ++{ ++ static int check_done, swap; ++ unsigned long reglong; ++ ++ if (likely(check_done)) { ++ if (swap) { ++ kgdb_hex2long(&packet, ®long); ++ regnum = (int) reglong; ++ } ++ ++ } else { ++ if (regnum > NUM_REGS) { ++ kgdb_hex2long(&packet, ®long); ++ regnum = (int) reglong; ++ swap = 1; ++ } ++ check_done = 1; ++ } ++ return regnum; ++} ++ ++static void do_kgdb_handle_exception(struct unw_frame_info *unw_info, ++ void *data) ++{ ++ long addr; ++ char *ptr; ++ unsigned long newPC; ++ int e_vector, signo; ++ unsigned long err_code; ++ struct pt_regs *linux_regs; ++ struct kgdb_state *info; ++ char *remcom_in_buffer, *remcom_out_buffer; ++ ++ info = data; ++ info->unw = unw_info; ++ e_vector = info->e_vector; ++ signo = info->signo; ++ err_code = info->err_code; ++ remcom_in_buffer = info->inbuf; ++ remcom_out_buffer = info->outbuf; ++ linux_regs = info->regs; ++ ++ if (info->unwind) ++ normalize(unw_info, linux_regs); ++ ++ switch (remcom_in_buffer[0]) { ++ case 'p': ++ { ++ unsigned int regnum; ++ ++ kgdb_hex2mem(&remcom_in_buffer[1], (char *)®num, ++ sizeof(regnum)); ++ regnum = check_packet(regnum, &remcom_in_buffer[1]); ++ if (regnum >= NUM_REGS) { ++ remcom_out_buffer[0] = 'E'; ++ remcom_out_buffer[1] = 0; ++ } else ++ kgdb_get_reg(remcom_out_buffer, regnum, ++ unw_info, linux_regs); ++ break; ++ } ++ case 'P': ++ { ++ unsigned int regno; ++ long v; ++ char *ptr; ++ ++ ptr = &remcom_in_buffer[1]; ++ if ((!kgdb_usethread || kgdb_usethread == current) && ++ kgdb_hex2long(&ptr, &v) && ++ *ptr++ == '=' && (v >= 0)) { ++ regno = (unsigned int)v; ++ regno = (regno >= NUM_REGS ? 0 : regno); ++ kgdb_put_reg(ptr, remcom_out_buffer, regno, ++ unw_info, linux_regs); ++ } else ++ strcpy(remcom_out_buffer, "E01"); ++ break; ++ } ++ case 'c': ++ case 's': ++ if (e_vector == TRAP_BRKPT && err_code == KGDBBREAKNUM) { ++ if (ia64_psr(linux_regs)->ri < 2) ++ kgdb_pc(linux_regs, linux_regs->cr_iip + ++ ia64_psr(linux_regs)->ri + 1); ++ else ++ kgdb_pc(linux_regs, linux_regs->cr_iip + 16); ++ } ++ ++ /* try to read optional parameter, pc unchanged if no parm */ ++ ptr = &remcom_in_buffer[1]; ++ if (kgdb_hex2long(&ptr, &addr)) { ++ linux_regs->cr_iip = addr; ++ } ++ newPC = linux_regs->cr_iip; ++ ++ /* clear the trace bit */ ++ linux_regs->cr_ipsr &= ~IA64_PSR_SS; ++ ++ atomic_set(&cpu_doing_single_step, -1); ++ ++ /* set the trace bit if we're stepping or took a hardware break */ ++ if (remcom_in_buffer[0] == 's' || e_vector == TRAP_HWBKPT) { ++ linux_regs->cr_ipsr |= IA64_PSR_SS; ++ debugger_step = 1; ++ if (kgdb_contthread) ++ atomic_set(&cpu_doing_single_step, ++ smp_processor_id()); ++ } ++ ++ kgdb_correct_hw_break(); ++ ++ /* if not hardware breakpoint, then reenable them */ ++ if (e_vector != TRAP_HWBKPT) ++ linux_regs->cr_ipsr |= IA64_PSR_DB; ++ else { ++ kgdb_hwbreak_sstep[smp_processor_id()] = 1; ++ linux_regs->cr_ipsr &= ~IA64_PSR_DB; ++ } ++ ++ info->ret = 0; ++ break; ++ default: ++ break; ++ } ++ ++ return; ++} ++ ++struct kgdb_arch arch_kgdb_ops = { ++ .gdb_bpt_instr = {0xcc}, ++}; +diff -Nurb linux-2.6.22-570/arch/ia64/kernel/smp.c linux-2.6.22-591/arch/ia64/kernel/smp.c +--- linux-2.6.22-570/arch/ia64/kernel/smp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ia64/kernel/smp.c 2007-12-21 15:36:11.000000000 -0500 +@@ -48,6 +48,7 @@ + #include + #include + #include ++#include + + /* + * Note: alignment of 4 entries/cacheline was empirically determined +@@ -79,6 +80,9 @@ + + #define IPI_CALL_FUNC 0 + #define IPI_CPU_STOP 1 ++#ifdef CONFIG_KGDB ++#define IPI_KGDB_INTERRUPT 2 ++#endif + #define IPI_KDUMP_CPU_STOP 3 + + /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ +@@ -169,6 +173,11 @@ + case IPI_CPU_STOP: + stop_this_cpu(); + break; ++#ifdef CONFIG_KGDB ++ case IPI_KGDB_INTERRUPT: ++ kgdb_wait_ipi(get_irq_regs()); ++ break; ++#endif + #ifdef CONFIG_KEXEC + case IPI_KDUMP_CPU_STOP: + unw_init_running(kdump_cpu_freeze, NULL); +@@ -399,6 +408,14 @@ + } + EXPORT_SYMBOL(smp_call_function_single); + ++#ifdef CONFIG_KGDB ++void ++smp_send_nmi_allbutself(void) ++{ ++ send_IPI_allbutself(IPI_KGDB_INTERRUPT); ++} ++#endif ++ + /* + * this function sends a 'generic call function' IPI to all other CPUs + * in the system. +diff -Nurb linux-2.6.22-570/arch/ia64/kernel/traps.c linux-2.6.22-591/arch/ia64/kernel/traps.c +--- linux-2.6.22-570/arch/ia64/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/ia64/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500 +@@ -155,8 +155,12 @@ + break; + + default: +- if (break_num < 0x40000 || break_num > 0x100000) ++ if (break_num < 0x40000 || break_num > 0x100000) { ++ if (notify_die(DIE_BREAK, "bad break", regs, ++ break_num, TRAP_BRKPT, SIGTRAP) == NOTIFY_STOP) ++ return; + die_if_kernel("Bad break", regs, break_num); ++ } + + if (break_num < 0x80000) { + sig = SIGILL; code = __ILL_BREAK; +diff -Nurb linux-2.6.22-570/arch/ia64/mm/extable.c linux-2.6.22-591/arch/ia64/mm/extable.c +--- linux-2.6.22-570/arch/ia64/mm/extable.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ia64/mm/extable.c 2007-12-21 15:36:11.000000000 -0500 +@@ -6,6 +6,7 @@ + */ + + #include ++#include + + #include + #include +@@ -73,6 +74,11 @@ + else + last = mid - 1; + } ++#ifdef CONFIG_KGDB ++ if (atomic_read(&debugger_active) && kgdb_may_fault) ++ kgdb_fault_longjmp(kgdb_fault_jmp_regs); ++ /* Not reached. */ ++#endif + return NULL; + } + +diff -Nurb linux-2.6.22-570/arch/ia64/mm/fault.c linux-2.6.22-591/arch/ia64/mm/fault.c +--- linux-2.6.22-570/arch/ia64/mm/fault.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/ia64/mm/fault.c 2007-12-21 15:36:11.000000000 -0500 +@@ -255,6 +255,10 @@ + */ + bust_spinlocks(1); + ++ if (notify_die(DIE_PAGE_FAULT_NO_CONTEXT, "no context", regs, ++ isr, 14, SIGSEGV) == NOTIFY_STOP) ++ return; ++ + if (address < PAGE_SIZE) + printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address); + else +diff -Nurb linux-2.6.22-570/arch/mips/Kconfig linux-2.6.22-591/arch/mips/Kconfig +--- linux-2.6.22-570/arch/mips/Kconfig 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/mips/Kconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -30,7 +30,6 @@ + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_BIG_ENDIAN +- select SYS_SUPPORTS_KGDB + help + The eXcite is a smart camera platform manufactured by + Basler Vision Technologies AG. +@@ -98,7 +97,6 @@ + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_BIG_ENDIAN +- select SYS_SUPPORTS_KGDB + help + This is an evaluation board based on the Galileo GT-64120 + single-chip system controller that contains a MIPS R5000 compatible +@@ -269,7 +267,6 @@ + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_BIG_ENDIAN +- select SYS_SUPPORTS_KGDB + help + The Ocelot is a MIPS-based Single Board Computer (SBC) made by + Momentum Computer . +@@ -331,8 +328,6 @@ + select SYS_HAS_CPU_R5432 + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_64BIT_KERNEL if EXPERIMENTAL +- select SYS_SUPPORTS_KGDB +- select SYS_SUPPORTS_KGDB + select SYS_SUPPORTS_LITTLE_ENDIAN + help + This enables support for the R5432-based NEC DDB Vrc-5477, +@@ -360,7 +355,6 @@ + select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_BIG_ENDIAN + select SYS_SUPPORTS_HIGHMEM +- select SYS_SUPPORTS_KGDB + select SYS_SUPPORTS_SMP + help + Yosemite is an evaluation board for the RM9000x2 processor +@@ -440,7 +434,6 @@ + select SYS_HAS_CPU_R10000 + select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_BIG_ENDIAN +- select SYS_SUPPORTS_KGDB + select SYS_SUPPORTS_NUMA + select SYS_SUPPORTS_SMP + select GENERIC_HARDIRQS_NO__DO_IRQ +@@ -490,7 +483,6 @@ + select SYS_HAS_CPU_SB1 + select SYS_SUPPORTS_BIG_ENDIAN + select SYS_SUPPORTS_HIGHMEM +- select SYS_SUPPORTS_KGDB + select SYS_SUPPORTS_LITTLE_ENDIAN + + config SIBYTE_SENTOSA +@@ -631,7 +623,6 @@ + select SYS_SUPPORTS_64BIT_KERNEL + select SYS_SUPPORTS_LITTLE_ENDIAN + select SYS_SUPPORTS_BIG_ENDIAN +- select SYS_SUPPORTS_KGDB + select GENERIC_HARDIRQS_NO__DO_IRQ + help + This Toshiba board is based on the TX4927 processor. Say Y here to +@@ -650,7 +641,6 @@ + select SYS_SUPPORTS_32BIT_KERNEL + select SYS_SUPPORTS_LITTLE_ENDIAN + select SYS_SUPPORTS_BIG_ENDIAN +- select SYS_SUPPORTS_KGDB + select GENERIC_HARDIRQS_NO__DO_IRQ + help + This Toshiba board is based on the TX4938 processor. Say Y here to +@@ -826,7 +816,6 @@ + + config DDB5XXX_COMMON + bool +- select SYS_SUPPORTS_KGDB + + config MIPS_BOARDS_GEN + bool +@@ -862,7 +851,6 @@ + select SYS_HAS_EARLY_PRINTK + select SYS_SUPPORTS_32BIT_KERNEL + select GENERIC_HARDIRQS_NO__DO_IRQ +- select SYS_SUPPORTS_KGDB + + config SWAP_IO_SPACE + bool +diff -Nurb linux-2.6.22-570/arch/mips/Kconfig.debug linux-2.6.22-591/arch/mips/Kconfig.debug +--- linux-2.6.22-570/arch/mips/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500 +@@ -46,28 +46,6 @@ + arch/mips/kernel/smtc.c. This debugging option result in significant + overhead so should be disabled in production kernels. + +-config KGDB +- bool "Remote GDB kernel debugging" +- depends on DEBUG_KERNEL && SYS_SUPPORTS_KGDB +- select DEBUG_INFO +- help +- If you say Y here, it will be possible to remotely debug the MIPS +- kernel using gdb. This enlarges your kernel image disk size by +- several megabytes and requires a machine with more than 16 MB, +- better 32 MB RAM to avoid excessive linking time. This is only +- useful for kernel hackers. If unsure, say N. +- +-config SYS_SUPPORTS_KGDB +- bool +- +-config GDB_CONSOLE +- bool "Console output to GDB" +- depends on KGDB +- help +- If you are using GDB for remote debugging over a serial port and +- would like kernel messages to be formatted into GDB $O packets so +- that GDB prints them as program output, say 'Y'. +- + config SB1XXX_CORELIS + bool "Corelis Debugger" + depends on SIBYTE_SB1xxx_SOC +diff -Nurb linux-2.6.22-570/arch/mips/au1000/common/Makefile linux-2.6.22-591/arch/mips/au1000/common/Makefile +--- linux-2.6.22-570/arch/mips/au1000/common/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/au1000/common/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -10,5 +10,4 @@ + au1xxx_irqmap.o clocks.o platform.o power.o setup.o \ + sleeper.o cputable.o dma.o dbdma.o gpio.o + +-obj-$(CONFIG_KGDB) += dbg_io.o + obj-$(CONFIG_PCI) += pci.o +diff -Nurb linux-2.6.22-570/arch/mips/au1000/common/dbg_io.c linux-2.6.22-591/arch/mips/au1000/common/dbg_io.c +--- linux-2.6.22-570/arch/mips/au1000/common/dbg_io.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/au1000/common/dbg_io.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,121 +0,0 @@ +- +-#include +-#include +- +-#ifdef CONFIG_KGDB +- +-/* +- * FIXME the user should be able to select the +- * uart to be used for debugging. +- */ +-#define DEBUG_BASE UART_DEBUG_BASE +-/**/ +- +-/* we need uint32 uint8 */ +-/* #include "types.h" */ +-typedef unsigned char uint8; +-typedef unsigned int uint32; +- +-#define UART16550_BAUD_2400 2400 +-#define UART16550_BAUD_4800 4800 +-#define UART16550_BAUD_9600 9600 +-#define UART16550_BAUD_19200 19200 +-#define UART16550_BAUD_38400 38400 +-#define UART16550_BAUD_57600 57600 +-#define UART16550_BAUD_115200 115200 +- +-#define UART16550_PARITY_NONE 0 +-#define UART16550_PARITY_ODD 0x08 +-#define UART16550_PARITY_EVEN 0x18 +-#define UART16550_PARITY_MARK 0x28 +-#define UART16550_PARITY_SPACE 0x38 +- +-#define UART16550_DATA_5BIT 0x0 +-#define UART16550_DATA_6BIT 0x1 +-#define UART16550_DATA_7BIT 0x2 +-#define UART16550_DATA_8BIT 0x3 +- +-#define UART16550_STOP_1BIT 0x0 +-#define UART16550_STOP_2BIT 0x4 +- +- +-#define UART_RX 0 /* Receive buffer */ +-#define UART_TX 4 /* Transmit buffer */ +-#define UART_IER 8 /* Interrupt Enable Register */ +-#define UART_IIR 0xC /* Interrupt ID Register */ +-#define UART_FCR 0x10 /* FIFO Control Register */ +-#define UART_LCR 0x14 /* Line Control Register */ +-#define UART_MCR 0x18 /* Modem Control Register */ +-#define UART_LSR 0x1C /* Line Status Register */ +-#define UART_MSR 0x20 /* Modem Status Register */ +-#define UART_CLK 0x28 /* Baud Rat4e Clock Divider */ +-#define UART_MOD_CNTRL 0x100 /* Module Control */ +- +-/* memory-mapped read/write of the port */ +-#define UART16550_READ(y) (au_readl(DEBUG_BASE + y) & 0xff) +-#define UART16550_WRITE(y,z) (au_writel(z&0xff, DEBUG_BASE + y)) +- +-extern unsigned long get_au1x00_uart_baud_base(void); +-extern unsigned long cal_r4koff(void); +- +-void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop) +-{ +- +- if (UART16550_READ(UART_MOD_CNTRL) != 0x3) { +- UART16550_WRITE(UART_MOD_CNTRL, 3); +- } +- cal_r4koff(); +- +- /* disable interrupts */ +- UART16550_WRITE(UART_IER, 0); +- +- /* set up baud rate */ +- { +- uint32 divisor; +- +- /* set divisor */ +- divisor = get_au1x00_uart_baud_base() / baud; +- UART16550_WRITE(UART_CLK, divisor & 0xffff); +- } +- +- /* set data format */ +- UART16550_WRITE(UART_LCR, (data | parity | stop)); +-} +- +-static int remoteDebugInitialized = 0; +- +-uint8 getDebugChar(void) +-{ +- if (!remoteDebugInitialized) { +- remoteDebugInitialized = 1; +- debugInit(UART16550_BAUD_115200, +- UART16550_DATA_8BIT, +- UART16550_PARITY_NONE, +- UART16550_STOP_1BIT); +- } +- +- while((UART16550_READ(UART_LSR) & 0x1) == 0); +- return UART16550_READ(UART_RX); +-} +- +- +-int putDebugChar(uint8 byte) +-{ +-// int i; +- +- if (!remoteDebugInitialized) { +- remoteDebugInitialized = 1; +- debugInit(UART16550_BAUD_115200, +- UART16550_DATA_8BIT, +- UART16550_PARITY_NONE, +- UART16550_STOP_1BIT); +- } +- +- while ((UART16550_READ(UART_LSR)&0x40) == 0); +- UART16550_WRITE(UART_TX, byte); +- //for (i=0;i<0xfff;i++); +- +- return 1; +-} +- +-#endif +diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/Makefile linux-2.6.22-591/arch/mips/basler/excite/Makefile +--- linux-2.6.22-570/arch/mips/basler/excite/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/basler/excite/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -5,5 +5,4 @@ + obj-$(CONFIG_BASLER_EXCITE) += excite_irq.o excite_prom.o excite_setup.o \ + excite_device.o excite_procfs.o + +-obj-$(CONFIG_KGDB) += excite_dbg_io.o + obj-m += excite_iodev.o +diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/excite_dbg_io.c linux-2.6.22-591/arch/mips/basler/excite/excite_dbg_io.c +--- linux-2.6.22-570/arch/mips/basler/excite/excite_dbg_io.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/basler/excite/excite_dbg_io.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,121 +0,0 @@ +-/* +- * Copyright (C) 2004 by Basler Vision Technologies AG +- * Author: Thomas Koeller +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2 of the License, or +- * (at your option) any later version. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, write to the Free Software +- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +- +-#if defined(CONFIG_SERIAL_8250) && CONFIG_SERIAL_8250_NR_UARTS > 1 +-#error Debug port used by serial driver +-#endif +- +-#define UART_CLK 25000000 +-#define BASE_BAUD (UART_CLK / 16) +-#define REGISTER_BASE_0 0x0208UL +-#define REGISTER_BASE_1 0x0238UL +- +-#define REGISTER_BASE_DBG REGISTER_BASE_1 +- +-#define CPRR 0x0004 +-#define UACFG 0x0200 +-#define UAINTS 0x0204 +-#define UARBR (REGISTER_BASE_DBG + 0x0000) +-#define UATHR (REGISTER_BASE_DBG + 0x0004) +-#define UADLL (REGISTER_BASE_DBG + 0x0008) +-#define UAIER (REGISTER_BASE_DBG + 0x000c) +-#define UADLH (REGISTER_BASE_DBG + 0x0010) +-#define UAIIR (REGISTER_BASE_DBG + 0x0014) +-#define UAFCR (REGISTER_BASE_DBG + 0x0018) +-#define UALCR (REGISTER_BASE_DBG + 0x001c) +-#define UAMCR (REGISTER_BASE_DBG + 0x0020) +-#define UALSR (REGISTER_BASE_DBG + 0x0024) +-#define UAMSR (REGISTER_BASE_DBG + 0x0028) +-#define UASCR (REGISTER_BASE_DBG + 0x002c) +- +-#define PARITY_NONE 0 +-#define PARITY_ODD 0x08 +-#define PARITY_EVEN 0x18 +-#define PARITY_MARK 0x28 +-#define PARITY_SPACE 0x38 +- +-#define DATA_5BIT 0x0 +-#define DATA_6BIT 0x1 +-#define DATA_7BIT 0x2 +-#define DATA_8BIT 0x3 +- +-#define STOP_1BIT 0x0 +-#define STOP_2BIT 0x4 +- +-#define BAUD_DBG 57600 +-#define PARITY_DBG PARITY_NONE +-#define DATA_DBG DATA_8BIT +-#define STOP_DBG STOP_1BIT +- +-/* Initialize the serial port for KGDB debugging */ +-void __init excite_kgdb_init(void) +-{ +- const u32 divisor = BASE_BAUD / BAUD_DBG; +- +- /* Take the UART out of reset */ +- titan_writel(0x00ff1cff, CPRR); +- titan_writel(0x00000000, UACFG); +- titan_writel(0x00000002, UACFG); +- +- titan_writel(0x0, UALCR); +- titan_writel(0x0, UAIER); +- +- /* Disable FIFOs */ +- titan_writel(0x00, UAFCR); +- +- titan_writel(0x80, UALCR); +- titan_writel(divisor & 0xff, UADLL); +- titan_writel((divisor & 0xff00) >> 8, UADLH); +- titan_writel(0x0, UALCR); +- +- titan_writel(DATA_DBG | PARITY_DBG | STOP_DBG, UALCR); +- +- /* Enable receiver interrupt */ +- titan_readl(UARBR); +- titan_writel(0x1, UAIER); +-} +- +-int getDebugChar(void) +-{ +- while (!(titan_readl(UALSR) & 0x1)); +- return titan_readl(UARBR); +-} +- +-int putDebugChar(int data) +-{ +- while (!(titan_readl(UALSR) & 0x20)); +- titan_writel(data, UATHR); +- return 1; +-} +- +-/* KGDB interrupt handler */ +-asmlinkage void excite_kgdb_inthdl(void) +-{ +- if (unlikely( +- ((titan_readl(UAIIR) & 0x7) == 4) +- && ((titan_readl(UARBR) & 0xff) == 0x3))) +- set_async_breakpoint(®s->cp0_epc); +-} +diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/excite_irq.c linux-2.6.22-591/arch/mips/basler/excite/excite_irq.c +--- linux-2.6.22-570/arch/mips/basler/excite/excite_irq.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/basler/excite/excite_irq.c 2007-12-21 15:36:11.000000000 -0500 +@@ -50,10 +50,6 @@ + mips_cpu_irq_init(); + rm7k_cpu_irq_init(); + rm9k_cpu_irq_init(); +- +-#ifdef CONFIG_KGDB +- excite_kgdb_init(); +-#endif + } + + asmlinkage void plat_irq_dispatch(void) +@@ -90,9 +86,6 @@ + msgint = msgintflags & msgintmask & (0x1 << (TITAN_MSGINT % 0x20)); + if ((pending & (1 << TITAN_IRQ)) && msgint) { + ocd_writel(msgint, INTP0Clear0 + (TITAN_MSGINT / 0x20 * 0x10)); +-#if defined(CONFIG_KGDB) +- excite_kgdb_inthdl(); +-#endif + do_IRQ(TITAN_IRQ); + return; + } +diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/excite_setup.c linux-2.6.22-591/arch/mips/basler/excite/excite_setup.c +--- linux-2.6.22-570/arch/mips/basler/excite/excite_setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/basler/excite/excite_setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -95,13 +95,13 @@ + /* Take the DUART out of reset */ + titan_writel(0x00ff1cff, CPRR); + +-#if defined(CONFIG_KGDB) || (CONFIG_SERIAL_8250_NR_UARTS > 1) ++#if (CONFIG_SERIAL_8250_NR_UARTS > 1) + /* Enable both ports */ + titan_writel(MASK_SER0 | MASK_SER1, UACFG); + #else + /* Enable port #0 only */ + titan_writel(MASK_SER0, UACFG); +-#endif /* defined(CONFIG_KGDB) */ ++#endif + + /* + * Set up serial port #0. Do not use autodetection; the result is +diff -Nurb linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/Makefile linux-2.6.22-591/arch/mips/ddb5xxx/ddb5477/Makefile +--- linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/ddb5xxx/ddb5477/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -5,4 +5,3 @@ + obj-y += irq.o irq_5477.o setup.o lcd44780.o + + obj-$(CONFIG_RUNTIME_DEBUG) += debug.o +-obj-$(CONFIG_KGDB) += kgdb_io.o +diff -Nurb linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/kgdb_io.c linux-2.6.22-591/arch/mips/ddb5xxx/ddb5477/kgdb_io.c +--- linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/kgdb_io.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/ddb5xxx/ddb5477/kgdb_io.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,136 +0,0 @@ +-/* +- * kgdb io functions for DDB5477. We use the second serial port (upper one). +- * +- * Copyright (C) 2001 MontaVista Software Inc. +- * Author: jsun@mvista.com or jsun@junsun.net +- * +- * This program is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License as published by the +- * Free Software Foundation; either version 2 of the License, or (at your +- * option) any later version. +- * +- */ +- +-/* ======================= CONFIG ======================== */ +- +-/* [jsun] we use the second serial port for kdb */ +-#define BASE 0xbfa04240 +-#define MAX_BAUD 115200 +- +-/* distance in bytes between two serial registers */ +-#define REG_OFFSET 8 +- +-/* +- * 0 - kgdb does serial init +- * 1 - kgdb skip serial init +- */ +-static int remoteDebugInitialized = 0; +- +-/* +- * the default baud rate *if* kgdb does serial init +- */ +-#define BAUD_DEFAULT UART16550_BAUD_38400 +- +-/* ======================= END OF CONFIG ======================== */ +- +-typedef unsigned char uint8; +-typedef unsigned int uint32; +- +-#define UART16550_BAUD_2400 2400 +-#define UART16550_BAUD_4800 4800 +-#define UART16550_BAUD_9600 9600 +-#define UART16550_BAUD_19200 19200 +-#define UART16550_BAUD_38400 38400 +-#define UART16550_BAUD_57600 57600 +-#define UART16550_BAUD_115200 115200 +- +-#define UART16550_PARITY_NONE 0 +-#define UART16550_PARITY_ODD 0x08 +-#define UART16550_PARITY_EVEN 0x18 +-#define UART16550_PARITY_MARK 0x28 +-#define UART16550_PARITY_SPACE 0x38 +- +-#define UART16550_DATA_5BIT 0x0 +-#define UART16550_DATA_6BIT 0x1 +-#define UART16550_DATA_7BIT 0x2 +-#define UART16550_DATA_8BIT 0x3 +- +-#define UART16550_STOP_1BIT 0x0 +-#define UART16550_STOP_2BIT 0x4 +- +-/* register offset */ +-#define OFS_RCV_BUFFER 0 +-#define OFS_TRANS_HOLD 0 +-#define OFS_SEND_BUFFER 0 +-#define OFS_INTR_ENABLE (1*REG_OFFSET) +-#define OFS_INTR_ID (2*REG_OFFSET) +-#define OFS_DATA_FORMAT (3*REG_OFFSET) +-#define OFS_LINE_CONTROL (3*REG_OFFSET) +-#define OFS_MODEM_CONTROL (4*REG_OFFSET) +-#define OFS_RS232_OUTPUT (4*REG_OFFSET) +-#define OFS_LINE_STATUS (5*REG_OFFSET) +-#define OFS_MODEM_STATUS (6*REG_OFFSET) +-#define OFS_RS232_INPUT (6*REG_OFFSET) +-#define OFS_SCRATCH_PAD (7*REG_OFFSET) +- +-#define OFS_DIVISOR_LSB (0*REG_OFFSET) +-#define OFS_DIVISOR_MSB (1*REG_OFFSET) +- +- +-/* memory-mapped read/write of the port */ +-#define UART16550_READ(y) (*((volatile uint8*)(BASE + y))) +-#define UART16550_WRITE(y, z) ((*((volatile uint8*)(BASE + y))) = z) +- +-void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop) +-{ +- /* disable interrupts */ +- UART16550_WRITE(OFS_INTR_ENABLE, 0); +- +- /* set up baud rate */ +- { +- uint32 divisor; +- +- /* set DIAB bit */ +- UART16550_WRITE(OFS_LINE_CONTROL, 0x80); +- +- /* set divisor */ +- divisor = MAX_BAUD / baud; +- UART16550_WRITE(OFS_DIVISOR_LSB, divisor & 0xff); +- UART16550_WRITE(OFS_DIVISOR_MSB, (divisor & 0xff00) >> 8); +- +- /* clear DIAB bit */ +- UART16550_WRITE(OFS_LINE_CONTROL, 0x0); +- } +- +- /* set data format */ +- UART16550_WRITE(OFS_DATA_FORMAT, data | parity | stop); +-} +- +- +-uint8 getDebugChar(void) +-{ +- if (!remoteDebugInitialized) { +- remoteDebugInitialized = 1; +- debugInit(BAUD_DEFAULT, +- UART16550_DATA_8BIT, +- UART16550_PARITY_NONE, UART16550_STOP_1BIT); +- } +- +- while ((UART16550_READ(OFS_LINE_STATUS) & 0x1) == 0); +- return UART16550_READ(OFS_RCV_BUFFER); +-} +- +- +-int putDebugChar(uint8 byte) +-{ +- if (!remoteDebugInitialized) { +- remoteDebugInitialized = 1; +- debugInit(BAUD_DEFAULT, +- UART16550_DATA_8BIT, +- UART16550_PARITY_NONE, UART16550_STOP_1BIT); +- } +- +- while ((UART16550_READ(OFS_LINE_STATUS) & 0x20) == 0); +- UART16550_WRITE(OFS_SEND_BUFFER, byte); +- return 1; +-} +diff -Nurb linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/Makefile linux-2.6.22-591/arch/mips/gt64120/momenco_ocelot/Makefile +--- linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/gt64120/momenco_ocelot/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -3,5 +3,3 @@ + # + + obj-y += irq.o prom.o reset.o setup.o +- +-obj-$(CONFIG_KGDB) += dbg_io.o +diff -Nurb linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/dbg_io.c linux-2.6.22-591/arch/mips/gt64120/momenco_ocelot/dbg_io.c +--- linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/dbg_io.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/gt64120/momenco_ocelot/dbg_io.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,121 +0,0 @@ +- +-#include /* For the serial port location and base baud */ +- +-/* --- CONFIG --- */ +- +-typedef unsigned char uint8; +-typedef unsigned int uint32; +- +-/* --- END OF CONFIG --- */ +- +-#define UART16550_BAUD_2400 2400 +-#define UART16550_BAUD_4800 4800 +-#define UART16550_BAUD_9600 9600 +-#define UART16550_BAUD_19200 19200 +-#define UART16550_BAUD_38400 38400 +-#define UART16550_BAUD_57600 57600 +-#define UART16550_BAUD_115200 115200 +- +-#define UART16550_PARITY_NONE 0 +-#define UART16550_PARITY_ODD 0x08 +-#define UART16550_PARITY_EVEN 0x18 +-#define UART16550_PARITY_MARK 0x28 +-#define UART16550_PARITY_SPACE 0x38 +- +-#define UART16550_DATA_5BIT 0x0 +-#define UART16550_DATA_6BIT 0x1 +-#define UART16550_DATA_7BIT 0x2 +-#define UART16550_DATA_8BIT 0x3 +- +-#define UART16550_STOP_1BIT 0x0 +-#define UART16550_STOP_2BIT 0x4 +- +-/* ----------------------------------------------------- */ +- +-/* === CONFIG === */ +- +-/* [jsun] we use the second serial port for kdb */ +-#define BASE OCELOT_SERIAL1_BASE +-#define MAX_BAUD OCELOT_BASE_BAUD +- +-/* === END OF CONFIG === */ +- +-#define REG_OFFSET 4 +- +-/* register offset */ +-#define OFS_RCV_BUFFER 0 +-#define OFS_TRANS_HOLD 0 +-#define OFS_SEND_BUFFER 0 +-#define OFS_INTR_ENABLE (1*REG_OFFSET) +-#define OFS_INTR_ID (2*REG_OFFSET) +-#define OFS_DATA_FORMAT (3*REG_OFFSET) +-#define OFS_LINE_CONTROL (3*REG_OFFSET) +-#define OFS_MODEM_CONTROL (4*REG_OFFSET) +-#define OFS_RS232_OUTPUT (4*REG_OFFSET) +-#define OFS_LINE_STATUS (5*REG_OFFSET) +-#define OFS_MODEM_STATUS (6*REG_OFFSET) +-#define OFS_RS232_INPUT (6*REG_OFFSET) +-#define OFS_SCRATCH_PAD (7*REG_OFFSET) +- +-#define OFS_DIVISOR_LSB (0*REG_OFFSET) +-#define OFS_DIVISOR_MSB (1*REG_OFFSET) +- +- +-/* memory-mapped read/write of the port */ +-#define UART16550_READ(y) (*((volatile uint8*)(BASE + y))) +-#define UART16550_WRITE(y, z) ((*((volatile uint8*)(BASE + y))) = z) +- +-void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop) +-{ +- /* disable interrupts */ +- UART16550_WRITE(OFS_INTR_ENABLE, 0); +- +- /* set up baud rate */ +- { +- uint32 divisor; +- +- /* set DIAB bit */ +- UART16550_WRITE(OFS_LINE_CONTROL, 0x80); +- +- /* set divisor */ +- divisor = MAX_BAUD / baud; +- UART16550_WRITE(OFS_DIVISOR_LSB, divisor & 0xff); +- UART16550_WRITE(OFS_DIVISOR_MSB, (divisor & 0xff00) >> 8); +- +- /* clear DIAB bit */ +- UART16550_WRITE(OFS_LINE_CONTROL, 0x0); +- } +- +- /* set data format */ +- UART16550_WRITE(OFS_DATA_FORMAT, data | parity | stop); +-} +- +-static int remoteDebugInitialized = 0; +- +-uint8 getDebugChar(void) +-{ +- if (!remoteDebugInitialized) { +- remoteDebugInitialized = 1; +- debugInit(UART16550_BAUD_38400, +- UART16550_DATA_8BIT, +- UART16550_PARITY_NONE, UART16550_STOP_1BIT); +- } +- +- while ((UART16550_READ(OFS_LINE_STATUS) & 0x1) == 0); +- return UART16550_READ(OFS_RCV_BUFFER); +-} +- +- +-int putDebugChar(uint8 byte) +-{ +- if (!remoteDebugInitialized) { +- remoteDebugInitialized = 1; +- debugInit(UART16550_BAUD_38400, +- UART16550_DATA_8BIT, +- UART16550_PARITY_NONE, UART16550_STOP_1BIT); +- } +- +- while ((UART16550_READ(OFS_LINE_STATUS) & 0x20) == 0); +- UART16550_WRITE(OFS_SEND_BUFFER, byte); +- return 1; +-} +diff -Nurb linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/Makefile linux-2.6.22-591/arch/mips/jmr3927/rbhma3100/Makefile +--- linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/jmr3927/rbhma3100/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -3,4 +3,3 @@ + # + + obj-y += init.o irq.o setup.o +-obj-$(CONFIG_KGDB) += kgdb_io.o +diff -Nurb linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/kgdb_io.c linux-2.6.22-591/arch/mips/jmr3927/rbhma3100/kgdb_io.c +--- linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/kgdb_io.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/jmr3927/rbhma3100/kgdb_io.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,105 +0,0 @@ +-/* +- * BRIEF MODULE DESCRIPTION +- * Low level uart routines to directly access a TX[34]927 SIO. +- * +- * Copyright 2001 MontaVista Software Inc. +- * Author: MontaVista Software, Inc. +- * ahennessy@mvista.com or source@mvista.com +- * +- * Based on arch/mips/ddb5xxx/ddb5477/kgdb_io.c +- * +- * Copyright (C) 2000-2001 Toshiba Corporation +- * +- * This program is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License as published by the +- * Free Software Foundation; either version 2 of the License, or (at your +- * option) any later version. +- * +- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN +- * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +- * +- * You should have received a copy of the GNU General Public License along +- * with this program; if not, write to the Free Software Foundation, Inc., +- * 675 Mass Ave, Cambridge, MA 02139, USA. +- */ +- +-#include +- +-#define TIMEOUT 0xffffff +- +-static int remoteDebugInitialized = 0; +-static void debugInit(int baud); +- +-int putDebugChar(unsigned char c) +-{ +- int i = 0; +- +- if (!remoteDebugInitialized) { +- remoteDebugInitialized = 1; +- debugInit(38400); +- } +- +- do { +- slow_down(); +- i++; +- if (i>TIMEOUT) { +- break; +- } +- } while (!(tx3927_sioptr(0)->cisr & TXx927_SICISR_TXALS)); +- tx3927_sioptr(0)->tfifo = c; +- +- return 1; +-} +- +-unsigned char getDebugChar(void) +-{ +- int i = 0; +- int dicr; +- char c; +- +- if (!remoteDebugInitialized) { +- remoteDebugInitialized = 1; +- debugInit(38400); +- } +- +- /* diable RX int. */ +- dicr = tx3927_sioptr(0)->dicr; +- tx3927_sioptr(0)->dicr = 0; +- +- do { +- slow_down(); +- i++; +- if (i>TIMEOUT) { +- break; +- } +- } while (tx3927_sioptr(0)->disr & TXx927_SIDISR_UVALID) +- ; +- c = tx3927_sioptr(0)->rfifo; +- +- /* clear RX int. status */ +- tx3927_sioptr(0)->disr &= ~TXx927_SIDISR_RDIS; +- /* enable RX int. */ +- tx3927_sioptr(0)->dicr = dicr; +- +- return c; +-} +- +-static void debugInit(int baud) +-{ +- tx3927_sioptr(0)->lcr = 0x020; +- tx3927_sioptr(0)->dicr = 0; +- tx3927_sioptr(0)->disr = 0x4100; +- tx3927_sioptr(0)->cisr = 0x014; +- tx3927_sioptr(0)->fcr = 0; +- tx3927_sioptr(0)->flcr = 0x02; +- tx3927_sioptr(0)->bgr = ((JMR3927_BASE_BAUD + baud / 2) / baud) | +- TXx927_SIBGR_BCLK_T0; +-} +diff -Nurb linux-2.6.22-570/arch/mips/kernel/Makefile linux-2.6.22-591/arch/mips/kernel/Makefile +--- linux-2.6.22-570/arch/mips/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -57,7 +57,8 @@ + obj-$(CONFIG_MIPS32_N32) += binfmt_elfn32.o scall64-n32.o signal_n32.o + obj-$(CONFIG_MIPS32_O32) += binfmt_elfo32.o scall64-o32.o + +-obj-$(CONFIG_KGDB) += gdb-low.o gdb-stub.o ++obj-$(CONFIG_KGDB) += kgdb_handler.o kgdb.o kgdb-jmp.o \ ++ kgdb-setjmp.o + obj-$(CONFIG_PROC_FS) += proc.o + + obj-$(CONFIG_64BIT) += cpu-bugs64.o +diff -Nurb linux-2.6.22-570/arch/mips/kernel/cpu-probe.c linux-2.6.22-591/arch/mips/kernel/cpu-probe.c +--- linux-2.6.22-570/arch/mips/kernel/cpu-probe.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/kernel/cpu-probe.c 2007-12-21 15:36:11.000000000 -0500 +@@ -177,6 +177,17 @@ + + cpu_wait = r4k_wait; + break; ++ case CPU_20KC: ++ /* ++ * WAIT on Rev1.0 has E1, E2, E3 and E16. ++ * WAIT on Rev2.0 and Rev3.0 has E16. ++ * Rev3.1 WAIT is nop, why bother ++ */ ++ if ((c->processor_id & 0xff) <= 0x64) ++ break; ++ ++ cpu_wait = r4k_wait; ++ break; + case CPU_RM9000: + if ((c->processor_id & 0x00ff) >= 0x40) + cpu_wait = r4k_wait; +diff -Nurb linux-2.6.22-570/arch/mips/kernel/gdb-low.S linux-2.6.22-591/arch/mips/kernel/gdb-low.S +--- linux-2.6.22-570/arch/mips/kernel/gdb-low.S 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/kernel/gdb-low.S 1969-12-31 19:00:00.000000000 -0500 +@@ -1,394 +0,0 @@ +-/* +- * gdb-low.S contains the low-level trap handler for the GDB stub. +- * +- * Copyright (C) 1995 Andreas Busse +- */ +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#ifdef CONFIG_32BIT +-#define DMFC0 mfc0 +-#define DMTC0 mtc0 +-#define LDC1 lwc1 +-#define SDC1 lwc1 +-#endif +-#ifdef CONFIG_64BIT +-#define DMFC0 dmfc0 +-#define DMTC0 dmtc0 +-#define LDC1 ldc1 +-#define SDC1 ldc1 +-#endif +- +-/* +- * [jsun] We reserves about 2x GDB_FR_SIZE in stack. The lower (addressed) +- * part is used to store registers and passed to exception handler. +- * The upper part is reserved for "call func" feature where gdb client +- * saves some of the regs, setups call frame and passes args. +- * +- * A trace shows about 200 bytes are used to store about half of all regs. +- * The rest should be big enough for frame setup and passing args. +- */ +- +-/* +- * The low level trap handler +- */ +- .align 5 +- NESTED(trap_low, GDB_FR_SIZE, sp) +- .set noat +- .set noreorder +- +- mfc0 k0, CP0_STATUS +- sll k0, 3 /* extract cu0 bit */ +- bltz k0, 1f +- move k1, sp +- +- /* +- * Called from user mode, go somewhere else. +- */ +- mfc0 k0, CP0_CAUSE +- andi k0, k0, 0x7c +-#ifdef CONFIG_64BIT +- dsll k0, k0, 1 +-#endif +- PTR_L k1, saved_vectors(k0) +- jr k1 +- nop +-1: +- move k0, sp +- PTR_SUBU sp, k1, GDB_FR_SIZE*2 # see comment above +- LONG_S k0, GDB_FR_REG29(sp) +- LONG_S $2, GDB_FR_REG2(sp) +- +-/* +- * First save the CP0 and special registers +- */ +- +- mfc0 v0, CP0_STATUS +- LONG_S v0, GDB_FR_STATUS(sp) +- mfc0 v0, CP0_CAUSE +- LONG_S v0, GDB_FR_CAUSE(sp) +- DMFC0 v0, CP0_EPC +- LONG_S v0, GDB_FR_EPC(sp) +- DMFC0 v0, CP0_BADVADDR +- LONG_S v0, GDB_FR_BADVADDR(sp) +- mfhi v0 +- LONG_S v0, GDB_FR_HI(sp) +- mflo v0 +- LONG_S v0, GDB_FR_LO(sp) +- +-/* +- * Now the integer registers +- */ +- +- LONG_S zero, GDB_FR_REG0(sp) /* I know... */ +- LONG_S $1, GDB_FR_REG1(sp) +- /* v0 already saved */ +- LONG_S $3, GDB_FR_REG3(sp) +- LONG_S $4, GDB_FR_REG4(sp) +- LONG_S $5, GDB_FR_REG5(sp) +- LONG_S $6, GDB_FR_REG6(sp) +- LONG_S $7, GDB_FR_REG7(sp) +- LONG_S $8, GDB_FR_REG8(sp) +- LONG_S $9, GDB_FR_REG9(sp) +- LONG_S $10, GDB_FR_REG10(sp) +- LONG_S $11, GDB_FR_REG11(sp) +- LONG_S $12, GDB_FR_REG12(sp) +- LONG_S $13, GDB_FR_REG13(sp) +- LONG_S $14, GDB_FR_REG14(sp) +- LONG_S $15, GDB_FR_REG15(sp) +- LONG_S $16, GDB_FR_REG16(sp) +- LONG_S $17, GDB_FR_REG17(sp) +- LONG_S $18, GDB_FR_REG18(sp) +- LONG_S $19, GDB_FR_REG19(sp) +- LONG_S $20, GDB_FR_REG20(sp) +- LONG_S $21, GDB_FR_REG21(sp) +- LONG_S $22, GDB_FR_REG22(sp) +- LONG_S $23, GDB_FR_REG23(sp) +- LONG_S $24, GDB_FR_REG24(sp) +- LONG_S $25, GDB_FR_REG25(sp) +- LONG_S $26, GDB_FR_REG26(sp) +- LONG_S $27, GDB_FR_REG27(sp) +- LONG_S $28, GDB_FR_REG28(sp) +- /* sp already saved */ +- LONG_S $30, GDB_FR_REG30(sp) +- LONG_S $31, GDB_FR_REG31(sp) +- +- CLI /* disable interrupts */ +- TRACE_IRQS_OFF +- +-/* +- * Followed by the floating point registers +- */ +- mfc0 v0, CP0_STATUS /* FPU enabled? */ +- srl v0, v0, 16 +- andi v0, v0, (ST0_CU1 >> 16) +- +- beqz v0,2f /* disabled, skip */ +- nop +- +- SDC1 $0, GDB_FR_FPR0(sp) +- SDC1 $1, GDB_FR_FPR1(sp) +- SDC1 $2, GDB_FR_FPR2(sp) +- SDC1 $3, GDB_FR_FPR3(sp) +- SDC1 $4, GDB_FR_FPR4(sp) +- SDC1 $5, GDB_FR_FPR5(sp) +- SDC1 $6, GDB_FR_FPR6(sp) +- SDC1 $7, GDB_FR_FPR7(sp) +- SDC1 $8, GDB_FR_FPR8(sp) +- SDC1 $9, GDB_FR_FPR9(sp) +- SDC1 $10, GDB_FR_FPR10(sp) +- SDC1 $11, GDB_FR_FPR11(sp) +- SDC1 $12, GDB_FR_FPR12(sp) +- SDC1 $13, GDB_FR_FPR13(sp) +- SDC1 $14, GDB_FR_FPR14(sp) +- SDC1 $15, GDB_FR_FPR15(sp) +- SDC1 $16, GDB_FR_FPR16(sp) +- SDC1 $17, GDB_FR_FPR17(sp) +- SDC1 $18, GDB_FR_FPR18(sp) +- SDC1 $19, GDB_FR_FPR19(sp) +- SDC1 $20, GDB_FR_FPR20(sp) +- SDC1 $21, GDB_FR_FPR21(sp) +- SDC1 $22, GDB_FR_FPR22(sp) +- SDC1 $23, GDB_FR_FPR23(sp) +- SDC1 $24, GDB_FR_FPR24(sp) +- SDC1 $25, GDB_FR_FPR25(sp) +- SDC1 $26, GDB_FR_FPR26(sp) +- SDC1 $27, GDB_FR_FPR27(sp) +- SDC1 $28, GDB_FR_FPR28(sp) +- SDC1 $29, GDB_FR_FPR29(sp) +- SDC1 $30, GDB_FR_FPR30(sp) +- SDC1 $31, GDB_FR_FPR31(sp) +- +-/* +- * FPU control registers +- */ +- +- cfc1 v0, CP1_STATUS +- LONG_S v0, GDB_FR_FSR(sp) +- cfc1 v0, CP1_REVISION +- LONG_S v0, GDB_FR_FIR(sp) +- +-/* +- * Current stack frame ptr +- */ +- +-2: +- LONG_S sp, GDB_FR_FRP(sp) +- +-/* +- * CP0 registers (R4000/R4400 unused registers skipped) +- */ +- +- mfc0 v0, CP0_INDEX +- LONG_S v0, GDB_FR_CP0_INDEX(sp) +- mfc0 v0, CP0_RANDOM +- LONG_S v0, GDB_FR_CP0_RANDOM(sp) +- DMFC0 v0, CP0_ENTRYLO0 +- LONG_S v0, GDB_FR_CP0_ENTRYLO0(sp) +- DMFC0 v0, CP0_ENTRYLO1 +- LONG_S v0, GDB_FR_CP0_ENTRYLO1(sp) +- DMFC0 v0, CP0_CONTEXT +- LONG_S v0, GDB_FR_CP0_CONTEXT(sp) +- mfc0 v0, CP0_PAGEMASK +- LONG_S v0, GDB_FR_CP0_PAGEMASK(sp) +- mfc0 v0, CP0_WIRED +- LONG_S v0, GDB_FR_CP0_WIRED(sp) +- DMFC0 v0, CP0_ENTRYHI +- LONG_S v0, GDB_FR_CP0_ENTRYHI(sp) +- mfc0 v0, CP0_PRID +- LONG_S v0, GDB_FR_CP0_PRID(sp) +- +- .set at +- +-/* +- * Continue with the higher level handler +- */ +- +- move a0,sp +- +- jal handle_exception +- nop +- +-/* +- * Restore all writable registers, in reverse order +- */ +- +- .set noat +- +- LONG_L v0, GDB_FR_CP0_ENTRYHI(sp) +- LONG_L v1, GDB_FR_CP0_WIRED(sp) +- DMTC0 v0, CP0_ENTRYHI +- mtc0 v1, CP0_WIRED +- LONG_L v0, GDB_FR_CP0_PAGEMASK(sp) +- LONG_L v1, GDB_FR_CP0_ENTRYLO1(sp) +- mtc0 v0, CP0_PAGEMASK +- DMTC0 v1, CP0_ENTRYLO1 +- LONG_L v0, GDB_FR_CP0_ENTRYLO0(sp) +- LONG_L v1, GDB_FR_CP0_INDEX(sp) +- DMTC0 v0, CP0_ENTRYLO0 +- LONG_L v0, GDB_FR_CP0_CONTEXT(sp) +- mtc0 v1, CP0_INDEX +- DMTC0 v0, CP0_CONTEXT +- +- +-/* +- * Next, the floating point registers +- */ +- mfc0 v0, CP0_STATUS /* check if the FPU is enabled */ +- srl v0, v0, 16 +- andi v0, v0, (ST0_CU1 >> 16) +- +- beqz v0, 3f /* disabled, skip */ +- nop +- +- LDC1 $31, GDB_FR_FPR31(sp) +- LDC1 $30, GDB_FR_FPR30(sp) +- LDC1 $29, GDB_FR_FPR29(sp) +- LDC1 $28, GDB_FR_FPR28(sp) +- LDC1 $27, GDB_FR_FPR27(sp) +- LDC1 $26, GDB_FR_FPR26(sp) +- LDC1 $25, GDB_FR_FPR25(sp) +- LDC1 $24, GDB_FR_FPR24(sp) +- LDC1 $23, GDB_FR_FPR23(sp) +- LDC1 $22, GDB_FR_FPR22(sp) +- LDC1 $21, GDB_FR_FPR21(sp) +- LDC1 $20, GDB_FR_FPR20(sp) +- LDC1 $19, GDB_FR_FPR19(sp) +- LDC1 $18, GDB_FR_FPR18(sp) +- LDC1 $17, GDB_FR_FPR17(sp) +- LDC1 $16, GDB_FR_FPR16(sp) +- LDC1 $15, GDB_FR_FPR15(sp) +- LDC1 $14, GDB_FR_FPR14(sp) +- LDC1 $13, GDB_FR_FPR13(sp) +- LDC1 $12, GDB_FR_FPR12(sp) +- LDC1 $11, GDB_FR_FPR11(sp) +- LDC1 $10, GDB_FR_FPR10(sp) +- LDC1 $9, GDB_FR_FPR9(sp) +- LDC1 $8, GDB_FR_FPR8(sp) +- LDC1 $7, GDB_FR_FPR7(sp) +- LDC1 $6, GDB_FR_FPR6(sp) +- LDC1 $5, GDB_FR_FPR5(sp) +- LDC1 $4, GDB_FR_FPR4(sp) +- LDC1 $3, GDB_FR_FPR3(sp) +- LDC1 $2, GDB_FR_FPR2(sp) +- LDC1 $1, GDB_FR_FPR1(sp) +- LDC1 $0, GDB_FR_FPR0(sp) +- +-/* +- * Now the CP0 and integer registers +- */ +- +-3: +-#ifdef CONFIG_MIPS_MT_SMTC +- /* Read-modify write of Status must be atomic */ +- mfc0 t2, CP0_TCSTATUS +- ori t1, t2, TCSTATUS_IXMT +- mtc0 t1, CP0_TCSTATUS +- andi t2, t2, TCSTATUS_IXMT +- _ehb +- DMT 9 # dmt t1 +- jal mips_ihb +- nop +-#endif /* CONFIG_MIPS_MT_SMTC */ +- mfc0 t0, CP0_STATUS +- ori t0, 0x1f +- xori t0, 0x1f +- mtc0 t0, CP0_STATUS +-#ifdef CONFIG_MIPS_MT_SMTC +- andi t1, t1, VPECONTROL_TE +- beqz t1, 9f +- nop +- EMT # emt +-9: +- mfc0 t1, CP0_TCSTATUS +- xori t1, t1, TCSTATUS_IXMT +- or t1, t1, t2 +- mtc0 t1, CP0_TCSTATUS +- _ehb +-#endif /* CONFIG_MIPS_MT_SMTC */ +- LONG_L v0, GDB_FR_STATUS(sp) +- LONG_L v1, GDB_FR_EPC(sp) +- mtc0 v0, CP0_STATUS +- DMTC0 v1, CP0_EPC +- LONG_L v0, GDB_FR_HI(sp) +- LONG_L v1, GDB_FR_LO(sp) +- mthi v0 +- mtlo v1 +- LONG_L $31, GDB_FR_REG31(sp) +- LONG_L $30, GDB_FR_REG30(sp) +- LONG_L $28, GDB_FR_REG28(sp) +- LONG_L $27, GDB_FR_REG27(sp) +- LONG_L $26, GDB_FR_REG26(sp) +- LONG_L $25, GDB_FR_REG25(sp) +- LONG_L $24, GDB_FR_REG24(sp) +- LONG_L $23, GDB_FR_REG23(sp) +- LONG_L $22, GDB_FR_REG22(sp) +- LONG_L $21, GDB_FR_REG21(sp) +- LONG_L $20, GDB_FR_REG20(sp) +- LONG_L $19, GDB_FR_REG19(sp) +- LONG_L $18, GDB_FR_REG18(sp) +- LONG_L $17, GDB_FR_REG17(sp) +- LONG_L $16, GDB_FR_REG16(sp) +- LONG_L $15, GDB_FR_REG15(sp) +- LONG_L $14, GDB_FR_REG14(sp) +- LONG_L $13, GDB_FR_REG13(sp) +- LONG_L $12, GDB_FR_REG12(sp) +- LONG_L $11, GDB_FR_REG11(sp) +- LONG_L $10, GDB_FR_REG10(sp) +- LONG_L $9, GDB_FR_REG9(sp) +- LONG_L $8, GDB_FR_REG8(sp) +- LONG_L $7, GDB_FR_REG7(sp) +- LONG_L $6, GDB_FR_REG6(sp) +- LONG_L $5, GDB_FR_REG5(sp) +- LONG_L $4, GDB_FR_REG4(sp) +- LONG_L $3, GDB_FR_REG3(sp) +- LONG_L $2, GDB_FR_REG2(sp) +- LONG_L $1, GDB_FR_REG1(sp) +-#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) +- LONG_L k0, GDB_FR_EPC(sp) +- LONG_L $29, GDB_FR_REG29(sp) /* Deallocate stack */ +- jr k0 +- rfe +-#else +- LONG_L sp, GDB_FR_REG29(sp) /* Deallocate stack */ +- +- .set mips3 +- eret +- .set mips0 +-#endif +- .set at +- .set reorder +- END(trap_low) +- +-LEAF(kgdb_read_byte) +-4: lb t0, (a0) +- sb t0, (a1) +- li v0, 0 +- jr ra +- .section __ex_table,"a" +- PTR 4b, kgdbfault +- .previous +- END(kgdb_read_byte) +- +-LEAF(kgdb_write_byte) +-5: sb a0, (a1) +- li v0, 0 +- jr ra +- .section __ex_table,"a" +- PTR 5b, kgdbfault +- .previous +- END(kgdb_write_byte) +- +- .type kgdbfault@function +- .ent kgdbfault +- +-kgdbfault: li v0, -EFAULT +- jr ra +- .end kgdbfault +diff -Nurb linux-2.6.22-570/arch/mips/kernel/gdb-stub.c linux-2.6.22-591/arch/mips/kernel/gdb-stub.c +--- linux-2.6.22-570/arch/mips/kernel/gdb-stub.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/kernel/gdb-stub.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,1154 +0,0 @@ +-/* +- * arch/mips/kernel/gdb-stub.c +- * +- * Originally written by Glenn Engel, Lake Stevens Instrument Division +- * +- * Contributed by HP Systems +- * +- * Modified for SPARC by Stu Grossman, Cygnus Support. +- * +- * Modified for Linux/MIPS (and MIPS in general) by Andreas Busse +- * Send complaints, suggestions etc. to +- * +- * Copyright (C) 1995 Andreas Busse +- * +- * Copyright (C) 2003 MontaVista Software Inc. +- * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net +- */ +- +-/* +- * To enable debugger support, two things need to happen. One, a +- * call to set_debug_traps() is necessary in order to allow any breakpoints +- * or error conditions to be properly intercepted and reported to gdb. +- * Two, a breakpoint needs to be generated to begin communication. This +- * is most easily accomplished by a call to breakpoint(). Breakpoint() +- * simulates a breakpoint by executing a BREAK instruction. +- * +- * +- * The following gdb commands are supported: +- * +- * command function Return value +- * +- * g return the value of the CPU registers hex data or ENN +- * G set the value of the CPU registers OK or ENN +- * +- * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN +- * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN +- * +- * c Resume at current address SNN ( signal NN) +- * cAA..AA Continue at address AA..AA SNN +- * +- * s Step one instruction SNN +- * sAA..AA Step one instruction from AA..AA SNN +- * +- * k kill +- * +- * ? What was the last sigval ? SNN (signal NN) +- * +- * bBB..BB Set baud rate to BB..BB OK or BNN, then sets +- * baud rate +- * +- * All commands and responses are sent with a packet which includes a +- * checksum. A packet consists of +- * +- * $#. +- * +- * where +- * :: +- * :: < two hex digits computed as modulo 256 sum of > +- * +- * When a packet is received, it is first acknowledged with either '+' or '-'. +- * '+' indicates a successful transfer. '-' indicates a failed transfer. +- * +- * Example: +- * +- * Host: Reply: +- * $m0,10#2a +$00010203040506070809101112131415#42 +- * +- * +- * ============== +- * MORE EXAMPLES: +- * ============== +- * +- * For reference -- the following are the steps that one +- * company took (RidgeRun Inc) to get remote gdb debugging +- * going. In this scenario the host machine was a PC and the +- * target platform was a Galileo EVB64120A MIPS evaluation +- * board. +- * +- * Step 1: +- * First download gdb-5.0.tar.gz from the internet. +- * and then build/install the package. +- * +- * Example: +- * $ tar zxf gdb-5.0.tar.gz +- * $ cd gdb-5.0 +- * $ ./configure --target=mips-linux-elf +- * $ make +- * $ install +- * $ which mips-linux-elf-gdb +- * /usr/local/bin/mips-linux-elf-gdb +- * +- * Step 2: +- * Configure linux for remote debugging and build it. +- * +- * Example: +- * $ cd ~/linux +- * $ make menuconfig +- * $ make +- * +- * Step 3: +- * Download the kernel to the remote target and start +- * the kernel running. It will promptly halt and wait +- * for the host gdb session to connect. It does this +- * since the "Kernel Hacking" option has defined +- * CONFIG_KGDB which in turn enables your calls +- * to: +- * set_debug_traps(); +- * breakpoint(); +- * +- * Step 4: +- * Start the gdb session on the host. +- * +- * Example: +- * $ mips-linux-elf-gdb vmlinux +- * (gdb) set remotebaud 115200 +- * (gdb) target remote /dev/ttyS1 +- * ...at this point you are connected to +- * the remote target and can use gdb +- * in the normal fasion. Setting +- * breakpoints, single stepping, +- * printing variables, etc. +- */ +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-/* +- * external low-level support routines +- */ +- +-extern int putDebugChar(char c); /* write a single character */ +-extern char getDebugChar(void); /* read and return a single char */ +-extern void trap_low(void); +- +-/* +- * breakpoint and test functions +- */ +-extern void breakpoint(void); +-extern void breakinst(void); +-extern void async_breakpoint(void); +-extern void async_breakinst(void); +-extern void adel(void); +- +-/* +- * local prototypes +- */ +- +-static void getpacket(char *buffer); +-static void putpacket(char *buffer); +-static int computeSignal(int tt); +-static int hex(unsigned char ch); +-static int hexToInt(char **ptr, int *intValue); +-static int hexToLong(char **ptr, long *longValue); +-static unsigned char *mem2hex(char *mem, char *buf, int count, int may_fault); +-void handle_exception(struct gdb_regs *regs); +- +-int kgdb_enabled; +- +-/* +- * spin locks for smp case +- */ +-static DEFINE_SPINLOCK(kgdb_lock); +-static raw_spinlock_t kgdb_cpulock[NR_CPUS] = { +- [0 ... NR_CPUS-1] = __RAW_SPIN_LOCK_UNLOCKED, +-}; +- +-/* +- * BUFMAX defines the maximum number of characters in inbound/outbound buffers +- * at least NUMREGBYTES*2 are needed for register packets +- */ +-#define BUFMAX 2048 +- +-static char input_buffer[BUFMAX]; +-static char output_buffer[BUFMAX]; +-static int initialized; /* !0 means we've been initialized */ +-static int kgdb_started; +-static const char hexchars[]="0123456789abcdef"; +- +-/* Used to prevent crashes in memory access. Note that they'll crash anyway if +- we haven't set up fault handlers yet... */ +-int kgdb_read_byte(unsigned char *address, unsigned char *dest); +-int kgdb_write_byte(unsigned char val, unsigned char *dest); +- +-/* +- * Convert ch from a hex digit to an int +- */ +-static int hex(unsigned char ch) +-{ +- if (ch >= 'a' && ch <= 'f') +- return ch-'a'+10; +- if (ch >= '0' && ch <= '9') +- return ch-'0'; +- if (ch >= 'A' && ch <= 'F') +- return ch-'A'+10; +- return -1; +-} +- +-/* +- * scan for the sequence $# +- */ +-static void getpacket(char *buffer) +-{ +- unsigned char checksum; +- unsigned char xmitcsum; +- int i; +- int count; +- unsigned char ch; +- +- do { +- /* +- * wait around for the start character, +- * ignore all other characters +- */ +- while ((ch = (getDebugChar() & 0x7f)) != '$') ; +- +- checksum = 0; +- xmitcsum = -1; +- count = 0; +- +- /* +- * now, read until a # or end of buffer is found +- */ +- while (count < BUFMAX) { +- ch = getDebugChar(); +- if (ch == '#') +- break; +- checksum = checksum + ch; +- buffer[count] = ch; +- count = count + 1; +- } +- +- if (count >= BUFMAX) +- continue; +- +- buffer[count] = 0; +- +- if (ch == '#') { +- xmitcsum = hex(getDebugChar() & 0x7f) << 4; +- xmitcsum |= hex(getDebugChar() & 0x7f); +- +- if (checksum != xmitcsum) +- putDebugChar('-'); /* failed checksum */ +- else { +- putDebugChar('+'); /* successful transfer */ +- +- /* +- * if a sequence char is present, +- * reply the sequence ID +- */ +- if (buffer[2] == ':') { +- putDebugChar(buffer[0]); +- putDebugChar(buffer[1]); +- +- /* +- * remove sequence chars from buffer +- */ +- count = strlen(buffer); +- for (i=3; i <= count; i++) +- buffer[i-3] = buffer[i]; +- } +- } +- } +- } +- while (checksum != xmitcsum); +-} +- +-/* +- * send the packet in buffer. +- */ +-static void putpacket(char *buffer) +-{ +- unsigned char checksum; +- int count; +- unsigned char ch; +- +- /* +- * $#. +- */ +- +- do { +- putDebugChar('$'); +- checksum = 0; +- count = 0; +- +- while ((ch = buffer[count]) != 0) { +- if (!(putDebugChar(ch))) +- return; +- checksum += ch; +- count += 1; +- } +- +- putDebugChar('#'); +- putDebugChar(hexchars[checksum >> 4]); +- putDebugChar(hexchars[checksum & 0xf]); +- +- } +- while ((getDebugChar() & 0x7f) != '+'); +-} +- +- +-/* +- * Convert the memory pointed to by mem into hex, placing result in buf. +- * Return a pointer to the last char put in buf (null), in case of mem fault, +- * return 0. +- * may_fault is non-zero if we are reading from arbitrary memory, but is currently +- * not used. +- */ +-static unsigned char *mem2hex(char *mem, char *buf, int count, int may_fault) +-{ +- unsigned char ch; +- +- while (count-- > 0) { +- if (kgdb_read_byte(mem++, &ch) != 0) +- return 0; +- *buf++ = hexchars[ch >> 4]; +- *buf++ = hexchars[ch & 0xf]; +- } +- +- *buf = 0; +- +- return buf; +-} +- +-/* +- * convert the hex array pointed to by buf into binary to be placed in mem +- * return a pointer to the character AFTER the last byte written +- * may_fault is non-zero if we are reading from arbitrary memory, but is currently +- * not used. +- */ +-static char *hex2mem(char *buf, char *mem, int count, int binary, int may_fault) +-{ +- int i; +- unsigned char ch; +- +- for (i=0; itt && ht->signo; ht++) +- saved_vectors[ht->tt] = set_except_vector(ht->tt, trap_low); +- +- putDebugChar('+'); /* 'hello world' */ +- /* +- * In case GDB is started before us, ack any packets +- * (presumably "$?#xx") sitting there. +- */ +- while((c = getDebugChar()) != '$'); +- while((c = getDebugChar()) != '#'); +- c = getDebugChar(); /* eat first csum byte */ +- c = getDebugChar(); /* eat second csum byte */ +- putDebugChar('+'); /* ack it */ +- +- initialized = 1; +- local_irq_restore(flags); +-} +- +-void restore_debug_traps(void) +-{ +- struct hard_trap_info *ht; +- unsigned long flags; +- +- local_irq_save(flags); +- for (ht = hard_trap_info; ht->tt && ht->signo; ht++) +- set_except_vector(ht->tt, saved_vectors[ht->tt]); +- local_irq_restore(flags); +-} +- +-/* +- * Convert the MIPS hardware trap type code to a Unix signal number. +- */ +-static int computeSignal(int tt) +-{ +- struct hard_trap_info *ht; +- +- for (ht = hard_trap_info; ht->tt && ht->signo; ht++) +- if (ht->tt == tt) +- return ht->signo; +- +- return SIGHUP; /* default for things we don't know about */ +-} +- +-/* +- * While we find nice hex chars, build an int. +- * Return number of chars processed. +- */ +-static int hexToInt(char **ptr, int *intValue) +-{ +- int numChars = 0; +- int hexValue; +- +- *intValue = 0; +- +- while (**ptr) { +- hexValue = hex(**ptr); +- if (hexValue < 0) +- break; +- +- *intValue = (*intValue << 4) | hexValue; +- numChars ++; +- +- (*ptr)++; +- } +- +- return (numChars); +-} +- +-static int hexToLong(char **ptr, long *longValue) +-{ +- int numChars = 0; +- int hexValue; +- +- *longValue = 0; +- +- while (**ptr) { +- hexValue = hex(**ptr); +- if (hexValue < 0) +- break; +- +- *longValue = (*longValue << 4) | hexValue; +- numChars ++; +- +- (*ptr)++; +- } +- +- return numChars; +-} +- +- +-#if 0 +-/* +- * Print registers (on target console) +- * Used only to debug the stub... +- */ +-void show_gdbregs(struct gdb_regs * regs) +-{ +- /* +- * Saved main processor registers +- */ +- printk("$0 : %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", +- regs->reg0, regs->reg1, regs->reg2, regs->reg3, +- regs->reg4, regs->reg5, regs->reg6, regs->reg7); +- printk("$8 : %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", +- regs->reg8, regs->reg9, regs->reg10, regs->reg11, +- regs->reg12, regs->reg13, regs->reg14, regs->reg15); +- printk("$16: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", +- regs->reg16, regs->reg17, regs->reg18, regs->reg19, +- regs->reg20, regs->reg21, regs->reg22, regs->reg23); +- printk("$24: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", +- regs->reg24, regs->reg25, regs->reg26, regs->reg27, +- regs->reg28, regs->reg29, regs->reg30, regs->reg31); +- +- /* +- * Saved cp0 registers +- */ +- printk("epc : %08lx\nStatus: %08lx\nCause : %08lx\n", +- regs->cp0_epc, regs->cp0_status, regs->cp0_cause); +-} +-#endif /* dead code */ +- +-/* +- * We single-step by setting breakpoints. When an exception +- * is handled, we need to restore the instructions hoisted +- * when the breakpoints were set. +- * +- * This is where we save the original instructions. +- */ +-static struct gdb_bp_save { +- unsigned long addr; +- unsigned int val; +-} step_bp[2]; +- +-#define BP 0x0000000d /* break opcode */ +- +-/* +- * Set breakpoint instructions for single stepping. +- */ +-static void single_step(struct gdb_regs *regs) +-{ +- union mips_instruction insn; +- unsigned long targ; +- int is_branch, is_cond, i; +- +- targ = regs->cp0_epc; +- insn.word = *(unsigned int *)targ; +- is_branch = is_cond = 0; +- +- switch (insn.i_format.opcode) { +- /* +- * jr and jalr are in r_format format. +- */ +- case spec_op: +- switch (insn.r_format.func) { +- case jalr_op: +- case jr_op: +- targ = *(®s->reg0 + insn.r_format.rs); +- is_branch = 1; +- break; +- } +- break; +- +- /* +- * This group contains: +- * bltz_op, bgez_op, bltzl_op, bgezl_op, +- * bltzal_op, bgezal_op, bltzall_op, bgezall_op. +- */ +- case bcond_op: +- is_branch = is_cond = 1; +- targ += 4 + (insn.i_format.simmediate << 2); +- break; +- +- /* +- * These are unconditional and in j_format. +- */ +- case jal_op: +- case j_op: +- is_branch = 1; +- targ += 4; +- targ >>= 28; +- targ <<= 28; +- targ |= (insn.j_format.target << 2); +- break; +- +- /* +- * These are conditional. +- */ +- case beq_op: +- case beql_op: +- case bne_op: +- case bnel_op: +- case blez_op: +- case blezl_op: +- case bgtz_op: +- case bgtzl_op: +- case cop0_op: +- case cop1_op: +- case cop2_op: +- case cop1x_op: +- is_branch = is_cond = 1; +- targ += 4 + (insn.i_format.simmediate << 2); +- break; +- } +- +- if (is_branch) { +- i = 0; +- if (is_cond && targ != (regs->cp0_epc + 8)) { +- step_bp[i].addr = regs->cp0_epc + 8; +- step_bp[i++].val = *(unsigned *)(regs->cp0_epc + 8); +- *(unsigned *)(regs->cp0_epc + 8) = BP; +- } +- step_bp[i].addr = targ; +- step_bp[i].val = *(unsigned *)targ; +- *(unsigned *)targ = BP; +- } else { +- step_bp[0].addr = regs->cp0_epc + 4; +- step_bp[0].val = *(unsigned *)(regs->cp0_epc + 4); +- *(unsigned *)(regs->cp0_epc + 4) = BP; +- } +-} +- +-/* +- * If asynchronously interrupted by gdb, then we need to set a breakpoint +- * at the interrupted instruction so that we wind up stopped with a +- * reasonable stack frame. +- */ +-static struct gdb_bp_save async_bp; +- +-/* +- * Swap the interrupted EPC with our asynchronous breakpoint routine. +- * This is safer than stuffing the breakpoint in-place, since no cache +- * flushes (or resulting smp_call_functions) are required. The +- * assumption is that only one CPU will be handling asynchronous bp's, +- * and only one can be active at a time. +- */ +-extern spinlock_t smp_call_lock; +- +-void set_async_breakpoint(unsigned long *epc) +-{ +- /* skip breaking into userland */ +- if ((*epc & 0x80000000) == 0) +- return; +- +-#ifdef CONFIG_SMP +- /* avoid deadlock if someone is make IPC */ +- if (spin_is_locked(&smp_call_lock)) +- return; +-#endif +- +- async_bp.addr = *epc; +- *epc = (unsigned long)async_breakpoint; +-} +- +-static void kgdb_wait(void *arg) +-{ +- unsigned flags; +- int cpu = smp_processor_id(); +- +- local_irq_save(flags); +- +- __raw_spin_lock(&kgdb_cpulock[cpu]); +- __raw_spin_unlock(&kgdb_cpulock[cpu]); +- +- local_irq_restore(flags); +-} +- +-/* +- * GDB stub needs to call kgdb_wait on all processor with interrupts +- * disabled, so it uses it's own special variant. +- */ +-static int kgdb_smp_call_kgdb_wait(void) +-{ +-#ifdef CONFIG_SMP +- struct call_data_struct data; +- int i, cpus = num_online_cpus() - 1; +- int cpu = smp_processor_id(); +- +- /* +- * Can die spectacularly if this CPU isn't yet marked online +- */ +- BUG_ON(!cpu_online(cpu)); +- +- if (!cpus) +- return 0; +- +- if (spin_is_locked(&smp_call_lock)) { +- /* +- * Some other processor is trying to make us do something +- * but we're not going to respond... give up +- */ +- return -1; +- } +- +- /* +- * We will continue here, accepting the fact that +- * the kernel may deadlock if another CPU attempts +- * to call smp_call_function now... +- */ +- +- data.func = kgdb_wait; +- data.info = NULL; +- atomic_set(&data.started, 0); +- data.wait = 0; +- +- spin_lock(&smp_call_lock); +- call_data = &data; +- mb(); +- +- /* Send a message to all other CPUs and wait for them to respond */ +- for (i = 0; i < NR_CPUS; i++) +- if (cpu_online(i) && i != cpu) +- core_send_ipi(i, SMP_CALL_FUNCTION); +- +- /* Wait for response */ +- /* FIXME: lock-up detection, backtrace on lock-up */ +- while (atomic_read(&data.started) != cpus) +- barrier(); +- +- call_data = NULL; +- spin_unlock(&smp_call_lock); +-#endif +- +- return 0; +-} +- +-/* +- * This function does all command processing for interfacing to gdb. It +- * returns 1 if you should skip the instruction at the trap address, 0 +- * otherwise. +- */ +-void handle_exception (struct gdb_regs *regs) +-{ +- int trap; /* Trap type */ +- int sigval; +- long addr; +- int length; +- char *ptr; +- unsigned long *stack; +- int i; +- int bflag = 0; +- +- kgdb_started = 1; +- +- /* +- * acquire the big kgdb spinlock +- */ +- if (!spin_trylock(&kgdb_lock)) { +- /* +- * some other CPU has the lock, we should go back to +- * receive the gdb_wait IPC +- */ +- return; +- } +- +- /* +- * If we're in async_breakpoint(), restore the real EPC from +- * the breakpoint. +- */ +- if (regs->cp0_epc == (unsigned long)async_breakinst) { +- regs->cp0_epc = async_bp.addr; +- async_bp.addr = 0; +- } +- +- /* +- * acquire the CPU spinlocks +- */ +- for (i = num_online_cpus()-1; i >= 0; i--) +- if (__raw_spin_trylock(&kgdb_cpulock[i]) == 0) +- panic("kgdb: couldn't get cpulock %d\n", i); +- +- /* +- * force other cpus to enter kgdb +- */ +- kgdb_smp_call_kgdb_wait(); +- +- /* +- * If we're in breakpoint() increment the PC +- */ +- trap = (regs->cp0_cause & 0x7c) >> 2; +- if (trap == 9 && regs->cp0_epc == (unsigned long)breakinst) +- regs->cp0_epc += 4; +- +- /* +- * If we were single_stepping, restore the opcodes hoisted +- * for the breakpoint[s]. +- */ +- if (step_bp[0].addr) { +- *(unsigned *)step_bp[0].addr = step_bp[0].val; +- step_bp[0].addr = 0; +- +- if (step_bp[1].addr) { +- *(unsigned *)step_bp[1].addr = step_bp[1].val; +- step_bp[1].addr = 0; +- } +- } +- +- stack = (long *)regs->reg29; /* stack ptr */ +- sigval = computeSignal(trap); +- +- /* +- * reply to host that an exception has occurred +- */ +- ptr = output_buffer; +- +- /* +- * Send trap type (converted to signal) +- */ +- *ptr++ = 'T'; +- *ptr++ = hexchars[sigval >> 4]; +- *ptr++ = hexchars[sigval & 0xf]; +- +- /* +- * Send Error PC +- */ +- *ptr++ = hexchars[REG_EPC >> 4]; +- *ptr++ = hexchars[REG_EPC & 0xf]; +- *ptr++ = ':'; +- ptr = mem2hex((char *)®s->cp0_epc, ptr, sizeof(long), 0); +- *ptr++ = ';'; +- +- /* +- * Send frame pointer +- */ +- *ptr++ = hexchars[REG_FP >> 4]; +- *ptr++ = hexchars[REG_FP & 0xf]; +- *ptr++ = ':'; +- ptr = mem2hex((char *)®s->reg30, ptr, sizeof(long), 0); +- *ptr++ = ';'; +- +- /* +- * Send stack pointer +- */ +- *ptr++ = hexchars[REG_SP >> 4]; +- *ptr++ = hexchars[REG_SP & 0xf]; +- *ptr++ = ':'; +- ptr = mem2hex((char *)®s->reg29, ptr, sizeof(long), 0); +- *ptr++ = ';'; +- +- *ptr++ = 0; +- putpacket(output_buffer); /* send it off... */ +- +- /* +- * Wait for input from remote GDB +- */ +- while (1) { +- output_buffer[0] = 0; +- getpacket(input_buffer); +- +- switch (input_buffer[0]) +- { +- case '?': +- output_buffer[0] = 'S'; +- output_buffer[1] = hexchars[sigval >> 4]; +- output_buffer[2] = hexchars[sigval & 0xf]; +- output_buffer[3] = 0; +- break; +- +- /* +- * Detach debugger; let CPU run +- */ +- case 'D': +- putpacket(output_buffer); +- goto finish_kgdb; +- break; +- +- case 'd': +- /* toggle debug flag */ +- break; +- +- /* +- * Return the value of the CPU registers +- */ +- case 'g': +- ptr = output_buffer; +- ptr = mem2hex((char *)®s->reg0, ptr, 32*sizeof(long), 0); /* r0...r31 */ +- ptr = mem2hex((char *)®s->cp0_status, ptr, 6*sizeof(long), 0); /* cp0 */ +- ptr = mem2hex((char *)®s->fpr0, ptr, 32*sizeof(long), 0); /* f0...31 */ +- ptr = mem2hex((char *)®s->cp1_fsr, ptr, 2*sizeof(long), 0); /* cp1 */ +- ptr = mem2hex((char *)®s->frame_ptr, ptr, 2*sizeof(long), 0); /* frp */ +- ptr = mem2hex((char *)®s->cp0_index, ptr, 16*sizeof(long), 0); /* cp0 */ +- break; +- +- /* +- * set the value of the CPU registers - return OK +- */ +- case 'G': +- { +- ptr = &input_buffer[1]; +- hex2mem(ptr, (char *)®s->reg0, 32*sizeof(long), 0, 0); +- ptr += 32*(2*sizeof(long)); +- hex2mem(ptr, (char *)®s->cp0_status, 6*sizeof(long), 0, 0); +- ptr += 6*(2*sizeof(long)); +- hex2mem(ptr, (char *)®s->fpr0, 32*sizeof(long), 0, 0); +- ptr += 32*(2*sizeof(long)); +- hex2mem(ptr, (char *)®s->cp1_fsr, 2*sizeof(long), 0, 0); +- ptr += 2*(2*sizeof(long)); +- hex2mem(ptr, (char *)®s->frame_ptr, 2*sizeof(long), 0, 0); +- ptr += 2*(2*sizeof(long)); +- hex2mem(ptr, (char *)®s->cp0_index, 16*sizeof(long), 0, 0); +- strcpy(output_buffer,"OK"); +- } +- break; +- +- /* +- * mAA..AA,LLLL Read LLLL bytes at address AA..AA +- */ +- case 'm': +- ptr = &input_buffer[1]; +- +- if (hexToLong(&ptr, &addr) +- && *ptr++ == ',' +- && hexToInt(&ptr, &length)) { +- if (mem2hex((char *)addr, output_buffer, length, 1)) +- break; +- strcpy (output_buffer, "E03"); +- } else +- strcpy(output_buffer,"E01"); +- break; +- +- /* +- * XAA..AA,LLLL: Write LLLL escaped binary bytes at address AA.AA +- */ +- case 'X': +- bflag = 1; +- /* fall through */ +- +- /* +- * MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK +- */ +- case 'M': +- ptr = &input_buffer[1]; +- +- if (hexToLong(&ptr, &addr) +- && *ptr++ == ',' +- && hexToInt(&ptr, &length) +- && *ptr++ == ':') { +- if (hex2mem(ptr, (char *)addr, length, bflag, 1)) +- strcpy(output_buffer, "OK"); +- else +- strcpy(output_buffer, "E03"); +- } +- else +- strcpy(output_buffer, "E02"); +- break; +- +- /* +- * cAA..AA Continue at address AA..AA(optional) +- */ +- case 'c': +- /* try to read optional parameter, pc unchanged if no parm */ +- +- ptr = &input_buffer[1]; +- if (hexToLong(&ptr, &addr)) +- regs->cp0_epc = addr; +- +- goto exit_kgdb_exception; +- break; +- +- /* +- * kill the program; let us try to restart the machine +- * Reset the whole machine. +- */ +- case 'k': +- case 'r': +- machine_restart("kgdb restarts machine"); +- break; +- +- /* +- * Step to next instruction +- */ +- case 's': +- /* +- * There is no single step insn in the MIPS ISA, so we +- * use breakpoints and continue, instead. +- */ +- single_step(regs); +- goto exit_kgdb_exception; +- /* NOTREACHED */ +- break; +- +- /* +- * Set baud rate (bBB) +- * FIXME: Needs to be written +- */ +- case 'b': +- { +-#if 0 +- int baudrate; +- extern void set_timer_3(); +- +- ptr = &input_buffer[1]; +- if (!hexToInt(&ptr, &baudrate)) +- { +- strcpy(output_buffer,"B01"); +- break; +- } +- +- /* Convert baud rate to uart clock divider */ +- +- switch (baudrate) +- { +- case 38400: +- baudrate = 16; +- break; +- case 19200: +- baudrate = 33; +- break; +- case 9600: +- baudrate = 65; +- break; +- default: +- baudrate = 0; +- strcpy(output_buffer,"B02"); +- goto x1; +- } +- +- if (baudrate) { +- putpacket("OK"); /* Ack before changing speed */ +- set_timer_3(baudrate); /* Set it */ +- } +-#endif +- } +- break; +- +- } /* switch */ +- +- /* +- * reply to the request +- */ +- +- putpacket(output_buffer); +- +- } /* while */ +- +- return; +- +-finish_kgdb: +- restore_debug_traps(); +- +-exit_kgdb_exception: +- /* release locks so other CPUs can go */ +- for (i = num_online_cpus()-1; i >= 0; i--) +- __raw_spin_unlock(&kgdb_cpulock[i]); +- spin_unlock(&kgdb_lock); +- +- __flush_cache_all(); +- return; +-} +- +-/* +- * This function will generate a breakpoint exception. It is used at the +- * beginning of a program to sync up with a debugger and can be used +- * otherwise as a quick means to stop program execution and "break" into +- * the debugger. +- */ +-void breakpoint(void) +-{ +- if (!initialized) +- return; +- +- __asm__ __volatile__( +- ".globl breakinst\n\t" +- ".set\tnoreorder\n\t" +- "nop\n" +- "breakinst:\tbreak\n\t" +- "nop\n\t" +- ".set\treorder" +- ); +-} +- +-/* Nothing but the break; don't pollute any registers */ +-void async_breakpoint(void) +-{ +- __asm__ __volatile__( +- ".globl async_breakinst\n\t" +- ".set\tnoreorder\n\t" +- "nop\n" +- "async_breakinst:\tbreak\n\t" +- "nop\n\t" +- ".set\treorder" +- ); +-} +- +-void adel(void) +-{ +- __asm__ __volatile__( +- ".globl\tadel\n\t" +- "lui\t$8,0x8000\n\t" +- "lw\t$9,1($8)\n\t" +- ); +-} +- +-/* +- * malloc is needed by gdb client in "call func()", even a private one +- * will make gdb happy +- */ +-static void * __attribute_used__ malloc(size_t size) +-{ +- return kmalloc(size, GFP_ATOMIC); +-} +- +-static void __attribute_used__ free (void *where) +-{ +- kfree(where); +-} +- +-#ifdef CONFIG_GDB_CONSOLE +- +-void gdb_putsn(const char *str, int l) +-{ +- char outbuf[18]; +- +- if (!kgdb_started) +- return; +- +- outbuf[0]='O'; +- +- while(l) { +- int i = (l>8)?8:l; +- mem2hex((char *)str, &outbuf[1], i, 0); +- outbuf[(i*2)+1]=0; +- putpacket(outbuf); +- str += i; +- l -= i; +- } +-} +- +-static void gdb_console_write(struct console *con, const char *s, unsigned n) +-{ +- gdb_putsn(s, n); +-} +- +-static struct console gdb_console = { +- .name = "gdb", +- .write = gdb_console_write, +- .flags = CON_PRINTBUFFER, +- .index = -1 +-}; +- +-static int __init register_gdb_console(void) +-{ +- register_console(&gdb_console); +- +- return 0; +-} +- +-console_initcall(register_gdb_console); +- +-#endif +diff -Nurb linux-2.6.22-570/arch/mips/kernel/irq.c linux-2.6.22-591/arch/mips/kernel/irq.c +--- linux-2.6.22-570/arch/mips/kernel/irq.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/kernel/irq.c 2007-12-21 15:36:11.000000000 -0500 +@@ -25,6 +25,10 @@ + #include + #include + #include ++#include ++ ++/* Keep track of if we've done certain initialization already or not. */ ++int kgdb_early_setup; + + static unsigned long irq_map[NR_IRQS / BITS_PER_LONG]; + +@@ -138,28 +142,23 @@ + atomic_inc(&irq_err_count); + } + +-#ifdef CONFIG_KGDB +-extern void breakpoint(void); +-extern void set_debug_traps(void); +- +-static int kgdb_flag = 1; +-static int __init nokgdb(char *str) ++void __init init_IRQ(void) + { +- kgdb_flag = 0; +- return 1; +-} +-__setup("nokgdb", nokgdb); ++ ++#ifdef CONFIG_KGDB ++ if (kgdb_early_setup) ++ return; + #endif + +-void __init init_IRQ(void) +-{ + arch_init_irq(); + ++ + #ifdef CONFIG_KGDB +- if (kgdb_flag) { +- printk("Wait for gdb client connection ...\n"); +- set_debug_traps(); +- breakpoint(); +- } ++ /* ++ * We have been called before kgdb_arch_init(). Hence, ++ * we dont want the traps to be reinitialized ++ */ ++ if (kgdb_early_setup == 0) ++ kgdb_early_setup = 1; + #endif + } +diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb-jmp.c linux-2.6.22-591/arch/mips/kernel/kgdb-jmp.c +--- linux-2.6.22-570/arch/mips/kernel/kgdb-jmp.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/mips/kernel/kgdb-jmp.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,110 @@ ++/* ++ * arch/mips/kernel/kgdb-jmp.c ++ * ++ * Save and restore system registers so that within a limited frame we ++ * may have a fault and "jump back" to a known safe location. ++ * ++ * Author: Tom Rini ++ * Author: Manish Lachwani ++ * ++ * Cribbed from glibc, which carries the following: ++ * Copyright (C) 1996, 1997, 2000, 2002, 2003 Free Software Foundation, Inc. ++ * Copyright (C) 2005-2006 by MontaVista Software. ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program as licensed "as is" without any warranty of ++ * any kind, whether express or implied. ++ */ ++ ++#include ++ ++#ifdef CONFIG_64BIT ++/* ++ * MIPS 64-bit ++ */ ++ ++int kgdb_fault_setjmp_aux(unsigned long *curr_context, unsigned long sp, unsigned long fp) ++{ ++ __asm__ __volatile__ ("sd $gp, %0" : : "m" (curr_context[0])); ++ __asm__ __volatile__ ("sd $16, %0" : : "m" (curr_context[1])); ++ __asm__ __volatile__ ("sd $17, %0" : : "m" (curr_context[2])); ++ __asm__ __volatile__ ("sd $18, %0" : : "m" (curr_context[3])); ++ __asm__ __volatile__ ("sd $19, %0" : : "m" (curr_context[4])); ++ __asm__ __volatile__ ("sd $20, %0" : : "m" (curr_context[5])); ++ __asm__ __volatile__ ("sd $21, %0" : : "m" (curr_context[6])); ++ __asm__ __volatile__ ("sd $22, %0" : : "m" (curr_context[7])); ++ __asm__ __volatile__ ("sd $23, %0" : : "m" (curr_context[8])); ++ __asm__ __volatile__ ("sd $31, %0" : : "m" (curr_context[9])); ++ curr_context[10] = sp; ++ curr_context[11] = fp; ++ ++ return 0; ++} ++ ++void kgdb_fault_longjmp(unsigned long *curr_context) ++{ ++ __asm__ __volatile__ ("ld $gp, %0" : : "m" (curr_context[0])); ++ __asm__ __volatile__ ("ld $16, %0" : : "m" (curr_context[1])); ++ __asm__ __volatile__ ("ld $17, %0" : : "m" (curr_context[2])); ++ __asm__ __volatile__ ("ld $18, %0" : : "m" (curr_context[3])); ++ __asm__ __volatile__ ("ld $19, %0" : : "m" (curr_context[4])); ++ __asm__ __volatile__ ("ld $20, %0" : : "m" (curr_context[5])); ++ __asm__ __volatile__ ("ld $21, %0" : : "m" (curr_context[6])); ++ __asm__ __volatile__ ("ld $22, %0" : : "m" (curr_context[7])); ++ __asm__ __volatile__ ("ld $23, %0" : : "m" (curr_context[8])); ++ __asm__ __volatile__ ("ld $25, %0" : : "m" (curr_context[9])); ++ __asm__ __volatile__ ("ld $29, %0\n\t" ++ "ld $30, %1\n\t" : : ++ "m" (curr_context[10]), "m" (curr_context[11])); ++ ++ __asm__ __volatile__ ("dli $2, 1"); ++ __asm__ __volatile__ ("j $25"); ++ ++ for (;;); ++} ++#else ++/* ++ * MIPS 32-bit ++ */ ++ ++int kgdb_fault_setjmp_aux(unsigned long *curr_context, unsigned long sp, unsigned long fp) ++{ ++ __asm__ __volatile__("sw $gp, %0" : : "m" (curr_context[0])); ++ __asm__ __volatile__("sw $16, %0" : : "m" (curr_context[1])); ++ __asm__ __volatile__("sw $17, %0" : : "m" (curr_context[2])); ++ __asm__ __volatile__("sw $18, %0" : : "m" (curr_context[3])); ++ __asm__ __volatile__("sw $19, %0" : : "m" (curr_context[4])); ++ __asm__ __volatile__("sw $20, %0" : : "m" (curr_context[5])); ++ __asm__ __volatile__("sw $21, %0" : : "m" (curr_context[6])); ++ __asm__ __volatile__("sw $22, %0" : : "m" (curr_context[7])); ++ __asm__ __volatile__("sw $23, %0" : : "m" (curr_context[8])); ++ __asm__ __volatile__("sw $31, %0" : : "m" (curr_context[9])); ++ curr_context[10] = sp; ++ curr_context[11] = fp; ++ ++ return 0; ++} ++ ++void kgdb_fault_longjmp(unsigned long *curr_context) ++{ ++ __asm__ __volatile__("lw $gp, %0" : : "m" (curr_context[0])); ++ __asm__ __volatile__("lw $16, %0" : : "m" (curr_context[1])); ++ __asm__ __volatile__("lw $17, %0" : : "m" (curr_context[2])); ++ __asm__ __volatile__("lw $18, %0" : : "m" (curr_context[3])); ++ __asm__ __volatile__("lw $19, %0" : : "m" (curr_context[4])); ++ __asm__ __volatile__("lw $20, %0" : : "m" (curr_context[5])); ++ __asm__ __volatile__("lw $21, %0" : : "m" (curr_context[6])); ++ __asm__ __volatile__("lw $22, %0" : : "m" (curr_context[7])); ++ __asm__ __volatile__("lw $23, %0" : : "m" (curr_context[8])); ++ __asm__ __volatile__("lw $25, %0" : : "m" (curr_context[9])); ++ ++ __asm__ __volatile__("lw $29, %0\n\t" ++ "lw $30, %1\n\t" : : ++ "m" (curr_context[10]), "m" (curr_context[11])); ++ ++ __asm__ __volatile__("li $2, 1"); ++ __asm__ __volatile__("jr $25"); ++ ++ for (;;); ++} ++#endif +diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb-setjmp.S linux-2.6.22-591/arch/mips/kernel/kgdb-setjmp.S +--- linux-2.6.22-570/arch/mips/kernel/kgdb-setjmp.S 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/mips/kernel/kgdb-setjmp.S 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,28 @@ ++/* ++ * arch/mips/kernel/kgdb-jmp.c ++ * ++ * Save and restore system registers so that within a limited frame we ++ * may have a fault and "jump back" to a known safe location. ++ * ++ * Copyright (C) 2005 by MontaVista Software. ++ * Author: Manish Lachwani (mlachwani@mvista.com) ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program as licensed "as is" without any warranty of ++ * any kind, whether express or implied. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++ .ent kgdb_fault_setjmp,0 ++ENTRY (kgdb_fault_setjmp) ++ move a1, sp ++ move a2, fp ++#ifdef CONFIG_64BIT ++ nop ++#endif ++ j kgdb_fault_setjmp_aux ++ .end kgdb_fault_setjmp +diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb.c linux-2.6.22-591/arch/mips/kernel/kgdb.c +--- linux-2.6.22-570/arch/mips/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/mips/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,299 @@ ++/* ++ * arch/mips/kernel/kgdb.c ++ * ++ * Originally written by Glenn Engel, Lake Stevens Instrument Division ++ * ++ * Contributed by HP Systems ++ * ++ * Modified for SPARC by Stu Grossman, Cygnus Support. ++ * ++ * Modified for Linux/MIPS (and MIPS in general) by Andreas Busse ++ * Send complaints, suggestions etc. to ++ * ++ * Copyright (C) 1995 Andreas Busse ++ * ++ * Copyright (C) 2003 MontaVista Software Inc. ++ * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net ++ * ++ * Copyright (C) 2004-2005 MontaVista Software Inc. ++ * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program is licensed "as is" without any warranty of any ++ * kind, whether express or implied. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* for linux pt_regs struct */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static struct hard_trap_info { ++ unsigned char tt; /* Trap type code for MIPS R3xxx and R4xxx */ ++ unsigned char signo; /* Signal that we map this trap into */ ++} hard_trap_info[] = { ++ { 6, SIGBUS }, /* instruction bus error */ ++ { 7, SIGBUS }, /* data bus error */ ++ { 9, SIGTRAP }, /* break */ ++/* { 11, SIGILL }, */ /* CPU unusable */ ++ { 12, SIGFPE }, /* overflow */ ++ { 13, SIGTRAP }, /* trap */ ++ { 14, SIGSEGV }, /* virtual instruction cache coherency */ ++ { 15, SIGFPE }, /* floating point exception */ ++ { 23, SIGSEGV }, /* watch */ ++ { 31, SIGSEGV }, /* virtual data cache coherency */ ++ { 0, 0} /* Must be last */ ++}; ++ ++/* Save the normal trap handlers for user-mode traps. */ ++void *saved_vectors[32]; ++ ++extern void trap_low(void); ++extern void breakinst(void); ++extern void init_IRQ(void); ++ ++void kgdb_call_nmi_hook(void *ignored) ++{ ++ kgdb_nmihook(smp_processor_id(), (void *)0); ++} ++ ++void kgdb_roundup_cpus(unsigned long flags) ++{ ++ local_irq_enable(); ++ smp_call_function(kgdb_call_nmi_hook, 0, 0, 0); ++ local_irq_disable(); ++} ++ ++static int compute_signal(int tt) ++{ ++ struct hard_trap_info *ht; ++ ++ for (ht = hard_trap_info; ht->tt && ht->signo; ht++) ++ if (ht->tt == tt) ++ return ht->signo; ++ ++ return SIGHUP; /* default for things we don't know about */ ++} ++ ++/* ++ * Set up exception handlers for tracing and breakpoints ++ */ ++void handle_exception(struct pt_regs *regs) ++{ ++ int trap = (regs->cp0_cause & 0x7c) >> 2; ++ ++ if (fixup_exception(regs)) { ++ return; ++ } ++ ++ if (atomic_read(&debugger_active)) ++ kgdb_nmihook(smp_processor_id(), regs); ++ ++ if (atomic_read(&kgdb_setting_breakpoint)) ++ if ((trap == 9) && (regs->cp0_epc == (unsigned long)breakinst)) ++ regs->cp0_epc += 4; ++ ++ kgdb_handle_exception(0, compute_signal(trap), 0, regs); ++ ++ /* In SMP mode, __flush_cache_all does IPI */ ++ local_irq_enable(); ++ __flush_cache_all(); ++} ++ ++void set_debug_traps(void) ++{ ++ struct hard_trap_info *ht; ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ ++ for (ht = hard_trap_info; ht->tt && ht->signo; ht++) ++ saved_vectors[ht->tt] = set_except_vector(ht->tt, trap_low); ++ ++ local_irq_restore(flags); ++} ++ ++#if 0 ++/* This should be called before we exit kgdb_handle_exception() I believe. ++ * -- Tom ++ */ ++void restore_debug_traps(void) ++{ ++ struct hard_trap_info *ht; ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ for (ht = hard_trap_info; ht->tt && ht->signo; ht++) ++ set_except_vector(ht->tt, saved_vectors[ht->tt]); ++ local_irq_restore(flags); ++} ++#endif ++ ++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) ++{ ++ int reg; ++ gdb_reg_t *ptr = (gdb_reg_t*)gdb_regs; ++ ++ for (reg = 0; reg < 32; reg++) ++ *(ptr++) = regs->regs[reg]; ++ ++ *(ptr++) = regs->cp0_status; ++ *(ptr++) = regs->lo; ++ *(ptr++) = regs->hi; ++ *(ptr++) = regs->cp0_badvaddr; ++ *(ptr++) = regs->cp0_cause; ++ *(ptr++) = regs->cp0_epc; ++ ++ return; ++} ++ ++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) ++{ ++ ++ int reg; ++ const gdb_reg_t *ptr = (gdb_reg_t*)gdb_regs; ++ ++ for (reg = 0; reg < 32; reg++) ++ regs->regs[reg] = *(ptr++); ++ ++ regs->cp0_status = *(ptr++); ++ regs->lo = *(ptr++); ++ regs->hi = *(ptr++); ++ regs->cp0_badvaddr = *(ptr++); ++ regs->cp0_cause = *(ptr++); ++ regs->cp0_epc = *(ptr++); ++ ++ return; ++} ++ ++/* ++ * Similar to regs_to_gdb_regs() except that process is sleeping and so ++ * we may not be able to get all the info. ++ */ ++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) ++{ ++ int reg; ++ struct thread_info *ti = task_thread_info(p); ++ unsigned long ksp = (unsigned long)ti + THREAD_SIZE - 32; ++ struct pt_regs *regs = (struct pt_regs *)ksp - 1; ++ gdb_reg_t *ptr = (gdb_reg_t*)gdb_regs; ++ ++ for (reg = 0; reg < 16; reg++) ++ *(ptr++) = regs->regs[reg]; ++ ++ /* S0 - S7 */ ++ for (reg = 16; reg < 24; reg++) ++ *(ptr++) = regs->regs[reg]; ++ ++ for (reg = 24; reg < 28; reg++) ++ *(ptr++) = 0; ++ ++ /* GP, SP, FP, RA */ ++ for (reg = 28; reg < 32; reg++) ++ *(ptr++) = regs->regs[reg]; ++ ++ *(ptr++) = regs->cp0_status; ++ *(ptr++) = regs->lo; ++ *(ptr++) = regs->hi; ++ *(ptr++) = regs->cp0_badvaddr; ++ *(ptr++) = regs->cp0_cause; ++ *(ptr++) = regs->cp0_epc; ++ ++ return; ++} ++ ++/* ++ * Calls linux_debug_hook before the kernel dies. If KGDB is enabled, ++ * then try to fall into the debugger ++ */ ++static int kgdb_mips_notify(struct notifier_block *self, unsigned long cmd, ++ void *ptr) ++{ ++ struct die_args *args = (struct die_args *)ptr; ++ struct pt_regs *regs = args->regs; ++ int trap = (regs->cp0_cause & 0x7c) >> 2; ++ ++ /* See if KGDB is interested. */ ++ if (user_mode(regs)) ++ /* Userpace events, ignore. */ ++ return NOTIFY_DONE; ++ ++ kgdb_handle_exception(trap, compute_signal(trap), 0, regs); ++ return NOTIFY_OK; ++} ++ ++static struct notifier_block kgdb_notifier = { ++ .notifier_call = kgdb_mips_notify, ++}; ++ ++/* ++ * Handle the 's' and 'c' commands ++ */ ++int kgdb_arch_handle_exception(int vector, int signo, int err_code, ++ char *remcom_in_buffer, char *remcom_out_buffer, ++ struct pt_regs *regs) ++{ ++ char *ptr; ++ unsigned long address; ++ int cpu = smp_processor_id(); ++ ++ switch (remcom_in_buffer[0]) { ++ case 's': ++ case 'c': ++ /* handle the optional parameter */ ++ ptr = &remcom_in_buffer[1]; ++ if (kgdb_hex2long(&ptr, &address)) ++ regs->cp0_epc = address; ++ ++ atomic_set(&cpu_doing_single_step, -1); ++ if (remcom_in_buffer[0] == 's') ++ if (kgdb_contthread) ++ atomic_set(&cpu_doing_single_step, cpu); ++ ++ return 0; ++ } ++ ++ return -1; ++} ++ ++struct kgdb_arch arch_kgdb_ops = { ++#ifdef CONFIG_CPU_LITTLE_ENDIAN ++ .gdb_bpt_instr = {0xd}, ++#else ++ .gdb_bpt_instr = {0x00, 0x00, 0x00, 0x0d}, ++#endif ++}; ++ ++/* ++ * We use kgdb_early_setup so that functions we need to call now don't ++ * cause trouble when called again later. ++ */ ++__init int kgdb_arch_init(void) ++{ ++ /* Board-specifics. */ ++ /* Force some calls to happen earlier. */ ++ if (kgdb_early_setup == 0) { ++ trap_init(); ++ init_IRQ(); ++ kgdb_early_setup = 1; ++ } ++ ++ /* Set our traps. */ ++ /* This needs to be done more finely grained again, paired in ++ * a before/after in kgdb_handle_exception(...) -- Tom */ ++ set_debug_traps(); ++ register_die_notifier(&kgdb_notifier); ++ ++ return 0; ++} +diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb_handler.S linux-2.6.22-591/arch/mips/kernel/kgdb_handler.S +--- linux-2.6.22-570/arch/mips/kernel/kgdb_handler.S 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/mips/kernel/kgdb_handler.S 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,339 @@ ++/* ++ * arch/mips/kernel/kgdb_handler.S ++ * ++ * Copyright (C) 2007 Wind River Systems, Inc ++ * ++ * Copyright (C) 2004-2005 MontaVista Software Inc. ++ * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com ++ * ++ * This file is licensed under the terms of the GNU General Public ++ * version 2. This program is licensed "as is" without any warranty of any ++ * kind, whether express or implied. ++ */ ++ ++/* ++ * Trap Handler for the new KGDB framework. The main KGDB handler is ++ * handle_exception that will be called from here ++ * ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_32BIT ++#define DMFC0 mfc0 ++#define DMTC0 mtc0 ++#define LDC1 lwc1 ++#define SDC1 swc1 ++#endif ++#ifdef CONFIG_64BIT ++#define DMFC0 dmfc0 ++#define DMTC0 dmtc0 ++#define LDC1 ldc1 ++#define SDC1 sdc1 ++#endif ++ ++#include ++ ++/* ++ * [jsun] We reserves about 2x GDB_FR_SIZE in stack. The lower (addressed) ++ * part is used to store registers and passed to exception handler. ++ * The upper part is reserved for "call func" feature where gdb client ++ * saves some of the regs, setups call frame and passes args. ++ * ++ * A trace shows about 200 bytes are used to store about half of all regs. ++ * The rest should be big enough for frame setup and passing args. ++ */ ++ ++/* ++ * The low level trap handler ++ */ ++ .align 5 ++ NESTED(trap_low, GDB_FR_SIZE, sp) ++ .set noat ++ .set noreorder ++ ++ mfc0 k0, CP0_STATUS ++ sll k0, 3 /* extract cu0 bit */ ++ bltz k0, 1f ++ move k1, sp ++ ++ /* ++ * Called from user mode, go somewhere else. ++ */ ++#if defined(CONFIG_32BIT) ++ lui k1, %hi(saved_vectors) ++ mfc0 k0, CP0_CAUSE ++ andi k0, k0, 0x7c ++ add k1, k1, k0 ++ lw k0, %lo(saved_vectors)(k1) ++#elif defined(CONFIG_64BIT) && defined(CONFIG_BUILD_ELF64) ++ DMFC0 k0, CP0_CAUSE ++ lui k1, %highest(saved_vectors) ++ andi k0, k0, 0x7c /* mask exception type */ ++ dsll k0, 1 /* turn into byte offset */ ++ daddiu k1, %higher(saved_vectors) ++ dsll k1, k1, 16 ++ daddiu k1, %hi(saved_vectors) ++ dsll k1, k1, 16 ++ daddu k1, k1, k0 ++ LONG_L k0, %lo(saved_vectors)(k1) ++#else ++#error "MIPS configuration is unsupported for kgdb!!" ++#endif ++ jr k0 ++ nop ++1: ++ move k0, sp ++ PTR_SUBU sp, k1, GDB_FR_SIZE*2 # see comment above ++ LONG_S k0, GDB_FR_REG29(sp) ++ LONG_S $2, GDB_FR_REG2(sp) ++ ++/* ++ * First save the CP0 and special registers ++ */ ++ ++ mfc0 v0, CP0_STATUS ++ LONG_S v0, GDB_FR_STATUS(sp) ++ mfc0 v0, CP0_CAUSE ++ LONG_S v0, GDB_FR_CAUSE(sp) ++ DMFC0 v0, CP0_EPC ++ LONG_S v0, GDB_FR_EPC(sp) ++ DMFC0 v0, CP0_BADVADDR ++ LONG_S v0, GDB_FR_BADVADDR(sp) ++ mfhi v0 ++ LONG_S v0, GDB_FR_HI(sp) ++ mflo v0 ++ LONG_S v0, GDB_FR_LO(sp) ++ ++/* ++ * Now the integer registers ++ */ ++ ++ LONG_S zero, GDB_FR_REG0(sp) /* I know... */ ++ LONG_S $1, GDB_FR_REG1(sp) ++ /* v0 already saved */ ++ LONG_S $3, GDB_FR_REG3(sp) ++ LONG_S $4, GDB_FR_REG4(sp) ++ LONG_S $5, GDB_FR_REG5(sp) ++ LONG_S $6, GDB_FR_REG6(sp) ++ LONG_S $7, GDB_FR_REG7(sp) ++ LONG_S $8, GDB_FR_REG8(sp) ++ LONG_S $9, GDB_FR_REG9(sp) ++ LONG_S $10, GDB_FR_REG10(sp) ++ LONG_S $11, GDB_FR_REG11(sp) ++ LONG_S $12, GDB_FR_REG12(sp) ++ LONG_S $13, GDB_FR_REG13(sp) ++ LONG_S $14, GDB_FR_REG14(sp) ++ LONG_S $15, GDB_FR_REG15(sp) ++ LONG_S $16, GDB_FR_REG16(sp) ++ LONG_S $17, GDB_FR_REG17(sp) ++ LONG_S $18, GDB_FR_REG18(sp) ++ LONG_S $19, GDB_FR_REG19(sp) ++ LONG_S $20, GDB_FR_REG20(sp) ++ LONG_S $21, GDB_FR_REG21(sp) ++ LONG_S $22, GDB_FR_REG22(sp) ++ LONG_S $23, GDB_FR_REG23(sp) ++ LONG_S $24, GDB_FR_REG24(sp) ++ LONG_S $25, GDB_FR_REG25(sp) ++ LONG_S $26, GDB_FR_REG26(sp) ++ LONG_S $27, GDB_FR_REG27(sp) ++ LONG_S $28, GDB_FR_REG28(sp) ++ /* sp already saved */ ++ LONG_S $30, GDB_FR_REG30(sp) ++ LONG_S $31, GDB_FR_REG31(sp) ++ ++ CLI /* disable interrupts */ ++ ++/* ++ * Followed by the floating point registers ++ */ ++ mfc0 v0, CP0_STATUS /* FPU enabled? */ ++ srl v0, v0, 16 ++ andi v0, v0, (ST0_CU1 >> 16) ++ ++ beqz v0,3f /* disabled, skip */ ++ nop ++ ++ li t0, 0 ++#ifdef CONFIG_64BIT ++ mfc0 t0, CP0_STATUS ++#endif ++ fpu_save_double_kgdb sp t0 t1 # clobbers t1 ++ ++ ++/* ++ * Current stack frame ptr ++ */ ++ ++3: ++ LONG_S sp, GDB_FR_FRP(sp) ++ ++/* ++ * CP0 registers (R4000/R4400 unused registers skipped) ++ */ ++ ++ mfc0 v0, CP0_INDEX ++ LONG_S v0, GDB_FR_CP0_INDEX(sp) ++ mfc0 v0, CP0_RANDOM ++ LONG_S v0, GDB_FR_CP0_RANDOM(sp) ++ DMFC0 v0, CP0_ENTRYLO0 ++ LONG_S v0, GDB_FR_CP0_ENTRYLO0(sp) ++ DMFC0 v0, CP0_ENTRYLO1 ++ LONG_S v0, GDB_FR_CP0_ENTRYLO1(sp) ++ DMFC0 v0, CP0_CONTEXT ++ LONG_S v0, GDB_FR_CP0_CONTEXT(sp) ++ mfc0 v0, CP0_PAGEMASK ++ LONG_S v0, GDB_FR_CP0_PAGEMASK(sp) ++ mfc0 v0, CP0_WIRED ++ LONG_S v0, GDB_FR_CP0_WIRED(sp) ++ DMFC0 v0, CP0_ENTRYHI ++ LONG_S v0, GDB_FR_CP0_ENTRYHI(sp) ++ mfc0 v0, CP0_PRID ++ LONG_S v0, GDB_FR_CP0_PRID(sp) ++ ++ .set at ++ ++/* ++ * Continue with the higher level handler ++ */ ++ ++ move a0,sp ++ ++ jal handle_exception ++ nop ++ ++/* ++ * Restore all writable registers, in reverse order ++ */ ++ ++ .set noat ++ ++ LONG_L v0, GDB_FR_CP0_ENTRYHI(sp) ++ LONG_L v1, GDB_FR_CP0_WIRED(sp) ++ DMTC0 v0, CP0_ENTRYHI ++ mtc0 v1, CP0_WIRED ++ LONG_L v0, GDB_FR_CP0_PAGEMASK(sp) ++ LONG_L v1, GDB_FR_CP0_ENTRYLO1(sp) ++ mtc0 v0, CP0_PAGEMASK ++ DMTC0 v1, CP0_ENTRYLO1 ++ LONG_L v0, GDB_FR_CP0_ENTRYLO0(sp) ++ LONG_L v1, GDB_FR_CP0_INDEX(sp) ++ DMTC0 v0, CP0_ENTRYLO0 ++ LONG_L v0, GDB_FR_CP0_CONTEXT(sp) ++ mtc0 v1, CP0_INDEX ++ DMTC0 v0, CP0_CONTEXT ++ ++ ++/* ++ * Next, the floating point registers ++ */ ++ mfc0 v0, CP0_STATUS /* check if the FPU is enabled */ ++ srl v0, v0, 16 ++ andi v0, v0, (ST0_CU1 >> 16) ++ ++ beqz v0, 3f /* disabled, skip */ ++ nop ++ ++ li t0, 0 ++#ifdef CONFIG_64BIT ++ mfc0 t0, CP0_STATUS ++#endif ++ fpu_restore_double_kgdb sp t0 t1 # clobbers t1 ++ ++ ++/* ++ * Now the CP0 and integer registers ++ */ ++ ++3: ++ mfc0 t0, CP0_STATUS ++ ori t0, 0x1f ++ xori t0, 0x1f ++ mtc0 t0, CP0_STATUS ++ ++ LONG_L v0, GDB_FR_STATUS(sp) ++ LONG_L v1, GDB_FR_EPC(sp) ++ mtc0 v0, CP0_STATUS ++ DMTC0 v1, CP0_EPC ++ LONG_L v0, GDB_FR_HI(sp) ++ LONG_L v1, GDB_FR_LO(sp) ++ mthi v0 ++ mtlo v1 ++ LONG_L $31, GDB_FR_REG31(sp) ++ LONG_L $30, GDB_FR_REG30(sp) ++ LONG_L $28, GDB_FR_REG28(sp) ++ LONG_L $27, GDB_FR_REG27(sp) ++ LONG_L $26, GDB_FR_REG26(sp) ++ LONG_L $25, GDB_FR_REG25(sp) ++ LONG_L $24, GDB_FR_REG24(sp) ++ LONG_L $23, GDB_FR_REG23(sp) ++ LONG_L $22, GDB_FR_REG22(sp) ++ LONG_L $21, GDB_FR_REG21(sp) ++ LONG_L $20, GDB_FR_REG20(sp) ++ LONG_L $19, GDB_FR_REG19(sp) ++ LONG_L $18, GDB_FR_REG18(sp) ++ LONG_L $17, GDB_FR_REG17(sp) ++ LONG_L $16, GDB_FR_REG16(sp) ++ LONG_L $15, GDB_FR_REG15(sp) ++ LONG_L $14, GDB_FR_REG14(sp) ++ LONG_L $13, GDB_FR_REG13(sp) ++ LONG_L $12, GDB_FR_REG12(sp) ++ LONG_L $11, GDB_FR_REG11(sp) ++ LONG_L $10, GDB_FR_REG10(sp) ++ LONG_L $9, GDB_FR_REG9(sp) ++ LONG_L $8, GDB_FR_REG8(sp) ++ LONG_L $7, GDB_FR_REG7(sp) ++ LONG_L $6, GDB_FR_REG6(sp) ++ LONG_L $5, GDB_FR_REG5(sp) ++ LONG_L $4, GDB_FR_REG4(sp) ++ LONG_L $3, GDB_FR_REG3(sp) ++ LONG_L $2, GDB_FR_REG2(sp) ++ LONG_L $1, GDB_FR_REG1(sp) ++#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX) ++ LONG_L k0, GDB_FR_EPC(sp) ++ LONG_L $29, GDB_FR_REG29(sp) /* Deallocate stack */ ++ jr k0 ++ rfe ++#else ++ LONG_L sp, GDB_FR_REG29(sp) /* Deallocate stack */ ++ ++ .set mips3 ++ eret ++ .set mips0 ++#endif ++ .set at ++ .set reorder ++ END(trap_low) ++ ++LEAF(kgdb_read_byte) ++4: lb t0, (a0) ++ sb t0, (a1) ++ li v0, 0 ++ jr ra ++ .section __ex_table,"a" ++ PTR 4b, kgdbfault ++ .previous ++ END(kgdb_read_byte) ++ ++LEAF(kgdb_write_byte) ++5: sb a0, (a1) ++ li v0, 0 ++ jr ra ++ .section __ex_table,"a" ++ PTR 5b, kgdbfault ++ .previous ++ END(kgdb_write_byte) ++ ++ .type kgdbfault@function ++ .ent kgdbfault ++ ++kgdbfault: li v0, -EFAULT ++ jr ra ++ .end kgdbfault +diff -Nurb linux-2.6.22-570/arch/mips/kernel/traps.c linux-2.6.22-591/arch/mips/kernel/traps.c +--- linux-2.6.22-570/arch/mips/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/mips/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500 +@@ -10,6 +10,8 @@ + * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com + * Copyright (C) 2000, 01 MIPS Technologies, Inc. + * Copyright (C) 2002, 2003, 2004, 2005 Maciej W. Rozycki ++ * ++ * KGDB specific changes - Manish Lachwani (mlachwani@mvista.com) + */ + #include + #include +@@ -21,6 +23,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -42,6 +45,7 @@ + #include + #include + #include ++#include + + extern asmlinkage void handle_int(void); + extern asmlinkage void handle_tlbm(void); +@@ -1445,6 +1449,11 @@ + extern char except_vec4; + unsigned long i; + ++#if defined(CONFIG_KGDB) ++ if (kgdb_early_setup) ++ return; /* Already done */ ++#endif ++ + if (cpu_has_veic || cpu_has_vint) + ebase = (unsigned long) alloc_bootmem_low_pages (0x200 + VECTORSPACING*64); + else +diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/atlas/Makefile linux-2.6.22-591/arch/mips/mips-boards/atlas/Makefile +--- linux-2.6.22-570/arch/mips/mips-boards/atlas/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/mips-boards/atlas/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -17,4 +17,3 @@ + # + + obj-y := atlas_int.o atlas_setup.o +-obj-$(CONFIG_KGDB) += atlas_gdb.o +diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_gdb.c linux-2.6.22-591/arch/mips/mips-boards/atlas/atlas_gdb.c +--- linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_gdb.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/mips-boards/atlas/atlas_gdb.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,97 +0,0 @@ +-/* +- * Carsten Langgaard, carstenl@mips.com +- * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. +- * +- * This program is free software; you can distribute it and/or modify it +- * under the terms of the GNU General Public License (Version 2) as +- * published by the Free Software Foundation. +- * +- * This program is distributed in the hope it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * for more details. +- * +- * You should have received a copy of the GNU General Public License along +- * with this program; if not, write to the Free Software Foundation, Inc., +- * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +- * +- * This is the interface to the remote debugger stub. +- */ +-#include +-#include +-#include +- +-#define INB(a) inb((unsigned long)a) +-#define OUTB(x,a) outb(x,(unsigned long)a) +- +-/* +- * This is the interface to the remote debugger stub +- * if the Philips part is used for the debug port, +- * called from the platform setup code. +- */ +-void *saa9730_base = (void *)ATLAS_SAA9730_REG; +- +-static int saa9730_kgdb_active = 0; +- +-#define SAA9730_BAUDCLOCK(baud) (((ATLAS_SAA9730_BAUDCLOCK/(baud))/16)-1) +- +-int saa9730_kgdb_hook(int speed) +-{ +- int baudclock; +- t_uart_saa9730_regmap *kgdb_uart = (t_uart_saa9730_regmap *)(saa9730_base + SAA9730_UART_REGS_ADDR); +- +- /* +- * Clear all interrupts +- */ +- (void) INB(&kgdb_uart->Lsr); +- (void) INB(&kgdb_uart->Msr); +- (void) INB(&kgdb_uart->Thr_Rbr); +- (void) INB(&kgdb_uart->Iir_Fcr); +- +- /* +- * Now, initialize the UART +- */ +- /* 8 data bits, one stop bit, no parity */ +- OUTB(SAA9730_LCR_DATA8, &kgdb_uart->Lcr); +- +- baudclock = SAA9730_BAUDCLOCK(speed); +- +- OUTB((baudclock >> 16) & 0xff, &kgdb_uart->BaudDivMsb); +- OUTB( baudclock & 0xff, &kgdb_uart->BaudDivLsb); +- +- /* Set RTS/DTR active */ +- OUTB(SAA9730_MCR_DTR | SAA9730_MCR_RTS, &kgdb_uart->Mcr); +- saa9730_kgdb_active = 1; +- +- return speed; +-} +- +-int saa9730_putDebugChar(char c) +-{ +- t_uart_saa9730_regmap *kgdb_uart = (t_uart_saa9730_regmap *)(saa9730_base + SAA9730_UART_REGS_ADDR); +- +- if (!saa9730_kgdb_active) { /* need to init device first */ +- return 0; +- } +- +- while (!(INB(&kgdb_uart->Lsr) & SAA9730_LSR_THRE)) +- ; +- OUTB(c, &kgdb_uart->Thr_Rbr); +- +- return 1; +-} +- +-char saa9730_getDebugChar(void) +-{ +- t_uart_saa9730_regmap *kgdb_uart = (t_uart_saa9730_regmap *)(saa9730_base + SAA9730_UART_REGS_ADDR); +- char c; +- +- if (!saa9730_kgdb_active) { /* need to init device first */ +- return 0; +- } +- while (!(INB(&kgdb_uart->Lsr) & SAA9730_LSR_DR)) +- ; +- +- c = INB(&kgdb_uart->Thr_Rbr); +- return(c); +-} +diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_setup.c linux-2.6.22-591/arch/mips/mips-boards/atlas/atlas_setup.c +--- linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/mips-boards/atlas/atlas_setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -37,10 +37,6 @@ + extern void mips_time_init(void); + extern unsigned long mips_rtc_get_time(void); + +-#ifdef CONFIG_KGDB +-extern void kgdb_config(void); +-#endif +- + static void __init serial_init(void); + + const char *get_system_type(void) +@@ -58,9 +54,6 @@ + + serial_init (); + +-#ifdef CONFIG_KGDB +- kgdb_config(); +-#endif + mips_reboot_setup(); + + board_time_init = mips_time_init; +diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/generic/gdb_hook.c linux-2.6.22-591/arch/mips/mips-boards/generic/gdb_hook.c +--- linux-2.6.22-570/arch/mips/mips-boards/generic/gdb_hook.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/mips-boards/generic/gdb_hook.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,133 +0,0 @@ +-/* +- * Carsten Langgaard, carstenl@mips.com +- * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. +- * +- * This program is free software; you can distribute it and/or modify it +- * under the terms of the GNU General Public License (Version 2) as +- * published by the Free Software Foundation. +- * +- * This program is distributed in the hope it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * for more details. +- * +- * You should have received a copy of the GNU General Public License along +- * with this program; if not, write to the Free Software Foundation, Inc., +- * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +- * +- * This is the interface to the remote debugger stub. +- */ +-#include +-#include +-#include +-#include +- +-#include +-#include +- +-static struct serial_state rs_table[] = { +- SERIAL_PORT_DFNS /* Defined in serial.h */ +-}; +- +-static struct async_struct kdb_port_info = {0}; +- +-int (*generic_putDebugChar)(char); +-char (*generic_getDebugChar)(void); +- +-static __inline__ unsigned int serial_in(struct async_struct *info, int offset) +-{ +- return inb(info->port + offset); +-} +- +-static __inline__ void serial_out(struct async_struct *info, int offset, +- int value) +-{ +- outb(value, info->port+offset); +-} +- +-int rs_kgdb_hook(int tty_no, int speed) { +- int t; +- struct serial_state *ser = &rs_table[tty_no]; +- +- kdb_port_info.state = ser; +- kdb_port_info.magic = SERIAL_MAGIC; +- kdb_port_info.port = ser->port; +- kdb_port_info.flags = ser->flags; +- +- /* +- * Clear all interrupts +- */ +- serial_in(&kdb_port_info, UART_LSR); +- serial_in(&kdb_port_info, UART_RX); +- serial_in(&kdb_port_info, UART_IIR); +- serial_in(&kdb_port_info, UART_MSR); +- +- /* +- * Now, initialize the UART +- */ +- serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8); /* reset DLAB */ +- if (kdb_port_info.flags & ASYNC_FOURPORT) { +- kdb_port_info.MCR = UART_MCR_DTR | UART_MCR_RTS; +- t = UART_MCR_DTR | UART_MCR_OUT1; +- } else { +- kdb_port_info.MCR +- = UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2; +- t = UART_MCR_DTR | UART_MCR_RTS; +- } +- +- kdb_port_info.MCR = t; /* no interrupts, please */ +- serial_out(&kdb_port_info, UART_MCR, kdb_port_info.MCR); +- +- /* +- * and set the speed of the serial port +- */ +- if (speed == 0) +- speed = 9600; +- +- t = kdb_port_info.state->baud_base / speed; +- /* set DLAB */ +- serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB); +- serial_out(&kdb_port_info, UART_DLL, t & 0xff);/* LS of divisor */ +- serial_out(&kdb_port_info, UART_DLM, t >> 8); /* MS of divisor */ +- /* reset DLAB */ +- serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8); +- +- return speed; +-} +- +-int putDebugChar(char c) +-{ +- return generic_putDebugChar(c); +-} +- +-char getDebugChar(void) +-{ +- return generic_getDebugChar(); +-} +- +-int rs_putDebugChar(char c) +-{ +- +- if (!kdb_port_info.state) { /* need to init device first */ +- return 0; +- } +- +- while ((serial_in(&kdb_port_info, UART_LSR) & UART_LSR_THRE) == 0) +- ; +- +- serial_out(&kdb_port_info, UART_TX, c); +- +- return 1; +-} +- +-char rs_getDebugChar(void) +-{ +- if (!kdb_port_info.state) { /* need to init device first */ +- return 0; +- } +- +- while (!(serial_in(&kdb_port_info, UART_LSR) & 1)) +- ; +- +- return serial_in(&kdb_port_info, UART_RX); +-} +diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/generic/init.c linux-2.6.22-591/arch/mips/mips-boards/generic/init.c +--- linux-2.6.22-570/arch/mips/mips-boards/generic/init.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/mips-boards/generic/init.c 2007-12-21 15:36:11.000000000 -0500 +@@ -37,15 +37,6 @@ + + #include + +-#ifdef CONFIG_KGDB +-extern int rs_kgdb_hook(int, int); +-extern int rs_putDebugChar(char); +-extern char rs_getDebugChar(void); +-extern int saa9730_kgdb_hook(int); +-extern int saa9730_putDebugChar(char); +-extern char saa9730_getDebugChar(void); +-#endif +- + int prom_argc; + int *_prom_argv, *_prom_envp; + +@@ -173,59 +164,6 @@ + } + #endif + +-#ifdef CONFIG_KGDB +-void __init kgdb_config (void) +-{ +- extern int (*generic_putDebugChar)(char); +- extern char (*generic_getDebugChar)(void); +- char *argptr; +- int line, speed; +- +- argptr = prom_getcmdline(); +- if ((argptr = strstr(argptr, "kgdb=ttyS")) != NULL) { +- argptr += strlen("kgdb=ttyS"); +- if (*argptr != '0' && *argptr != '1') +- printk("KGDB: Unknown serial line /dev/ttyS%c, " +- "falling back to /dev/ttyS1\n", *argptr); +- line = *argptr == '0' ? 0 : 1; +- printk("KGDB: Using serial line /dev/ttyS%d for session\n", line); +- +- speed = 0; +- if (*++argptr == ',') +- { +- int c; +- while ((c = *++argptr) && ('0' <= c && c <= '9')) +- speed = speed * 10 + c - '0'; +- } +-#ifdef CONFIG_MIPS_ATLAS +- if (line == 1) { +- speed = saa9730_kgdb_hook(speed); +- generic_putDebugChar = saa9730_putDebugChar; +- generic_getDebugChar = saa9730_getDebugChar; +- } +- else +-#endif +- { +- speed = rs_kgdb_hook(line, speed); +- generic_putDebugChar = rs_putDebugChar; +- generic_getDebugChar = rs_getDebugChar; +- } +- +- pr_info("KGDB: Using serial line /dev/ttyS%d at %d for " +- "session, please connect your debugger\n", +- line ? 1 : 0, speed); +- +- { +- char *s; +- for (s = "Please connect GDB to this port\r\n"; *s; ) +- generic_putDebugChar (*s++); +- } +- +- /* Breakpoint is invoked after interrupts are initialised */ +- } +-} +-#endif +- + void __init mips_nmi_setup (void) + { + void *base; +diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/malta/malta_setup.c linux-2.6.22-591/arch/mips/mips-boards/malta/malta_setup.c +--- linux-2.6.22-570/arch/mips/mips-boards/malta/malta_setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/mips-boards/malta/malta_setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -39,10 +39,6 @@ + extern void mips_time_init(void); + extern unsigned long mips_rtc_get_time(void); + +-#ifdef CONFIG_KGDB +-extern void kgdb_config(void); +-#endif +- + struct resource standard_io_resources[] = { + { .name = "dma1", .start = 0x00, .end = 0x1f, .flags = IORESOURCE_BUSY }, + { .name = "timer", .start = 0x40, .end = 0x5f, .flags = IORESOURCE_BUSY }, +@@ -99,10 +95,6 @@ + */ + enable_dma(4); + +-#ifdef CONFIG_KGDB +- kgdb_config (); +-#endif +- + if (mips_revision_sconid == MIPS_REVISION_SCON_BONITO) { + char *argptr; + +diff -Nurb linux-2.6.22-570/arch/mips/mm/extable.c linux-2.6.22-591/arch/mips/mm/extable.c +--- linux-2.6.22-570/arch/mips/mm/extable.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/mm/extable.c 2007-12-21 15:36:11.000000000 -0500 +@@ -3,6 +3,7 @@ + */ + #include + #include ++#include + #include + #include + +@@ -16,6 +17,12 @@ + + return 1; + } ++#ifdef CONFIG_KGDB ++ if (atomic_read(&debugger_active) && kgdb_may_fault) ++ /* Restore our previous state. */ ++ kgdb_fault_longjmp(kgdb_fault_jmp_regs); ++ /* Not reached. */ ++#endif + + return 0; + } +diff -Nurb linux-2.6.22-570/arch/mips/momentum/ocelot_c/Makefile linux-2.6.22-591/arch/mips/momentum/ocelot_c/Makefile +--- linux-2.6.22-570/arch/mips/momentum/ocelot_c/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/momentum/ocelot_c/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -4,5 +4,3 @@ + + obj-y += cpci-irq.o irq.o platform.o prom.o reset.o \ + setup.o uart-irq.o +- +-obj-$(CONFIG_KGDB) += dbg_io.o +diff -Nurb linux-2.6.22-570/arch/mips/momentum/ocelot_c/dbg_io.c linux-2.6.22-591/arch/mips/momentum/ocelot_c/dbg_io.c +--- linux-2.6.22-570/arch/mips/momentum/ocelot_c/dbg_io.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/momentum/ocelot_c/dbg_io.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,121 +0,0 @@ +- +-#include /* For the serial port location and base baud */ +- +-/* --- CONFIG --- */ +- +-typedef unsigned char uint8; +-typedef unsigned int uint32; +- +-/* --- END OF CONFIG --- */ +- +-#define UART16550_BAUD_2400 2400 +-#define UART16550_BAUD_4800 4800 +-#define UART16550_BAUD_9600 9600 +-#define UART16550_BAUD_19200 19200 +-#define UART16550_BAUD_38400 38400 +-#define UART16550_BAUD_57600 57600 +-#define UART16550_BAUD_115200 115200 +- +-#define UART16550_PARITY_NONE 0 +-#define UART16550_PARITY_ODD 0x08 +-#define UART16550_PARITY_EVEN 0x18 +-#define UART16550_PARITY_MARK 0x28 +-#define UART16550_PARITY_SPACE 0x38 +- +-#define UART16550_DATA_5BIT 0x0 +-#define UART16550_DATA_6BIT 0x1 +-#define UART16550_DATA_7BIT 0x2 +-#define UART16550_DATA_8BIT 0x3 +- +-#define UART16550_STOP_1BIT 0x0 +-#define UART16550_STOP_2BIT 0x4 +- +-/* ----------------------------------------------------- */ +- +-/* === CONFIG === */ +- +-/* [jsun] we use the second serial port for kdb */ +-#define BASE OCELOT_SERIAL1_BASE +-#define MAX_BAUD OCELOT_BASE_BAUD +- +-/* === END OF CONFIG === */ +- +-#define REG_OFFSET 4 +- +-/* register offset */ +-#define OFS_RCV_BUFFER 0 +-#define OFS_TRANS_HOLD 0 +-#define OFS_SEND_BUFFER 0 +-#define OFS_INTR_ENABLE (1*REG_OFFSET) +-#define OFS_INTR_ID (2*REG_OFFSET) +-#define OFS_DATA_FORMAT (3*REG_OFFSET) +-#define OFS_LINE_CONTROL (3*REG_OFFSET) +-#define OFS_MODEM_CONTROL (4*REG_OFFSET) +-#define OFS_RS232_OUTPUT (4*REG_OFFSET) +-#define OFS_LINE_STATUS (5*REG_OFFSET) +-#define OFS_MODEM_STATUS (6*REG_OFFSET) +-#define OFS_RS232_INPUT (6*REG_OFFSET) +-#define OFS_SCRATCH_PAD (7*REG_OFFSET) +- +-#define OFS_DIVISOR_LSB (0*REG_OFFSET) +-#define OFS_DIVISOR_MSB (1*REG_OFFSET) +- +- +-/* memory-mapped read/write of the port */ +-#define UART16550_READ(y) (*((volatile uint8*)(BASE + y))) +-#define UART16550_WRITE(y, z) ((*((volatile uint8*)(BASE + y))) = z) +- +-void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop) +-{ +- /* disable interrupts */ +- UART16550_WRITE(OFS_INTR_ENABLE, 0); +- +- /* set up baud rate */ +- { +- uint32 divisor; +- +- /* set DIAB bit */ +- UART16550_WRITE(OFS_LINE_CONTROL, 0x80); +- +- /* set divisor */ +- divisor = MAX_BAUD / baud; +- UART16550_WRITE(OFS_DIVISOR_LSB, divisor & 0xff); +- UART16550_WRITE(OFS_DIVISOR_MSB, (divisor & 0xff00) >> 8); +- +- /* clear DIAB bit */ +- UART16550_WRITE(OFS_LINE_CONTROL, 0x0); +- } +- +- /* set data format */ +- UART16550_WRITE(OFS_DATA_FORMAT, data | parity | stop); +-} +- +-static int remoteDebugInitialized = 0; +- +-uint8 getDebugChar(void) +-{ +- if (!remoteDebugInitialized) { +- remoteDebugInitialized = 1; +- debugInit(UART16550_BAUD_38400, +- UART16550_DATA_8BIT, +- UART16550_PARITY_NONE, UART16550_STOP_1BIT); +- } +- +- while ((UART16550_READ(OFS_LINE_STATUS) & 0x1) == 0); +- return UART16550_READ(OFS_RCV_BUFFER); +-} +- +- +-int putDebugChar(uint8 byte) +-{ +- if (!remoteDebugInitialized) { +- remoteDebugInitialized = 1; +- debugInit(UART16550_BAUD_38400, +- UART16550_DATA_8BIT, +- UART16550_PARITY_NONE, UART16550_STOP_1BIT); +- } +- +- while ((UART16550_READ(OFS_LINE_STATUS) & 0x20) == 0); +- UART16550_WRITE(OFS_SEND_BUFFER, byte); +- return 1; +-} +diff -Nurb linux-2.6.22-570/arch/mips/pci/fixup-atlas.c linux-2.6.22-591/arch/mips/pci/fixup-atlas.c +--- linux-2.6.22-570/arch/mips/pci/fixup-atlas.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/pci/fixup-atlas.c 2007-12-21 15:36:11.000000000 -0500 +@@ -68,24 +68,3 @@ + { + return 0; + } +- +-#ifdef CONFIG_KGDB +-/* +- * The PCI scan may have moved the saa9730 I/O address, so reread +- * the address here. +- * This does mean that it's not possible to debug the PCI bus configuration +- * code, but it is better than nothing... +- */ +- +-static void atlas_saa9730_base_fixup (struct pci_dev *pdev) +-{ +- extern void *saa9730_base; +- if (pdev->bus == 0 && PCI_SLOT(pdev->devfn) == 19) +- (void) pci_read_config_dword (pdev, 0x14, (u32 *)&saa9730_base); +- printk ("saa9730_base = %x\n", saa9730_base); +-} +- +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PHILIPS, PCI_DEVICE_ID_PHILIPS_SAA9730, +- atlas_saa9730_base_fixup); +- +-#endif +diff -Nurb linux-2.6.22-570/arch/mips/philips/pnx8550/common/Makefile linux-2.6.22-591/arch/mips/philips/pnx8550/common/Makefile +--- linux-2.6.22-570/arch/mips/philips/pnx8550/common/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/philips/pnx8550/common/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -24,4 +24,3 @@ + + obj-y := setup.o prom.o int.o reset.o time.o proc.o platform.o + obj-$(CONFIG_PCI) += pci.o +-obj-$(CONFIG_KGDB) += gdb_hook.o +diff -Nurb linux-2.6.22-570/arch/mips/philips/pnx8550/common/gdb_hook.c linux-2.6.22-591/arch/mips/philips/pnx8550/common/gdb_hook.c +--- linux-2.6.22-570/arch/mips/philips/pnx8550/common/gdb_hook.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/philips/pnx8550/common/gdb_hook.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,109 +0,0 @@ +-/* +- * Carsten Langgaard, carstenl@mips.com +- * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. +- * +- * ######################################################################## +- * +- * This program is free software; you can distribute it and/or modify it +- * under the terms of the GNU General Public License (Version 2) as +- * published by the Free Software Foundation. +- * +- * This program is distributed in the hope it will be useful, but WITHOUT +- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +- * for more details. +- * +- * You should have received a copy of the GNU General Public License along +- * with this program; if not, write to the Free Software Foundation, Inc., +- * 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +- * +- * ######################################################################## +- * +- * This is the interface to the remote debugger stub. +- * +- */ +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +- +-#include +- +-static struct serial_state rs_table[IP3106_NR_PORTS] = { +-}; +-static struct async_struct kdb_port_info = {0}; +- +-void rs_kgdb_hook(int tty_no) +-{ +- struct serial_state *ser = &rs_table[tty_no]; +- +- kdb_port_info.state = ser; +- kdb_port_info.magic = SERIAL_MAGIC; +- kdb_port_info.port = tty_no; +- kdb_port_info.flags = ser->flags; +- +- /* +- * Clear all interrupts +- */ +- /* Clear all the transmitter FIFO counters (pointer and status) */ +- ip3106_lcr(UART_BASE, tty_no) |= IP3106_UART_LCR_TX_RST; +- /* Clear all the receiver FIFO counters (pointer and status) */ +- ip3106_lcr(UART_BASE, tty_no) |= IP3106_UART_LCR_RX_RST; +- /* Clear all interrupts */ +- ip3106_iclr(UART_BASE, tty_no) = IP3106_UART_INT_ALLRX | +- IP3106_UART_INT_ALLTX; +- +- /* +- * Now, initialize the UART +- */ +- ip3106_lcr(UART_BASE, tty_no) = IP3106_UART_LCR_8BIT; +- ip3106_baud(UART_BASE, tty_no) = 5; // 38400 Baud +-} +- +-int putDebugChar(char c) +-{ +- /* Wait until FIFO not full */ +- while (((ip3106_fifo(UART_BASE, kdb_port_info.port) & IP3106_UART_FIFO_TXFIFO) >> 16) >= 16) +- ; +- /* Send one char */ +- ip3106_fifo(UART_BASE, kdb_port_info.port) = c; +- +- return 1; +-} +- +-char getDebugChar(void) +-{ +- char ch; +- +- /* Wait until there is a char in the FIFO */ +- while (!((ip3106_fifo(UART_BASE, kdb_port_info.port) & +- IP3106_UART_FIFO_RXFIFO) >> 8)) +- ; +- /* Read one char */ +- ch = ip3106_fifo(UART_BASE, kdb_port_info.port) & +- IP3106_UART_FIFO_RBRTHR; +- /* Advance the RX FIFO read pointer */ +- ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_RX_NEXT; +- return (ch); +-} +- +-void rs_disable_debug_interrupts(void) +-{ +- ip3106_ien(UART_BASE, kdb_port_info.port) = 0; /* Disable all interrupts */ +-} +- +-void rs_enable_debug_interrupts(void) +-{ +- /* Clear all the transmitter FIFO counters (pointer and status) */ +- ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_TX_RST; +- /* Clear all the receiver FIFO counters (pointer and status) */ +- ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_RX_RST; +- /* Clear all interrupts */ +- ip3106_iclr(UART_BASE, kdb_port_info.port) = IP3106_UART_INT_ALLRX | +- IP3106_UART_INT_ALLTX; +- ip3106_ien(UART_BASE, kdb_port_info.port) = IP3106_UART_INT_ALLRX; /* Enable RX interrupts */ +-} +diff -Nurb linux-2.6.22-570/arch/mips/philips/pnx8550/common/setup.c linux-2.6.22-591/arch/mips/philips/pnx8550/common/setup.c +--- linux-2.6.22-570/arch/mips/philips/pnx8550/common/setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/philips/pnx8550/common/setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -145,16 +145,5 @@ + ip3106_baud(UART_BASE, pnx8550_console_port) = 5; + } + +-#ifdef CONFIG_KGDB +- argptr = prom_getcmdline(); +- if ((argptr = strstr(argptr, "kgdb=ttyS")) != NULL) { +- int line; +- argptr += strlen("kgdb=ttyS"); +- line = *argptr == '0' ? 0 : 1; +- rs_kgdb_hook(line); +- pr_info("KGDB: Using ttyS%i for session, " +- "please connect your debugger\n", line ? 1 : 0); +- } +-#endif + return; + } +diff -Nurb linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/Makefile linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/Makefile +--- linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -4,5 +4,4 @@ + + obj-y += irq.o i2c-yosemite.o prom.o py-console.o setup.o + +-obj-$(CONFIG_KGDB) += dbg_io.o + obj-$(CONFIG_SMP) += smp.o +diff -Nurb linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/dbg_io.c linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/dbg_io.c +--- linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/dbg_io.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/dbg_io.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,180 +0,0 @@ +-/* +- * Copyright 2003 PMC-Sierra +- * Author: Manish Lachwani (lachwani@pmc-sierra.com) +- * +- * This program is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License as published by the +- * Free Software Foundation; either version 2 of the License, or (at your +- * option) any later version. +- * +- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN +- * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +- * +- * You should have received a copy of the GNU General Public License along +- * with this program; if not, write to the Free Software Foundation, Inc., +- * 675 Mass Ave, Cambridge, MA 02139, USA. +- */ +- +-/* +- * Support for KGDB for the Yosemite board. We make use of single serial +- * port to be used for KGDB as well as console. The second serial port +- * seems to be having a problem. Single IRQ is allocated for both the +- * ports. Hence, the interrupt routing code needs to figure out whether +- * the interrupt came from channel A or B. +- */ +- +-#include +- +-/* +- * Baud rate, Parity, Data and Stop bit settings for the +- * serial port on the Yosemite. Note that the Early printk +- * patch has been added. So, we should be all set to go +- */ +-#define YOSEMITE_BAUD_2400 2400 +-#define YOSEMITE_BAUD_4800 4800 +-#define YOSEMITE_BAUD_9600 9600 +-#define YOSEMITE_BAUD_19200 19200 +-#define YOSEMITE_BAUD_38400 38400 +-#define YOSEMITE_BAUD_57600 57600 +-#define YOSEMITE_BAUD_115200 115200 +- +-#define YOSEMITE_PARITY_NONE 0 +-#define YOSEMITE_PARITY_ODD 0x08 +-#define YOSEMITE_PARITY_EVEN 0x18 +-#define YOSEMITE_PARITY_MARK 0x28 +-#define YOSEMITE_PARITY_SPACE 0x38 +- +-#define YOSEMITE_DATA_5BIT 0x0 +-#define YOSEMITE_DATA_6BIT 0x1 +-#define YOSEMITE_DATA_7BIT 0x2 +-#define YOSEMITE_DATA_8BIT 0x3 +- +-#define YOSEMITE_STOP_1BIT 0x0 +-#define YOSEMITE_STOP_2BIT 0x4 +- +-/* This is crucial */ +-#define SERIAL_REG_OFS 0x1 +- +-#define SERIAL_RCV_BUFFER 0x0 +-#define SERIAL_TRANS_HOLD 0x0 +-#define SERIAL_SEND_BUFFER 0x0 +-#define SERIAL_INTR_ENABLE (1 * SERIAL_REG_OFS) +-#define SERIAL_INTR_ID (2 * SERIAL_REG_OFS) +-#define SERIAL_DATA_FORMAT (3 * SERIAL_REG_OFS) +-#define SERIAL_LINE_CONTROL (3 * SERIAL_REG_OFS) +-#define SERIAL_MODEM_CONTROL (4 * SERIAL_REG_OFS) +-#define SERIAL_RS232_OUTPUT (4 * SERIAL_REG_OFS) +-#define SERIAL_LINE_STATUS (5 * SERIAL_REG_OFS) +-#define SERIAL_MODEM_STATUS (6 * SERIAL_REG_OFS) +-#define SERIAL_RS232_INPUT (6 * SERIAL_REG_OFS) +-#define SERIAL_SCRATCH_PAD (7 * SERIAL_REG_OFS) +- +-#define SERIAL_DIVISOR_LSB (0 * SERIAL_REG_OFS) +-#define SERIAL_DIVISOR_MSB (1 * SERIAL_REG_OFS) +- +-/* +- * Functions to READ and WRITE to serial port 0 +- */ +-#define SERIAL_READ(ofs) (*((volatile unsigned char*) \ +- (TITAN_SERIAL_BASE + ofs))) +- +-#define SERIAL_WRITE(ofs, val) ((*((volatile unsigned char*) \ +- (TITAN_SERIAL_BASE + ofs))) = val) +- +-/* +- * Functions to READ and WRITE to serial port 1 +- */ +-#define SERIAL_READ_1(ofs) (*((volatile unsigned char*) \ +- (TITAN_SERIAL_BASE_1 + ofs))) +- +-#define SERIAL_WRITE_1(ofs, val) ((*((volatile unsigned char*) \ +- (TITAN_SERIAL_BASE_1 + ofs))) = val) +- +-/* +- * Second serial port initialization +- */ +-void init_second_port(void) +-{ +- /* Disable Interrupts */ +- SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x0); +- SERIAL_WRITE_1(SERIAL_INTR_ENABLE, 0x0); +- +- { +- unsigned int divisor; +- +- SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x80); +- divisor = TITAN_SERIAL_BASE_BAUD / YOSEMITE_BAUD_115200; +- SERIAL_WRITE_1(SERIAL_DIVISOR_LSB, divisor & 0xff); +- +- SERIAL_WRITE_1(SERIAL_DIVISOR_MSB, +- (divisor & 0xff00) >> 8); +- SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x0); +- } +- +- SERIAL_WRITE_1(SERIAL_DATA_FORMAT, YOSEMITE_DATA_8BIT | +- YOSEMITE_PARITY_NONE | YOSEMITE_STOP_1BIT); +- +- /* Enable Interrupts */ +- SERIAL_WRITE_1(SERIAL_INTR_ENABLE, 0xf); +-} +- +-/* Initialize the serial port for KGDB debugging */ +-void debugInit(unsigned int baud, unsigned char data, unsigned char parity, +- unsigned char stop) +-{ +- /* Disable Interrupts */ +- SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x0); +- SERIAL_WRITE(SERIAL_INTR_ENABLE, 0x0); +- +- { +- unsigned int divisor; +- +- SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x80); +- +- divisor = TITAN_SERIAL_BASE_BAUD / baud; +- SERIAL_WRITE(SERIAL_DIVISOR_LSB, divisor & 0xff); +- +- SERIAL_WRITE(SERIAL_DIVISOR_MSB, (divisor & 0xff00) >> 8); +- SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x0); +- } +- +- SERIAL_WRITE(SERIAL_DATA_FORMAT, data | parity | stop); +-} +- +-static int remoteDebugInitialized = 0; +- +-unsigned char getDebugChar(void) +-{ +- if (!remoteDebugInitialized) { +- remoteDebugInitialized = 1; +- debugInit(YOSEMITE_BAUD_115200, +- YOSEMITE_DATA_8BIT, +- YOSEMITE_PARITY_NONE, YOSEMITE_STOP_1BIT); +- } +- +- while ((SERIAL_READ(SERIAL_LINE_STATUS) & 0x1) == 0); +- return SERIAL_READ(SERIAL_RCV_BUFFER); +-} +- +-int putDebugChar(unsigned char byte) +-{ +- if (!remoteDebugInitialized) { +- remoteDebugInitialized = 1; +- debugInit(YOSEMITE_BAUD_115200, +- YOSEMITE_DATA_8BIT, +- YOSEMITE_PARITY_NONE, YOSEMITE_STOP_1BIT); +- } +- +- while ((SERIAL_READ(SERIAL_LINE_STATUS) & 0x20) == 0); +- SERIAL_WRITE(SERIAL_SEND_BUFFER, byte); +- +- return 1; +-} +diff -Nurb linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/irq.c linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/irq.c +--- linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/irq.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/pmc-sierra/yosemite/irq.c 2007-12-21 15:36:11.000000000 -0500 +@@ -137,10 +137,6 @@ + } + } + +-#ifdef CONFIG_KGDB +-extern void init_second_port(void); +-#endif +- + /* + * Initialize the next level interrupt handler + */ +@@ -152,11 +148,6 @@ + rm7k_cpu_irq_init(); + rm9k_cpu_irq_init(); + +-#ifdef CONFIG_KGDB +- /* At this point, initialize the second serial port */ +- init_second_port(); +-#endif +- + #ifdef CONFIG_GDB_CONSOLE + register_gdb_console(); + #endif +diff -Nurb linux-2.6.22-570/arch/mips/sgi-ip22/ip22-setup.c linux-2.6.22-591/arch/mips/sgi-ip22/ip22-setup.c +--- linux-2.6.22-570/arch/mips/sgi-ip22/ip22-setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/sgi-ip22/ip22-setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -101,30 +101,6 @@ + add_preferred_console("arc", 0, NULL); + } + +-#ifdef CONFIG_KGDB +- { +- char *kgdb_ttyd = prom_getcmdline(); +- +- if ((kgdb_ttyd = strstr(kgdb_ttyd, "kgdb=ttyd")) != NULL) { +- int line; +- kgdb_ttyd += strlen("kgdb=ttyd"); +- if (*kgdb_ttyd != '1' && *kgdb_ttyd != '2') +- printk(KERN_INFO "KGDB: Uknown serial line /dev/ttyd%c" +- ", falling back to /dev/ttyd1\n", *kgdb_ttyd); +- line = *kgdb_ttyd == '2' ? 0 : 1; +- printk(KERN_INFO "KGDB: Using serial line /dev/ttyd%d for " +- "session\n", line ? 1 : 2); +- rs_kgdb_hook(line); +- +- printk(KERN_INFO "KGDB: Using serial line /dev/ttyd%d for " +- "session, please connect your debugger\n", line ? 1:2); +- +- kgdb_enabled = 1; +- /* Breakpoints and stuff are in sgi_irq_setup() */ +- } +- } +-#endif +- + #if defined(CONFIG_VT) && defined(CONFIG_SGI_NEWPORT_CONSOLE) + { + ULONG *gfxinfo; +diff -Nurb linux-2.6.22-570/arch/mips/sgi-ip27/Makefile linux-2.6.22-591/arch/mips/sgi-ip27/Makefile +--- linux-2.6.22-570/arch/mips/sgi-ip27/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/sgi-ip27/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -7,5 +7,4 @@ + ip27-xtalk.o + + obj-$(CONFIG_EARLY_PRINTK) += ip27-console.o +-obj-$(CONFIG_KGDB) += ip27-dbgio.o + obj-$(CONFIG_SMP) += ip27-smp.o +diff -Nurb linux-2.6.22-570/arch/mips/sgi-ip27/ip27-dbgio.c linux-2.6.22-591/arch/mips/sgi-ip27/ip27-dbgio.c +--- linux-2.6.22-570/arch/mips/sgi-ip27/ip27-dbgio.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/sgi-ip27/ip27-dbgio.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,60 +0,0 @@ +-/* +- * This program is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License as published by the +- * Free Software Foundation; either version 2 of the License, or (at your +- * option) any later version. +- * +- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN +- * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +- * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +- * +- * You should have received a copy of the GNU General Public License along +- * with this program; if not, write to the Free Software Foundation, Inc., +- * 675 Mass Ave, Cambridge, MA 02139, USA. +- * +- * Copyright 2004 Ralf Baechle +- */ +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +- +-#define IOC3_CLK (22000000 / 3) +-#define IOC3_FLAGS (0) +- +-static inline struct ioc3_uartregs *console_uart(void) +-{ +- struct ioc3 *ioc3; +- +- ioc3 = (struct ioc3 *)KL_CONFIG_CH_CONS_INFO(get_nasid())->memory_base; +- +- return &ioc3->sregs.uarta; +-} +- +-unsigned char getDebugChar(void) +-{ +- struct ioc3_uartregs *uart = console_uart(); +- +- while ((uart->iu_lsr & UART_LSR_DR) == 0); +- return uart->iu_rbr; +-} +- +-void putDebugChar(unsigned char c) +-{ +- struct ioc3_uartregs *uart = console_uart(); +- +- while ((uart->iu_lsr & UART_LSR_THRE) == 0); +- uart->iu_thr = c; +-} +diff -Nurb linux-2.6.22-570/arch/mips/sibyte/bcm1480/irq.c linux-2.6.22-591/arch/mips/sibyte/bcm1480/irq.c +--- linux-2.6.22-570/arch/mips/sibyte/bcm1480/irq.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/sibyte/bcm1480/irq.c 2007-12-21 15:36:11.000000000 -0500 +@@ -57,30 +57,6 @@ + extern unsigned long ht_eoi_space; + #endif + +-#ifdef CONFIG_KGDB +-#include +-extern void breakpoint(void); +-static int kgdb_irq; +-#ifdef CONFIG_GDB_CONSOLE +-extern void register_gdb_console(void); +-#endif +- +-/* kgdb is on when configured. Pass "nokgdb" kernel arg to turn it off */ +-static int kgdb_flag = 1; +-static int __init nokgdb(char *str) +-{ +- kgdb_flag = 0; +- return 1; +-} +-__setup("nokgdb", nokgdb); +- +-/* Default to UART1 */ +-int kgdb_port = 1; +-#ifdef CONFIG_SIBYTE_SB1250_DUART +-extern char sb1250_duart_present[]; +-#endif +-#endif +- + static struct irq_chip bcm1480_irq_type = { + .name = "BCM1480-IMR", + .ack = ack_bcm1480_irq, +@@ -394,62 +370,11 @@ + * does its own management of IP7. + */ + +-#ifdef CONFIG_KGDB +- imask |= STATUSF_IP6; +-#endif + /* Enable necessary IPs, disable the rest */ + change_c0_status(ST0_IM, imask); + +-#ifdef CONFIG_KGDB +- if (kgdb_flag) { +- kgdb_irq = K_BCM1480_INT_UART_0 + kgdb_port; +- +-#ifdef CONFIG_SIBYTE_SB1250_DUART +- sb1250_duart_present[kgdb_port] = 0; +-#endif +- /* Setup uart 1 settings, mapper */ +- /* QQQ FIXME */ +- __raw_writeq(M_DUART_IMR_BRK, IO_SPACE_BASE + A_DUART_IMRREG(kgdb_port)); +- +- bcm1480_steal_irq(kgdb_irq); +- __raw_writeq(IMR_IP6_VAL, +- IO_SPACE_BASE + A_BCM1480_IMR_REGISTER(0, R_BCM1480_IMR_INTERRUPT_MAP_BASE_H) + +- (kgdb_irq<<3)); +- bcm1480_unmask_irq(0, kgdb_irq); +- +-#ifdef CONFIG_GDB_CONSOLE +- register_gdb_console(); +-#endif +- printk("Waiting for GDB on UART port %d\n", kgdb_port); +- set_debug_traps(); +- breakpoint(); +- } +-#endif + } + +-#ifdef CONFIG_KGDB +- +-#include +- +-#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg))) +-#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg))) +- +-static void bcm1480_kgdb_interrupt(void) +-{ +- /* +- * Clear break-change status (allow some time for the remote +- * host to stop the break, since we would see another +- * interrupt on the end-of-break too) +- */ +- kstat.irqs[smp_processor_id()][kgdb_irq]++; +- mdelay(500); +- duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT | +- M_DUART_RX_EN | M_DUART_TX_EN); +- set_async_breakpoint(&get_irq_regs()->cp0_epc); +-} +- +-#endif /* CONFIG_KGDB */ +- + extern void bcm1480_timer_interrupt(void); + extern void bcm1480_mailbox_interrupt(void); + +@@ -478,11 +403,6 @@ + bcm1480_mailbox_interrupt(); + #endif + +-#ifdef CONFIG_KGDB +- else if (pending & CAUSEF_IP6) +- bcm1480_kgdb_interrupt(); /* KGDB (uart 1) */ +-#endif +- + else if (pending & CAUSEF_IP2) { + unsigned long long mask_h, mask_l; + unsigned long base; +diff -Nurb linux-2.6.22-570/arch/mips/sibyte/cfe/setup.c linux-2.6.22-591/arch/mips/sibyte/cfe/setup.c +--- linux-2.6.22-570/arch/mips/sibyte/cfe/setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/sibyte/cfe/setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -58,10 +58,6 @@ + extern unsigned long initrd_start, initrd_end; + #endif + +-#ifdef CONFIG_KGDB +-extern int kgdb_port; +-#endif +- + static void ATTRIB_NORET cfe_linux_exit(void *arg) + { + int warm = *(int *)arg; +@@ -242,9 +238,6 @@ + int argc = fw_arg0; + char **envp = (char **) fw_arg2; + int *prom_vec = (int *) fw_arg3; +-#ifdef CONFIG_KGDB +- char *arg; +-#endif + + _machine_restart = cfe_linux_restart; + _machine_halt = cfe_linux_halt; +@@ -308,13 +301,6 @@ + } + } + +-#ifdef CONFIG_KGDB +- if ((arg = strstr(arcs_cmdline,"kgdb=duart")) != NULL) +- kgdb_port = (arg[10] == '0') ? 0 : 1; +- else +- kgdb_port = 1; +-#endif +- + #ifdef CONFIG_BLK_DEV_INITRD + { + char *ptr; +diff -Nurb linux-2.6.22-570/arch/mips/sibyte/sb1250/Makefile linux-2.6.22-591/arch/mips/sibyte/sb1250/Makefile +--- linux-2.6.22-570/arch/mips/sibyte/sb1250/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/sibyte/sb1250/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -3,3 +3,4 @@ + obj-$(CONFIG_SMP) += smp.o + obj-$(CONFIG_SIBYTE_STANDALONE) += prom.o + obj-$(CONFIG_SIBYTE_BUS_WATCHER) += bus_watcher.o ++obj-$(CONFIG_KGDB_SIBYTE) += kgdb_sibyte.o +diff -Nurb linux-2.6.22-570/arch/mips/sibyte/sb1250/irq.c linux-2.6.22-591/arch/mips/sibyte/sb1250/irq.c +--- linux-2.6.22-570/arch/mips/sibyte/sb1250/irq.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/sibyte/sb1250/irq.c 2007-12-21 15:36:11.000000000 -0500 +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -56,16 +57,6 @@ + extern unsigned long ldt_eoi_space; + #endif + +-#ifdef CONFIG_KGDB +-static int kgdb_irq; +- +-/* Default to UART1 */ +-int kgdb_port = 1; +-#ifdef CONFIG_SIBYTE_SB1250_DUART +-extern char sb1250_duart_present[]; +-#endif +-#endif +- + static struct irq_chip sb1250_irq_type = { + .name = "SB1250-IMR", + .ack = ack_sb1250_irq, +@@ -304,6 +295,11 @@ + unsigned int imask = STATUSF_IP4 | STATUSF_IP3 | STATUSF_IP2 | + STATUSF_IP1 | STATUSF_IP0; + ++#ifdef CONFIG_KGDB ++ if (kgdb_early_setup) ++ return; ++#endif ++ + /* Default everything to IP2 */ + for (i = 0; i < SB1250_NR_IRQS; i++) { /* was I0 */ + __raw_writeq(IMR_IP2_VAL, +@@ -349,58 +345,16 @@ + * does its own management of IP7. + */ + +-#ifdef CONFIG_KGDB ++#ifdef CONFIG_KGDB_SIBYTE + imask |= STATUSF_IP6; + #endif + /* Enable necessary IPs, disable the rest */ + change_c0_status(ST0_IM, imask); +- +-#ifdef CONFIG_KGDB +- if (kgdb_flag) { +- kgdb_irq = K_INT_UART_0 + kgdb_port; +- +-#ifdef CONFIG_SIBYTE_SB1250_DUART +- sb1250_duart_present[kgdb_port] = 0; +-#endif +- /* Setup uart 1 settings, mapper */ +- __raw_writeq(M_DUART_IMR_BRK, +- IOADDR(A_DUART_IMRREG(kgdb_port))); +- +- sb1250_steal_irq(kgdb_irq); +- __raw_writeq(IMR_IP6_VAL, +- IOADDR(A_IMR_REGISTER(0, +- R_IMR_INTERRUPT_MAP_BASE) + +- (kgdb_irq << 3))); +- sb1250_unmask_irq(0, kgdb_irq); +- } +-#endif + } + +-#ifdef CONFIG_KGDB +- +-#include +- +-#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg))) +-#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg))) +- +-static void sb1250_kgdb_interrupt(void) +-{ +- /* +- * Clear break-change status (allow some time for the remote +- * host to stop the break, since we would see another +- * interrupt on the end-of-break too) +- */ +- kstat_this_cpu.irqs[kgdb_irq]++; +- mdelay(500); +- duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT | +- M_DUART_RX_EN | M_DUART_TX_EN); +- set_async_breakpoint(&get_irq_regs()->cp0_epc); +-} +- +-#endif /* CONFIG_KGDB */ +- + extern void sb1250_timer_interrupt(void); + extern void sb1250_mailbox_interrupt(void); ++extern void sb1250_kgdb_interrupt(void); + + asmlinkage void plat_irq_dispatch(void) + { +@@ -437,7 +391,7 @@ + sb1250_mailbox_interrupt(); + #endif + +-#ifdef CONFIG_KGDB ++#ifdef CONFIG_KGDB_SIBYTE + else if (pending & CAUSEF_IP6) /* KGDB (uart 1) */ + sb1250_kgdb_interrupt(); + #endif +diff -Nurb linux-2.6.22-570/arch/mips/sibyte/sb1250/kgdb_sibyte.c linux-2.6.22-591/arch/mips/sibyte/sb1250/kgdb_sibyte.c +--- linux-2.6.22-570/arch/mips/sibyte/sb1250/kgdb_sibyte.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/mips/sibyte/sb1250/kgdb_sibyte.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,144 @@ ++/* ++ * arch/mips/sibyte/sb1250/kgdb_sibyte.c ++ * ++ * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com ++ * ++ * 2004 (c) MontaVista Software, Inc. This file is licensed under ++ * the terms of the GNU General Public License version 2. This program ++ * is licensed "as is" without any warranty of any kind, whether express ++ * or implied. ++ */ ++ ++/* ++ * Support for KGDB on the Broadcom Sibyte. The SWARM board ++ * for example does not have a 8250/16550 compatible serial ++ * port. Hence, we need to have a driver for the serial ++ * ports to handle KGDB. This board needs nothing in addition ++ * to what is normally provided by the gdb portion of the stub. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++int kgdb_port = 1; ++static int kgdb_irq; ++ ++extern char sb1250_duart_present[]; ++extern int sb1250_steal_irq(int irq); ++ ++/* Forward declarations. */ ++static void kgdbsibyte_init_duart(void); ++static int kgdb_init_io(void); ++ ++#define IMR_IP6_VAL K_INT_MAP_I4 ++#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg))) ++#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg))) ++ ++static void kgdbsibyte_write_char(u8 c) ++{ ++ while ((duart_in(R_DUART_STATUS) & M_DUART_TX_RDY) == 0) ; ++ duart_out(R_DUART_TX_HOLD, c); ++} ++ ++static int kgdbsibyte_read_char(void) ++{ ++ int ret_char; ++ unsigned int status; ++ ++ do { ++ status = duart_in(R_DUART_STATUS); ++ } while ((status & M_DUART_RX_RDY) == 0); ++ ++ /* ++ * Check for framing error ++ */ ++ if (status & M_DUART_FRM_ERR) { ++ kgdbsibyte_init_duart(); ++ kgdbsibyte_write_char('-'); ++ return '-'; ++ } ++ ++ ret_char = duart_in(R_DUART_RX_HOLD); ++ ++ return ret_char; ++} ++ ++void sb1250_kgdb_interrupt(void) ++{ ++ int kgdb_irq = K_INT_UART_0 + kgdb_port; ++ ++ /* ++ * Clear break-change status (allow some time for the remote ++ * host to stop the break, since we would see another ++ * interrupt on the end-of-break too) ++ */ ++ kstat_this_cpu.irqs[kgdb_irq]++; ++ mdelay(500); ++ duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT | ++ M_DUART_RX_EN | M_DUART_TX_EN); ++ breakpoint(); ++ ++} ++ ++/* ++ * We use port #1 and we set it for 115200 BAUD, 8n1. ++ */ ++static void kgdbsibyte_init_duart(void) ++{ ++ /* Set 8n1. */ ++ duart_out(R_DUART_MODE_REG_1, ++ V_DUART_BITS_PER_CHAR_8 | V_DUART_PARITY_MODE_NONE); ++ duart_out(R_DUART_MODE_REG_2, M_DUART_STOP_BIT_LEN_1); ++ /* Set baud rate of 115200. */ ++ duart_out(R_DUART_CLK_SEL, V_DUART_BAUD_RATE(115200)); ++ /* Enable rx and tx */ ++ duart_out(R_DUART_CMD, M_DUART_RX_EN | M_DUART_TX_EN); ++} ++ ++static int kgdb_init_io(void) ++{ ++#ifdef CONFIG_SIBYTE_SB1250_DUART ++ sb1250_duart_present[kgdb_port] = 0; ++#endif ++ ++ kgdbsibyte_init_duart(); ++ ++ return 0; ++} ++ ++/* ++ * Hookup our IRQ line. We will already have been initialized a ++ * this point. ++ */ ++static void __init kgdbsibyte_hookup_irq(void) ++{ ++ /* Steal the IRQ. */ ++ kgdb_irq = K_INT_UART_0 + kgdb_port; ++ ++ /* Setup uart 1 settings, mapper */ ++ __raw_writeq(M_DUART_IMR_BRK, IOADDR(A_DUART_IMRREG(kgdb_port))); ++ ++ sb1250_steal_irq(kgdb_irq); ++ ++ __raw_writeq(IMR_IP6_VAL, ++ IOADDR(A_IMR_REGISTER(0, R_IMR_INTERRUPT_MAP_BASE) + ++ (kgdb_irq << 3))); ++ ++ sb1250_unmask_irq(0, kgdb_irq); ++} ++ ++struct kgdb_io kgdb_io_ops = { ++ .read_char = kgdbsibyte_read_char, ++ .write_char = kgdbsibyte_write_char, ++ .init = kgdb_init_io, ++ .late_init = kgdbsibyte_hookup_irq, ++}; +diff -Nurb linux-2.6.22-570/arch/mips/sibyte/swarm/Makefile linux-2.6.22-591/arch/mips/sibyte/swarm/Makefile +--- linux-2.6.22-570/arch/mips/sibyte/swarm/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/sibyte/swarm/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -1,3 +1 @@ + lib-y = setup.o rtc_xicor1241.o rtc_m41t81.o +- +-lib-$(CONFIG_KGDB) += dbg_io.o +diff -Nurb linux-2.6.22-570/arch/mips/sibyte/swarm/dbg_io.c linux-2.6.22-591/arch/mips/sibyte/swarm/dbg_io.c +--- linux-2.6.22-570/arch/mips/sibyte/swarm/dbg_io.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/sibyte/swarm/dbg_io.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,76 +0,0 @@ +-/* +- * kgdb debug routines for SiByte boards. +- * +- * Copyright (C) 2001 MontaVista Software Inc. +- * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net +- * +- * This program is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License as published by the +- * Free Software Foundation; either version 2 of the License, or (at your +- * option) any later version. +- * +- */ +- +-/* -------------------- BEGINNING OF CONFIG --------------------- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-/* +- * We use the second serial port for kgdb traffic. +- * 115200, 8, N, 1. +- */ +- +-#define BAUD_RATE 115200 +-#define CLK_DIVISOR V_DUART_BAUD_RATE(BAUD_RATE) +-#define DATA_BITS V_DUART_BITS_PER_CHAR_8 /* or 7 */ +-#define PARITY V_DUART_PARITY_MODE_NONE /* or even */ +-#define STOP_BITS M_DUART_STOP_BIT_LEN_1 /* or 2 */ +- +-static int duart_initialized = 0; /* 0: need to be init'ed by kgdb */ +- +-/* -------------------- END OF CONFIG --------------------- */ +-extern int kgdb_port; +- +-#define duart_out(reg, val) csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg))) +-#define duart_in(reg) csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg))) +- +-void putDebugChar(unsigned char c); +-unsigned char getDebugChar(void); +-static void +-duart_init(int clk_divisor, int data, int parity, int stop) +-{ +- duart_out(R_DUART_MODE_REG_1, data | parity); +- duart_out(R_DUART_MODE_REG_2, stop); +- duart_out(R_DUART_CLK_SEL, clk_divisor); +- +- duart_out(R_DUART_CMD, M_DUART_RX_EN | M_DUART_TX_EN); /* enable rx and tx */ +-} +- +-void +-putDebugChar(unsigned char c) +-{ +- if (!duart_initialized) { +- duart_initialized = 1; +- duart_init(CLK_DIVISOR, DATA_BITS, PARITY, STOP_BITS); +- } +- while ((duart_in(R_DUART_STATUS) & M_DUART_TX_RDY) == 0); +- duart_out(R_DUART_TX_HOLD, c); +-} +- +-unsigned char +-getDebugChar(void) +-{ +- if (!duart_initialized) { +- duart_initialized = 1; +- duart_init(CLK_DIVISOR, DATA_BITS, PARITY, STOP_BITS); +- } +- while ((duart_in(R_DUART_STATUS) & M_DUART_RX_RDY) == 0) ; +- return duart_in(R_DUART_RX_HOLD); +-} +- +diff -Nurb linux-2.6.22-570/arch/mips/tx4927/common/Makefile linux-2.6.22-591/arch/mips/tx4927/common/Makefile +--- linux-2.6.22-570/arch/mips/tx4927/common/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/tx4927/common/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -9,4 +9,3 @@ + obj-y += tx4927_prom.o tx4927_setup.o tx4927_irq.o + + obj-$(CONFIG_TOSHIBA_FPCIB0) += smsc_fdc37m81x.o +-obj-$(CONFIG_KGDB) += tx4927_dbgio.o +diff -Nurb linux-2.6.22-570/arch/mips/tx4927/common/tx4927_dbgio.c linux-2.6.22-591/arch/mips/tx4927/common/tx4927_dbgio.c +--- linux-2.6.22-570/arch/mips/tx4927/common/tx4927_dbgio.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/tx4927/common/tx4927_dbgio.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,47 +0,0 @@ +-/* +- * linux/arch/mips/tx4927/common/tx4927_dbgio.c +- * +- * kgdb interface for gdb +- * +- * Author: MontaVista Software, Inc. +- * source@mvista.com +- * +- * Copyright 2001-2002 MontaVista Software Inc. +- * +- * This program is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License as published by the +- * Free Software Foundation; either version 2 of the License, or (at your +- * option) any later version. +- * +- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +- * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +- * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +- * +- * You should have received a copy of the GNU General Public License along +- * with this program; if not, write to the Free Software Foundation, Inc., +- * 675 Mass Ave, Cambridge, MA 02139, USA. +- */ +- +-#include +-#include +-#include +- +-u8 getDebugChar(void) +-{ +- extern u8 txx9_sio_kdbg_rd(void); +- return (txx9_sio_kdbg_rd()); +-} +- +- +-int putDebugChar(u8 byte) +-{ +- extern int txx9_sio_kdbg_wr( u8 ch ); +- return (txx9_sio_kdbg_wr(byte)); +-} +diff -Nurb linux-2.6.22-570/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c linux-2.6.22-591/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c +--- linux-2.6.22-570/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -76,7 +76,7 @@ + #include + #include + #endif +-#ifdef CONFIG_SERIAL_TXX9 ++#if defined(CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9) + #include + #include + #include +@@ -973,9 +973,10 @@ + + #endif /* CONFIG_PCI */ + +-#ifdef CONFIG_SERIAL_TXX9 ++#if defined (CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9) + { + extern int early_serial_txx9_setup(struct uart_port *port); ++ extern int txx9_kgdb_add_port(int n, struct uart_port *port); + int i; + struct uart_port req; + for(i = 0; i < 2; i++) { +@@ -987,7 +988,12 @@ + req.irq = 32 + i; + req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; + req.uartclk = 50000000; ++#ifdef CONFIG_SERIAL_TXX9 + early_serial_txx9_setup(&req); ++#endif ++#ifdef CONFIG_KGDB_TXX9 ++ txx9_kgdb_add_port(i, &req); ++#endif + } + } + #ifdef CONFIG_SERIAL_TXX9_CONSOLE +@@ -996,7 +1002,7 @@ + strcat(argptr, " console=ttyS0,38400"); + } + #endif +-#endif ++#endif /* defined(CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9) */ + + #ifdef CONFIG_ROOT_NFS + argptr = prom_getcmdline(); +diff -Nurb linux-2.6.22-570/arch/mips/tx4938/common/Makefile linux-2.6.22-591/arch/mips/tx4938/common/Makefile +--- linux-2.6.22-570/arch/mips/tx4938/common/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/tx4938/common/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -7,5 +7,4 @@ + # + + obj-y += prom.o setup.o irq.o rtc_rx5c348.o +-obj-$(CONFIG_KGDB) += dbgio.o + +diff -Nurb linux-2.6.22-570/arch/mips/tx4938/common/dbgio.c linux-2.6.22-591/arch/mips/tx4938/common/dbgio.c +--- linux-2.6.22-570/arch/mips/tx4938/common/dbgio.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/tx4938/common/dbgio.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,50 +0,0 @@ +-/* +- * linux/arch/mips/tx4938/common/dbgio.c +- * +- * kgdb interface for gdb +- * +- * Author: MontaVista Software, Inc. +- * source@mvista.com +- * +- * Copyright 2005 MontaVista Software Inc. +- * +- * This program is free software; you can redistribute it and/or modify it +- * under the terms of the GNU General Public License as published by the +- * Free Software Foundation; either version 2 of the License, or (at your +- * option) any later version. +- * +- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +- * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +- * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +- * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +- * +- * You should have received a copy of the GNU General Public License along +- * with this program; if not, write to the Free Software Foundation, Inc., +- * 675 Mass Ave, Cambridge, MA 02139, USA. +- * +- * Support for TX4938 in 2.6 - Hiroshi DOYU +- */ +- +-#include +-#include +-#include +- +-extern u8 txx9_sio_kdbg_rd(void); +-extern int txx9_sio_kdbg_wr( u8 ch ); +- +-u8 getDebugChar(void) +-{ +- return (txx9_sio_kdbg_rd()); +-} +- +-int putDebugChar(u8 byte) +-{ +- return (txx9_sio_kdbg_wr(byte)); +-} +- +diff -Nurb linux-2.6.22-570/arch/mips/tx4938/toshiba_rbtx4938/setup.c linux-2.6.22-591/arch/mips/tx4938/toshiba_rbtx4938/setup.c +--- linux-2.6.22-570/arch/mips/tx4938/toshiba_rbtx4938/setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/mips/tx4938/toshiba_rbtx4938/setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -30,7 +30,7 @@ + #include + #include + #include +-#ifdef CONFIG_SERIAL_TXX9 ++#if defined(CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9) + #include + #include + #include +@@ -924,9 +924,10 @@ + set_io_port_base(RBTX4938_ETHER_BASE); + #endif + +-#ifdef CONFIG_SERIAL_TXX9 ++#if defined (CONFIG_SERIAL_TXX9) || defined (CONFIG_KGDB_TXX9) + { + extern int early_serial_txx9_setup(struct uart_port *port); ++ extern int txx9_kgdb_add_port(int n, struct uart_port *port); + int i; + struct uart_port req; + for(i = 0; i < 2; i++) { +@@ -938,7 +939,12 @@ + req.irq = 32 + i; + req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/; + req.uartclk = 50000000; ++#ifdef CONFIG_SERIAL_TXX9 + early_serial_txx9_setup(&req); ++#endif ++#ifdef CONFIG_KGDB_TXX9 ++ txx9_kgdb_add_port(i, &req); ++#endif + } + } + #ifdef CONFIG_SERIAL_TXX9_CONSOLE +diff -Nurb linux-2.6.22-570/arch/powerpc/Kconfig linux-2.6.22-591/arch/powerpc/Kconfig +--- linux-2.6.22-570/arch/powerpc/Kconfig 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/Kconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -4,12 +4,7 @@ + + mainmenu "Linux/PowerPC Kernel Configuration" + +-config PPC64 +- bool "64-bit kernel" +- default n +- help +- This option selects whether a 32-bit or a 64-bit kernel +- will be built. ++source "arch/powerpc/platforms/Kconfig.cputype" + + config PPC_PM_NEEDS_RTC_LIB + bool +@@ -132,123 +127,6 @@ + depends on PPC64 && (BROKEN || (PPC_PMAC64 && EXPERIMENTAL)) + default y + +-menu "Processor support" +-choice +- prompt "Processor Type" +- depends on PPC32 +- default 6xx +- +-config CLASSIC32 +- bool "52xx/6xx/7xx/74xx" +- select PPC_FPU +- select 6xx +- help +- There are four families of PowerPC chips supported. The more common +- types (601, 603, 604, 740, 750, 7400), the Motorola embedded +- versions (821, 823, 850, 855, 860, 52xx, 82xx, 83xx), the AMCC +- embedded versions (403 and 405) and the high end 64 bit Power +- processors (POWER 3, POWER4, and IBM PPC970 also known as G5). +- +- This option is the catch-all for 6xx types, including some of the +- embedded versions. Unless there is see an option for the specific +- chip family you are using, you want this option. +- +- You do not want this if you are building a kernel for a 64 bit +- IBM RS/6000 or an Apple G5, choose 6xx. +- +- If unsure, select this option +- +- Note that the kernel runs in 32-bit mode even on 64-bit chips. +- +-config PPC_82xx +- bool "Freescale 82xx" +- select 6xx +- select PPC_FPU +- +-config PPC_83xx +- bool "Freescale 83xx" +- select 6xx +- select FSL_SOC +- select 83xx +- select PPC_FPU +- select WANT_DEVICE_TREE +- +-config PPC_85xx +- bool "Freescale 85xx" +- select E500 +- select FSL_SOC +- select 85xx +- select WANT_DEVICE_TREE +- +-config PPC_86xx +- bool "Freescale 86xx" +- select 6xx +- select FSL_SOC +- select FSL_PCIE +- select PPC_FPU +- select ALTIVEC +- help +- The Freescale E600 SoCs have 74xx cores. +- +-config PPC_8xx +- bool "Freescale 8xx" +- select FSL_SOC +- select 8xx +- +-config 40x +- bool "AMCC 40x" +- select PPC_DCR_NATIVE +- +-config 44x +- bool "AMCC 44x" +- select PPC_DCR_NATIVE +- select WANT_DEVICE_TREE +- +-config E200 +- bool "Freescale e200" +- +-endchoice +- +-config POWER4_ONLY +- bool "Optimize for POWER4" +- depends on PPC64 +- default n +- ---help--- +- Cause the compiler to optimize for POWER4/POWER5/PPC970 processors. +- The resulting binary will not work on POWER3 or RS64 processors +- when compiled with binutils 2.15 or later. +- +-config POWER3 +- bool +- depends on PPC64 +- default y if !POWER4_ONLY +- +-config POWER4 +- depends on PPC64 +- def_bool y +- +-config 6xx +- bool +- +-# this is temp to handle compat with arch=ppc +-config 8xx +- bool +- +-# this is temp to handle compat with arch=ppc +-config 83xx +- bool +- +-# this is temp to handle compat with arch=ppc +-config 85xx +- bool +- +-config E500 +- bool +- +-config PPC_FPU +- bool +- default y if PPC64 +- + config PPC_DCR_NATIVE + bool + default n +@@ -267,134 +145,6 @@ + depends on PPC64 # not supported on 32 bits yet + default n + +-config 4xx +- bool +- depends on 40x || 44x +- default y +- +-config BOOKE +- bool +- depends on E200 || E500 || 44x +- default y +- +-config FSL_BOOKE +- bool +- depends on E200 || E500 +- default y +- +-config PTE_64BIT +- bool +- depends on 44x || E500 +- default y if 44x +- default y if E500 && PHYS_64BIT +- +-config PHYS_64BIT +- bool 'Large physical address support' if E500 +- depends on 44x || E500 +- select RESOURCES_64BIT +- default y if 44x +- ---help--- +- This option enables kernel support for larger than 32-bit physical +- addresses. This features is not be available on all e500 cores. +- +- If in doubt, say N here. +- +-config ALTIVEC +- bool "AltiVec Support" +- depends on CLASSIC32 || POWER4 +- ---help--- +- This option enables kernel support for the Altivec extensions to the +- PowerPC processor. The kernel currently supports saving and restoring +- altivec registers, and turning on the 'altivec enable' bit so user +- processes can execute altivec instructions. +- +- This option is only usefully if you have a processor that supports +- altivec (G4, otherwise known as 74xx series), but does not have +- any affect on a non-altivec cpu (it does, however add code to the +- kernel). +- +- If in doubt, say Y here. +- +-config SPE +- bool "SPE Support" +- depends on E200 || E500 +- default y +- ---help--- +- This option enables kernel support for the Signal Processing +- Extensions (SPE) to the PowerPC processor. The kernel currently +- supports saving and restoring SPE registers, and turning on the +- 'spe enable' bit so user processes can execute SPE instructions. +- +- This option is only useful if you have a processor that supports +- SPE (e500, otherwise known as 85xx series), but does not have any +- effect on a non-spe cpu (it does, however add code to the kernel). +- +- If in doubt, say Y here. +- +-config PPC_STD_MMU +- bool +- depends on 6xx || POWER3 || POWER4 || PPC64 +- default y +- +-config PPC_STD_MMU_32 +- def_bool y +- depends on PPC_STD_MMU && PPC32 +- +-config PPC_MM_SLICES +- bool +- default y if HUGETLB_PAGE +- default n +- +-config VIRT_CPU_ACCOUNTING +- bool "Deterministic task and CPU time accounting" +- depends on PPC64 +- default y +- help +- Select this option to enable more accurate task and CPU time +- accounting. This is done by reading a CPU counter on each +- kernel entry and exit and on transitions within the kernel +- between system, softirq and hardirq state, so there is a +- small performance impact. This also enables accounting of +- stolen time on logically-partitioned systems running on +- IBM POWER5-based machines. +- +- If in doubt, say Y here. +- +-config SMP +- depends on PPC_STD_MMU +- bool "Symmetric multi-processing support" +- ---help--- +- This enables support for systems with more than one CPU. If you have +- a system with only one CPU, say N. If you have a system with more +- than one CPU, say Y. Note that the kernel does not currently +- support SMP machines with 603/603e/603ev or PPC750 ("G3") processors +- since they have inadequate hardware support for multiprocessor +- operation. +- +- If you say N here, the kernel will run on single and multiprocessor +- machines, but will use only one CPU of a multiprocessor machine. If +- you say Y here, the kernel will run on single-processor machines. +- On a single-processor machine, the kernel will run faster if you say +- N here. +- +- If you don't know what to do here, say N. +- +-config NR_CPUS +- int "Maximum number of CPUs (2-128)" +- range 2 128 +- depends on SMP +- default "32" if PPC64 +- default "4" +- +-config NOT_COHERENT_CACHE +- bool +- depends on 4xx || 8xx || E200 +- default y +- +-config CONFIG_CHECK_CACHE_COHERENCY +- bool +-endmenu +- + source "init/Kconfig" + + source "arch/powerpc/platforms/Kconfig" +@@ -686,9 +436,9 @@ + bool "PCI support" if 40x || CPM2 || PPC_83xx || PPC_85xx || PPC_86xx \ + || PPC_MPC52xx || (EMBEDDED && (PPC_PSERIES || PPC_ISERIES)) \ + || MPC7448HPC2 || PPC_PS3 || PPC_HOLLY +- default y if !40x && !CPM2 && !8xx && !APUS && !PPC_83xx \ ++ default y if !40x && !CPM2 && !8xx && !PPC_83xx \ + && !PPC_85xx && !PPC_86xx +- default PCI_PERMEDIA if !4xx && !CPM2 && !8xx && APUS ++ default PCI_PERMEDIA if !4xx && !CPM2 && !8xx + default PCI_QSPAN if !4xx && !CPM2 && 8xx + select ARCH_SUPPORTS_MSI + help +diff -Nurb linux-2.6.22-570/arch/powerpc/Kconfig.debug linux-2.6.22-591/arch/powerpc/Kconfig.debug +--- linux-2.6.22-570/arch/powerpc/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500 +@@ -41,52 +41,9 @@ + This option will add a small amount of overhead to all hypervisor + calls. + +-config DEBUGGER +- bool "Enable debugger hooks" +- depends on DEBUG_KERNEL +- help +- Include in-kernel hooks for kernel debuggers. Unless you are +- intending to debug the kernel, say N here. +- +-config KGDB +- bool "Include kgdb kernel debugger" +- depends on DEBUGGER && (BROKEN || PPC_GEN550 || 4xx) +- select DEBUG_INFO +- help +- Include in-kernel hooks for kgdb, the Linux kernel source level +- debugger. See for more information. +- Unless you are intending to debug the kernel, say N here. +- +-choice +- prompt "Serial Port" +- depends on KGDB +- default KGDB_TTYS1 +- +-config KGDB_TTYS0 +- bool "ttyS0" +- +-config KGDB_TTYS1 +- bool "ttyS1" +- +-config KGDB_TTYS2 +- bool "ttyS2" +- +-config KGDB_TTYS3 +- bool "ttyS3" +- +-endchoice +- +-config KGDB_CONSOLE +- bool "Enable serial console thru kgdb port" +- depends on KGDB && 8xx || CPM2 +- help +- If you enable this, all serial console messages will be sent +- over the gdb stub. +- If unsure, say N. +- + config XMON + bool "Include xmon kernel debugger" +- depends on DEBUGGER ++ depends on DEBUG_KERNEL + help + Include in-kernel hooks for the xmon kernel monitor/debugger. + Unless you are intending to debug the kernel, say N here. +@@ -116,6 +73,11 @@ + to say Y here, unless you're building for a memory-constrained + system. + ++config DEBUGGER ++ bool ++ depends on KGDB || XMON ++ default y ++ + config IRQSTACKS + bool "Use separate kernel stacks when processing interrupts" + depends on PPC64 +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/44x.c linux-2.6.22-591/arch/powerpc/boot/44x.c +--- linux-2.6.22-570/arch/powerpc/boot/44x.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/boot/44x.c 2007-12-21 15:36:11.000000000 -0500 +@@ -38,3 +38,48 @@ + + dt_fixup_memory(0, memsize); + } ++ ++#define SPRN_DBCR0 0x134 ++#define DBCR0_RST_SYSTEM 0x30000000 ++ ++void ibm44x_dbcr_reset(void) ++{ ++ unsigned long tmp; ++ ++ asm volatile ( ++ "mfspr %0,%1\n" ++ "oris %0,%0,%2@h\n" ++ "mtspr %1,%0" ++ : "=&r"(tmp) : "i"(SPRN_DBCR0), "i"(DBCR0_RST_SYSTEM) ++ ); ++ ++} ++ ++/* Read 4xx EBC bus bridge registers to get mappings of the peripheral ++ * banks into the OPB address space */ ++void ibm4xx_fixup_ebc_ranges(const char *ebc) ++{ ++ void *devp; ++ u32 bxcr; ++ u32 ranges[EBC_NUM_BANKS*4]; ++ u32 *p = ranges; ++ int i; ++ ++ for (i = 0; i < EBC_NUM_BANKS; i++) { ++ mtdcr(DCRN_EBC0_CFGADDR, EBC_BXCR(i)); ++ bxcr = mfdcr(DCRN_EBC0_CFGDATA); ++ ++ if ((bxcr & EBC_BXCR_BU) != EBC_BXCR_BU_OFF) { ++ *p++ = i; ++ *p++ = 0; ++ *p++ = bxcr & EBC_BXCR_BAS; ++ *p++ = EBC_BXCR_BANK_SIZE(bxcr); ++ } ++ } ++ ++ devp = finddevice(ebc); ++ if (! devp) ++ fatal("Couldn't locate EBC node %s\n\r", ebc); ++ ++ setprop(devp, "ranges", ranges, (p - ranges) * sizeof(u32)); ++} +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/44x.h linux-2.6.22-591/arch/powerpc/boot/44x.h +--- linux-2.6.22-570/arch/powerpc/boot/44x.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/boot/44x.h 2007-12-21 15:36:11.000000000 -0500 +@@ -11,6 +11,9 @@ + #define _PPC_BOOT_44X_H_ + + void ibm44x_fixup_memsize(void); ++void ibm4xx_fixup_ebc_ranges(const char *ebc); ++ ++void ibm44x_dbcr_reset(void); + void ebony_init(void *mac0, void *mac1); + + #endif /* _PPC_BOOT_44X_H_ */ +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/Makefile linux-2.6.22-591/arch/powerpc/boot/Makefile +--- linux-2.6.22-570/arch/powerpc/boot/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/boot/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -43,8 +43,8 @@ + + src-wlib := string.S crt0.S stdio.c main.c flatdevtree.c flatdevtree_misc.c \ + ns16550.c serial.c simple_alloc.c div64.S util.S \ +- gunzip_util.c elf_util.c $(zlib) devtree.c \ +- 44x.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c ++ gunzip_util.c elf_util.c $(zlib) devtree.c oflib.c ofconsole.c \ ++ 44x.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c cuboot.c + src-plat := of.c cuboot-83xx.c cuboot-85xx.c holly.c \ + cuboot-ebony.c treeboot-ebony.c prpmc2800.c + src-boot := $(src-wlib) $(src-plat) empty.c +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot-83xx.c linux-2.6.22-591/arch/powerpc/boot/cuboot-83xx.c +--- linux-2.6.22-570/arch/powerpc/boot/cuboot-83xx.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/boot/cuboot-83xx.c 2007-12-21 15:36:11.000000000 -0500 +@@ -12,12 +12,12 @@ + + #include "ops.h" + #include "stdio.h" ++#include "cuboot.h" + + #define TARGET_83xx + #include "ppcboot.h" + + static bd_t bd; +-extern char _end[]; + extern char _dtb_start[], _dtb_end[]; + + static void platform_fixups(void) +@@ -52,16 +52,7 @@ + void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) + { +- unsigned long end_of_ram = bd.bi_memstart + bd.bi_memsize; +- unsigned long avail_ram = end_of_ram - (unsigned long)_end; +- +- memcpy(&bd, (bd_t *)r3, sizeof(bd)); +- loader_info.initrd_addr = r4; +- loader_info.initrd_size = r4 ? r5 - r4 : 0; +- loader_info.cmdline = (char *)r6; +- loader_info.cmdline_len = r7 - r6; +- +- simple_alloc_init(_end, avail_ram - 1024*1024, 32, 64); ++ CUBOOT_INIT(); + ft_init(_dtb_start, _dtb_end - _dtb_start, 32); + serial_console_init(); + platform_ops.fixups = platform_fixups; +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot-85xx.c linux-2.6.22-591/arch/powerpc/boot/cuboot-85xx.c +--- linux-2.6.22-570/arch/powerpc/boot/cuboot-85xx.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/boot/cuboot-85xx.c 2007-12-21 15:36:11.000000000 -0500 +@@ -12,12 +12,12 @@ + + #include "ops.h" + #include "stdio.h" ++#include "cuboot.h" + + #define TARGET_85xx + #include "ppcboot.h" + + static bd_t bd; +-extern char _end[]; + extern char _dtb_start[], _dtb_end[]; + + static void platform_fixups(void) +@@ -53,16 +53,7 @@ + void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) + { +- unsigned long end_of_ram = bd.bi_memstart + bd.bi_memsize; +- unsigned long avail_ram = end_of_ram - (unsigned long)_end; +- +- memcpy(&bd, (bd_t *)r3, sizeof(bd)); +- loader_info.initrd_addr = r4; +- loader_info.initrd_size = r4 ? r5 - r4 : 0; +- loader_info.cmdline = (char *)r6; +- loader_info.cmdline_len = r7 - r6; +- +- simple_alloc_init(_end, avail_ram - 1024*1024, 32, 64); ++ CUBOOT_INIT(); + ft_init(_dtb_start, _dtb_end - _dtb_start, 32); + serial_console_init(); + platform_ops.fixups = platform_fixups; +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot-ebony.c linux-2.6.22-591/arch/powerpc/boot/cuboot-ebony.c +--- linux-2.6.22-570/arch/powerpc/boot/cuboot-ebony.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/boot/cuboot-ebony.c 2007-12-21 15:36:11.000000000 -0500 +@@ -15,28 +15,16 @@ + #include "ops.h" + #include "stdio.h" + #include "44x.h" ++#include "cuboot.h" + + #define TARGET_44x + #include "ppcboot.h" + + static bd_t bd; +-extern char _end[]; +- +-BSS_STACK(4096); + + void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) + { +- unsigned long end_of_ram = bd.bi_memstart + bd.bi_memsize; +- unsigned long avail_ram = end_of_ram - (unsigned long)_end; +- +- memcpy(&bd, (bd_t *)r3, sizeof(bd)); +- loader_info.initrd_addr = r4; +- loader_info.initrd_size = r4 ? r5 : 0; +- loader_info.cmdline = (char *)r6; +- loader_info.cmdline_len = r7 - r6; +- +- simple_alloc_init(_end, avail_ram, 32, 64); +- ++ CUBOOT_INIT(); + ebony_init(&bd.bi_enetaddr, &bd.bi_enet1addr); + } +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot.c linux-2.6.22-591/arch/powerpc/boot/cuboot.c +--- linux-2.6.22-570/arch/powerpc/boot/cuboot.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/boot/cuboot.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,35 @@ ++/* ++ * Compatibility for old (not device tree aware) U-Boot versions ++ * ++ * Author: Scott Wood ++ * Consolidated using macros by David Gibson ++ * ++ * Copyright 2007 David Gibson, IBM Corporation. ++ * Copyright (c) 2007 Freescale Semiconductor, Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 as published ++ * by the Free Software Foundation. ++ */ ++ ++#include "ops.h" ++#include "stdio.h" ++ ++#include "ppcboot.h" ++ ++extern char _end[]; ++extern char _dtb_start[], _dtb_end[]; ++ ++void cuboot_init(unsigned long r4, unsigned long r5, ++ unsigned long r6, unsigned long r7, ++ unsigned long end_of_ram) ++{ ++ unsigned long avail_ram = end_of_ram - (unsigned long)_end; ++ ++ loader_info.initrd_addr = r4; ++ loader_info.initrd_size = r4 ? r5 - r4 : 0; ++ loader_info.cmdline = (char *)r6; ++ loader_info.cmdline_len = r7 - r6; ++ ++ simple_alloc_init(_end, avail_ram - 1024*1024, 32, 64); ++} +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot.h linux-2.6.22-591/arch/powerpc/boot/cuboot.h +--- linux-2.6.22-570/arch/powerpc/boot/cuboot.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/boot/cuboot.h 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,14 @@ ++#ifndef _PPC_BOOT_CUBOOT_H_ ++#define _PPC_BOOT_CUBOOT_H_ ++ ++void cuboot_init(unsigned long r4, unsigned long r5, ++ unsigned long r6, unsigned long r7, ++ unsigned long end_of_ram); ++ ++#define CUBOOT_INIT() \ ++ do { \ ++ memcpy(&bd, (bd_t *)r3, sizeof(bd)); \ ++ cuboot_init(r4, r5, r6, r7, bd.bi_memstart + bd.bi_memsize); \ ++ } while (0) ++ ++#endif /* _PPC_BOOT_CUBOOT_H_ */ +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dcr.h linux-2.6.22-591/arch/powerpc/boot/dcr.h +--- linux-2.6.22-570/arch/powerpc/boot/dcr.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/boot/dcr.h 2007-12-21 15:36:11.000000000 -0500 +@@ -26,6 +26,43 @@ + #define SDRAM_CONFIG_BANK_SIZE(reg) \ + (0x00400000 << ((reg & SDRAM_CONFIG_SIZE_MASK) >> 17)) + ++/* 440GP External Bus Controller (EBC) */ ++#define DCRN_EBC0_CFGADDR 0x012 ++#define DCRN_EBC0_CFGDATA 0x013 ++#define EBC_NUM_BANKS 8 ++#define EBC_B0CR 0x00 ++#define EBC_B1CR 0x01 ++#define EBC_B2CR 0x02 ++#define EBC_B3CR 0x03 ++#define EBC_B4CR 0x04 ++#define EBC_B5CR 0x05 ++#define EBC_B6CR 0x06 ++#define EBC_B7CR 0x07 ++#define EBC_BXCR(n) (n) ++#define EBC_BXCR_BAS 0xfff00000 ++#define EBC_BXCR_BS 0x000e0000 ++#define EBC_BXCR_BANK_SIZE(reg) \ ++ (0x100000 << (((reg) & EBC_BXCR_BS) >> 17)) ++#define EBC_BXCR_BU 0x00018000 ++#define EBC_BXCR_BU_OFF 0x00000000 ++#define EBC_BXCR_BU_RO 0x00008000 ++#define EBC_BXCR_BU_WO 0x00010000 ++#define EBC_BXCR_BU_RW 0x00018000 ++#define EBC_BXCR_BW 0x00006000 ++#define EBC_B0AP 0x10 ++#define EBC_B1AP 0x11 ++#define EBC_B2AP 0x12 ++#define EBC_B3AP 0x13 ++#define EBC_B4AP 0x14 ++#define EBC_B5AP 0x15 ++#define EBC_B6AP 0x16 ++#define EBC_B7AP 0x17 ++#define EBC_BXAP(n) (0x10+(n)) ++#define EBC_BEAR 0x20 ++#define EBC_BESR 0x21 ++#define EBC_CFG 0x23 ++#define EBC_CID 0x24 ++ + /* 440GP Clock, PM, chip control */ + #define DCRN_CPC0_SR 0x0b0 + #define DCRN_CPC0_ER 0x0b1 +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dts/ebony.dts linux-2.6.22-591/arch/powerpc/boot/dts/ebony.dts +--- linux-2.6.22-570/arch/powerpc/boot/dts/ebony.dts 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/boot/dts/ebony.dts 2007-12-21 15:36:11.000000000 -0500 +@@ -135,11 +135,9 @@ + #address-cells = <2>; + #size-cells = <1>; + clock-frequency = <0>; // Filled in by zImage +- ranges = <0 00000000 fff00000 100000 +- 1 00000000 48000000 100000 +- 2 00000000 ff800000 400000 +- 3 00000000 48200000 100000 +- 7 00000000 48300000 100000>; ++ // ranges property is supplied by zImage ++ // based on firmware's configuration of the ++ // EBC bridge + interrupts = <5 4>; + interrupt-parent = <&UIC1>; + +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dts/holly.dts linux-2.6.22-591/arch/powerpc/boot/dts/holly.dts +--- linux-2.6.22-570/arch/powerpc/boot/dts/holly.dts 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/boot/dts/holly.dts 2007-12-21 15:36:11.000000000 -0500 +@@ -46,7 +46,7 @@ + + tsi109@c0000000 { + device_type = "tsi-bridge"; +- compatible = "tsi-bridge"; ++ compatible = "tsi109-bridge", "tsi108-bridge"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <00000000 c0000000 00010000>; +@@ -54,52 +54,55 @@ + + i2c@7000 { + device_type = "i2c"; +- compatible = "tsi-i2c"; +- interrupt-parent = < &/tsi109@c0000000/pic@7400 >; ++ compatible = "tsi109-i2c", "tsi108-i2c"; ++ interrupt-parent = <&MPIC>; + interrupts = ; + reg = <7000 400>; + }; + +- mdio@6000 { ++ MDIO: mdio@6000 { + device_type = "mdio"; +- compatible = "tsi-ethernet"; +- +- PHY1: ethernet-phy@6000 { +- device_type = "ethernet-phy"; +- compatible = "bcm54xx"; ++ compatible = "tsi109-mdio", "tsi108-mdio"; + reg = <6000 50>; +- phy-id = <1>; ++ #address-cells = <1>; ++ #size-cells = <0>; ++ ++ PHY1: ethernet-phy@1 { ++ compatible = "bcm5461a"; ++ reg = <1>; ++ txc-rxc-delay-disable; + }; + +- PHY2: ethernet-phy@6400 { +- device_type = "ethernet-phy"; +- compatible = "bcm54xx"; +- reg = <6000 50>; +- phy-id = <2>; ++ PHY2: ethernet-phy@2 { ++ compatible = "bcm5461a"; ++ reg = <2>; ++ txc-rxc-delay-disable; + }; + }; + + ethernet@6200 { + device_type = "network"; +- compatible = "tsi-ethernet"; ++ compatible = "tsi109-ethernet", "tsi108-ethernet"; + #address-cells = <1>; + #size-cells = <0>; + reg = <6000 200>; + local-mac-address = [ 00 00 00 00 00 00 ]; +- interrupt-parent = < &/tsi109@c0000000/pic@7400 >; ++ interrupt-parent = <&MPIC>; + interrupts = <10 2>; ++ mdio-handle = <&MDIO>; + phy-handle = <&PHY1>; + }; + + ethernet@6600 { + device_type = "network"; +- compatible = "tsi-ethernet"; ++ compatible = "tsi109-ethernet", "tsi108-ethernet"; + #address-cells = <1>; + #size-cells = <0>; + reg = <6400 200>; + local-mac-address = [ 00 00 00 00 00 00 ]; +- interrupt-parent = < &/tsi109@c0000000/pic@7400 >; ++ interrupt-parent = <&MPIC>; + interrupts = <11 2>; ++ mdio-handle = <&MDIO>; + phy-handle = <&PHY2>; + }; + +@@ -110,7 +113,7 @@ + virtual-reg = ; + clock-frequency = <3F9C6000>; + current-speed = <1c200>; +- interrupt-parent = < &/tsi109@c0000000/pic@7400 >; ++ interrupt-parent = <&MPIC>; + interrupts = ; + }; + +@@ -121,7 +124,7 @@ + virtual-reg = ; + clock-frequency = <3F9C6000>; + current-speed = <1c200>; +- interrupt-parent = < &/tsi109@c0000000/pic@7400 >; ++ interrupt-parent = <&MPIC>; + interrupts = ; + }; + +@@ -136,7 +139,7 @@ + + pci@1000 { + device_type = "pci"; +- compatible = "tsi109"; ++ compatible = "tsi109-pci", "tsi108-pci"; + #interrupt-cells = <1>; + #size-cells = <2>; + #address-cells = <3>; +@@ -150,7 +153,7 @@ + ranges = <02000000 0 40000000 40000000 0 10000000 + 01000000 0 00000000 7e000000 0 00010000>; + clock-frequency = <7f28154>; +- interrupt-parent = < &/tsi109@c0000000/pic@7400 >; ++ interrupt-parent = <&MPIC>; + interrupts = <17 2>; + interrupt-map-mask = ; + /*----------------------------------------------------+ +@@ -186,13 +189,12 @@ + #address-cells = <0>; + #interrupt-cells = <2>; + interrupts = <17 2>; +- interrupt-parent = < &/tsi109@c0000000/pic@7400 >; ++ interrupt-parent = <&MPIC>; + }; + }; + }; + + chosen { + linux,stdout-path = "/tsi109@c0000000/serial@7808"; +- bootargs = "console=ttyS0,115200"; + }; + }; +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dts/mpc7448hpc2.dts linux-2.6.22-591/arch/powerpc/boot/dts/mpc7448hpc2.dts +--- linux-2.6.22-570/arch/powerpc/boot/dts/mpc7448hpc2.dts 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/boot/dts/mpc7448hpc2.dts 2007-12-21 15:36:11.000000000 -0500 +@@ -45,7 +45,7 @@ + #address-cells = <1>; + #size-cells = <1>; + #interrupt-cells = <2>; +- device_type = "tsi-bridge"; ++ device_type = "tsi108-bridge"; + ranges = <00000000 c0000000 00010000>; + reg = ; + bus-frequency = <0>; +@@ -55,27 +55,26 @@ + interrupts = ; + reg = <7000 400>; + device_type = "i2c"; +- compatible = "tsi-i2c"; ++ compatible = "tsi108-i2c"; + }; + +- mdio@6000 { ++ MDIO: mdio@6000 { + device_type = "mdio"; +- compatible = "tsi-ethernet"; ++ compatible = "tsi108-mdio"; ++ reg = <6000 50>; ++ #address-cells = <1>; ++ #size-cells = <0>; + +- phy8: ethernet-phy@6000 { ++ phy8: ethernet-phy@8 { + interrupt-parent = <&mpic>; + interrupts = <2 1>; +- reg = <6000 50>; +- phy-id = <8>; +- device_type = "ethernet-phy"; ++ reg = <8>; + }; + +- phy9: ethernet-phy@6400 { ++ phy9: ethernet-phy@9 { + interrupt-parent = <&mpic>; + interrupts = <2 1>; +- reg = <6000 50>; +- phy-id = <9>; +- device_type = "ethernet-phy"; ++ reg = <9>; + }; + + }; +@@ -83,12 +82,12 @@ + ethernet@6200 { + #size-cells = <0>; + device_type = "network"; +- model = "TSI-ETH"; +- compatible = "tsi-ethernet"; ++ compatible = "tsi108-ethernet"; + reg = <6000 200>; + address = [ 00 06 D2 00 00 01 ]; + interrupts = <10 2>; + interrupt-parent = <&mpic>; ++ mdio-handle = <&MDIO>; + phy-handle = <&phy8>; + }; + +@@ -96,12 +95,12 @@ + #address-cells = <1>; + #size-cells = <0>; + device_type = "network"; +- model = "TSI-ETH"; +- compatible = "tsi-ethernet"; ++ compatible = "tsi108-ethernet"; + reg = <6400 200>; + address = [ 00 06 D2 00 00 02 ]; + interrupts = <11 2>; + interrupt-parent = <&mpic>; ++ mdio-handle = <&MDIO>; + phy-handle = <&phy9>; + }; + +@@ -135,7 +134,7 @@ + big-endian; + }; + pci@1000 { +- compatible = "tsi10x"; ++ compatible = "tsi108-pci"; + device_type = "pci"; + #interrupt-cells = <1>; + #size-cells = <2>; +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/ebony.c linux-2.6.22-591/arch/powerpc/boot/ebony.c +--- linux-2.6.22-570/arch/powerpc/boot/ebony.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/boot/ebony.c 2007-12-21 15:36:11.000000000 -0500 +@@ -100,28 +100,13 @@ + ibm440gp_fixup_clocks(sysclk, 6 * 1843200); + ibm44x_fixup_memsize(); + dt_fixup_mac_addresses(ebony_mac0, ebony_mac1); +-} +- +-#define SPRN_DBCR0 0x134 +-#define DBCR0_RST_SYSTEM 0x30000000 +- +-static void ebony_exit(void) +-{ +- unsigned long tmp; +- +- asm volatile ( +- "mfspr %0,%1\n" +- "oris %0,%0,%2@h\n" +- "mtspr %1,%0" +- : "=&r"(tmp) : "i"(SPRN_DBCR0), "i"(DBCR0_RST_SYSTEM) +- ); +- ++ ibm4xx_fixup_ebc_ranges("/plb/opb/ebc"); + } + + void ebony_init(void *mac0, void *mac1) + { + platform_ops.fixups = ebony_fixups; +- platform_ops.exit = ebony_exit; ++ platform_ops.exit = ibm44x_dbcr_reset; + ebony_mac0 = mac0; + ebony_mac1 = mac1; + ft_init(_dtb_start, _dtb_end - _dtb_start, 32); +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/of.c linux-2.6.22-591/arch/powerpc/boot/of.c +--- linux-2.6.22-570/arch/powerpc/boot/of.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/boot/of.c 2007-12-21 15:36:11.000000000 -0500 +@@ -15,8 +15,7 @@ + #include "page.h" + #include "ops.h" + +-typedef void *ihandle; +-typedef void *phandle; ++#include "of.h" + + extern char _end[]; + +@@ -25,154 +24,10 @@ + #define RAM_END (512<<20) /* Fixme: use OF */ + #define ONE_MB 0x100000 + +-int (*prom) (void *); + + + static unsigned long claim_base; + +-static int call_prom(const char *service, int nargs, int nret, ...) +-{ +- int i; +- struct prom_args { +- const char *service; +- int nargs; +- int nret; +- unsigned int args[12]; +- } args; +- va_list list; +- +- args.service = service; +- args.nargs = nargs; +- args.nret = nret; +- +- va_start(list, nret); +- for (i = 0; i < nargs; i++) +- args.args[i] = va_arg(list, unsigned int); +- va_end(list); +- +- for (i = 0; i < nret; i++) +- args.args[nargs+i] = 0; +- +- if (prom(&args) < 0) +- return -1; +- +- return (nret > 0)? args.args[nargs]: 0; +-} +- +-static int call_prom_ret(const char *service, int nargs, int nret, +- unsigned int *rets, ...) +-{ +- int i; +- struct prom_args { +- const char *service; +- int nargs; +- int nret; +- unsigned int args[12]; +- } args; +- va_list list; +- +- args.service = service; +- args.nargs = nargs; +- args.nret = nret; +- +- va_start(list, rets); +- for (i = 0; i < nargs; i++) +- args.args[i] = va_arg(list, unsigned int); +- va_end(list); +- +- for (i = 0; i < nret; i++) +- args.args[nargs+i] = 0; +- +- if (prom(&args) < 0) +- return -1; +- +- if (rets != (void *) 0) +- for (i = 1; i < nret; ++i) +- rets[i-1] = args.args[nargs+i]; +- +- return (nret > 0)? args.args[nargs]: 0; +-} +- +-/* +- * Older OF's require that when claiming a specific range of addresses, +- * we claim the physical space in the /memory node and the virtual +- * space in the chosen mmu node, and then do a map operation to +- * map virtual to physical. +- */ +-static int need_map = -1; +-static ihandle chosen_mmu; +-static phandle memory; +- +-/* returns true if s2 is a prefix of s1 */ +-static int string_match(const char *s1, const char *s2) +-{ +- for (; *s2; ++s2) +- if (*s1++ != *s2) +- return 0; +- return 1; +-} +- +-static int check_of_version(void) +-{ +- phandle oprom, chosen; +- char version[64]; +- +- oprom = finddevice("/openprom"); +- if (oprom == (phandle) -1) +- return 0; +- if (getprop(oprom, "model", version, sizeof(version)) <= 0) +- return 0; +- version[sizeof(version)-1] = 0; +- printf("OF version = '%s'\r\n", version); +- if (!string_match(version, "Open Firmware, 1.") +- && !string_match(version, "FirmWorks,3.")) +- return 0; +- chosen = finddevice("/chosen"); +- if (chosen == (phandle) -1) { +- chosen = finddevice("/chosen@0"); +- if (chosen == (phandle) -1) { +- printf("no chosen\n"); +- return 0; +- } +- } +- if (getprop(chosen, "mmu", &chosen_mmu, sizeof(chosen_mmu)) <= 0) { +- printf("no mmu\n"); +- return 0; +- } +- memory = (ihandle) call_prom("open", 1, 1, "/memory"); +- if (memory == (ihandle) -1) { +- memory = (ihandle) call_prom("open", 1, 1, "/memory@0"); +- if (memory == (ihandle) -1) { +- printf("no memory node\n"); +- return 0; +- } +- } +- printf("old OF detected\r\n"); +- return 1; +-} +- +-static void *claim(unsigned long virt, unsigned long size, unsigned long align) +-{ +- int ret; +- unsigned int result; +- +- if (need_map < 0) +- need_map = check_of_version(); +- if (align || !need_map) +- return (void *) call_prom("claim", 3, 1, virt, size, align); +- +- ret = call_prom_ret("call-method", 5, 2, &result, "claim", memory, +- align, size, virt); +- if (ret != 0 || result == -1) +- return (void *) -1; +- ret = call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu, +- align, size, virt); +- /* 0x12 == coherent + read/write */ +- ret = call_prom("call-method", 6, 1, "map", chosen_mmu, +- 0x12, size, virt, virt); +- return (void *) virt; +-} +- + static void *of_try_claim(unsigned long size) + { + unsigned long addr = 0; +@@ -184,7 +39,7 @@ + #ifdef DEBUG + printf(" trying: 0x%08lx\n\r", claim_base); + #endif +- addr = (unsigned long)claim(claim_base, size, 0); ++ addr = (unsigned long)of_claim(claim_base, size, 0); + if ((void *)addr != (void *)-1) + break; + } +@@ -218,52 +73,24 @@ + return p; + } + +-static void of_exit(void) +-{ +- call_prom("exit", 0, 0); +-} +- + /* + * OF device tree routines + */ + static void *of_finddevice(const char *name) + { +- return (phandle) call_prom("finddevice", 1, 1, name); ++ return (phandle) of_call_prom("finddevice", 1, 1, name); + } + + static int of_getprop(const void *phandle, const char *name, void *buf, + const int buflen) + { +- return call_prom("getprop", 4, 1, phandle, name, buf, buflen); ++ return of_call_prom("getprop", 4, 1, phandle, name, buf, buflen); + } + + static int of_setprop(const void *phandle, const char *name, const void *buf, + const int buflen) + { +- return call_prom("setprop", 4, 1, phandle, name, buf, buflen); +-} +- +-/* +- * OF console routines +- */ +-static void *of_stdout_handle; +- +-static int of_console_open(void) +-{ +- void *devp; +- +- if (((devp = finddevice("/chosen")) != NULL) +- && (getprop(devp, "stdout", &of_stdout_handle, +- sizeof(of_stdout_handle)) +- == sizeof(of_stdout_handle))) +- return 0; +- +- return -1; +-} +- +-static void of_console_write(char *buf, int len) +-{ +- call_prom("write", 3, 1, of_stdout_handle, buf, len); ++ return of_call_prom("setprop", 4, 1, phandle, name, buf, buflen); + } + + void platform_init(unsigned long a1, unsigned long a2, void *promptr) +@@ -277,10 +104,9 @@ + dt_ops.getprop = of_getprop; + dt_ops.setprop = of_setprop; + +- console_ops.open = of_console_open; +- console_ops.write = of_console_write; ++ of_console_init(); + +- prom = (int (*)(void *))promptr; ++ of_init(promptr); + loader_info.promptr = promptr; + if (a1 && a2 && a2 != 0xdeadbeef) { + loader_info.initrd_addr = a1; +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/of.h linux-2.6.22-591/arch/powerpc/boot/of.h +--- linux-2.6.22-570/arch/powerpc/boot/of.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/boot/of.h 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,15 @@ ++#ifndef _PPC_BOOT_OF_H_ ++#define _PPC_BOOT_OF_H_ ++ ++typedef void *phandle; ++typedef void *ihandle; ++ ++void of_init(void *promptr); ++int of_call_prom(const char *service, int nargs, int nret, ...); ++void *of_claim(unsigned long virt, unsigned long size, unsigned long align); ++void of_exit(void); ++ ++/* Console functions */ ++void of_console_init(void); ++ ++#endif /* _PPC_BOOT_OF_H_ */ +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/ofconsole.c linux-2.6.22-591/arch/powerpc/boot/ofconsole.c +--- linux-2.6.22-570/arch/powerpc/boot/ofconsole.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/boot/ofconsole.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,45 @@ ++/* ++ * OF console routines ++ * ++ * Copyright (C) Paul Mackerras 1997. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ */ ++#include ++#include "types.h" ++#include "elf.h" ++#include "string.h" ++#include "stdio.h" ++#include "page.h" ++#include "ops.h" ++ ++#include "of.h" ++ ++static void *of_stdout_handle; ++ ++static int of_console_open(void) ++{ ++ void *devp; ++ ++ if (((devp = finddevice("/chosen")) != NULL) ++ && (getprop(devp, "stdout", &of_stdout_handle, ++ sizeof(of_stdout_handle)) ++ == sizeof(of_stdout_handle))) ++ return 0; ++ ++ return -1; ++} ++ ++static void of_console_write(char *buf, int len) ++{ ++ of_call_prom("write", 3, 1, of_stdout_handle, buf, len); ++} ++ ++void of_console_init(void) ++{ ++ console_ops.open = of_console_open; ++ console_ops.write = of_console_write; ++} +diff -Nurb linux-2.6.22-570/arch/powerpc/boot/oflib.c linux-2.6.22-591/arch/powerpc/boot/oflib.c +--- linux-2.6.22-570/arch/powerpc/boot/oflib.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/boot/oflib.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,172 @@ ++/* ++ * Copyright (C) Paul Mackerras 1997. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ */ ++#include ++#include "types.h" ++#include "elf.h" ++#include "string.h" ++#include "stdio.h" ++#include "page.h" ++#include "ops.h" ++ ++#include "of.h" ++ ++static int (*prom) (void *); ++ ++void of_init(void *promptr) ++{ ++ prom = (int (*)(void *))promptr; ++} ++ ++int of_call_prom(const char *service, int nargs, int nret, ...) ++{ ++ int i; ++ struct prom_args { ++ const char *service; ++ int nargs; ++ int nret; ++ unsigned int args[12]; ++ } args; ++ va_list list; ++ ++ args.service = service; ++ args.nargs = nargs; ++ args.nret = nret; ++ ++ va_start(list, nret); ++ for (i = 0; i < nargs; i++) ++ args.args[i] = va_arg(list, unsigned int); ++ va_end(list); ++ ++ for (i = 0; i < nret; i++) ++ args.args[nargs+i] = 0; ++ ++ if (prom(&args) < 0) ++ return -1; ++ ++ return (nret > 0)? args.args[nargs]: 0; ++} ++ ++static int of_call_prom_ret(const char *service, int nargs, int nret, ++ unsigned int *rets, ...) ++{ ++ int i; ++ struct prom_args { ++ const char *service; ++ int nargs; ++ int nret; ++ unsigned int args[12]; ++ } args; ++ va_list list; ++ ++ args.service = service; ++ args.nargs = nargs; ++ args.nret = nret; ++ ++ va_start(list, rets); ++ for (i = 0; i < nargs; i++) ++ args.args[i] = va_arg(list, unsigned int); ++ va_end(list); ++ ++ for (i = 0; i < nret; i++) ++ args.args[nargs+i] = 0; ++ ++ if (prom(&args) < 0) ++ return -1; ++ ++ if (rets != (void *) 0) ++ for (i = 1; i < nret; ++i) ++ rets[i-1] = args.args[nargs+i]; ++ ++ return (nret > 0)? args.args[nargs]: 0; ++} ++ ++/* returns true if s2 is a prefix of s1 */ ++static int string_match(const char *s1, const char *s2) ++{ ++ for (; *s2; ++s2) ++ if (*s1++ != *s2) ++ return 0; ++ return 1; ++} ++ ++/* ++ * Older OF's require that when claiming a specific range of addresses, ++ * we claim the physical space in the /memory node and the virtual ++ * space in the chosen mmu node, and then do a map operation to ++ * map virtual to physical. ++ */ ++static int need_map = -1; ++static ihandle chosen_mmu; ++static phandle memory; ++ ++static int check_of_version(void) ++{ ++ phandle oprom, chosen; ++ char version[64]; ++ ++ oprom = finddevice("/openprom"); ++ if (oprom == (phandle) -1) ++ return 0; ++ if (getprop(oprom, "model", version, sizeof(version)) <= 0) ++ return 0; ++ version[sizeof(version)-1] = 0; ++ printf("OF version = '%s'\r\n", version); ++ if (!string_match(version, "Open Firmware, 1.") ++ && !string_match(version, "FirmWorks,3.")) ++ return 0; ++ chosen = finddevice("/chosen"); ++ if (chosen == (phandle) -1) { ++ chosen = finddevice("/chosen@0"); ++ if (chosen == (phandle) -1) { ++ printf("no chosen\n"); ++ return 0; ++ } ++ } ++ if (getprop(chosen, "mmu", &chosen_mmu, sizeof(chosen_mmu)) <= 0) { ++ printf("no mmu\n"); ++ return 0; ++ } ++ memory = (ihandle) of_call_prom("open", 1, 1, "/memory"); ++ if (memory == (ihandle) -1) { ++ memory = (ihandle) of_call_prom("open", 1, 1, "/memory@0"); ++ if (memory == (ihandle) -1) { ++ printf("no memory node\n"); ++ return 0; ++ } ++ } ++ printf("old OF detected\r\n"); ++ return 1; ++} ++ ++void *of_claim(unsigned long virt, unsigned long size, unsigned long align) ++{ ++ int ret; ++ unsigned int result; ++ ++ if (need_map < 0) ++ need_map = check_of_version(); ++ if (align || !need_map) ++ return (void *) of_call_prom("claim", 3, 1, virt, size, align); ++ ++ ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", memory, ++ align, size, virt); ++ if (ret != 0 || result == -1) ++ return (void *) -1; ++ ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu, ++ align, size, virt); ++ /* 0x12 == coherent + read/write */ ++ ret = of_call_prom("call-method", 6, 1, "map", chosen_mmu, ++ 0x12, size, virt, virt); ++ return (void *) virt; ++} ++ ++void of_exit(void) ++{ ++ of_call_prom("exit", 0, 0); ++} +diff -Nurb linux-2.6.22-570/arch/powerpc/configs/holly_defconfig linux-2.6.22-591/arch/powerpc/configs/holly_defconfig +--- linux-2.6.22-570/arch/powerpc/configs/holly_defconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/configs/holly_defconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -190,7 +190,8 @@ + # CONFIG_RESOURCES_64BIT is not set + CONFIG_ZONE_DMA_FLAG=1 + CONFIG_PROC_DEVICETREE=y +-# CONFIG_CMDLINE_BOOL is not set ++CONFIG_CMDLINE_BOOL=y ++CONFIG_CMDLINE="console=ttyS0,115200" + # CONFIG_PM is not set + # CONFIG_SECCOMP is not set + # CONFIG_WANT_DEVICE_TREE is not set +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/Makefile linux-2.6.22-591/arch/powerpc/kernel/Makefile +--- linux-2.6.22-570/arch/powerpc/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -12,7 +12,8 @@ + + obj-y := semaphore.o cputable.o ptrace.o syscalls.o \ + irq.o align.o signal_32.o pmc.o vdso.o \ +- init_task.o process.o systbl.o idle.o ++ init_task.o process.o systbl.o idle.o \ ++ signal.o + obj-y += vdso32/ + obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \ + signal_64.o ptrace32.o \ +@@ -62,10 +63,16 @@ + obj-$(CONFIG_KPROBES) += kprobes.o + obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o + ++ifeq ($(CONFIG_PPC32),y) ++obj-$(CONFIG_KGDB) += kgdb.o kgdb_setjmp32.o ++else ++obj-$(CONFIG_KGDB) += kgdb.o kgdb_setjmp64.o ++endif ++ + module-$(CONFIG_PPC64) += module_64.o + obj-$(CONFIG_MODULES) += $(module-y) + +-pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o ++pci64-$(CONFIG_PPC64) += pci_64.o pci_dn.o isa-bridge.o + pci32-$(CONFIG_PPC32) := pci_32.o + obj-$(CONFIG_PCI) += $(pci64-y) $(pci32-y) + obj-$(CONFIG_PCI_MSI) += msi.o +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/head_32.S linux-2.6.22-591/arch/powerpc/kernel/head_32.S +--- linux-2.6.22-570/arch/powerpc/kernel/head_32.S 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/kernel/head_32.S 2007-12-21 15:36:11.000000000 -0500 +@@ -9,7 +9,6 @@ + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * This file contains the low-level support and setup for the + * PowerPC platform, including trap and interrupt dispatch. +@@ -32,10 +31,6 @@ + #include + #include + +-#ifdef CONFIG_APUS +-#include +-#endif +- + /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ + #define LOAD_BAT(n, reg, RA, RB) \ + /* see the comment for clear_bats() -- Cort */ \ +@@ -92,11 +87,6 @@ + * r4: virtual address of boot_infos_t + * r5: 0 + * +- * APUS +- * r3: 'APUS' +- * r4: physical address of memory base +- * Linux/m68k style BootInfo structure at &_end. +- * + * PREP + * This is jumped to on prep systems right after the kernel is relocated + * to its proper place in memory by the boot loader. The expected layout +@@ -150,14 +140,6 @@ + */ + bl early_init + +-#ifdef CONFIG_APUS +-/* On APUS the __va/__pa constants need to be set to the correct +- * values before continuing. +- */ +- mr r4,r30 +- bl fix_mem_constants +-#endif /* CONFIG_APUS */ +- + /* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains + * the physical address we are running at, returned by early_init() + */ +@@ -167,7 +149,7 @@ + bl flush_tlbs + + bl initial_bats +-#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) ++#if defined(CONFIG_BOOTX_TEXT) + bl setup_disp_bat + #endif + +@@ -183,7 +165,6 @@ + #endif /* CONFIG_6xx */ + + +-#ifndef CONFIG_APUS + /* + * We need to run with _start at physical address 0. + * On CHRP, we are loaded at 0x10000 since OF on CHRP uses +@@ -196,7 +177,6 @@ + addis r4,r3,KERNELBASE@h /* current address of _start */ + cmpwi 0,r4,0 /* are we already running at 0? */ + bne relocate_kernel +-#endif /* CONFIG_APUS */ + /* + * we now have the 1st 16M of ram mapped with the bats. + * prep needs the mmu to be turned on here, but pmac already has it on. +@@ -881,85 +861,6 @@ + addi r6,r6,4 + blr + +-#ifdef CONFIG_APUS +-/* +- * On APUS the physical base address of the kernel is not known at compile +- * time, which means the __pa/__va constants used are incorrect. In the +- * __init section is recorded the virtual addresses of instructions using +- * these constants, so all that has to be done is fix these before +- * continuing the kernel boot. +- * +- * r4 = The physical address of the kernel base. +- */ +-fix_mem_constants: +- mr r10,r4 +- addis r10,r10,-KERNELBASE@h /* virt_to_phys constant */ +- neg r11,r10 /* phys_to_virt constant */ +- +- lis r12,__vtop_table_begin@h +- ori r12,r12,__vtop_table_begin@l +- add r12,r12,r10 /* table begin phys address */ +- lis r13,__vtop_table_end@h +- ori r13,r13,__vtop_table_end@l +- add r13,r13,r10 /* table end phys address */ +- subi r12,r12,4 +- subi r13,r13,4 +-1: lwzu r14,4(r12) /* virt address of instruction */ +- add r14,r14,r10 /* phys address of instruction */ +- lwz r15,0(r14) /* instruction, now insert top */ +- rlwimi r15,r10,16,16,31 /* half of vp const in low half */ +- stw r15,0(r14) /* of instruction and restore. */ +- dcbst r0,r14 /* write it to memory */ +- sync +- icbi r0,r14 /* flush the icache line */ +- cmpw r12,r13 +- bne 1b +- sync /* additional sync needed on g4 */ +- isync +- +-/* +- * Map the memory where the exception handlers will +- * be copied to when hash constants have been patched. +- */ +-#ifdef CONFIG_APUS_FAST_EXCEPT +- lis r8,0xfff0 +-#else +- lis r8,0 +-#endif +- ori r8,r8,0x2 /* 128KB, supervisor */ +- mtspr SPRN_DBAT3U,r8 +- mtspr SPRN_DBAT3L,r8 +- +- lis r12,__ptov_table_begin@h +- ori r12,r12,__ptov_table_begin@l +- add r12,r12,r10 /* table begin phys address */ +- lis r13,__ptov_table_end@h +- ori r13,r13,__ptov_table_end@l +- add r13,r13,r10 /* table end phys address */ +- subi r12,r12,4 +- subi r13,r13,4 +-1: lwzu r14,4(r12) /* virt address of instruction */ +- add r14,r14,r10 /* phys address of instruction */ +- lwz r15,0(r14) /* instruction, now insert top */ +- rlwimi r15,r11,16,16,31 /* half of pv const in low half*/ +- stw r15,0(r14) /* of instruction and restore. */ +- dcbst r0,r14 /* write it to memory */ +- sync +- icbi r0,r14 /* flush the icache line */ +- cmpw r12,r13 +- bne 1b +- +- sync /* additional sync needed on g4 */ +- isync /* No speculative loading until now */ +- blr +- +-/*********************************************************************** +- * Please note that on APUS the exception handlers are located at the +- * physical address 0xfff0000. For this reason, the exception handlers +- * cannot use relative branches to access the code below. +- ***********************************************************************/ +-#endif /* CONFIG_APUS */ +- + #ifdef CONFIG_SMP + #ifdef CONFIG_GEMINI + .globl __secondary_start_gemini +@@ -1135,19 +1036,6 @@ + bl __save_cpu_setup + bl MMU_init + +-#ifdef CONFIG_APUS +- /* Copy exception code to exception vector base on APUS. */ +- lis r4,KERNELBASE@h +-#ifdef CONFIG_APUS_FAST_EXCEPT +- lis r3,0xfff0 /* Copy to 0xfff00000 */ +-#else +- lis r3,0 /* Copy to 0x00000000 */ +-#endif +- li r5,0x4000 /* # bytes of memory to copy */ +- li r6,0 +- bl copy_and_flush /* copy the first 0x4000 bytes */ +-#endif /* CONFIG_APUS */ +- + /* + * Go back to running unmapped so we can load up new values + * for SDR1 (hash table pointer) and the segment registers +@@ -1324,11 +1212,7 @@ + #else + ori r8,r8,2 /* R/W access */ + #endif /* CONFIG_SMP */ +-#ifdef CONFIG_APUS +- ori r11,r11,BL_8M<<2|0x2 /* set up 8MB BAT registers for 604 */ +-#else + ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */ +-#endif /* CONFIG_APUS */ + + mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */ + mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */ +@@ -1338,7 +1222,7 @@ + blr + + +-#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) ++#ifdef CONFIG_BOOTX_TEXT + setup_disp_bat: + /* + * setup the display bat prepared for us in prom.c +@@ -1362,7 +1246,7 @@ + 1: mtspr SPRN_IBAT3L,r8 + mtspr SPRN_IBAT3U,r11 + blr +-#endif /* !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) */ ++#endif /* CONFIG_BOOTX_TEXT */ + + #ifdef CONFIG_8260 + /* Jump into the system reset for the rom. +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/irq.c linux-2.6.22-591/arch/powerpc/kernel/irq.c +--- linux-2.6.22-570/arch/powerpc/kernel/irq.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/kernel/irq.c 2007-12-21 15:36:11.000000000 -0500 +@@ -7,7 +7,6 @@ + * Copyright (C) 1996-2001 Cort Dougan + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/isa-bridge.c linux-2.6.22-591/arch/powerpc/kernel/isa-bridge.c +--- linux-2.6.22-570/arch/powerpc/kernel/isa-bridge.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/kernel/isa-bridge.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,271 @@ ++/* ++ * Routines for tracking a legacy ISA bridge ++ * ++ * Copyrigh 2007 Benjamin Herrenschmidt , IBM Corp. ++ * ++ * Some bits and pieces moved over from pci_64.c ++ * ++ * Copyrigh 2003 Anton Blanchard , IBM Corp. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ */ ++ ++#define DEBUG ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++unsigned long isa_io_base; /* NULL if no ISA bus */ ++EXPORT_SYMBOL(isa_io_base); ++ ++/* Cached ISA bridge dev. */ ++static struct device_node *isa_bridge_devnode; ++struct pci_dev *isa_bridge_pcidev; ++EXPORT_SYMBOL_GPL(isa_bridge_pcidev); ++ ++#define ISA_SPACE_MASK 0x1 ++#define ISA_SPACE_IO 0x1 ++ ++static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node, ++ unsigned long phb_io_base_phys) ++{ ++ /* We should get some saner parsing here and remove these structs */ ++ struct pci_address { ++ u32 a_hi; ++ u32 a_mid; ++ u32 a_lo; ++ }; ++ ++ struct isa_address { ++ u32 a_hi; ++ u32 a_lo; ++ }; ++ ++ struct isa_range { ++ struct isa_address isa_addr; ++ struct pci_address pci_addr; ++ unsigned int size; ++ }; ++ ++ const struct isa_range *range; ++ unsigned long pci_addr; ++ unsigned int isa_addr; ++ unsigned int size; ++ int rlen = 0; ++ ++ range = of_get_property(isa_node, "ranges", &rlen); ++ if (range == NULL || (rlen < sizeof(struct isa_range))) ++ goto inval_range; ++ ++ /* From "ISA Binding to 1275" ++ * The ranges property is laid out as an array of elements, ++ * each of which comprises: ++ * cells 0 - 1: an ISA address ++ * cells 2 - 4: a PCI address ++ * (size depending on dev->n_addr_cells) ++ * cell 5: the size of the range ++ */ ++ if ((range->isa_addr.a_hi && ISA_SPACE_MASK) != ISA_SPACE_IO) { ++ range++; ++ rlen -= sizeof(struct isa_range); ++ if (rlen < sizeof(struct isa_range)) ++ goto inval_range; ++ } ++ if ((range->isa_addr.a_hi && ISA_SPACE_MASK) != ISA_SPACE_IO) ++ goto inval_range; ++ ++ isa_addr = range->isa_addr.a_lo; ++ pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | ++ range->pci_addr.a_lo; ++ ++ /* Assume these are both zero. Note: We could fix that and ++ * do a proper parsing instead ... oh well, that will do for ++ * now as nobody uses fancy mappings for ISA bridges ++ */ ++ if ((pci_addr != 0) || (isa_addr != 0)) { ++ printk(KERN_ERR "unexpected isa to pci mapping: %s\n", ++ __FUNCTION__); ++ return; ++ } ++ ++ /* Align size and make sure it's cropped to 64K */ ++ size = PAGE_ALIGN(range->size); ++ if (size > 0x10000) ++ size = 0x10000; ++ ++ printk(KERN_ERR "no ISA IO ranges or unexpected isa range," ++ "mapping 64k\n"); ++ ++ __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, ++ size, _PAGE_NO_CACHE|_PAGE_GUARDED); ++ return; ++ ++inval_range: ++ printk(KERN_ERR "no ISA IO ranges or unexpected isa range," ++ "mapping 64k\n"); ++ __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, ++ 0x10000, _PAGE_NO_CACHE|_PAGE_GUARDED); ++} ++ ++ ++/** ++ * isa_bridge_find_early - Find and map the ISA IO space early before ++ * main PCI discovery. This is optionally called by ++ * the arch code when adding PCI PHBs to get early ++ * access to ISA IO ports ++ */ ++void __init isa_bridge_find_early(struct pci_controller *hose) ++{ ++ struct device_node *np, *parent = NULL, *tmp; ++ ++ /* If we already have an ISA bridge, bail off */ ++ if (isa_bridge_devnode != NULL) ++ return; ++ ++ /* For each "isa" node in the system. Note : we do a search by ++ * type and not by name. It might be better to do by name but that's ++ * what the code used to do and I don't want to break too much at ++ * once. We can look into changing that separately ++ */ ++ for_each_node_by_type(np, "isa") { ++ /* Look for our hose being a parent */ ++ for (parent = of_get_parent(np); parent;) { ++ if (parent == hose->arch_data) { ++ of_node_put(parent); ++ break; ++ } ++ tmp = parent; ++ parent = of_get_parent(parent); ++ of_node_put(tmp); ++ } ++ if (parent != NULL) ++ break; ++ } ++ if (np == NULL) ++ return; ++ isa_bridge_devnode = np; ++ ++ /* Now parse the "ranges" property and setup the ISA mapping */ ++ pci_process_ISA_OF_ranges(np, hose->io_base_phys); ++ ++ /* Set the global ISA io base to indicate we have an ISA bridge */ ++ isa_io_base = ISA_IO_BASE; ++ ++ pr_debug("ISA bridge (early) is %s\n", np->full_name); ++} ++ ++/** ++ * isa_bridge_find_late - Find and map the ISA IO space upon discovery of ++ * a new ISA bridge ++ */ ++static void __devinit isa_bridge_find_late(struct pci_dev *pdev, ++ struct device_node *devnode) ++{ ++ struct pci_controller *hose = pci_bus_to_host(pdev->bus); ++ ++ /* Store ISA device node and PCI device */ ++ isa_bridge_devnode = of_node_get(devnode); ++ isa_bridge_pcidev = pdev; ++ ++ /* Now parse the "ranges" property and setup the ISA mapping */ ++ pci_process_ISA_OF_ranges(devnode, hose->io_base_phys); ++ ++ /* Set the global ISA io base to indicate we have an ISA bridge */ ++ isa_io_base = ISA_IO_BASE; ++ ++ pr_debug("ISA bridge (late) is %s on %s\n", ++ devnode->full_name, pci_name(pdev)); ++} ++ ++/** ++ * isa_bridge_remove - Remove/unmap an ISA bridge ++ */ ++static void isa_bridge_remove(void) ++{ ++ pr_debug("ISA bridge removed !\n"); ++ ++ /* Clear the global ISA io base to indicate that we have no more ++ * ISA bridge. Note that drivers don't quite handle that, though ++ * we should probably do something about it. But do we ever really ++ * have ISA bridges being removed on machines using legacy devices ? ++ */ ++ isa_io_base = ISA_IO_BASE; ++ ++ /* Clear references to the bridge */ ++ of_node_put(isa_bridge_devnode); ++ isa_bridge_devnode = NULL; ++ isa_bridge_pcidev = NULL; ++ ++ /* Unmap the ISA area */ ++ __iounmap_at((void *)ISA_IO_BASE, 0x10000); ++} ++ ++/** ++ * isa_bridge_notify - Get notified of PCI devices addition/removal ++ */ ++static int __devinit isa_bridge_notify(struct notifier_block *nb, ++ unsigned long action, void *data) ++{ ++ struct device *dev = data; ++ struct pci_dev *pdev = to_pci_dev(dev); ++ struct device_node *devnode = pci_device_to_OF_node(pdev); ++ ++ switch(action) { ++ case BUS_NOTIFY_ADD_DEVICE: ++ /* Check if we have an early ISA device, without PCI dev */ ++ if (isa_bridge_devnode && isa_bridge_devnode == devnode && ++ !isa_bridge_pcidev) { ++ pr_debug("ISA bridge PCI attached: %s\n", ++ pci_name(pdev)); ++ isa_bridge_pcidev = pdev; ++ } ++ ++ /* Check if we have no ISA device, and this happens to be one, ++ * register it as such if it has an OF device ++ */ ++ if (!isa_bridge_devnode && devnode && devnode->type && ++ !strcmp(devnode->type, "isa")) ++ isa_bridge_find_late(pdev, devnode); ++ ++ return 0; ++ case BUS_NOTIFY_DEL_DEVICE: ++ /* Check if this our existing ISA device */ ++ if (pdev == isa_bridge_pcidev || ++ (devnode && devnode == isa_bridge_devnode)) ++ isa_bridge_remove(); ++ return 0; ++ } ++ return 0; ++} ++ ++static struct notifier_block isa_bridge_notifier = { ++ .notifier_call = isa_bridge_notify ++}; ++ ++/** ++ * isa_bridge_init - register to be notified of ISA bridge addition/removal ++ * ++ */ ++static int __init isa_bridge_init(void) ++{ ++ if (firmware_has_feature(FW_FEATURE_ISERIES)) ++ return 0; ++ bus_register_notifier(&pci_bus_type, &isa_bridge_notifier); ++ return 0; ++} ++arch_initcall(isa_bridge_init); +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/kgdb.c linux-2.6.22-591/arch/powerpc/kernel/kgdb.c +--- linux-2.6.22-570/arch/powerpc/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,499 @@ ++/* ++ * arch/powerpc/kernel/kgdb.c ++ * ++ * PowerPC backend to the KGDB stub. ++ * ++ * Maintainer: Tom Rini ++ * ++ * Copied from arch/ppc/kernel/kgdb.c, updated for ppc64 ++ * ++ * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu) ++ * Copyright (C) 2003 Timesys Corporation. ++ * Copyright (C) 2004-2006 MontaVista Software, Inc. ++ * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com) ++ * PPC32 support restored by Vitaly Wool and ++ * Sergei Shtylyov ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program as licensed "as is" without any warranty of any ++ * kind, whether express or implied. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * This table contains the mapping between PowerPC hardware trap types, and ++ * signals, which are primarily what GDB understands. GDB and the kernel ++ * don't always agree on values, so we use constants taken from gdb-6.2. ++ */ ++static struct hard_trap_info ++{ ++ unsigned int tt; /* Trap type code for powerpc */ ++ unsigned char signo; /* Signal that we map this trap into */ ++} hard_trap_info[] = { ++ { 0x0100, 0x02 /* SIGINT */ }, /* system reset */ ++ { 0x0200, 0x0b /* SIGSEGV */ }, /* machine check */ ++ { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */ ++ { 0x0400, 0x0b /* SIGSEGV */ }, /* instruction access */ ++ { 0x0500, 0x02 /* SIGINT */ }, /* external interrupt */ ++ { 0x0600, 0x0a /* SIGBUS */ }, /* alignment */ ++ { 0x0700, 0x05 /* SIGTRAP */ }, /* program check */ ++ { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ ++ { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ ++ { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ ++#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) ++ { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ ++#if defined(CONFIG_FSL_BOOKE) ++ { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ ++ { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */ ++ { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */ ++ { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */ ++ { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */ ++ { 0x2060, 0x0e /* SIGILL */ }, /* performace monitor */ ++ { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */ ++ { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */ ++ { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */ ++#else ++ { 0x1000, 0x0e /* SIGALRM */ }, /* programmable interval timer */ ++ { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */ ++ { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */ ++ { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ ++ { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ ++#endif ++#else ++ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ ++#if defined(CONFIG_8xx) ++ { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ ++#else ++ { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */ ++ { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */ ++ { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */ ++#if defined(CONFIG_PPC64) ++ { 0x1200, 0x05 /* SIGILL */ }, /* system error */ ++ { 0x1500, 0x04 /* SIGILL */ }, /* soft patch */ ++ { 0x1600, 0x04 /* SIGILL */ }, /* maintenance */ ++ { 0x1700, 0x08 /* SIGFPE */ }, /* altivec assist */ ++ { 0x1800, 0x04 /* SIGILL */ }, /* thermal */ ++#else ++ { 0x1400, 0x02 /* SIGINT */ }, /* SMI */ ++ { 0x1600, 0x08 /* SIGFPE */ }, /* altivec assist */ ++ { 0x1700, 0x04 /* SIGILL */ }, /* TAU */ ++ { 0x2000, 0x05 /* SIGTRAP */ }, /* run mode */ ++#endif ++#endif ++#endif ++ { 0x0000, 0x00 } /* Must be last */ ++}; ++ ++extern atomic_t cpu_doing_single_step; ++ ++static int computeSignal(unsigned int tt) ++{ ++ struct hard_trap_info *ht; ++ ++ for (ht = hard_trap_info; ht->tt && ht->signo; ht++) ++ if (ht->tt == tt) ++ return ht->signo; ++ ++ return SIGHUP; /* default for things we don't know about */ ++} ++ ++static int kgdb_call_nmi_hook(struct pt_regs *regs) ++{ ++ kgdb_nmihook(smp_processor_id(), regs); ++ return 0; ++} ++ ++#ifdef CONFIG_SMP ++void kgdb_roundup_cpus(unsigned long flags) ++{ ++ smp_send_debugger_break(MSG_ALL_BUT_SELF); ++} ++#endif ++ ++/* KGDB functions to use existing PowerPC64 hooks. */ ++static int kgdb_debugger(struct pt_regs *regs) ++{ ++ return kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); ++} ++ ++static int kgdb_breakpoint(struct pt_regs *regs) ++{ ++ if (user_mode(regs)) ++ return 0; ++ ++ kgdb_handle_exception(0, SIGTRAP, 0, regs); ++ ++ if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) ++ regs->nip += 4; ++ ++ return 1; ++} ++ ++static int kgdb_singlestep(struct pt_regs *regs) ++{ ++ struct thread_info *thread_info, *exception_thread_info; ++ ++ if (user_mode(regs)) ++ return 0; ++ ++ /* ++ * On Book E and perhaps other processsors, singlestep is handled on ++ * the critical exception stack. This causes current_thread_info() ++ * to fail, since it it locates the thread_info by masking off ++ * the low bits of the current stack pointer. We work around ++ * this issue by copying the thread_info from the kernel stack ++ * before calling kgdb_handle_exception, and copying it back ++ * afterwards. On most processors the copy is avoided since ++ * exception_thread_info == thread_info. ++ */ ++ thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1)); ++ exception_thread_info = current_thread_info(); ++ ++ if (thread_info != exception_thread_info) ++ memcpy(exception_thread_info, thread_info, sizeof *thread_info); ++ ++ kgdb_handle_exception(0, SIGTRAP, 0, regs); ++ ++ if (thread_info != exception_thread_info) ++ memcpy(thread_info, exception_thread_info, sizeof *thread_info); ++ ++ return 1; ++} ++ ++int kgdb_iabr_match(struct pt_regs *regs) ++{ ++ if (user_mode(regs)) ++ return 0; ++ ++ kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); ++ return 1; ++} ++ ++int kgdb_dabr_match(struct pt_regs *regs) ++{ ++ if (user_mode(regs)) ++ return 0; ++ ++ kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); ++ return 1; ++} ++ ++#define PACK64(ptr,src) do { *(ptr++) = (src); } while(0) ++ ++#define PACK32(ptr,src) do { \ ++ u32 *ptr32; \ ++ ptr32 = (u32 *)ptr; \ ++ *(ptr32++) = (src); \ ++ ptr = (unsigned long *)ptr32; \ ++ } while(0) ++ ++ ++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) ++{ ++ unsigned long *ptr = gdb_regs; ++ int reg; ++ ++ memset(gdb_regs, 0, NUMREGBYTES); ++ ++ for (reg = 0; reg < 32; reg++) ++ PACK64(ptr, regs->gpr[reg]); ++ ++#ifdef CONFIG_FSL_BOOKE ++#ifdef CONFIG_SPE ++ for (reg = 0; reg < 32; reg++) ++ PACK64(ptr, current->thread.evr[reg]); ++#else ++ ptr += 32; ++#endif ++#else ++ /* fp registers not used by kernel, leave zero */ ++ ptr += 32 * 8 / sizeof(long); ++#endif ++ ++ PACK64(ptr, regs->nip); ++ PACK64(ptr, regs->msr); ++ PACK32(ptr, regs->ccr); ++ PACK64(ptr, regs->link); ++ PACK64(ptr, regs->ctr); ++ PACK32(ptr, regs->xer); ++ ++#if 0 ++ Following are in struct thread_struct, not struct pt_regs, ++ ignoring for now since kernel does not use them. Would it ++ make sense to get them from the thread that kgdb is set to? ++ ++ If this code is enabled, update the definition of NUMREGBYTES to ++ include the vector registers and vector state registers. ++ ++ PACK32(ptr, current->thread->fpscr); ++ ++ /* vr registers not used by kernel, leave zero */ ++ ptr += 32 * 16 / sizeof(long); ++ ++#ifdef CONFIG_ALTIVEC ++ PACK32(ptr, current->thread->vscr); ++ PACK32(ptr, current->thread->vrsave); ++#else ++ ptr += 2 * 4 / sizeof(long); ++#endif ++#else ++#ifdef CONFIG_FSL_BOOKE ++#ifdef CONFIG_SPE ++ /* u64 acc */ ++ PACK32(ptr, current->thread.acc >> 32); ++ PACK32(ptr, current->thread.acc & 0xffffffff); ++ PACK64(ptr, current->thread.spefscr); ++#else ++ ptr += 2 + 1; ++#endif ++#else ++ /* fpscr not used by kernel, leave zero */ ++ PACK32(ptr, 0); ++#endif ++#endif ++ ++ BUG_ON((unsigned long)ptr > ++ (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); ++} ++ ++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) ++{ ++ struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp + ++ STACK_FRAME_OVERHEAD); ++ unsigned long *ptr = gdb_regs; ++ int reg; ++ ++ memset(gdb_regs, 0, NUMREGBYTES); ++ ++ /* Regs GPR0-2 */ ++ for (reg = 0; reg < 3; reg++) ++ PACK64(ptr, regs->gpr[reg]); ++ ++ /* Regs GPR3-13 are caller saved, not in regs->gpr[] */ ++ ptr += 11; ++ ++ /* Regs GPR14-31 */ ++ for (reg = 14; reg < 32; reg++) ++ PACK64(ptr, regs->gpr[reg]); ++ ++#ifdef CONFIG_FSL_BOOKE ++#ifdef CONFIG_SPE ++ for (reg = 0; reg < 32; reg++) ++ PACK64(ptr, p->thread.evr[reg]); ++#else ++ ptr += 32; ++#endif ++#else ++ /* fp registers not used by kernel, leave zero */ ++ ptr += 32 * 8 / sizeof(long); ++#endif ++ ++ PACK64(ptr, regs->nip); ++ PACK64(ptr, regs->msr); ++ PACK32(ptr, regs->ccr); ++ PACK64(ptr, regs->link); ++ PACK64(ptr, regs->ctr); ++ PACK32(ptr, regs->xer); ++ ++#if 0 ++ Following are in struct thread_struct, not struct pt_regs, ++ ignoring for now since kernel does not use them. Would it ++ make sense to get them from the thread that kgdb is set to? ++ ++ If this code is enabled, update the definition of NUMREGBYTES to ++ include the vector registers and vector state registers. ++ ++ PACK32(ptr, p->thread->fpscr); ++ ++ /* vr registers not used by kernel, leave zero */ ++ ptr += 32 * 16 / sizeof(long); ++ ++#ifdef CONFIG_ALTIVEC ++ PACK32(ptr, p->thread->vscr); ++ PACK32(ptr, p->thread->vrsave); ++#else ++ ptr += 2 * 4 / sizeof(long); ++#endif ++#else ++#ifdef CONFIG_FSL_BOOKE ++#ifdef CONFIG_SPE ++ /* u64 acc */ ++ PACK32(ptr, p->thread.acc >> 32); ++ PACK32(ptr, p->thread.acc & 0xffffffff); ++ PACK64(ptr, p->thread.spefscr); ++#else ++ ptr += 2 + 1; ++#endif ++#else ++ /* fpscr not used by kernel, leave zero */ ++ PACK32(ptr, 0); ++#endif ++#endif ++ ++ BUG_ON((unsigned long)ptr > ++ (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); ++} ++ ++#define UNPACK64(dest,ptr) do { dest = *(ptr++); } while(0) ++ ++#define UNPACK32(dest,ptr) do { \ ++ u32 *ptr32; \ ++ ptr32 = (u32 *)ptr; \ ++ dest = *(ptr32++); \ ++ ptr = (unsigned long *)ptr32; \ ++ } while(0) ++ ++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) ++{ ++ unsigned long *ptr = gdb_regs; ++ int reg; ++#ifdef CONFIG_SPE ++ union { ++ u32 v32[2]; ++ u64 v64; ++ } acc; ++#endif ++ ++ for (reg = 0; reg < 32; reg++) ++ UNPACK64(regs->gpr[reg], ptr); ++ ++#ifdef CONFIG_FSL_BOOKE ++#ifdef CONFIG_SPE ++ for (reg = 0; reg < 32; reg++) ++ UNPACK64(current->thread.evr[reg], ptr); ++#else ++ ptr += 32; ++#endif ++#else ++ /* fp registers not used by kernel, leave zero */ ++ ptr += 32 * 8 / sizeof(int); ++#endif ++ ++ UNPACK64(regs->nip, ptr); ++ UNPACK64(regs->msr, ptr); ++ UNPACK32(regs->ccr, ptr); ++ UNPACK64(regs->link, ptr); ++ UNPACK64(regs->ctr, ptr); ++ UNPACK32(regs->xer, ptr); ++ ++#if 0 ++ Following are in struct thread_struct, not struct pt_regs, ++ ignoring for now since kernel does not use them. Would it ++ make sense to get them from the thread that kgdb is set to? ++ ++ If this code is enabled, update the definition of NUMREGBYTES to ++ include the vector registers and vector state registers. ++ ++ /* fpscr, vscr, vrsave not used by kernel, leave unchanged */ ++ ++ UNPACK32(current->thread->fpscr, ptr); ++ ++ /* vr registers not used by kernel, leave zero */ ++ ptr += 32 * 16 / sizeof(long); ++ ++#ifdef CONFIG_ALTIVEC ++ UNPACK32(current->thread->vscr, ptr); ++ UNPACK32(current->thread->vrsave, ptr); ++#else ++ ptr += 2 * 4 / sizeof(long); ++#endif ++#else ++#ifdef CONFIG_FSL_BOOKE ++#ifdef CONFIG_SPE ++ /* u64 acc */ ++ UNPACK32(acc.v32[0], ptr); ++ UNPACK32(acc.v32[1], ptr); ++ current->thread.acc = acc.v64; ++ UNPACK64(current->thread.spefscr, ptr); ++#else ++ ptr += 2 + 1; ++#endif ++#endif ++#endif ++ ++ BUG_ON((unsigned long)ptr > ++ (unsigned long)(((void *)gdb_regs) + NUMREGBYTES)); ++} ++ ++/* ++ * This function does PowerPC specific procesing for interfacing to gdb. ++ */ ++int kgdb_arch_handle_exception(int vector, int signo, int err_code, ++ char *remcom_in_buffer, char *remcom_out_buffer, ++ struct pt_regs *linux_regs) ++{ ++ char *ptr = &remcom_in_buffer[1]; ++ unsigned long addr; ++ ++ switch (remcom_in_buffer[0]) { ++ /* ++ * sAA..AA Step one instruction from AA..AA ++ * This will return an error to gdb .. ++ */ ++ case 's': ++ case 'c': ++ /* handle the optional parameter */ ++ if (kgdb_hex2long(&ptr, &addr)) ++ linux_regs->nip = addr; ++ ++ atomic_set(&cpu_doing_single_step, -1); ++ /* set the trace bit if we're stepping */ ++ if (remcom_in_buffer[0] == 's') { ++#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) ++ mtspr(SPRN_DBCR0, ++ mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); ++ linux_regs->msr |= MSR_DE; ++#else ++ linux_regs->msr |= MSR_SE; ++#endif ++ debugger_step = 1; ++ if (kgdb_contthread) ++ atomic_set(&cpu_doing_single_step, ++ smp_processor_id()); ++ } ++ return 0; ++ } ++ ++ return -1; ++} ++ ++/* ++ * Global data ++ */ ++struct kgdb_arch arch_kgdb_ops = { ++ .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08}, ++}; ++ ++int kgdb_not_implemented(struct pt_regs *regs) ++{ ++ return 0; ++} ++ ++int kgdb_arch_init(void) ++{ ++#ifdef CONFIG_XMON ++#error Both XMON and KGDB selected in .config. Unselect one of them. ++#endif ++ ++ __debugger_ipi = kgdb_call_nmi_hook; ++ __debugger = kgdb_debugger; ++ __debugger_bpt = kgdb_breakpoint; ++ __debugger_sstep = kgdb_singlestep; ++ __debugger_iabr_match = kgdb_iabr_match; ++ __debugger_dabr_match = kgdb_dabr_match; ++ __debugger_fault_handler = kgdb_not_implemented; ++ ++ return 0; ++} ++ ++arch_initcall(kgdb_arch_init); +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp32.S linux-2.6.22-591/arch/powerpc/kernel/kgdb_setjmp32.S +--- linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp32.S 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/kernel/kgdb_setjmp32.S 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (C) 1996 Paul Mackerras ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program as licensed "as is" without any warranty of any ++ * kind, whether express or implied. ++ */ ++ ++#include ++#include ++ ++ .text ++ ++/* ++ * Save/restore state in case a memory access causes a fault. ++ * ++ * int kgdb_fault_setjmp(unsigned long *curr_context); ++ * void kgdb_fault_longjmp(unsigned long *curr_context); ++ */ ++ ++_GLOBAL(kgdb_fault_setjmp) ++ mflr r0 ++ stw r0,0(r3) ++ stw r1,4(r3) ++ stw r2,8(r3) ++ mfcr r0 ++ stw r0,12(r3) ++ stmw r13,16(r3) ++ li r3,0 ++ blr ++ ++_GLOBAL(kgdb_fault_longjmp) ++ lmw r13,16(r3) ++ lwz r0,12(r3) ++ mtcrf 0x38,r0 ++ lwz r0,0(r3) ++ lwz r1,4(r3) ++ lwz r2,8(r3) ++ mtlr r0 ++ mr r3,r1 ++ blr +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp64.S linux-2.6.22-591/arch/powerpc/kernel/kgdb_setjmp64.S +--- linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp64.S 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/kernel/kgdb_setjmp64.S 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (C) 1996 Paul Mackerras ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program as licensed "as is" without any warranty of any ++ * kind, whether express or implied. ++ */ ++ ++#include ++#include ++ ++ .text ++ ++/* ++ * Save/restore state in case a memory access causes a fault. ++ * ++ * int kgdb_fault_setjmp(unsigned long *curr_context); ++ * void kgdb_fault_longjmp(unsigned long *curr_context); ++ */ ++ ++_GLOBAL(kgdb_fault_setjmp) ++ mflr r0 ++ std r0,0(r3) ++ std r1,8(r3) ++ std r2,16(r3) ++ mfcr r0 ++ std r0,24(r3) ++ std r13,32(r3) ++ std r14,40(r3) ++ std r15,48(r3) ++ std r16,56(r3) ++ std r17,64(r3) ++ std r18,72(r3) ++ std r19,80(r3) ++ std r20,88(r3) ++ std r21,96(r3) ++ std r22,104(r3) ++ std r23,112(r3) ++ std r24,120(r3) ++ std r25,128(r3) ++ std r26,136(r3) ++ std r27,144(r3) ++ std r28,152(r3) ++ std r29,160(r3) ++ std r30,168(r3) ++ std r31,176(r3) ++ li r3,0 ++ blr ++ ++_GLOBAL(kgdb_fault_longjmp) ++ ld r13,32(r3) ++ ld r14,40(r3) ++ ld r15,48(r3) ++ ld r16,56(r3) ++ ld r17,64(r3) ++ ld r18,72(r3) ++ ld r19,80(r3) ++ ld r20,88(r3) ++ ld r21,96(r3) ++ ld r22,104(r3) ++ ld r23,112(r3) ++ ld r24,120(r3) ++ ld r25,128(r3) ++ ld r26,136(r3) ++ ld r27,144(r3) ++ ld r28,152(r3) ++ ld r29,160(r3) ++ ld r30,168(r3) ++ ld r31,176(r3) ++ ld r0,24(r3) ++ mtcrf 0x38,r0 ++ ld r0,0(r3) ++ ld r1,8(r3) ++ ld r2,16(r3) ++ mtlr r0 ++ mr r3,r1 ++ blr +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/legacy_serial.c linux-2.6.22-591/arch/powerpc/kernel/legacy_serial.c +--- linux-2.6.22-570/arch/powerpc/kernel/legacy_serial.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/kernel/legacy_serial.c 2007-12-21 15:36:11.000000000 -0500 +@@ -11,6 +11,9 @@ + #include + #include + #include ++#ifdef CONFIG_KGDB_8250 ++#include ++#endif + + #undef DEBUG + +@@ -487,6 +490,9 @@ + fixup_port_pio(i, np, port); + if ((port->iotype == UPIO_MEM) || (port->iotype == UPIO_TSI)) + fixup_port_mmio(i, np, port); ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_platform_port(i, port); ++#endif + } + + DBG("Registering platform serial ports\n"); +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/misc_32.S linux-2.6.22-591/arch/powerpc/kernel/misc_32.S +--- linux-2.6.22-570/arch/powerpc/kernel/misc_32.S 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/kernel/misc_32.S 2007-12-21 15:36:11.000000000 -0500 +@@ -392,7 +392,7 @@ + mtspr SPRN_L1CSR0,r3 + isync + blr +-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) ++END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) + mfspr r3,SPRN_L1CSR1 + ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR + mtspr SPRN_L1CSR1,r3 +@@ -419,7 +419,7 @@ + _GLOBAL(__flush_icache_range) + BEGIN_FTR_SECTION + blr /* for 601, do nothing */ +-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) ++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) + li r5,L1_CACHE_BYTES-1 + andc r3,r3,r5 + subf r4,r3,r4 +@@ -514,8 +514,8 @@ + */ + _GLOBAL(__flush_dcache_icache) + BEGIN_FTR_SECTION +- blr /* for 601, do nothing */ +-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) ++ blr ++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) + rlwinm r3,r3,0,0,19 /* Get page base address */ + li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ + mtctr r4 +@@ -543,7 +543,7 @@ + _GLOBAL(__flush_dcache_icache_phys) + BEGIN_FTR_SECTION + blr /* for 601, do nothing */ +-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) ++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) + mfmsr r10 + rlwinm r0,r10,0,28,26 /* clear DR */ + mtmsr r0 +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/of_platform.c linux-2.6.22-591/arch/powerpc/kernel/of_platform.c +--- linux-2.6.22-570/arch/powerpc/kernel/of_platform.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/kernel/of_platform.c 2007-12-21 15:36:11.000000000 -0500 +@@ -427,14 +427,6 @@ + /* Process "ranges" property */ + pci_process_bridge_OF_ranges(phb, dev->node, 0); + +- /* Setup IO space. We use the non-dynamic version of that code here, +- * which doesn't quite support unplugging. Next kernel release will +- * have a better fix for this. +- * Note also that we don't do ISA, this will also be fixed with a +- * more massive rework. +- */ +- pci_setup_phb_io(phb, pci_io_base == 0); +- + /* Init pci_dn data structures */ + pci_devs_phb_init_dynamic(phb); + +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/pci_64.c linux-2.6.22-591/arch/powerpc/kernel/pci_64.c +--- linux-2.6.22-570/arch/powerpc/kernel/pci_64.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/kernel/pci_64.c 2007-12-21 15:36:11.000000000 -0500 +@@ -11,7 +11,7 @@ + * 2 of the License, or (at your option) any later version. + */ + +-#undef DEBUG ++#define DEBUG + + #include + #include +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -41,35 +42,26 @@ + + unsigned long pci_probe_only = 1; + int pci_assign_all_buses = 0; +-static int pci_initial_scan_done; + + static void fixup_resource(struct resource *res, struct pci_dev *dev); + static void do_bus_setup(struct pci_bus *bus); +-static void phbs_remap_io(void); + + /* pci_io_base -- the base address from which io bars are offsets. + * This is the lowest I/O base address (so bar values are always positive), + * and it *must* be the start of ISA space if an ISA bus exists because +- * ISA drivers use hard coded offsets. If no ISA bus exists a dummy +- * page is mapped and isa_io_limit prevents access to it. ++ * ISA drivers use hard coded offsets. If no ISA bus exists nothing ++ * is mapped on the first 64K of IO space + */ +-unsigned long isa_io_base; /* NULL if no ISA bus */ +-EXPORT_SYMBOL(isa_io_base); +-unsigned long pci_io_base; ++unsigned long pci_io_base = ISA_IO_BASE; + EXPORT_SYMBOL(pci_io_base); + +-void iSeries_pcibios_init(void); +- + LIST_HEAD(hose_list); + + static struct dma_mapping_ops *pci_dma_ops; + ++/* XXX kill that some day ... */ + int global_phb_number; /* Global phb counter */ + +-/* Cached ISA bridge dev. */ +-struct pci_dev *ppc64_isabridge_dev = NULL; +-EXPORT_SYMBOL_GPL(ppc64_isabridge_dev); +- + void set_pci_dma_ops(struct dma_mapping_ops *dma_ops) + { + pci_dma_ops = dma_ops; +@@ -100,7 +92,7 @@ + return; + + if (res->flags & IORESOURCE_IO) +- offset = (unsigned long)hose->io_base_virt - pci_io_base; ++ offset = (unsigned long)hose->io_base_virt - _IO_BASE; + + if (res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; +@@ -119,7 +111,7 @@ + return; + + if (res->flags & IORESOURCE_IO) +- offset = (unsigned long)hose->io_base_virt - pci_io_base; ++ offset = (unsigned long)hose->io_base_virt - _IO_BASE; + + if (res->flags & IORESOURCE_MEM) + offset = hose->pci_mem_offset; +@@ -156,7 +148,7 @@ + + if (res->flags & IORESOURCE_IO) { + unsigned long offset = (unsigned long)hose->io_base_virt - +- pci_io_base; ++ _IO_BASE; + /* Make sure we start at our min on all hoses */ + if (start - offset < PCIBIOS_MIN_IO) + start = PCIBIOS_MIN_IO + offset; +@@ -535,10 +527,16 @@ + bus->secondary = hose->first_busno; + hose->bus = bus; + ++ if (!firmware_has_feature(FW_FEATURE_ISERIES)) ++ pcibios_map_io_space(bus); ++ + bus->resource[0] = res = &hose->io_resource; +- if (res->flags && request_resource(&ioport_resource, res)) ++ if (res->flags && request_resource(&ioport_resource, res)) { + printk(KERN_ERR "Failed to request PCI IO region " + "on PCI domain %04x\n", hose->global_number); ++ DBG("res->start = 0x%016lx, res->end = 0x%016lx\n", ++ res->start, res->end); ++ } + + for (i = 0; i < 3; ++i) { + res = &hose->mem_resources[i]; +@@ -596,17 +594,6 @@ + if (ppc_md.pcibios_fixup) + ppc_md.pcibios_fixup(); + +- /* Cache the location of the ISA bridge (if we have one) */ +- ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL); +- if (ppc64_isabridge_dev != NULL) +- printk(KERN_DEBUG "ISA bridge at %s\n", pci_name(ppc64_isabridge_dev)); +- +- if (!firmware_has_feature(FW_FEATURE_ISERIES)) +- /* map in PCI I/O space */ +- phbs_remap_io(); +- +- pci_initial_scan_done = 1; +- + printk(KERN_DEBUG "PCI: Probing PCI hardware done\n"); + + return 0; +@@ -711,7 +698,7 @@ + #endif + res_bit = IORESOURCE_MEM; + } else { +- io_offset = (unsigned long)hose->io_base_virt - pci_io_base; ++ io_offset = (unsigned long)hose->io_base_virt - _IO_BASE; + *offset += io_offset; + res_bit = IORESOURCE_IO; + } +@@ -881,76 +868,6 @@ + device_create_file(&pdev->dev, &dev_attr_devspec); + } + +-#define ISA_SPACE_MASK 0x1 +-#define ISA_SPACE_IO 0x1 +- +-static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node, +- unsigned long phb_io_base_phys, +- void __iomem * phb_io_base_virt) +-{ +- /* Remove these asap */ +- +- struct pci_address { +- u32 a_hi; +- u32 a_mid; +- u32 a_lo; +- }; +- +- struct isa_address { +- u32 a_hi; +- u32 a_lo; +- }; +- +- struct isa_range { +- struct isa_address isa_addr; +- struct pci_address pci_addr; +- unsigned int size; +- }; +- +- const struct isa_range *range; +- unsigned long pci_addr; +- unsigned int isa_addr; +- unsigned int size; +- int rlen = 0; +- +- range = of_get_property(isa_node, "ranges", &rlen); +- if (range == NULL || (rlen < sizeof(struct isa_range))) { +- printk(KERN_ERR "no ISA ranges or unexpected isa range size," +- "mapping 64k\n"); +- __ioremap_explicit(phb_io_base_phys, +- (unsigned long)phb_io_base_virt, +- 0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED); +- return; +- } +- +- /* From "ISA Binding to 1275" +- * The ranges property is laid out as an array of elements, +- * each of which comprises: +- * cells 0 - 1: an ISA address +- * cells 2 - 4: a PCI address +- * (size depending on dev->n_addr_cells) +- * cell 5: the size of the range +- */ +- if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) { +- isa_addr = range->isa_addr.a_lo; +- pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | +- range->pci_addr.a_lo; +- +- /* Assume these are both zero */ +- if ((pci_addr != 0) || (isa_addr != 0)) { +- printk(KERN_ERR "unexpected isa to pci mapping: %s\n", +- __FUNCTION__); +- return; +- } +- +- size = PAGE_ALIGN(range->size); +- +- __ioremap_explicit(phb_io_base_phys, +- (unsigned long) phb_io_base_virt, +- size, _PAGE_NO_CACHE | _PAGE_GUARDED); +- } +-} +- + void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose, + struct device_node *dev, int prim) + { +@@ -1045,155 +962,122 @@ + } + } + +-void __devinit pci_setup_phb_io(struct pci_controller *hose, int primary) +-{ +- unsigned long size = hose->pci_io_size; +- unsigned long io_virt_offset; +- struct resource *res; +- struct device_node *isa_dn; +- +- if (size == 0) +- return; +- +- hose->io_base_virt = reserve_phb_iospace(size); +- DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n", +- hose->global_number, hose->io_base_phys, +- (unsigned long) hose->io_base_virt); +- +- if (primary) { +- pci_io_base = (unsigned long)hose->io_base_virt; +- isa_dn = of_find_node_by_type(NULL, "isa"); +- if (isa_dn) { +- isa_io_base = pci_io_base; +- pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys, +- hose->io_base_virt); +- of_node_put(isa_dn); +- } +- } +- +- io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base; +- res = &hose->io_resource; +- res->start += io_virt_offset; +- res->end += io_virt_offset; +- +- /* If this is called after the initial PCI scan, then we need to +- * proceed to IO mappings now +- */ +- if (pci_initial_scan_done) +- __ioremap_explicit(hose->io_base_phys, +- (unsigned long)hose->io_base_virt, +- hose->pci_io_size, +- _PAGE_NO_CACHE | _PAGE_GUARDED); +-} ++#ifdef CONFIG_HOTPLUG + +-void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose, +- int primary) ++int pcibios_unmap_io_space(struct pci_bus *bus) + { +- unsigned long size = hose->pci_io_size; +- unsigned long io_virt_offset; +- struct resource *res; ++ struct pci_controller *hose; + +- if (size == 0) +- return; ++ WARN_ON(bus == NULL); + +- hose->io_base_virt = __ioremap(hose->io_base_phys, size, +- _PAGE_NO_CACHE | _PAGE_GUARDED); +- DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n", +- hose->global_number, hose->io_base_phys, +- (unsigned long) hose->io_base_virt); +- +- if (primary) +- pci_io_base = (unsigned long)hose->io_base_virt; +- +- io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base; +- res = &hose->io_resource; +- res->start += io_virt_offset; +- res->end += io_virt_offset; +-} ++ /* If this is not a PHB, we only flush the hash table over ++ * the area mapped by this bridge. We don't play with the PTE ++ * mappings since we might have to deal with sub-page alignemnts ++ * so flushing the hash table is the only sane way to make sure ++ * that no hash entries are covering that removed bridge area ++ * while still allowing other busses overlapping those pages ++ */ ++ if (bus->self) { ++ struct resource *res = bus->resource[0]; + ++ DBG("IO unmapping for PCI-PCI bridge %s\n", ++ pci_name(bus->self)); + +-static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys, +- unsigned long *start_virt, unsigned long *size) +-{ +- struct pci_controller *hose = pci_bus_to_host(bus); +- struct resource *res; ++ __flush_hash_table_range(&init_mm, res->start + _IO_BASE, ++ res->end - res->start + 1); ++ return 0; ++ } + +- if (bus->self) +- res = bus->resource[0]; +- else +- /* Root Bus */ +- res = &hose->io_resource; ++ /* Get the host bridge */ ++ hose = pci_bus_to_host(bus); + +- if (res->end == 0 && res->start == 0) +- return 1; ++ /* Check if we have IOs allocated */ ++ if (hose->io_base_alloc == 0) ++ return 0; + +- *start_virt = pci_io_base + res->start; +- *start_phys = *start_virt + hose->io_base_phys +- - (unsigned long) hose->io_base_virt; ++ DBG("IO unmapping for PHB %s\n", ++ ((struct device_node *)hose->arch_data)->full_name); ++ DBG(" alloc=0x%p\n", hose->io_base_alloc); + +- if (res->end > res->start) +- *size = res->end - res->start + 1; +- else { +- printk("%s(): unexpected region 0x%lx->0x%lx\n", +- __FUNCTION__, res->start, res->end); +- return 1; +- } ++ /* This is a PHB, we fully unmap the IO area */ ++ vunmap(hose->io_base_alloc); + + return 0; + } ++EXPORT_SYMBOL_GPL(pcibios_unmap_io_space); + +-int unmap_bus_range(struct pci_bus *bus) +-{ +- unsigned long start_phys; +- unsigned long start_virt; +- unsigned long size; ++#endif /* CONFIG_HOTPLUG */ + +- if (!bus) { +- printk(KERN_ERR "%s() expected bus\n", __FUNCTION__); +- return 1; +- } ++int __devinit pcibios_map_io_space(struct pci_bus *bus) ++{ ++ struct vm_struct *area; ++ unsigned long phys_page; ++ unsigned long size_page; ++ unsigned long io_virt_offset; ++ struct pci_controller *hose; + +- if (get_bus_io_range(bus, &start_phys, &start_virt, &size)) +- return 1; +- if (__iounmap_explicit((void __iomem *) start_virt, size)) +- return 1; ++ WARN_ON(bus == NULL); + ++ /* If this not a PHB, nothing to do, page tables still exist and ++ * thus HPTEs will be faulted in when needed ++ */ ++ if (bus->self) { ++ DBG("IO mapping for PCI-PCI bridge %s\n", ++ pci_name(bus->self)); ++ DBG(" virt=0x%016lx...0x%016lx\n", ++ bus->resource[0]->start + _IO_BASE, ++ bus->resource[0]->end + _IO_BASE); + return 0; +-} +-EXPORT_SYMBOL(unmap_bus_range); +- +-int remap_bus_range(struct pci_bus *bus) +-{ +- unsigned long start_phys; +- unsigned long start_virt; +- unsigned long size; +- +- if (!bus) { +- printk(KERN_ERR "%s() expected bus\n", __FUNCTION__); +- return 1; + } + ++ /* Get the host bridge */ ++ hose = pci_bus_to_host(bus); ++ phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE); ++ size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE); + +- if (get_bus_io_range(bus, &start_phys, &start_virt, &size)) +- return 1; +- if (start_phys == 0) +- return 1; +- printk(KERN_DEBUG "mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size); +- if (__ioremap_explicit(start_phys, start_virt, size, +- _PAGE_NO_CACHE | _PAGE_GUARDED)) +- return 1; ++ /* Make sure IO area address is clear */ ++ hose->io_base_alloc = NULL; + ++ /* If there's no IO to map on that bus, get away too */ ++ if (hose->pci_io_size == 0 || hose->io_base_phys == 0) + return 0; +-} +-EXPORT_SYMBOL(remap_bus_range); + +-static void phbs_remap_io(void) +-{ +- struct pci_controller *hose, *tmp; ++ /* Let's allocate some IO space for that guy. We don't pass ++ * VM_IOREMAP because we don't care about alignment tricks that ++ * the core does in that case. Maybe we should due to stupid card ++ * with incomplete address decoding but I'd rather not deal with ++ * those outside of the reserved 64K legacy region. ++ */ ++ area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END); ++ if (area == NULL) ++ return -ENOMEM; ++ hose->io_base_alloc = area->addr; ++ hose->io_base_virt = (void __iomem *)(area->addr + ++ hose->io_base_phys - phys_page); ++ ++ DBG("IO mapping for PHB %s\n", ++ ((struct device_node *)hose->arch_data)->full_name); ++ DBG(" phys=0x%016lx, virt=0x%p (alloc=0x%p)\n", ++ hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc); ++ DBG(" size=0x%016lx (alloc=0x%016lx)\n", ++ hose->pci_io_size, size_page); ++ ++ /* Establish the mapping */ ++ if (__ioremap_at(phys_page, area->addr, size_page, ++ _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL) ++ return -ENOMEM; ++ ++ /* Fixup hose IO resource */ ++ io_virt_offset = (unsigned long)hose->io_base_virt - _IO_BASE; ++ hose->io_resource.start += io_virt_offset; ++ hose->io_resource.end += io_virt_offset; + +- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) +- remap_bus_range(hose->bus); ++ DBG(" hose->io_resource=0x%016lx...0x%016lx\n", ++ hose->io_resource.start, hose->io_resource.end); ++ ++ return 0; + } ++EXPORT_SYMBOL_GPL(pcibios_map_io_space); + + static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev) + { +@@ -1201,8 +1085,7 @@ + unsigned long offset; + + if (res->flags & IORESOURCE_IO) { +- offset = (unsigned long)hose->io_base_virt - pci_io_base; +- ++ offset = (unsigned long)hose->io_base_virt - _IO_BASE; + res->start += offset; + res->end += offset; + } else if (res->flags & IORESOURCE_MEM) { +@@ -1217,9 +1100,20 @@ + /* Update device resources. */ + int i; + +- for (i = 0; i < PCI_NUM_RESOURCES; i++) +- if (dev->resource[i].flags) +- fixup_resource(&dev->resource[i], dev); ++ DBG("%s: Fixup resources:\n", pci_name(dev)); ++ for (i = 0; i < PCI_NUM_RESOURCES; i++) { ++ struct resource *res = &dev->resource[i]; ++ if (!res->flags) ++ continue; ++ ++ DBG(" 0x%02x < %08lx:0x%016lx...0x%016lx\n", ++ i, res->flags, res->start, res->end); ++ ++ fixup_resource(res, dev); ++ ++ DBG(" > %08lx:0x%016lx...0x%016lx\n", ++ res->flags, res->start, res->end); ++ } + } + EXPORT_SYMBOL(pcibios_fixup_device_resources); + +@@ -1360,7 +1254,7 @@ + return; + + if (rsrc->flags & IORESOURCE_IO) +- offset = (unsigned long)hose->io_base_virt - pci_io_base; ++ offset = (unsigned long)hose->io_base_virt - _IO_BASE; + + /* We pass a fully fixed up address to userland for MMIO instead of + * a BAR value because X is lame and expects to be able to use that +@@ -1410,7 +1304,7 @@ + if (address >= hose->io_base_phys && + address < (hose->io_base_phys + hose->pci_io_size)) { + unsigned long base = +- (unsigned long)hose->io_base_virt - pci_io_base; ++ (unsigned long)hose->io_base_virt - _IO_BASE; + return base + (address - hose->io_base_phys); + } + } +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/process.c linux-2.6.22-591/arch/powerpc/kernel/process.c +--- linux-2.6.22-570/arch/powerpc/kernel/process.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/kernel/process.c 2007-12-21 15:36:11.000000000 -0500 +@@ -219,22 +219,26 @@ + } + #endif /* CONFIG_SMP */ + +-#ifdef CONFIG_PPC_MERGE /* XXX for now */ + int set_dabr(unsigned long dabr) + { ++#ifdef CONFIG_PPC_MERGE /* XXX for now */ + if (ppc_md.set_dabr) + return ppc_md.set_dabr(dabr); ++#endif + ++ /* XXX should we have a CPU_FTR_HAS_DABR ? */ ++#if defined(CONFIG_PPC64) || defined(CONFIG_6xx) + mtspr(SPRN_DABR, dabr); ++#endif + return 0; + } +-#endif + + #ifdef CONFIG_PPC64 + DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array); +-static DEFINE_PER_CPU(unsigned long, current_dabr); + #endif + ++static DEFINE_PER_CPU(unsigned long, current_dabr); ++ + struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *new) + { +@@ -299,12 +303,10 @@ + + #endif /* CONFIG_SMP */ + +-#ifdef CONFIG_PPC64 /* for now */ + if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) { + set_dabr(new->thread.dabr); + __get_cpu_var(current_dabr) = new->thread.dabr; + } +-#endif /* CONFIG_PPC64 */ + + new_thread = &new->thread; + old_thread = ¤t->thread; +@@ -474,12 +476,10 @@ + + discard_lazy_cpu_state(); + +-#ifdef CONFIG_PPC64 /* for now */ + if (current->thread.dabr) { + current->thread.dabr = 0; + set_dabr(0); + } +-#endif + } + + void +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/prom_init.c linux-2.6.22-591/arch/powerpc/kernel/prom_init.c +--- linux-2.6.22-570/arch/powerpc/kernel/prom_init.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/kernel/prom_init.c 2007-12-21 15:36:11.000000000 -0500 +@@ -635,6 +635,7 @@ + /* ibm,dynamic-reconfiguration-memory property supported */ + #define OV5_DRCONF_MEMORY 0x20 + #define OV5_LARGE_PAGES 0x10 /* large pages supported */ ++#define OV5_DONATE_DEDICATE_CPU 0x02 /* donate dedicated CPU support */ + /* PCIe/MSI support. Without MSI full PCIe is not supported */ + #ifdef CONFIG_PCI_MSI + #define OV5_MSI 0x01 /* PCIe/MSI support */ +@@ -685,7 +686,8 @@ + /* option vector 5: PAPR/OF options */ + 3 - 2, /* length */ + 0, /* don't ignore, don't halt */ +- OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_MSI, ++ OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | ++ OV5_DONATE_DEDICATE_CPU | OV5_MSI, + }; + + /* Old method - ELF header with PT_NOTE sections */ +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/ptrace-common.h linux-2.6.22-591/arch/powerpc/kernel/ptrace-common.h +--- linux-2.6.22-570/arch/powerpc/kernel/ptrace-common.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/kernel/ptrace-common.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,161 +0,0 @@ +-/* +- * Copyright (c) 2002 Stephen Rothwell, IBM Coproration +- * Extracted from ptrace.c and ptrace32.c +- * +- * This file is subject to the terms and conditions of the GNU General +- * Public License. See the file README.legal in the main directory of +- * this archive for more details. +- */ +- +-#ifndef _PPC64_PTRACE_COMMON_H +-#define _PPC64_PTRACE_COMMON_H +- +-#include +- +-/* +- * Set of msr bits that gdb can change on behalf of a process. +- */ +-#define MSR_DEBUGCHANGE (MSR_FE0 | MSR_SE | MSR_BE | MSR_FE1) +- +-/* +- * Get contents of register REGNO in task TASK. +- */ +-static inline unsigned long get_reg(struct task_struct *task, int regno) +-{ +- unsigned long tmp = 0; +- +- /* +- * Put the correct FP bits in, they might be wrong as a result +- * of our lazy FP restore. +- */ +- if (regno == PT_MSR) { +- tmp = ((unsigned long *)task->thread.regs)[PT_MSR]; +- tmp |= task->thread.fpexc_mode; +- } else if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) { +- tmp = ((unsigned long *)task->thread.regs)[regno]; +- } +- +- return tmp; +-} +- +-/* +- * Write contents of register REGNO in task TASK. +- */ +-static inline int put_reg(struct task_struct *task, int regno, +- unsigned long data) +-{ +- if (regno < PT_SOFTE) { +- if (regno == PT_MSR) +- data = (data & MSR_DEBUGCHANGE) +- | (task->thread.regs->msr & ~MSR_DEBUGCHANGE); +- ((unsigned long *)task->thread.regs)[regno] = data; +- return 0; +- } +- return -EIO; +-} +- +-static inline void set_single_step(struct task_struct *task) +-{ +- struct pt_regs *regs = task->thread.regs; +- if (regs != NULL) +- regs->msr |= MSR_SE; +- set_tsk_thread_flag(task, TIF_SINGLESTEP); +-} +- +-static inline void clear_single_step(struct task_struct *task) +-{ +- struct pt_regs *regs = task->thread.regs; +- if (regs != NULL) +- regs->msr &= ~MSR_SE; +- clear_tsk_thread_flag(task, TIF_SINGLESTEP); +-} +- +-#ifdef CONFIG_ALTIVEC +-/* +- * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. +- * The transfer totals 34 quadword. Quadwords 0-31 contain the +- * corresponding vector registers. Quadword 32 contains the vscr as the +- * last word (offset 12) within that quadword. Quadword 33 contains the +- * vrsave as the first word (offset 0) within the quadword. +- * +- * This definition of the VMX state is compatible with the current PPC32 +- * ptrace interface. This allows signal handling and ptrace to use the +- * same structures. This also simplifies the implementation of a bi-arch +- * (combined (32- and 64-bit) gdb. +- */ +- +-/* +- * Get contents of AltiVec register state in task TASK +- */ +-static inline int get_vrregs(unsigned long __user *data, +- struct task_struct *task) +-{ +- unsigned long regsize; +- +- /* copy AltiVec registers VR[0] .. VR[31] */ +- regsize = 32 * sizeof(vector128); +- if (copy_to_user(data, task->thread.vr, regsize)) +- return -EFAULT; +- data += (regsize / sizeof(unsigned long)); +- +- /* copy VSCR */ +- regsize = 1 * sizeof(vector128); +- if (copy_to_user(data, &task->thread.vscr, regsize)) +- return -EFAULT; +- data += (regsize / sizeof(unsigned long)); +- +- /* copy VRSAVE */ +- if (put_user(task->thread.vrsave, (u32 __user *)data)) +- return -EFAULT; +- +- return 0; +-} +- +-/* +- * Write contents of AltiVec register state into task TASK. +- */ +-static inline int set_vrregs(struct task_struct *task, +- unsigned long __user *data) +-{ +- unsigned long regsize; +- +- /* copy AltiVec registers VR[0] .. VR[31] */ +- regsize = 32 * sizeof(vector128); +- if (copy_from_user(task->thread.vr, data, regsize)) +- return -EFAULT; +- data += (regsize / sizeof(unsigned long)); +- +- /* copy VSCR */ +- regsize = 1 * sizeof(vector128); +- if (copy_from_user(&task->thread.vscr, data, regsize)) +- return -EFAULT; +- data += (regsize / sizeof(unsigned long)); +- +- /* copy VRSAVE */ +- if (get_user(task->thread.vrsave, (u32 __user *)data)) +- return -EFAULT; +- +- return 0; +-} +-#endif +- +-static inline int ptrace_set_debugreg(struct task_struct *task, +- unsigned long addr, unsigned long data) +-{ +- /* We only support one DABR and no IABRS at the moment */ +- if (addr > 0) +- return -EINVAL; +- +- /* The bottom 3 bits are flags */ +- if ((data & ~0x7UL) >= TASK_SIZE) +- return -EIO; +- +- /* Ensure translation is on */ +- if (data && !(data & DABR_TRANSLATION)) +- return -EIO; +- +- task->thread.dabr = data; +- return 0; +-} +- +-#endif /* _PPC64_PTRACE_COMMON_H */ +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/ptrace.c linux-2.6.22-591/arch/powerpc/kernel/ptrace.c +--- linux-2.6.22-570/arch/powerpc/kernel/ptrace.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/kernel/ptrace.c 2007-12-21 15:36:11.000000000 -0500 +@@ -35,11 +35,11 @@ + #include + #include + +-#ifdef CONFIG_PPC64 +-#include "ptrace-common.h" +-#endif ++/* ++ * does not yet catch signals sent when the child dies. ++ * in exit.c or in signal.c. ++ */ + +-#ifdef CONFIG_PPC32 + /* + * Set of msr bits that gdb can change on behalf of a process. + */ +@@ -48,65 +48,117 @@ + #else + #define MSR_DEBUGCHANGE (MSR_SE | MSR_BE) + #endif +-#endif /* CONFIG_PPC32 */ + + /* +- * does not yet catch signals sent when the child dies. +- * in exit.c or in signal.c. ++ * Max register writeable via put_reg + */ +- + #ifdef CONFIG_PPC32 ++#define PT_MAX_PUT_REG PT_MQ ++#else ++#define PT_MAX_PUT_REG PT_CCR ++#endif ++ + /* + * Get contents of register REGNO in task TASK. + */ +-static inline unsigned long get_reg(struct task_struct *task, int regno) ++unsigned long ptrace_get_reg(struct task_struct *task, int regno) + { +- if (regno < sizeof(struct pt_regs) / sizeof(unsigned long) +- && task->thread.regs != NULL) ++ unsigned long tmp = 0; ++ ++ if (task->thread.regs == NULL) ++ return -EIO; ++ ++ if (regno == PT_MSR) { ++ tmp = ((unsigned long *)task->thread.regs)[PT_MSR]; ++ return tmp | task->thread.fpexc_mode; ++ } ++ ++ if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) + return ((unsigned long *)task->thread.regs)[regno]; +- return (0); ++ ++ return -EIO; + } + + /* + * Write contents of register REGNO in task TASK. + */ +-static inline int put_reg(struct task_struct *task, int regno, +- unsigned long data) ++int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data) + { +- if (regno <= PT_MQ && task->thread.regs != NULL) { ++ if (task->thread.regs == NULL) ++ return -EIO; ++ ++ if (regno <= PT_MAX_PUT_REG || regno == PT_TRAP) { + if (regno == PT_MSR) + data = (data & MSR_DEBUGCHANGE) + | (task->thread.regs->msr & ~MSR_DEBUGCHANGE); ++ /* We prevent mucking around with the reserved area of trap ++ * which are used internally by the kernel ++ */ ++ if (regno == PT_TRAP) ++ data &= 0xfff0; + ((unsigned long *)task->thread.regs)[regno] = data; + return 0; + } + return -EIO; + } + ++ ++static int get_fpregs(void __user *data, struct task_struct *task, ++ int has_fpscr) ++{ ++ unsigned int count = has_fpscr ? 33 : 32; ++ ++ if (copy_to_user(data, task->thread.fpr, count * sizeof(double))) ++ return -EFAULT; ++ return 0; ++} ++ ++static int set_fpregs(void __user *data, struct task_struct *task, ++ int has_fpscr) ++{ ++ unsigned int count = has_fpscr ? 33 : 32; ++ ++ if (copy_from_user(task->thread.fpr, data, count * sizeof(double))) ++ return -EFAULT; ++ return 0; ++} ++ ++ + #ifdef CONFIG_ALTIVEC + /* ++ * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go. ++ * The transfer totals 34 quadword. Quadwords 0-31 contain the ++ * corresponding vector registers. Quadword 32 contains the vscr as the ++ * last word (offset 12) within that quadword. Quadword 33 contains the ++ * vrsave as the first word (offset 0) within the quadword. ++ * ++ * This definition of the VMX state is compatible with the current PPC32 ++ * ptrace interface. This allows signal handling and ptrace to use the ++ * same structures. This also simplifies the implementation of a bi-arch ++ * (combined (32- and 64-bit) gdb. ++ */ ++ ++/* + * Get contents of AltiVec register state in task TASK + */ +-static inline int get_vrregs(unsigned long __user *data, struct task_struct *task) ++static int get_vrregs(unsigned long __user *data, struct task_struct *task) + { +- int i, j; +- +- if (!access_ok(VERIFY_WRITE, data, 133 * sizeof(unsigned long))) +- return -EFAULT; ++ unsigned long regsize; + + /* copy AltiVec registers VR[0] .. VR[31] */ +- for (i = 0; i < 32; i++) +- for (j = 0; j < 4; j++, data++) +- if (__put_user(task->thread.vr[i].u[j], data)) ++ regsize = 32 * sizeof(vector128); ++ if (copy_to_user(data, task->thread.vr, regsize)) + return -EFAULT; ++ data += (regsize / sizeof(unsigned long)); + + /* copy VSCR */ +- for (i = 0; i < 4; i++, data++) +- if (__put_user(task->thread.vscr.u[i], data)) ++ regsize = 1 * sizeof(vector128); ++ if (copy_to_user(data, &task->thread.vscr, regsize)) + return -EFAULT; ++ data += (regsize / sizeof(unsigned long)); + + /* copy VRSAVE */ +- if (__put_user(task->thread.vrsave, data)) ++ if (put_user(task->thread.vrsave, (u32 __user *)data)) + return -EFAULT; + + return 0; +@@ -115,31 +167,29 @@ + /* + * Write contents of AltiVec register state into task TASK. + */ +-static inline int set_vrregs(struct task_struct *task, unsigned long __user *data) ++static int set_vrregs(struct task_struct *task, unsigned long __user *data) + { +- int i, j; +- +- if (!access_ok(VERIFY_READ, data, 133 * sizeof(unsigned long))) +- return -EFAULT; ++ unsigned long regsize; + + /* copy AltiVec registers VR[0] .. VR[31] */ +- for (i = 0; i < 32; i++) +- for (j = 0; j < 4; j++, data++) +- if (__get_user(task->thread.vr[i].u[j], data)) ++ regsize = 32 * sizeof(vector128); ++ if (copy_from_user(task->thread.vr, data, regsize)) + return -EFAULT; ++ data += (regsize / sizeof(unsigned long)); + + /* copy VSCR */ +- for (i = 0; i < 4; i++, data++) +- if (__get_user(task->thread.vscr.u[i], data)) ++ regsize = 1 * sizeof(vector128); ++ if (copy_from_user(&task->thread.vscr, data, regsize)) + return -EFAULT; ++ data += (regsize / sizeof(unsigned long)); + + /* copy VRSAVE */ +- if (__get_user(task->thread.vrsave, data)) ++ if (get_user(task->thread.vrsave, (u32 __user *)data)) + return -EFAULT; + + return 0; + } +-#endif ++#endif /* CONFIG_ALTIVEC */ + + #ifdef CONFIG_SPE + +@@ -156,7 +206,7 @@ + /* + * Get contents of SPE register state in task TASK. + */ +-static inline int get_evrregs(unsigned long *data, struct task_struct *task) ++static int get_evrregs(unsigned long *data, struct task_struct *task) + { + int i; + +@@ -182,7 +232,7 @@ + /* + * Write contents of SPE register state into task TASK. + */ +-static inline int set_evrregs(struct task_struct *task, unsigned long *data) ++static int set_evrregs(struct task_struct *task, unsigned long *data) + { + int i; + +@@ -205,8 +255,8 @@ + } + #endif /* CONFIG_SPE */ + +-static inline void +-set_single_step(struct task_struct *task) ++ ++static void set_single_step(struct task_struct *task) + { + struct pt_regs *regs = task->thread.regs; + +@@ -221,8 +271,7 @@ + set_tsk_thread_flag(task, TIF_SINGLESTEP); + } + +-static inline void +-clear_single_step(struct task_struct *task) ++static void clear_single_step(struct task_struct *task) + { + struct pt_regs *regs = task->thread.regs; + +@@ -236,7 +285,25 @@ + } + clear_tsk_thread_flag(task, TIF_SINGLESTEP); + } +-#endif /* CONFIG_PPC32 */ ++ ++static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, ++ unsigned long data) ++{ ++ /* We only support one DABR and no IABRS at the moment */ ++ if (addr > 0) ++ return -EINVAL; ++ ++ /* The bottom 3 bits are flags */ ++ if ((data & ~0x7UL) >= TASK_SIZE) ++ return -EIO; ++ ++ /* Ensure translation is on */ ++ if (data && !(data & DABR_TRANSLATION)) ++ return -EIO; ++ ++ task->thread.dabr = data; ++ return 0; ++} + + /* + * Called by kernel/ptrace.c when detaching.. +@@ -249,6 +316,62 @@ + clear_single_step(child); + } + ++/* ++ * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, ++ * we mark them as obsolete now, they will be removed in a future version ++ */ ++static long arch_ptrace_old(struct task_struct *child, long request, long addr, ++ long data) ++{ ++ int ret = -EPERM; ++ ++ switch(request) { ++ case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ ++ int i; ++ unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; ++ unsigned long __user *tmp = (unsigned long __user *)addr; ++ ++ for (i = 0; i < 32; i++) { ++ ret = put_user(*reg, tmp); ++ if (ret) ++ break; ++ reg++; ++ tmp++; ++ } ++ break; ++ } ++ ++ case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ ++ int i; ++ unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; ++ unsigned long __user *tmp = (unsigned long __user *)addr; ++ ++ for (i = 0; i < 32; i++) { ++ ret = get_user(*reg, tmp); ++ if (ret) ++ break; ++ reg++; ++ tmp++; ++ } ++ break; ++ } ++ ++ case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */ ++ flush_fp_to_thread(child); ++ ret = get_fpregs((void __user *)addr, child, 0); ++ break; ++ } ++ ++ case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */ ++ flush_fp_to_thread(child); ++ ret = set_fpregs((void __user *)addr, child, 0); ++ break; ++ } ++ ++ } ++ return ret; ++} ++ + long arch_ptrace(struct task_struct *child, long request, long addr, long data) + { + int ret = -EPERM; +@@ -284,11 +407,9 @@ + #endif + break; + +-#ifdef CONFIG_PPC32 + CHECK_FULL_REGS(child->thread.regs); +-#endif + if (index < PT_FPR0) { +- tmp = get_reg(child, (int) index); ++ tmp = ptrace_get_reg(child, (int) index); + } else { + flush_fp_to_thread(child); + tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0]; +@@ -323,13 +444,9 @@ + #endif + break; + +-#ifdef CONFIG_PPC32 + CHECK_FULL_REGS(child->thread.regs); +-#endif +- if (index == PT_ORIG_R3) +- break; + if (index < PT_FPR0) { +- ret = put_reg(child, index, data); ++ ret = ptrace_put_reg(child, index, data); + } else { + flush_fp_to_thread(child); + ((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data; +@@ -384,7 +501,6 @@ + break; + } + +-#ifdef CONFIG_PPC64 + case PTRACE_GET_DEBUGREG: { + ret = -EINVAL; + /* We only support one DABR and no IABRS at the moment */ +@@ -398,73 +514,61 @@ + case PTRACE_SET_DEBUGREG: + ret = ptrace_set_debugreg(child, addr, data); + break; +-#endif + + case PTRACE_DETACH: + ret = ptrace_detach(child, data); + break; + +- case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ +- int i; +- unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; +- unsigned long __user *tmp = (unsigned long __user *)addr; +- +- for (i = 0; i < 32; i++) { +- ret = put_user(*reg, tmp); +- if (ret) ++#ifdef CONFIG_PPC64 ++ case PTRACE_GETREGS64: ++#endif ++ case PTRACE_GETREGS: { /* Get all pt_regs from the child. */ ++ int ui; ++ if (!access_ok(VERIFY_WRITE, (void __user *)data, ++ sizeof(struct pt_regs))) { ++ ret = -EIO; + break; +- reg++; +- tmp++; + } +- break; ++ ret = 0; ++ for (ui = 0; ui < PT_REGS_COUNT; ui ++) { ++ ret |= __put_user(ptrace_get_reg(child, ui), ++ (unsigned long __user *) data); ++ data += sizeof(long); + } +- +- case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ +- int i; +- unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; +- unsigned long __user *tmp = (unsigned long __user *)addr; +- +- for (i = 0; i < 32; i++) { +- ret = get_user(*reg, tmp); +- if (ret) + break; +- reg++; +- tmp++; + } ++ ++#ifdef CONFIG_PPC64 ++ case PTRACE_SETREGS64: ++#endif ++ case PTRACE_SETREGS: { /* Set all gp regs in the child. */ ++ unsigned long tmp; ++ int ui; ++ if (!access_ok(VERIFY_READ, (void __user *)data, ++ sizeof(struct pt_regs))) { ++ ret = -EIO; + break; + } +- +- case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */ +- int i; +- unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; +- unsigned long __user *tmp = (unsigned long __user *)addr; +- +- flush_fp_to_thread(child); +- +- for (i = 0; i < 32; i++) { +- ret = put_user(*reg, tmp); ++ ret = 0; ++ for (ui = 0; ui < PT_REGS_COUNT; ui ++) { ++ ret = __get_user(tmp, (unsigned long __user *) data); + if (ret) + break; +- reg++; +- tmp++; ++ ptrace_put_reg(child, ui, tmp); ++ data += sizeof(long); + } + break; + } + +- case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */ +- int i; +- unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; +- unsigned long __user *tmp = (unsigned long __user *)addr; +- ++ case PTRACE_GETFPREGS: { /* Get the child FPU state (FPR0...31 + FPSCR) */ + flush_fp_to_thread(child); +- +- for (i = 0; i < 32; i++) { +- ret = get_user(*reg, tmp); +- if (ret) ++ ret = get_fpregs((void __user *)data, child, 1); + break; +- reg++; +- tmp++; + } ++ ++ case PTRACE_SETFPREGS: { /* Set the child FPU state (FPR0...31 + FPSCR) */ ++ flush_fp_to_thread(child); ++ ret = set_fpregs((void __user *)data, child, 1); + break; + } + +@@ -499,11 +603,18 @@ + break; + #endif + ++ /* Old reverse args ptrace callss */ ++ case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ ++ case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ ++ case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */ ++ case PPC_PTRACE_SETFPREGS: /* Get FPRs 0 - 31. */ ++ ret = arch_ptrace_old(child, request, addr, data); ++ break; ++ + default: + ret = ptrace_request(child, request, addr, data); + break; + } +- + return ret; + } + +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/ptrace32.c linux-2.6.22-591/arch/powerpc/kernel/ptrace32.c +--- linux-2.6.22-570/arch/powerpc/kernel/ptrace32.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/kernel/ptrace32.c 2007-12-21 15:36:11.000000000 -0500 +@@ -33,13 +33,55 @@ + #include + #include + +-#include "ptrace-common.h" +- + /* + * does not yet catch signals sent when the child dies. + * in exit.c or in signal.c. + */ + ++/* ++ * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls, ++ * we mark them as obsolete now, they will be removed in a future version ++ */ ++static long compat_ptrace_old(struct task_struct *child, long request, ++ long addr, long data) ++{ ++ int ret = -EPERM; ++ ++ switch(request) { ++ case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ ++ int i; ++ unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; ++ unsigned int __user *tmp = (unsigned int __user *)addr; ++ ++ for (i = 0; i < 32; i++) { ++ ret = put_user(*reg, tmp); ++ if (ret) ++ break; ++ reg++; ++ tmp++; ++ } ++ break; ++ } ++ ++ case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ ++ int i; ++ unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; ++ unsigned int __user *tmp = (unsigned int __user *)addr; ++ ++ for (i = 0; i < 32; i++) { ++ ret = get_user(*reg, tmp); ++ if (ret) ++ break; ++ reg++; ++ tmp++; ++ } ++ break; ++ } ++ ++ } ++ return ret; ++} ++ + long compat_sys_ptrace(int request, int pid, unsigned long addr, + unsigned long data) + { +@@ -123,7 +165,7 @@ + break; + + if (index < PT_FPR0) { +- tmp = get_reg(child, index); ++ tmp = ptrace_get_reg(child, index); + } else { + flush_fp_to_thread(child); + /* +@@ -162,7 +204,9 @@ + else + part = 0; /* want the 1st half of the register (left-most). */ + +- /* Validate the input - check to see if address is on the wrong boundary or beyond the end of the user area */ ++ /* Validate the input - check to see if address is on the wrong boundary ++ * or beyond the end of the user area ++ */ + if ((addr & 3) || numReg > PT_FPSCR) + break; + +@@ -170,7 +214,7 @@ + flush_fp_to_thread(child); + tmp = ((unsigned long int *)child->thread.fpr)[numReg - PT_FPR0]; + } else { /* register within PT_REGS struct */ +- tmp = get_reg(child, numReg); ++ tmp = ptrace_get_reg(child, numReg); + } + reg32bits = ((u32*)&tmp)[part]; + ret = put_user(reg32bits, (u32 __user *)data); +@@ -226,10 +270,8 @@ + if ((addr & 3) || (index > PT_FPSCR32)) + break; + +- if (index == PT_ORIG_R3) +- break; + if (index < PT_FPR0) { +- ret = put_reg(child, index, data); ++ ret = ptrace_put_reg(child, index, data); + } else { + flush_fp_to_thread(child); + /* +@@ -258,70 +300,25 @@ + /* Determine which register the user wants */ + index = (u64)addr >> 2; + numReg = index / 2; ++ + /* + * Validate the input - check to see if address is on the + * wrong boundary or beyond the end of the user area + */ + if ((addr & 3) || (numReg > PT_FPSCR)) + break; +- /* Insure it is a register we let them change */ +- if ((numReg == PT_ORIG_R3) +- || ((numReg > PT_CCR) && (numReg < PT_FPR0))) +- break; +- if (numReg >= PT_FPR0) { +- flush_fp_to_thread(child); +- } +- if (numReg == PT_MSR) +- data = (data & MSR_DEBUGCHANGE) +- | (child->thread.regs->msr & ~MSR_DEBUGCHANGE); +- ((u32*)child->thread.regs)[index] = data; +- ret = 0; +- break; +- } +- +- case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ +- case PTRACE_CONT: { /* restart after signal. */ +- ret = -EIO; +- if (!valid_signal(data)) +- break; +- if (request == PTRACE_SYSCALL) +- set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); ++ if (numReg < PT_FPR0) { ++ unsigned long freg = ptrace_get_reg(child, numReg); ++ if (index % 2) ++ freg = (freg & ~0xfffffffful) | (data & 0xfffffffful); + else +- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); +- child->exit_code = data; +- /* make sure the single step bit is not set. */ +- clear_single_step(child); +- wake_up_process(child); +- ret = 0; +- break; +- } +- +- /* +- * make the child exit. Best I can do is send it a sigkill. +- * perhaps it should be put in the status that it wants to +- * exit. +- */ +- case PTRACE_KILL: { ++ freg = (freg & 0xfffffffful) | (data << 32); ++ ret = ptrace_put_reg(child, numReg, freg); ++ } else { ++ flush_fp_to_thread(child); ++ ((unsigned int *)child->thread.regs)[index] = data; + ret = 0; +- if (child->exit_state == EXIT_ZOMBIE) /* already dead */ +- break; +- child->exit_code = SIGKILL; +- /* make sure the single step bit is not set. */ +- clear_single_step(child); +- wake_up_process(child); +- break; + } +- +- case PTRACE_SINGLESTEP: { /* set the trap flag. */ +- ret = -EIO; +- if (!valid_signal(data)) +- break; +- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); +- set_single_step(child); +- child->exit_code = data; +- /* give it a chance to run. */ +- wake_up_process(child); +- ret = 0; + break; + } + +@@ -334,95 +331,67 @@ + break; + } + +- case PTRACE_SET_DEBUGREG: +- ret = ptrace_set_debugreg(child, addr, data); +- break; +- +- case PTRACE_DETACH: +- ret = ptrace_detach(child, data); ++ case PTRACE_GETEVENTMSG: ++ ret = put_user(child->ptrace_message, (unsigned int __user *) data); + break; + +- case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */ +- int i; +- unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; +- unsigned int __user *tmp = (unsigned int __user *)addr; +- +- for (i = 0; i < 32; i++) { +- ret = put_user(*reg, tmp); +- if (ret) +- break; +- reg++; +- tmp++; +- } ++ case PTRACE_GETREGS: { /* Get all pt_regs from the child. */ ++ int ui; ++ if (!access_ok(VERIFY_WRITE, (void __user *)data, ++ PT_REGS_COUNT * sizeof(int))) { ++ ret = -EIO; + break; + } +- +- case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */ +- int i; +- unsigned long *reg = &((unsigned long *)child->thread.regs)[0]; +- unsigned int __user *tmp = (unsigned int __user *)addr; +- +- for (i = 0; i < 32; i++) { +- ret = get_user(*reg, tmp); +- if (ret) +- break; +- reg++; +- tmp++; ++ ret = 0; ++ for (ui = 0; ui < PT_REGS_COUNT; ui ++) { ++ ret |= __put_user(ptrace_get_reg(child, ui), ++ (unsigned int __user *) data); ++ data += sizeof(int); + } + break; + } + +- case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */ +- int i; +- unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; +- unsigned int __user *tmp = (unsigned int __user *)addr; +- +- flush_fp_to_thread(child); +- +- for (i = 0; i < 32; i++) { +- ret = put_user(*reg, tmp); +- if (ret) +- break; +- reg++; +- tmp++; +- } ++ case PTRACE_SETREGS: { /* Set all gp regs in the child. */ ++ unsigned long tmp; ++ int ui; ++ if (!access_ok(VERIFY_READ, (void __user *)data, ++ PT_REGS_COUNT * sizeof(int))) { ++ ret = -EIO; + break; + } +- +- case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */ +- int i; +- unsigned long *reg = &((unsigned long *)child->thread.fpr)[0]; +- unsigned int __user *tmp = (unsigned int __user *)addr; +- +- flush_fp_to_thread(child); +- +- for (i = 0; i < 32; i++) { +- ret = get_user(*reg, tmp); ++ ret = 0; ++ for (ui = 0; ui < PT_REGS_COUNT; ui ++) { ++ ret = __get_user(tmp, (unsigned int __user *) data); + if (ret) + break; +- reg++; +- tmp++; ++ ptrace_put_reg(child, ui, tmp); ++ data += sizeof(int); + } + break; + } + +- case PTRACE_GETEVENTMSG: +- ret = put_user(child->ptrace_message, (unsigned int __user *) data); +- break; +- +-#ifdef CONFIG_ALTIVEC ++ case PTRACE_GETFPREGS: ++ case PTRACE_SETFPREGS: + case PTRACE_GETVRREGS: +- /* Get the child altivec register state. */ +- flush_altivec_to_thread(child); +- ret = get_vrregs((unsigned long __user *)data, child); ++ case PTRACE_SETVRREGS: ++ case PTRACE_GETREGS64: ++ case PTRACE_SETREGS64: ++ case PPC_PTRACE_GETFPREGS: ++ case PPC_PTRACE_SETFPREGS: ++ case PTRACE_KILL: ++ case PTRACE_SINGLESTEP: ++ case PTRACE_DETACH: ++ case PTRACE_SET_DEBUGREG: ++ case PTRACE_SYSCALL: ++ case PTRACE_CONT: ++ ret = arch_ptrace(child, request, addr, data); + break; + +- case PTRACE_SETVRREGS: +- /* Set the child altivec register state. */ +- flush_altivec_to_thread(child); +- ret = set_vrregs(child, (unsigned long __user *)data); ++ /* Old reverse args ptrace callss */ ++ case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */ ++ case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */ ++ ret = compat_ptrace_old(child, request, addr, data); + break; +-#endif + + default: + ret = ptrace_request(child, request, addr, data); +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/rtas_pci.c linux-2.6.22-591/arch/powerpc/kernel/rtas_pci.c +--- linux-2.6.22-570/arch/powerpc/kernel/rtas_pci.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/kernel/rtas_pci.c 2007-12-21 15:36:11.000000000 -0500 +@@ -278,10 +278,8 @@ + { + struct device_node *node; + struct pci_controller *phb; +- unsigned int index; + struct device_node *root = of_find_node_by_path("/"); + +- index = 0; + for (node = of_get_next_child(root, NULL); + node != NULL; + node = of_get_next_child(root, node)) { +@@ -295,8 +293,7 @@ + continue; + rtas_setup_phb(phb); + pci_process_bridge_OF_ranges(phb, node, 0); +- pci_setup_phb_io(phb, index == 0); +- index++; ++ isa_bridge_find_early(phb); + } + + of_node_put(root); +@@ -335,7 +332,7 @@ + return 1; + } + +- rc = unmap_bus_range(b); ++ rc = pcibios_unmap_io_space(b); + if (rc) { + printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", + __FUNCTION__, b->name); +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/setup_32.c linux-2.6.22-591/arch/powerpc/kernel/setup_32.c +--- linux-2.6.22-570/arch/powerpc/kernel/setup_32.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/kernel/setup_32.c 2007-12-21 15:36:11.000000000 -0500 +@@ -45,10 +45,6 @@ + + #define DBG(fmt...) + +-#if defined CONFIG_KGDB +-#include +-#endif +- + extern void bootx_init(unsigned long r4, unsigned long phys); + + struct ide_machdep_calls ppc_ide_md; +@@ -245,30 +241,16 @@ + + xmon_setup(); + +-#if defined(CONFIG_KGDB) +- if (ppc_md.kgdb_map_scc) +- ppc_md.kgdb_map_scc(); +- set_debug_traps(); +- if (strstr(cmd_line, "gdb")) { +- if (ppc_md.progress) +- ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000); +- printk("kgdb breakpoint activated\n"); +- breakpoint(); +- } +-#endif +- + /* + * Set cache line size based on type of cpu as a default. + * Systems with OF can look in the properties on the cpu node(s) + * for a possibly more accurate value. + */ +- if (cpu_has_feature(CPU_FTR_SPLIT_ID_CACHE)) { + dcache_bsize = cur_cpu_spec->dcache_bsize; + icache_bsize = cur_cpu_spec->icache_bsize; + ucache_bsize = 0; +- } else +- ucache_bsize = dcache_bsize = icache_bsize +- = cur_cpu_spec->dcache_bsize; ++ if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) ++ ucache_bsize = icache_bsize = dcache_bsize; + + /* reboot on panic */ + panic_timeout = 180; +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal.c linux-2.6.22-591/arch/powerpc/kernel/signal.c +--- linux-2.6.22-570/arch/powerpc/kernel/signal.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/kernel/signal.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,180 @@ ++/* ++ * Common signal handling code for both 32 and 64 bits ++ * ++ * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration ++ * Extracted from signal_32.c and signal_64.c ++ * ++ * This file is subject to the terms and conditions of the GNU General ++ * Public License. See the file README.legal in the main directory of ++ * this archive for more details. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include "signal.h" ++ ++/* ++ * Allocate space for the signal frame ++ */ ++void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, ++ size_t frame_size) ++{ ++ unsigned long oldsp, newsp; ++ ++ /* Default to using normal stack */ ++ oldsp = regs->gpr[1]; ++ ++ /* Check for alt stack */ ++ if ((ka->sa.sa_flags & SA_ONSTACK) && ++ current->sas_ss_size && !on_sig_stack(oldsp)) ++ oldsp = (current->sas_ss_sp + current->sas_ss_size); ++ ++ /* Get aligned frame */ ++ newsp = (oldsp - frame_size) & ~0xFUL; ++ ++ /* Check access */ ++ if (!access_ok(VERIFY_WRITE, (void __user *)newsp, oldsp - newsp)) ++ return NULL; ++ ++ return (void __user *)newsp; ++} ++ ++ ++/* ++ * Restore the user process's signal mask ++ */ ++void restore_sigmask(sigset_t *set) ++{ ++ sigdelsetmask(set, ~_BLOCKABLE); ++ spin_lock_irq(¤t->sighand->siglock); ++ current->blocked = *set; ++ recalc_sigpending(); ++ spin_unlock_irq(¤t->sighand->siglock); ++} ++ ++static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka, ++ int has_handler) ++{ ++ unsigned long ret = regs->gpr[3]; ++ int restart = 1; ++ ++ /* syscall ? */ ++ if (TRAP(regs) != 0x0C00) ++ return; ++ ++ /* error signalled ? */ ++ if (!(regs->ccr & 0x10000000)) ++ return; ++ ++ switch (ret) { ++ case ERESTART_RESTARTBLOCK: ++ case ERESTARTNOHAND: ++ /* ERESTARTNOHAND means that the syscall should only be ++ * restarted if there was no handler for the signal, and since ++ * we only get here if there is a handler, we dont restart. ++ */ ++ restart = !has_handler; ++ break; ++ case ERESTARTSYS: ++ /* ERESTARTSYS means to restart the syscall if there is no ++ * handler or the handler was registered with SA_RESTART ++ */ ++ restart = !has_handler || (ka->sa.sa_flags & SA_RESTART) != 0; ++ break; ++ case ERESTARTNOINTR: ++ /* ERESTARTNOINTR means that the syscall should be ++ * called again after the signal handler returns. ++ */ ++ break; ++ default: ++ return; ++ } ++ if (restart) { ++ if (ret == ERESTART_RESTARTBLOCK) ++ regs->gpr[0] = __NR_restart_syscall; ++ else ++ regs->gpr[3] = regs->orig_gpr3; ++ regs->nip -= 4; ++ regs->result = 0; ++ } else { ++ regs->result = -EINTR; ++ regs->gpr[3] = EINTR; ++ regs->ccr |= 0x10000000; ++ } ++} ++ ++int do_signal(sigset_t *oldset, struct pt_regs *regs) ++{ ++ siginfo_t info; ++ int signr; ++ struct k_sigaction ka; ++ int ret; ++ int is32 = is_32bit_task(); ++ ++ if (test_thread_flag(TIF_RESTORE_SIGMASK)) ++ oldset = ¤t->saved_sigmask; ++ else if (!oldset) ++ oldset = ¤t->blocked; ++ ++ signr = get_signal_to_deliver(&info, &ka, regs, NULL); ++ ++ /* Is there any syscall restart business here ? */ ++ check_syscall_restart(regs, &ka, signr > 0); ++ ++ if (signr <= 0) { ++ /* No signal to deliver -- put the saved sigmask back */ ++ if (test_thread_flag(TIF_RESTORE_SIGMASK)) { ++ clear_thread_flag(TIF_RESTORE_SIGMASK); ++ sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); ++ } ++ return 0; /* no signals delivered */ ++ } ++ ++ /* ++ * Reenable the DABR before delivering the signal to ++ * user space. The DABR will have been cleared if it ++ * triggered inside the kernel. ++ */ ++ if (current->thread.dabr) ++ set_dabr(current->thread.dabr); ++ ++ if (is32) { ++ if (ka.sa.sa_flags & SA_SIGINFO) ++ ret = handle_rt_signal32(signr, &ka, &info, oldset, ++ regs); ++ else ++ ret = handle_signal32(signr, &ka, &info, oldset, ++ regs); ++ } else { ++ ret = handle_rt_signal64(signr, &ka, &info, oldset, regs); ++ } ++ ++ if (ret) { ++ spin_lock_irq(¤t->sighand->siglock); ++ sigorsets(¤t->blocked, ¤t->blocked, ++ &ka.sa.sa_mask); ++ if (!(ka.sa.sa_flags & SA_NODEFER)) ++ sigaddset(¤t->blocked, signr); ++ recalc_sigpending(); ++ spin_unlock_irq(¤t->sighand->siglock); ++ ++ /* ++ * A signal was successfully delivered; the saved sigmask is in ++ * its frame, and we can clear the TIF_RESTORE_SIGMASK flag. ++ */ ++ if (test_thread_flag(TIF_RESTORE_SIGMASK)) ++ clear_thread_flag(TIF_RESTORE_SIGMASK); ++ } ++ ++ return ret; ++} ++ ++long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, ++ unsigned long r5, unsigned long r6, unsigned long r7, ++ unsigned long r8, struct pt_regs *regs) ++{ ++ return do_sigaltstack(uss, uoss, regs->gpr[1]); ++} +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal.h linux-2.6.22-591/arch/powerpc/kernel/signal.h +--- linux-2.6.22-570/arch/powerpc/kernel/signal.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/kernel/signal.h 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration ++ * Extracted from signal_32.c and signal_64.c ++ * ++ * This file is subject to the terms and conditions of the GNU General ++ * Public License. See the file README.legal in the main directory of ++ * this archive for more details. ++ */ ++ ++#ifndef _POWERPC_ARCH_SIGNAL_H ++#define _POWERPC_ARCH_SIGNAL_H ++ ++#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) ++ ++extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, ++ size_t frame_size); ++extern void restore_sigmask(sigset_t *set); ++ ++extern int handle_signal32(unsigned long sig, struct k_sigaction *ka, ++ siginfo_t *info, sigset_t *oldset, ++ struct pt_regs *regs); ++ ++extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, ++ siginfo_t *info, sigset_t *oldset, ++ struct pt_regs *regs); ++ ++ ++#ifdef CONFIG_PPC64 ++ ++static inline int is_32bit_task(void) ++{ ++ return test_thread_flag(TIF_32BIT); ++} ++ ++extern int handle_rt_signal64(int signr, struct k_sigaction *ka, ++ siginfo_t *info, sigset_t *set, ++ struct pt_regs *regs); ++ ++#else /* CONFIG_PPC64 */ ++ ++static inline int is_32bit_task(void) ++{ ++ return 1; ++} ++ ++static inline int handle_rt_signal64(int signr, struct k_sigaction *ka, ++ siginfo_t *info, sigset_t *set, ++ struct pt_regs *regs) ++{ ++ return -EFAULT; ++} ++ ++#endif /* !defined(CONFIG_PPC64) */ ++ ++#endif /* _POWERPC_ARCH_SIGNAL_H */ +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal_32.c linux-2.6.22-591/arch/powerpc/kernel/signal_32.c +--- linux-2.6.22-570/arch/powerpc/kernel/signal_32.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/kernel/signal_32.c 2007-12-21 15:36:11.000000000 -0500 +@@ -51,12 +51,11 @@ + #include + #endif + +-#undef DEBUG_SIG ++#include "signal.h" + +-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) ++#undef DEBUG_SIG + + #ifdef CONFIG_PPC64 +-#define do_signal do_signal32 + #define sys_sigsuspend compat_sys_sigsuspend + #define sys_rt_sigsuspend compat_sys_rt_sigsuspend + #define sys_rt_sigreturn compat_sys_rt_sigreturn +@@ -231,8 +230,6 @@ + + #endif /* CONFIG_PPC64 */ + +-int do_signal(sigset_t *oldset, struct pt_regs *regs); +- + /* + * Atomically swap in the new signal mask, and wait for a signal. + */ +@@ -251,14 +248,6 @@ + return -ERESTARTNOHAND; + } + +-#ifdef CONFIG_PPC32 +-long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, int r5, +- int r6, int r7, int r8, struct pt_regs *regs) +-{ +- return do_sigaltstack(uss, uoss, regs->gpr[1]); +-} +-#endif +- + long sys_sigaction(int sig, struct old_sigaction __user *act, + struct old_sigaction __user *oact) + { +@@ -293,14 +282,17 @@ + /* + * When we have signals to deliver, we set up on the + * user stack, going down from the original stack pointer: +- * a sigregs struct ++ * an ABI gap of 56 words ++ * an mcontext struct + * a sigcontext struct + * a gap of __SIGNAL_FRAMESIZE bytes + * +- * Each of these things must be a multiple of 16 bytes in size. ++ * Each of these things must be a multiple of 16 bytes in size. The following ++ * structure represent all of this except the __SIGNAL_FRAMESIZE gap + * + */ +-struct sigregs { ++struct sigframe { ++ struct sigcontext sctx; /* the sigcontext */ + struct mcontext mctx; /* all the register values */ + /* + * Programs using the rs6000/xcoff abi can save up to 19 gp +@@ -703,44 +695,22 @@ + } + #endif /* CONFIG_PPC64 */ + +- +-/* +- * Restore the user process's signal mask +- */ +-#ifdef CONFIG_PPC64 +-extern void restore_sigmask(sigset_t *set); +-#else /* CONFIG_PPC64 */ +-static void restore_sigmask(sigset_t *set) +-{ +- sigdelsetmask(set, ~_BLOCKABLE); +- spin_lock_irq(¤t->sighand->siglock); +- current->blocked = *set; +- recalc_sigpending(); +- spin_unlock_irq(¤t->sighand->siglock); +-} +-#endif +- + /* + * Set up a signal frame for a "real-time" signal handler + * (one which gets siginfo). + */ +-static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka, ++int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, + siginfo_t *info, sigset_t *oldset, +- struct pt_regs *regs, unsigned long newsp) ++ struct pt_regs *regs) + { + struct rt_sigframe __user *rt_sf; + struct mcontext __user *frame; +- unsigned long origsp = newsp; ++ unsigned long newsp = 0; + + /* Set up Signal Frame */ + /* Put a Real Time Context onto stack */ +- newsp -= sizeof(*rt_sf); +- rt_sf = (struct rt_sigframe __user *)newsp; +- +- /* create a stack frame for the caller of the handler */ +- newsp -= __SIGNAL_FRAMESIZE + 16; +- +- if (!access_ok(VERIFY_WRITE, (void __user *)newsp, origsp - newsp)) ++ rt_sf = get_sigframe(ka, regs, sizeof(*rt_sf)); ++ if (unlikely(rt_sf == NULL)) + goto badframe; + + /* Put the siginfo & fill in most of the ucontext */ +@@ -770,8 +740,12 @@ + + current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + ++ /* create a stack frame for the caller of the handler */ ++ newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16); + if (put_user(regs->gpr[1], (u32 __user *)newsp)) + goto badframe; ++ ++ /* Fill registers for signal handler */ + regs->gpr[1] = newsp; + regs->gpr[3] = sig; + regs->gpr[4] = (unsigned long) &rt_sf->info; +@@ -1015,27 +989,18 @@ + /* + * OK, we're invoking a handler + */ +-static int handle_signal(unsigned long sig, struct k_sigaction *ka, +- siginfo_t *info, sigset_t *oldset, struct pt_regs *regs, +- unsigned long newsp) ++int handle_signal32(unsigned long sig, struct k_sigaction *ka, ++ siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) + { + struct sigcontext __user *sc; +- struct sigregs __user *frame; +- unsigned long origsp = newsp; ++ struct sigframe __user *frame; ++ unsigned long newsp = 0; + + /* Set up Signal Frame */ +- newsp -= sizeof(struct sigregs); +- frame = (struct sigregs __user *) newsp; +- +- /* Put a sigcontext on the stack */ +- newsp -= sizeof(*sc); +- sc = (struct sigcontext __user *) newsp; +- +- /* create a stack frame for the caller of the handler */ +- newsp -= __SIGNAL_FRAMESIZE; +- +- if (!access_ok(VERIFY_WRITE, (void __user *) newsp, origsp - newsp)) ++ frame = get_sigframe(ka, regs, sizeof(*frame)); ++ if (unlikely(frame == NULL)) + goto badframe; ++ sc = (struct sigcontext __user *) &frame->sctx; + + #if _NSIG != 64 + #error "Please adjust handle_signal()" +@@ -1047,7 +1012,7 @@ + #else + || __put_user(oldset->sig[1], &sc->_unused[3]) + #endif +- || __put_user(to_user_ptr(frame), &sc->regs) ++ || __put_user(to_user_ptr(&frame->mctx), &sc->regs) + || __put_user(sig, &sc->signal)) + goto badframe; + +@@ -1063,8 +1028,11 @@ + + current->thread.fpscr.val = 0; /* turn off all fp exceptions */ + ++ /* create a stack frame for the caller of the handler */ ++ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; + if (put_user(regs->gpr[1], (u32 __user *)newsp)) + goto badframe; ++ + regs->gpr[1] = newsp; + regs->gpr[3] = sig; + regs->gpr[4] = (unsigned long) sc; +@@ -1126,106 +1094,3 @@ + force_sig(SIGSEGV, current); + return 0; + } +- +-/* +- * Note that 'init' is a special process: it doesn't get signals it doesn't +- * want to handle. Thus you cannot kill init even with a SIGKILL even by +- * mistake. +- */ +-int do_signal(sigset_t *oldset, struct pt_regs *regs) +-{ +- siginfo_t info; +- struct k_sigaction ka; +- unsigned int newsp; +- int signr, ret; +- +-#ifdef CONFIG_PPC32 +- if (try_to_freeze()) { +- signr = 0; +- if (!signal_pending(current)) +- goto no_signal; +- } +-#endif +- +- if (test_thread_flag(TIF_RESTORE_SIGMASK)) +- oldset = ¤t->saved_sigmask; +- else if (!oldset) +- oldset = ¤t->blocked; +- +- signr = get_signal_to_deliver(&info, &ka, regs, NULL); +-#ifdef CONFIG_PPC32 +-no_signal: +-#endif +- if (TRAP(regs) == 0x0C00 /* System Call! */ +- && regs->ccr & 0x10000000 /* error signalled */ +- && ((ret = regs->gpr[3]) == ERESTARTSYS +- || ret == ERESTARTNOHAND || ret == ERESTARTNOINTR +- || ret == ERESTART_RESTARTBLOCK)) { +- +- if (signr > 0 +- && (ret == ERESTARTNOHAND || ret == ERESTART_RESTARTBLOCK +- || (ret == ERESTARTSYS +- && !(ka.sa.sa_flags & SA_RESTART)))) { +- /* make the system call return an EINTR error */ +- regs->result = -EINTR; +- regs->gpr[3] = EINTR; +- /* note that the cr0.SO bit is already set */ +- } else { +- regs->nip -= 4; /* Back up & retry system call */ +- regs->result = 0; +- regs->trap = 0; +- if (ret == ERESTART_RESTARTBLOCK) +- regs->gpr[0] = __NR_restart_syscall; +- else +- regs->gpr[3] = regs->orig_gpr3; +- } +- } +- +- if (signr == 0) { +- /* No signal to deliver -- put the saved sigmask back */ +- if (test_thread_flag(TIF_RESTORE_SIGMASK)) { +- clear_thread_flag(TIF_RESTORE_SIGMASK); +- sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); +- } +- return 0; /* no signals delivered */ +- } +- +- if ((ka.sa.sa_flags & SA_ONSTACK) && current->sas_ss_size +- && !on_sig_stack(regs->gpr[1])) +- newsp = current->sas_ss_sp + current->sas_ss_size; +- else +- newsp = regs->gpr[1]; +- newsp &= ~0xfUL; +- +-#ifdef CONFIG_PPC64 +- /* +- * Reenable the DABR before delivering the signal to +- * user space. The DABR will have been cleared if it +- * triggered inside the kernel. +- */ +- if (current->thread.dabr) +- set_dabr(current->thread.dabr); +-#endif +- +- /* Whee! Actually deliver the signal. */ +- if (ka.sa.sa_flags & SA_SIGINFO) +- ret = handle_rt_signal(signr, &ka, &info, oldset, regs, newsp); +- else +- ret = handle_signal(signr, &ka, &info, oldset, regs, newsp); +- +- if (ret) { +- spin_lock_irq(¤t->sighand->siglock); +- sigorsets(¤t->blocked, ¤t->blocked, +- &ka.sa.sa_mask); +- if (!(ka.sa.sa_flags & SA_NODEFER)) +- sigaddset(¤t->blocked, signr); +- recalc_sigpending(); +- spin_unlock_irq(¤t->sighand->siglock); +- /* A signal was successfully delivered; the saved sigmask is in +- its frame, and we can clear the TIF_RESTORE_SIGMASK flag */ +- if (test_thread_flag(TIF_RESTORE_SIGMASK)) +- clear_thread_flag(TIF_RESTORE_SIGMASK); +- } +- +- return ret; +-} +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal_64.c linux-2.6.22-591/arch/powerpc/kernel/signal_64.c +--- linux-2.6.22-570/arch/powerpc/kernel/signal_64.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/kernel/signal_64.c 2007-12-21 15:36:11.000000000 -0500 +@@ -34,9 +34,9 @@ + #include + #include + +-#define DEBUG_SIG 0 ++#include "signal.h" + +-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) ++#define DEBUG_SIG 0 + + #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) + #define FP_REGS_SIZE sizeof(elf_fpregset_t) +@@ -64,14 +64,6 @@ + char abigap[288]; + } __attribute__ ((aligned (16))); + +-long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r5, +- unsigned long r6, unsigned long r7, unsigned long r8, +- struct pt_regs *regs) +-{ +- return do_sigaltstack(uss, uoss, regs->gpr[1]); +-} +- +- + /* + * Set up the sigcontext for the signal frame. + */ +@@ -208,25 +200,6 @@ + } + + /* +- * Allocate space for the signal frame +- */ +-static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, +- size_t frame_size) +-{ +- unsigned long newsp; +- +- /* Default to using normal stack */ +- newsp = regs->gpr[1]; +- +- if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size) { +- if (! on_sig_stack(regs->gpr[1])) +- newsp = (current->sas_ss_sp + current->sas_ss_size); +- } +- +- return (void __user *)((newsp - frame_size) & -16ul); +-} +- +-/* + * Setup the trampoline code on the stack + */ + static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) +@@ -253,19 +226,6 @@ + } + + /* +- * Restore the user process's signal mask (also used by signal32.c) +- */ +-void restore_sigmask(sigset_t *set) +-{ +- sigdelsetmask(set, ~_BLOCKABLE); +- spin_lock_irq(¤t->sighand->siglock); +- current->blocked = *set; +- recalc_sigpending(); +- spin_unlock_irq(¤t->sighand->siglock); +-} +- +- +-/* + * Handle {get,set,swap}_context operations + */ + int sys_swapcontext(struct ucontext __user *old_ctx, +@@ -359,7 +319,7 @@ + return 0; + } + +-static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info, ++int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs) + { + /* Handler is *really* a pointer to the function descriptor for +@@ -373,8 +333,7 @@ + long err = 0; + + frame = get_sigframe(ka, regs, sizeof(*frame)); +- +- if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) ++ if (unlikely(frame == NULL)) + goto badframe; + + err |= __put_user(&frame->info, &frame->pinfo); +@@ -411,7 +370,7 @@ + funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler; + + /* Allocate a dummy caller frame for the signal handler. */ +- newsp = (unsigned long)frame - __SIGNAL_FRAMESIZE; ++ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE; + err |= put_user(regs->gpr[1], (unsigned long __user *)newsp); + + /* Set up "regs" so we "return" to the signal handler. */ +@@ -442,134 +401,3 @@ + force_sigsegv(signr, current); + return 0; + } +- +- +-/* +- * OK, we're invoking a handler +- */ +-static int handle_signal(unsigned long sig, struct k_sigaction *ka, +- siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) +-{ +- int ret; +- +- /* Set up Signal Frame */ +- ret = setup_rt_frame(sig, ka, info, oldset, regs); +- +- if (ret) { +- spin_lock_irq(¤t->sighand->siglock); +- sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); +- if (!(ka->sa.sa_flags & SA_NODEFER)) +- sigaddset(¤t->blocked,sig); +- recalc_sigpending(); +- spin_unlock_irq(¤t->sighand->siglock); +- } +- +- return ret; +-} +- +-static inline void syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) +-{ +- switch ((int)regs->result) { +- case -ERESTART_RESTARTBLOCK: +- case -ERESTARTNOHAND: +- /* ERESTARTNOHAND means that the syscall should only be +- * restarted if there was no handler for the signal, and since +- * we only get here if there is a handler, we dont restart. +- */ +- regs->result = -EINTR; +- regs->gpr[3] = EINTR; +- regs->ccr |= 0x10000000; +- break; +- case -ERESTARTSYS: +- /* ERESTARTSYS means to restart the syscall if there is no +- * handler or the handler was registered with SA_RESTART +- */ +- if (!(ka->sa.sa_flags & SA_RESTART)) { +- regs->result = -EINTR; +- regs->gpr[3] = EINTR; +- regs->ccr |= 0x10000000; +- break; +- } +- /* fallthrough */ +- case -ERESTARTNOINTR: +- /* ERESTARTNOINTR means that the syscall should be +- * called again after the signal handler returns. +- */ +- regs->gpr[3] = regs->orig_gpr3; +- regs->nip -= 4; +- regs->result = 0; +- break; +- } +-} +- +-/* +- * Note that 'init' is a special process: it doesn't get signals it doesn't +- * want to handle. Thus you cannot kill init even with a SIGKILL even by +- * mistake. +- */ +-int do_signal(sigset_t *oldset, struct pt_regs *regs) +-{ +- siginfo_t info; +- int signr; +- struct k_sigaction ka; +- +- /* +- * If the current thread is 32 bit - invoke the +- * 32 bit signal handling code +- */ +- if (test_thread_flag(TIF_32BIT)) +- return do_signal32(oldset, regs); +- +- if (test_thread_flag(TIF_RESTORE_SIGMASK)) +- oldset = ¤t->saved_sigmask; +- else if (!oldset) +- oldset = ¤t->blocked; +- +- signr = get_signal_to_deliver(&info, &ka, regs, NULL); +- if (signr > 0) { +- int ret; +- +- /* Whee! Actually deliver the signal. */ +- if (TRAP(regs) == 0x0C00) +- syscall_restart(regs, &ka); +- +- /* +- * Reenable the DABR before delivering the signal to +- * user space. The DABR will have been cleared if it +- * triggered inside the kernel. +- */ +- if (current->thread.dabr) +- set_dabr(current->thread.dabr); +- +- ret = handle_signal(signr, &ka, &info, oldset, regs); +- +- /* If a signal was successfully delivered, the saved sigmask is in +- its frame, and we can clear the TIF_RESTORE_SIGMASK flag */ +- if (ret && test_thread_flag(TIF_RESTORE_SIGMASK)) +- clear_thread_flag(TIF_RESTORE_SIGMASK); +- +- return ret; +- } +- +- if (TRAP(regs) == 0x0C00) { /* System Call! */ +- if ((int)regs->result == -ERESTARTNOHAND || +- (int)regs->result == -ERESTARTSYS || +- (int)regs->result == -ERESTARTNOINTR) { +- regs->gpr[3] = regs->orig_gpr3; +- regs->nip -= 4; /* Back up & retry system call */ +- regs->result = 0; +- } else if ((int)regs->result == -ERESTART_RESTARTBLOCK) { +- regs->gpr[0] = __NR_restart_syscall; +- regs->nip -= 4; +- regs->result = 0; +- } +- } +- /* No signal to deliver -- put the saved sigmask back */ +- if (test_thread_flag(TIF_RESTORE_SIGMASK)) { +- clear_thread_flag(TIF_RESTORE_SIGMASK); +- sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); +- } +- +- return 0; +-} +-EXPORT_SYMBOL(do_signal); +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/sys_ppc32.c linux-2.6.22-591/arch/powerpc/kernel/sys_ppc32.c +--- linux-2.6.22-570/arch/powerpc/kernel/sys_ppc32.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/kernel/sys_ppc32.c 2007-12-21 15:36:11.000000000 -0500 +@@ -773,6 +773,13 @@ + return sys_truncate(path, (high << 32) | low); + } + ++asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, ++ u32 lenhi, u32 lenlo) ++{ ++ return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, ++ ((loff_t)lenhi << 32) | lenlo); ++} ++ + asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high, + unsigned long low) + { +diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/vdso.c linux-2.6.22-591/arch/powerpc/kernel/vdso.c +--- linux-2.6.22-570/arch/powerpc/kernel/vdso.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/kernel/vdso.c 2007-12-21 15:36:11.000000000 -0500 +@@ -671,7 +671,7 @@ + /* + * Fill up the "systemcfg" stuff for backward compatiblity + */ +- strcpy(vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); ++ strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); + vdso_data->version.major = SYSTEMCFG_MAJOR; + vdso_data->version.minor = SYSTEMCFG_MINOR; + vdso_data->processor = mfspr(SPRN_PVR); +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/44x_mmu.c linux-2.6.22-591/arch/powerpc/mm/44x_mmu.c +--- linux-2.6.22-570/arch/powerpc/mm/44x_mmu.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/44x_mmu.c 2007-12-21 15:36:11.000000000 -0500 +@@ -12,7 +12,6 @@ + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/4xx_mmu.c linux-2.6.22-591/arch/powerpc/mm/4xx_mmu.c +--- linux-2.6.22-570/arch/powerpc/mm/4xx_mmu.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/4xx_mmu.c 2007-12-21 15:36:11.000000000 -0500 +@@ -9,7 +9,6 @@ + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/Makefile linux-2.6.22-591/arch/powerpc/mm/Makefile +--- linux-2.6.22-570/arch/powerpc/mm/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -11,8 +11,7 @@ + hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o + obj-$(CONFIG_PPC64) += init_64.o pgtable_64.o mmu_context_64.o \ + hash_utils_64.o hash_low_64.o tlb_64.o \ +- slb_low.o slb.o stab.o mmap.o imalloc.o \ +- $(hash-y) ++ slb_low.o slb.o stab.o mmap.o $(hash-y) + obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o tlb_32.o + obj-$(CONFIG_40x) += 4xx_mmu.o + obj-$(CONFIG_44x) += 44x_mmu.o +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/fault.c linux-2.6.22-591/arch/powerpc/mm/fault.c +--- linux-2.6.22-570/arch/powerpc/mm/fault.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/mm/fault.c 2007-12-21 15:36:11.000000000 -0500 +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -381,7 +382,7 @@ + printk("VM: killing process %s(%d:#%u)\n", + current->comm, current->pid, current->xid); + if (user_mode(regs)) +- do_exit(SIGKILL); ++ do_group_exit(SIGKILL); + return SIGKILL; + + do_sigbus: +@@ -412,6 +413,13 @@ + return; + } + ++#ifdef CONFIG_KGDB ++ if (atomic_read(&debugger_active) && kgdb_may_fault) ++ /* Restore our previous state. */ ++ kgdb_fault_longjmp(kgdb_fault_jmp_regs); ++ /* Not reached. */ ++#endif ++ + /* kernel has accessed a bad area */ + + switch (regs->trap) { +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/fsl_booke_mmu.c linux-2.6.22-591/arch/powerpc/mm/fsl_booke_mmu.c +--- linux-2.6.22-570/arch/powerpc/mm/fsl_booke_mmu.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/fsl_booke_mmu.c 2007-12-21 15:36:11.000000000 -0500 +@@ -14,7 +14,6 @@ + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/hash_native_64.c linux-2.6.22-591/arch/powerpc/mm/hash_native_64.c +--- linux-2.6.22-570/arch/powerpc/mm/hash_native_64.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/hash_native_64.c 2007-12-21 15:36:11.000000000 -0500 +@@ -104,7 +104,7 @@ + spin_unlock(&native_tlbie_lock); + } + +-static inline void native_lock_hpte(hpte_t *hptep) ++static inline void native_lock_hpte(struct hash_pte *hptep) + { + unsigned long *word = &hptep->v; + +@@ -116,7 +116,7 @@ + } + } + +-static inline void native_unlock_hpte(hpte_t *hptep) ++static inline void native_unlock_hpte(struct hash_pte *hptep) + { + unsigned long *word = &hptep->v; + +@@ -128,7 +128,7 @@ + unsigned long pa, unsigned long rflags, + unsigned long vflags, int psize) + { +- hpte_t *hptep = htab_address + hpte_group; ++ struct hash_pte *hptep = htab_address + hpte_group; + unsigned long hpte_v, hpte_r; + int i; + +@@ -177,7 +177,7 @@ + + static long native_hpte_remove(unsigned long hpte_group) + { +- hpte_t *hptep; ++ struct hash_pte *hptep; + int i; + int slot_offset; + unsigned long hpte_v; +@@ -217,7 +217,7 @@ + static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long va, int psize, int local) + { +- hpte_t *hptep = htab_address + slot; ++ struct hash_pte *hptep = htab_address + slot; + unsigned long hpte_v, want_v; + int ret = 0; + +@@ -233,15 +233,14 @@ + /* Even if we miss, we need to invalidate the TLB */ + if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) { + DBG_LOW(" -> miss\n"); +- native_unlock_hpte(hptep); + ret = -1; + } else { + DBG_LOW(" -> hit\n"); + /* Update the HPTE */ + hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | + (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); +- native_unlock_hpte(hptep); + } ++ native_unlock_hpte(hptep); + + /* Ensure it is out of the tlb too. */ + tlbie(va, psize, local); +@@ -251,7 +250,7 @@ + + static long native_hpte_find(unsigned long va, int psize) + { +- hpte_t *hptep; ++ struct hash_pte *hptep; + unsigned long hash; + unsigned long i, j; + long slot; +@@ -294,7 +293,7 @@ + { + unsigned long vsid, va; + long slot; +- hpte_t *hptep; ++ struct hash_pte *hptep; + + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0x0fffffff); +@@ -315,7 +314,7 @@ + static void native_hpte_invalidate(unsigned long slot, unsigned long va, + int psize, int local) + { +- hpte_t *hptep = htab_address + slot; ++ struct hash_pte *hptep = htab_address + slot; + unsigned long hpte_v; + unsigned long want_v; + unsigned long flags; +@@ -345,7 +344,7 @@ + #define LP_BITS 8 + #define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT) + +-static void hpte_decode(hpte_t *hpte, unsigned long slot, ++static void hpte_decode(struct hash_pte *hpte, unsigned long slot, + int *psize, unsigned long *va) + { + unsigned long hpte_r = hpte->r; +@@ -415,7 +414,7 @@ + static void native_hpte_clear(void) + { + unsigned long slot, slots, flags; +- hpte_t *hptep = htab_address; ++ struct hash_pte *hptep = htab_address; + unsigned long hpte_v, va; + unsigned long pteg_count; + int psize; +@@ -462,7 +461,7 @@ + static void native_flush_hash_range(unsigned long number, int local) + { + unsigned long va, hash, index, hidx, shift, slot; +- hpte_t *hptep; ++ struct hash_pte *hptep; + unsigned long hpte_v; + unsigned long want_v; + unsigned long flags; +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/hash_utils_64.c linux-2.6.22-591/arch/powerpc/mm/hash_utils_64.c +--- linux-2.6.22-570/arch/powerpc/mm/hash_utils_64.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/hash_utils_64.c 2007-12-21 15:36:11.000000000 -0500 +@@ -87,7 +87,7 @@ + static unsigned long _SDR1; + struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; + +-hpte_t *htab_address; ++struct hash_pte *htab_address; + unsigned long htab_size_bytes; + unsigned long htab_hash_mask; + int mmu_linear_psize = MMU_PAGE_4K; +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/imalloc.c linux-2.6.22-591/arch/powerpc/mm/imalloc.c +--- linux-2.6.22-570/arch/powerpc/mm/imalloc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/imalloc.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,313 +0,0 @@ +-/* +- * c 2001 PPC 64 Team, IBM Corp +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * as published by the Free Software Foundation; either version +- * 2 of the License, or (at your option) any later version. +- */ +- +-#include +-#include +- +-#include +-#include +-#include +-#include +-#include +- +-#include "mmu_decl.h" +- +-static DEFINE_MUTEX(imlist_mutex); +-struct vm_struct * imlist = NULL; +- +-static int get_free_im_addr(unsigned long size, unsigned long *im_addr) +-{ +- unsigned long addr; +- struct vm_struct **p, *tmp; +- +- addr = ioremap_bot; +- for (p = &imlist; (tmp = *p) ; p = &tmp->next) { +- if (size + addr < (unsigned long) tmp->addr) +- break; +- if ((unsigned long)tmp->addr >= ioremap_bot) +- addr = tmp->size + (unsigned long) tmp->addr; +- if (addr >= IMALLOC_END-size) +- return 1; +- } +- *im_addr = addr; +- +- return 0; +-} +- +-/* Return whether the region described by v_addr and size is a subset +- * of the region described by parent +- */ +-static inline int im_region_is_subset(unsigned long v_addr, unsigned long size, +- struct vm_struct *parent) +-{ +- return (int) (v_addr >= (unsigned long) parent->addr && +- v_addr < (unsigned long) parent->addr + parent->size && +- size < parent->size); +-} +- +-/* Return whether the region described by v_addr and size is a superset +- * of the region described by child +- */ +-static int im_region_is_superset(unsigned long v_addr, unsigned long size, +- struct vm_struct *child) +-{ +- struct vm_struct parent; +- +- parent.addr = (void *) v_addr; +- parent.size = size; +- +- return im_region_is_subset((unsigned long) child->addr, child->size, +- &parent); +-} +- +-/* Return whether the region described by v_addr and size overlaps +- * the region described by vm. Overlapping regions meet the +- * following conditions: +- * 1) The regions share some part of the address space +- * 2) The regions aren't identical +- * 3) Neither region is a subset of the other +- */ +-static int im_region_overlaps(unsigned long v_addr, unsigned long size, +- struct vm_struct *vm) +-{ +- if (im_region_is_superset(v_addr, size, vm)) +- return 0; +- +- return (v_addr + size > (unsigned long) vm->addr + vm->size && +- v_addr < (unsigned long) vm->addr + vm->size) || +- (v_addr < (unsigned long) vm->addr && +- v_addr + size > (unsigned long) vm->addr); +-} +- +-/* Determine imalloc status of region described by v_addr and size. +- * Can return one of the following: +- * IM_REGION_UNUSED - Entire region is unallocated in imalloc space. +- * IM_REGION_SUBSET - Region is a subset of a region that is already +- * allocated in imalloc space. +- * vm will be assigned to a ptr to the parent region. +- * IM_REGION_EXISTS - Exact region already allocated in imalloc space. +- * vm will be assigned to a ptr to the existing imlist +- * member. +- * IM_REGION_OVERLAPS - Region overlaps an allocated region in imalloc space. +- * IM_REGION_SUPERSET - Region is a superset of a region that is already +- * allocated in imalloc space. +- */ +-static int im_region_status(unsigned long v_addr, unsigned long size, +- struct vm_struct **vm) +-{ +- struct vm_struct *tmp; +- +- for (tmp = imlist; tmp; tmp = tmp->next) +- if (v_addr < (unsigned long) tmp->addr + tmp->size) +- break; +- +- *vm = NULL; +- if (tmp) { +- if (im_region_overlaps(v_addr, size, tmp)) +- return IM_REGION_OVERLAP; +- +- *vm = tmp; +- if (im_region_is_subset(v_addr, size, tmp)) { +- /* Return with tmp pointing to superset */ +- return IM_REGION_SUBSET; +- } +- if (im_region_is_superset(v_addr, size, tmp)) { +- /* Return with tmp pointing to first subset */ +- return IM_REGION_SUPERSET; +- } +- else if (v_addr == (unsigned long) tmp->addr && +- size == tmp->size) { +- /* Return with tmp pointing to exact region */ +- return IM_REGION_EXISTS; +- } +- } +- +- return IM_REGION_UNUSED; +-} +- +-static struct vm_struct * split_im_region(unsigned long v_addr, +- unsigned long size, struct vm_struct *parent) +-{ +- struct vm_struct *vm1 = NULL; +- struct vm_struct *vm2 = NULL; +- struct vm_struct *new_vm = NULL; +- +- vm1 = kmalloc(sizeof(*vm1), GFP_KERNEL); +- if (vm1 == NULL) { +- printk(KERN_ERR "%s() out of memory\n", __FUNCTION__); +- return NULL; +- } +- +- if (v_addr == (unsigned long) parent->addr) { +- /* Use existing parent vm_struct to represent child, allocate +- * new one for the remainder of parent range +- */ +- vm1->size = parent->size - size; +- vm1->addr = (void *) (v_addr + size); +- vm1->next = parent->next; +- +- parent->size = size; +- parent->next = vm1; +- new_vm = parent; +- } else if (v_addr + size == (unsigned long) parent->addr + +- parent->size) { +- /* Allocate new vm_struct to represent child, use existing +- * parent one for remainder of parent range +- */ +- vm1->size = size; +- vm1->addr = (void *) v_addr; +- vm1->next = parent->next; +- new_vm = vm1; +- +- parent->size -= size; +- parent->next = vm1; +- } else { +- /* Allocate two new vm_structs for the new child and +- * uppermost remainder, and use existing parent one for the +- * lower remainder of parent range +- */ +- vm2 = kmalloc(sizeof(*vm2), GFP_KERNEL); +- if (vm2 == NULL) { +- printk(KERN_ERR "%s() out of memory\n", __FUNCTION__); +- kfree(vm1); +- return NULL; +- } +- +- vm1->size = size; +- vm1->addr = (void *) v_addr; +- vm1->next = vm2; +- new_vm = vm1; +- +- vm2->size = ((unsigned long) parent->addr + parent->size) - +- (v_addr + size); +- vm2->addr = (void *) v_addr + size; +- vm2->next = parent->next; +- +- parent->size = v_addr - (unsigned long) parent->addr; +- parent->next = vm1; +- } +- +- return new_vm; +-} +- +-static struct vm_struct * __add_new_im_area(unsigned long req_addr, +- unsigned long size) +-{ +- struct vm_struct **p, *tmp, *area; +- +- for (p = &imlist; (tmp = *p) ; p = &tmp->next) { +- if (req_addr + size <= (unsigned long)tmp->addr) +- break; +- } +- +- area = kmalloc(sizeof(*area), GFP_KERNEL); +- if (!area) +- return NULL; +- area->flags = 0; +- area->addr = (void *)req_addr; +- area->size = size; +- area->next = *p; +- *p = area; +- +- return area; +-} +- +-static struct vm_struct * __im_get_area(unsigned long req_addr, +- unsigned long size, +- int criteria) +-{ +- struct vm_struct *tmp; +- int status; +- +- status = im_region_status(req_addr, size, &tmp); +- if ((criteria & status) == 0) { +- return NULL; +- } +- +- switch (status) { +- case IM_REGION_UNUSED: +- tmp = __add_new_im_area(req_addr, size); +- break; +- case IM_REGION_SUBSET: +- tmp = split_im_region(req_addr, size, tmp); +- break; +- case IM_REGION_EXISTS: +- /* Return requested region */ +- break; +- case IM_REGION_SUPERSET: +- /* Return first existing subset of requested region */ +- break; +- default: +- printk(KERN_ERR "%s() unexpected imalloc region status\n", +- __FUNCTION__); +- tmp = NULL; +- } +- +- return tmp; +-} +- +-struct vm_struct * im_get_free_area(unsigned long size) +-{ +- struct vm_struct *area; +- unsigned long addr; +- +- mutex_lock(&imlist_mutex); +- if (get_free_im_addr(size, &addr)) { +- printk(KERN_ERR "%s() cannot obtain addr for size 0x%lx\n", +- __FUNCTION__, size); +- area = NULL; +- goto next_im_done; +- } +- +- area = __im_get_area(addr, size, IM_REGION_UNUSED); +- if (area == NULL) { +- printk(KERN_ERR +- "%s() cannot obtain area for addr 0x%lx size 0x%lx\n", +- __FUNCTION__, addr, size); +- } +-next_im_done: +- mutex_unlock(&imlist_mutex); +- return area; +-} +- +-struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, +- int criteria) +-{ +- struct vm_struct *area; +- +- mutex_lock(&imlist_mutex); +- area = __im_get_area(v_addr, size, criteria); +- mutex_unlock(&imlist_mutex); +- return area; +-} +- +-void im_free(void * addr) +-{ +- struct vm_struct **p, *tmp; +- +- if (!addr) +- return; +- if ((unsigned long) addr & ~PAGE_MASK) { +- printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__, addr); +- return; +- } +- mutex_lock(&imlist_mutex); +- for (p = &imlist ; (tmp = *p) ; p = &tmp->next) { +- if (tmp->addr == addr) { +- *p = tmp->next; +- unmap_vm_area(tmp); +- kfree(tmp); +- mutex_unlock(&imlist_mutex); +- return; +- } +- } +- mutex_unlock(&imlist_mutex); +- printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__, +- addr); +-} +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/init_32.c linux-2.6.22-591/arch/powerpc/mm/init_32.c +--- linux-2.6.22-570/arch/powerpc/mm/init_32.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/init_32.c 2007-12-21 15:36:11.000000000 -0500 +@@ -5,7 +5,6 @@ + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com) + * + * Derived from "arch/i386/mm/init.c" +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/init_64.c linux-2.6.22-591/arch/powerpc/mm/init_64.c +--- linux-2.6.22-570/arch/powerpc/mm/init_64.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/init_64.c 2007-12-21 15:36:11.000000000 -0500 +@@ -5,7 +5,6 @@ + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/mem.c linux-2.6.22-591/arch/powerpc/mm/mem.c +--- linux-2.6.22-570/arch/powerpc/mm/mem.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/mem.c 2007-12-21 15:36:11.000000000 -0500 +@@ -5,7 +5,6 @@ + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com) + * + * Derived from "arch/i386/mm/init.c" +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/mmu_context_32.c linux-2.6.22-591/arch/powerpc/mm/mmu_context_32.c +--- linux-2.6.22-570/arch/powerpc/mm/mmu_context_32.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/mmu_context_32.c 2007-12-21 15:36:11.000000000 -0500 +@@ -11,7 +11,6 @@ + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/mmu_decl.h linux-2.6.22-591/arch/powerpc/mm/mmu_decl.h +--- linux-2.6.22-570/arch/powerpc/mm/mmu_decl.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/mmu_decl.h 2007-12-21 15:36:11.000000000 -0500 +@@ -8,7 +8,6 @@ + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds +@@ -40,8 +39,8 @@ + extern unsigned long ioremap_base; + extern unsigned int rtas_data, rtas_size; + +-struct _PTE; +-extern struct _PTE *Hash, *Hash_end; ++struct hash_pte; ++extern struct hash_pte *Hash, *Hash_end; + extern unsigned long Hash_size, Hash_mask; + + extern unsigned int num_tlbcam_entries; +@@ -90,16 +89,4 @@ + else + _tlbie(va); + } +-#else /* CONFIG_PPC64 */ +-/* imalloc region types */ +-#define IM_REGION_UNUSED 0x1 +-#define IM_REGION_SUBSET 0x2 +-#define IM_REGION_EXISTS 0x4 +-#define IM_REGION_OVERLAP 0x8 +-#define IM_REGION_SUPERSET 0x10 +- +-extern struct vm_struct * im_get_free_area(unsigned long size); +-extern struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size, +- int region_type); +-extern void im_free(void *addr); + #endif +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/pgtable_32.c linux-2.6.22-591/arch/powerpc/mm/pgtable_32.c +--- linux-2.6.22-570/arch/powerpc/mm/pgtable_32.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/pgtable_32.c 2007-12-21 15:36:11.000000000 -0500 +@@ -8,7 +8,6 @@ + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds +@@ -37,7 +36,6 @@ + unsigned long ioremap_base; + unsigned long ioremap_bot; + EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ +-int io_bat_index; + + #if defined(CONFIG_6xx) || defined(CONFIG_POWER3) + #define HAVE_BATS 1 +@@ -300,51 +298,6 @@ + } + } + +-/* is x a power of 4? */ +-#define is_power_of_4(x) is_power_of_2(x) && (ffs(x) & 1) +- +-/* +- * Set up a mapping for a block of I/O. +- * virt, phys, size must all be page-aligned. +- * This should only be called before ioremap is called. +- */ +-void __init io_block_mapping(unsigned long virt, phys_addr_t phys, +- unsigned int size, int flags) +-{ +- int i; +- +- if (virt > KERNELBASE && virt < ioremap_bot) +- ioremap_bot = ioremap_base = virt; +- +-#ifdef HAVE_BATS +- /* +- * Use a BAT for this if possible... +- */ +- if (io_bat_index < 2 && is_power_of_2(size) +- && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { +- setbat(io_bat_index, virt, phys, size, flags); +- ++io_bat_index; +- return; +- } +-#endif /* HAVE_BATS */ +- +-#ifdef HAVE_TLBCAM +- /* +- * Use a CAM for this if possible... +- */ +- if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size) +- && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { +- settlbcam(tlbcam_index, virt, phys, size, flags, 0); +- ++tlbcam_index; +- return; +- } +-#endif /* HAVE_TLBCAM */ +- +- /* No BATs available, put it in the page tables. */ +- for (i = 0; i < size; i += PAGE_SIZE) +- map_page(virt + i, phys + i, flags); +-} +- + /* Scan the real Linux page tables and return a PTE pointer for + * a virtual address in a context. + * Returns true (1) if PTE was found, zero otherwise. The pointer to +@@ -379,82 +332,6 @@ + return(retval); + } + +-/* Find physical address for this virtual address. Normally used by +- * I/O functions, but anyone can call it. +- */ +-unsigned long iopa(unsigned long addr) +-{ +- unsigned long pa; +- +- /* I don't know why this won't work on PMacs or CHRP. It +- * appears there is some bug, or there is some implicit +- * mapping done not properly represented by BATs or in page +- * tables.......I am actively working on resolving this, but +- * can't hold up other stuff. -- Dan +- */ +- pte_t *pte; +- struct mm_struct *mm; +- +- /* Check the BATs */ +- pa = v_mapped_by_bats(addr); +- if (pa) +- return pa; +- +- /* Allow mapping of user addresses (within the thread) +- * for DMA if necessary. +- */ +- if (addr < TASK_SIZE) +- mm = current->mm; +- else +- mm = &init_mm; +- +- pa = 0; +- if (get_pteptr(mm, addr, &pte, NULL)) { +- pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK); +- pte_unmap(pte); +- } +- +- return(pa); +-} +- +-/* This is will find the virtual address for a physical one.... +- * Swiped from APUS, could be dangerous :-). +- * This is only a placeholder until I really find a way to make this +- * work. -- Dan +- */ +-unsigned long +-mm_ptov (unsigned long paddr) +-{ +- unsigned long ret; +-#if 0 +- if (paddr < 16*1024*1024) +- ret = ZTWO_VADDR(paddr); +- else { +- int i; +- +- for (i = 0; i < kmap_chunk_count;){ +- unsigned long phys = kmap_chunks[i++]; +- unsigned long size = kmap_chunks[i++]; +- unsigned long virt = kmap_chunks[i++]; +- if (paddr >= phys +- && paddr < (phys + size)){ +- ret = virt + paddr - phys; +- goto exit; +- } +- } +- +- ret = (unsigned long) __va(paddr); +- } +-exit: +-#ifdef DEBUGPV +- printk ("PTOV(%lx)=%lx\n", paddr, ret); +-#endif +-#else +- ret = (unsigned long)paddr + KERNELBASE; +-#endif +- return ret; +-} +- + #ifdef CONFIG_DEBUG_PAGEALLOC + + static int __change_page_attr(struct page *page, pgprot_t prot) +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/pgtable_64.c linux-2.6.22-591/arch/powerpc/mm/pgtable_64.c +--- linux-2.6.22-570/arch/powerpc/mm/pgtable_64.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/pgtable_64.c 2007-12-21 15:36:11.000000000 -0500 +@@ -7,7 +7,6 @@ + * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds +@@ -34,41 +33,27 @@ + #include + #include + #include +-#include +-#include +-#include +-#include +-#include +-#include + + #include + #include + #include +-#include +-#include + #include + #include + #include + #include +-#include + #include + #include + #include +-#include + #include +-#include + #include + #include + #include +-#include + #include +-#include + #include + + #include "mmu_decl.h" + +-unsigned long ioremap_bot = IMALLOC_BASE; +-static unsigned long phbs_io_bot = PHBS_IO_BASE; ++unsigned long ioremap_bot = IOREMAP_BASE; + + /* + * map_io_page currently only called by __ioremap +@@ -102,8 +87,8 @@ + * entry in the hardware page table. + * + */ +- if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags, +- mmu_io_psize)) { ++ if (htab_bolt_mapping(ea, (unsigned long)ea + PAGE_SIZE, ++ pa, flags, mmu_io_psize)) { + printk(KERN_ERR "Failed to do bolted mapping IO " + "memory at %016lx !\n", pa); + return -ENOMEM; +@@ -113,8 +98,11 @@ + } + + +-static void __iomem * __ioremap_com(phys_addr_t addr, unsigned long pa, +- unsigned long ea, unsigned long size, ++/** ++ * __ioremap_at - Low level function to establish the page tables ++ * for an IO mapping ++ */ ++void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, + unsigned long flags) + { + unsigned long i; +@@ -122,17 +110,35 @@ + if ((flags & _PAGE_PRESENT) == 0) + flags |= pgprot_val(PAGE_KERNEL); + ++ WARN_ON(pa & ~PAGE_MASK); ++ WARN_ON(((unsigned long)ea) & ~PAGE_MASK); ++ WARN_ON(size & ~PAGE_MASK); ++ + for (i = 0; i < size; i += PAGE_SIZE) +- if (map_io_page(ea+i, pa+i, flags)) ++ if (map_io_page((unsigned long)ea+i, pa+i, flags)) + return NULL; + +- return (void __iomem *) (ea + (addr & ~PAGE_MASK)); ++ return (void __iomem *)ea; ++} ++ ++/** ++ * __iounmap_from - Low level function to tear down the page tables ++ * for an IO mapping. This is used for mappings that ++ * are manipulated manually, like partial unmapping of ++ * PCI IOs or ISA space. ++ */ ++void __iounmap_at(void *ea, unsigned long size) ++{ ++ WARN_ON(((unsigned long)ea) & ~PAGE_MASK); ++ WARN_ON(size & ~PAGE_MASK); ++ ++ unmap_kernel_range((unsigned long)ea, size); + } + + void __iomem * __ioremap(phys_addr_t addr, unsigned long size, + unsigned long flags) + { +- unsigned long pa, ea; ++ phys_addr_t paligned; + void __iomem *ret; + + /* +@@ -144,27 +150,30 @@ + * IMALLOC_END + * + */ +- pa = addr & PAGE_MASK; +- size = PAGE_ALIGN(addr + size) - pa; ++ paligned = addr & PAGE_MASK; ++ size = PAGE_ALIGN(addr + size) - paligned; + +- if ((size == 0) || (pa == 0)) ++ if ((size == 0) || (paligned == 0)) + return NULL; + + if (mem_init_done) { + struct vm_struct *area; +- area = im_get_free_area(size); ++ ++ area = __get_vm_area(size, VM_IOREMAP, ++ ioremap_bot, IOREMAP_END); + if (area == NULL) + return NULL; +- ea = (unsigned long)(area->addr); +- ret = __ioremap_com(addr, pa, ea, size, flags); ++ ret = __ioremap_at(paligned, area->addr, size, flags); + if (!ret) +- im_free(area->addr); ++ vunmap(area->addr); + } else { +- ea = ioremap_bot; +- ret = __ioremap_com(addr, pa, ea, size, flags); ++ ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags); + if (ret) + ioremap_bot += size; + } ++ ++ if (ret) ++ ret += addr & ~PAGE_MASK; + return ret; + } + +@@ -187,62 +196,9 @@ + } + + +-#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK)) +- +-int __ioremap_explicit(phys_addr_t pa, unsigned long ea, +- unsigned long size, unsigned long flags) +-{ +- struct vm_struct *area; +- void __iomem *ret; +- +- /* For now, require page-aligned values for pa, ea, and size */ +- if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) || +- !IS_PAGE_ALIGNED(size)) { +- printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__); +- return 1; +- } +- +- if (!mem_init_done) { +- /* Two things to consider in this case: +- * 1) No records will be kept (imalloc, etc) that the region +- * has been remapped +- * 2) It won't be easy to iounmap() the region later (because +- * of 1) +- */ +- ; +- } else { +- area = im_get_area(ea, size, +- IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS); +- if (area == NULL) { +- /* Expected when PHB-dlpar is in play */ +- return 1; +- } +- if (ea != (unsigned long) area->addr) { +- printk(KERN_ERR "unexpected addr return from " +- "im_get_area\n"); +- return 1; +- } +- } +- +- ret = __ioremap_com(pa, pa, ea, size, flags); +- if (ret == NULL) { +- printk(KERN_ERR "ioremap_explicit() allocation failure !\n"); +- return 1; +- } +- if (ret != (void *) ea) { +- printk(KERN_ERR "__ioremap_com() returned unexpected addr\n"); +- return 1; +- } +- +- return 0; +-} +- + /* + * Unmap an IO region and remove it from imalloc'd list. + * Access to IO memory should be serialized by driver. +- * This code is modeled after vmalloc code - unmap_vm_area() +- * +- * XXX what about calls before mem_init_done (ie python_countermeasures()) + */ + void __iounmap(volatile void __iomem *token) + { +@@ -251,9 +207,14 @@ + if (!mem_init_done) + return; + +- addr = (void *) ((unsigned long __force) token & PAGE_MASK); +- +- im_free(addr); ++ addr = (void *) ((unsigned long __force) ++ PCI_FIX_ADDR(token) & PAGE_MASK); ++ if ((unsigned long)addr < ioremap_bot) { ++ printk(KERN_WARNING "Attempt to iounmap early bolted mapping" ++ " at 0x%p\n", addr); ++ return; ++ } ++ vunmap(addr); + } + + void iounmap(volatile void __iomem *token) +@@ -264,77 +225,8 @@ + __iounmap(token); + } + +-static int iounmap_subset_regions(unsigned long addr, unsigned long size) +-{ +- struct vm_struct *area; +- +- /* Check whether subsets of this region exist */ +- area = im_get_area(addr, size, IM_REGION_SUPERSET); +- if (area == NULL) +- return 1; +- +- while (area) { +- iounmap((void __iomem *) area->addr); +- area = im_get_area(addr, size, +- IM_REGION_SUPERSET); +- } +- +- return 0; +-} +- +-int __iounmap_explicit(volatile void __iomem *start, unsigned long size) +-{ +- struct vm_struct *area; +- unsigned long addr; +- int rc; +- +- addr = (unsigned long __force) start & PAGE_MASK; +- +- /* Verify that the region either exists or is a subset of an existing +- * region. In the latter case, split the parent region to create +- * the exact region +- */ +- area = im_get_area(addr, size, +- IM_REGION_EXISTS | IM_REGION_SUBSET); +- if (area == NULL) { +- /* Determine whether subset regions exist. If so, unmap */ +- rc = iounmap_subset_regions(addr, size); +- if (rc) { +- printk(KERN_ERR +- "%s() cannot unmap nonexistent range 0x%lx\n", +- __FUNCTION__, addr); +- return 1; +- } +- } else { +- iounmap((void __iomem *) area->addr); +- } +- /* +- * FIXME! This can't be right: +- iounmap(area->addr); +- * Maybe it should be "iounmap(area);" +- */ +- return 0; +-} +- + EXPORT_SYMBOL(ioremap); + EXPORT_SYMBOL(ioremap_flags); + EXPORT_SYMBOL(__ioremap); + EXPORT_SYMBOL(iounmap); + EXPORT_SYMBOL(__iounmap); +- +-static DEFINE_SPINLOCK(phb_io_lock); +- +-void __iomem * reserve_phb_iospace(unsigned long size) +-{ +- void __iomem *virt_addr; +- +- if (phbs_io_bot >= IMALLOC_BASE) +- panic("reserve_phb_iospace(): phb io space overflow\n"); +- +- spin_lock(&phb_io_lock); +- virt_addr = (void __iomem *) phbs_io_bot; +- phbs_io_bot += size; +- spin_unlock(&phb_io_lock); +- +- return virt_addr; +-} +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/ppc_mmu_32.c linux-2.6.22-591/arch/powerpc/mm/ppc_mmu_32.c +--- linux-2.6.22-570/arch/powerpc/mm/ppc_mmu_32.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/ppc_mmu_32.c 2007-12-21 15:36:11.000000000 -0500 +@@ -11,7 +11,6 @@ + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds +@@ -35,12 +34,12 @@ + + #include "mmu_decl.h" + +-PTE *Hash, *Hash_end; ++struct hash_pte *Hash, *Hash_end; + unsigned long Hash_size, Hash_mask; + unsigned long _SDR1; + + union ubat { /* BAT register values to be loaded */ +- BAT bat; ++ struct ppc_bat bat; + u32 word[2]; + } BATS[8][2]; /* 8 pairs of IBAT, DBAT */ + +@@ -245,7 +244,7 @@ + cacheable_memzero(Hash, Hash_size); + _SDR1 = __pa(Hash) | SDR1_LOW_BITS; + +- Hash_end = (PTE *) ((unsigned long)Hash + Hash_size); ++ Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size); + + printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n", + total_memory >> 20, Hash_size >> 10, Hash); +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/tlb_32.c linux-2.6.22-591/arch/powerpc/mm/tlb_32.c +--- linux-2.6.22-570/arch/powerpc/mm/tlb_32.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/tlb_32.c 2007-12-21 15:36:11.000000000 -0500 +@@ -11,7 +11,6 @@ + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds +diff -Nurb linux-2.6.22-570/arch/powerpc/mm/tlb_64.c linux-2.6.22-591/arch/powerpc/mm/tlb_64.c +--- linux-2.6.22-570/arch/powerpc/mm/tlb_64.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/mm/tlb_64.c 2007-12-21 15:36:11.000000000 -0500 +@@ -8,7 +8,6 @@ + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras +- * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds +@@ -239,3 +238,59 @@ + pte_free_submit(*batchp); + *batchp = NULL; + } ++ ++/** ++ * __flush_hash_table_range - Flush all HPTEs for a given address range ++ * from the hash table (and the TLB). But keeps ++ * the linux PTEs intact. ++ * ++ * @mm : mm_struct of the target address space (generally init_mm) ++ * @start : starting address ++ * @end : ending address (not included in the flush) ++ * ++ * This function is mostly to be used by some IO hotplug code in order ++ * to remove all hash entries from a given address range used to map IO ++ * space on a removed PCI-PCI bidge without tearing down the full mapping ++ * since 64K pages may overlap with other bridges when using 64K pages ++ * with 4K HW pages on IO space. ++ * ++ * Because of that usage pattern, it's only available with CONFIG_HOTPLUG ++ * and is implemented for small size rather than speed. ++ */ ++#ifdef CONFIG_HOTPLUG ++ ++void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, ++ unsigned long end) ++{ ++ unsigned long flags; ++ ++ start = _ALIGN_DOWN(start, PAGE_SIZE); ++ end = _ALIGN_UP(end, PAGE_SIZE); ++ ++ BUG_ON(!mm->pgd); ++ ++ /* Note: Normally, we should only ever use a batch within a ++ * PTE locked section. This violates the rule, but will work ++ * since we don't actually modify the PTEs, we just flush the ++ * hash while leaving the PTEs intact (including their reference ++ * to being hashed). This is not the most performance oriented ++ * way to do things but is fine for our needs here. ++ */ ++ local_irq_save(flags); ++ arch_enter_lazy_mmu_mode(); ++ for (; start < end; start += PAGE_SIZE) { ++ pte_t *ptep = find_linux_pte(mm->pgd, start); ++ unsigned long pte; ++ ++ if (ptep == NULL) ++ continue; ++ pte = pte_val(*ptep); ++ if (!(pte & _PAGE_HASHPTE)) ++ continue; ++ hpte_need_flush(mm, start, ptep, pte, 0); ++ } ++ arch_leave_lazy_mmu_mode(); ++ local_irq_restore(flags); ++} ++ ++#endif /* CONFIG_HOTPLUG */ +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/Kconfig linux-2.6.22-591/arch/powerpc/platforms/Kconfig +--- linux-2.6.22-570/arch/powerpc/platforms/Kconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/Kconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -16,13 +16,6 @@ + bool "Embedded 6xx/7xx/7xxx-based board" + depends on PPC32 && (BROKEN||BROKEN_ON_SMP) + +-config APUS +- bool "Amiga-APUS" +- depends on PPC32 && BROKEN +- help +- Select APUS if configuring for a PowerUP Amiga. +- More information is available at: +- . + endchoice + + source "arch/powerpc/platforms/pseries/Kconfig" +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/Kconfig.cputype linux-2.6.22-591/arch/powerpc/platforms/Kconfig.cputype +--- linux-2.6.22-570/arch/powerpc/platforms/Kconfig.cputype 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/platforms/Kconfig.cputype 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,252 @@ ++config PPC64 ++ bool "64-bit kernel" ++ default n ++ help ++ This option selects whether a 32-bit or a 64-bit kernel ++ will be built. ++ ++menu "Processor support" ++choice ++ prompt "Processor Type" ++ depends on PPC32 ++ default 6xx ++ ++config CLASSIC32 ++ bool "52xx/6xx/7xx/74xx" ++ select PPC_FPU ++ select 6xx ++ help ++ There are four families of PowerPC chips supported. The more common ++ types (601, 603, 604, 740, 750, 7400), the Motorola embedded ++ versions (821, 823, 850, 855, 860, 52xx, 82xx, 83xx), the AMCC ++ embedded versions (403 and 405) and the high end 64 bit Power ++ processors (POWER 3, POWER4, and IBM PPC970 also known as G5). ++ ++ This option is the catch-all for 6xx types, including some of the ++ embedded versions. Unless there is see an option for the specific ++ chip family you are using, you want this option. ++ ++ You do not want this if you are building a kernel for a 64 bit ++ IBM RS/6000 or an Apple G5, choose 6xx. ++ ++ If unsure, select this option ++ ++ Note that the kernel runs in 32-bit mode even on 64-bit chips. ++ ++config PPC_82xx ++ bool "Freescale 82xx" ++ select 6xx ++ select PPC_FPU ++ ++config PPC_83xx ++ bool "Freescale 83xx" ++ select 6xx ++ select FSL_SOC ++ select 83xx ++ select PPC_FPU ++ select WANT_DEVICE_TREE ++ ++config PPC_85xx ++ bool "Freescale 85xx" ++ select E500 ++ select FSL_SOC ++ select 85xx ++ select WANT_DEVICE_TREE ++ ++config PPC_86xx ++ bool "Freescale 86xx" ++ select 6xx ++ select FSL_SOC ++ select FSL_PCIE ++ select PPC_FPU ++ select ALTIVEC ++ help ++ The Freescale E600 SoCs have 74xx cores. ++ ++config PPC_8xx ++ bool "Freescale 8xx" ++ select FSL_SOC ++ select 8xx ++ ++config 40x ++ bool "AMCC 40x" ++ select PPC_DCR_NATIVE ++ ++config 44x ++ bool "AMCC 44x" ++ select PPC_DCR_NATIVE ++ select WANT_DEVICE_TREE ++ ++config E200 ++ bool "Freescale e200" ++ ++endchoice ++ ++config POWER4_ONLY ++ bool "Optimize for POWER4" ++ depends on PPC64 ++ default n ++ ---help--- ++ Cause the compiler to optimize for POWER4/POWER5/PPC970 processors. ++ The resulting binary will not work on POWER3 or RS64 processors ++ when compiled with binutils 2.15 or later. ++ ++config POWER3 ++ bool ++ depends on PPC64 ++ default y if !POWER4_ONLY ++ ++config POWER4 ++ depends on PPC64 ++ def_bool y ++ ++config 6xx ++ bool ++ ++# this is temp to handle compat with arch=ppc ++config 8xx ++ bool ++ ++# this is temp to handle compat with arch=ppc ++config 83xx ++ bool ++ ++# this is temp to handle compat with arch=ppc ++config 85xx ++ bool ++ ++config E500 ++ bool ++ ++config PPC_FPU ++ bool ++ default y if PPC64 ++ ++config 4xx ++ bool ++ depends on 40x || 44x ++ default y ++ ++config BOOKE ++ bool ++ depends on E200 || E500 || 44x ++ default y ++ ++config FSL_BOOKE ++ bool ++ depends on E200 || E500 ++ default y ++ ++config PTE_64BIT ++ bool ++ depends on 44x || E500 ++ default y if 44x ++ default y if E500 && PHYS_64BIT ++ ++config PHYS_64BIT ++ bool 'Large physical address support' if E500 ++ depends on 44x || E500 ++ select RESOURCES_64BIT ++ default y if 44x ++ ---help--- ++ This option enables kernel support for larger than 32-bit physical ++ addresses. This features is not be available on all e500 cores. ++ ++ If in doubt, say N here. ++ ++config ALTIVEC ++ bool "AltiVec Support" ++ depends on CLASSIC32 || POWER4 ++ ---help--- ++ This option enables kernel support for the Altivec extensions to the ++ PowerPC processor. The kernel currently supports saving and restoring ++ altivec registers, and turning on the 'altivec enable' bit so user ++ processes can execute altivec instructions. ++ ++ This option is only usefully if you have a processor that supports ++ altivec (G4, otherwise known as 74xx series), but does not have ++ any affect on a non-altivec cpu (it does, however add code to the ++ kernel). ++ ++ If in doubt, say Y here. ++ ++config SPE ++ bool "SPE Support" ++ depends on E200 || E500 ++ default y ++ ---help--- ++ This option enables kernel support for the Signal Processing ++ Extensions (SPE) to the PowerPC processor. The kernel currently ++ supports saving and restoring SPE registers, and turning on the ++ 'spe enable' bit so user processes can execute SPE instructions. ++ ++ This option is only useful if you have a processor that supports ++ SPE (e500, otherwise known as 85xx series), but does not have any ++ effect on a non-spe cpu (it does, however add code to the kernel). ++ ++ If in doubt, say Y here. ++ ++config PPC_STD_MMU ++ bool ++ depends on 6xx || POWER3 || POWER4 || PPC64 ++ default y ++ ++config PPC_STD_MMU_32 ++ def_bool y ++ depends on PPC_STD_MMU && PPC32 ++ ++config PPC_MM_SLICES ++ bool ++ default y if HUGETLB_PAGE ++ default n ++ ++config VIRT_CPU_ACCOUNTING ++ bool "Deterministic task and CPU time accounting" ++ depends on PPC64 ++ default y ++ help ++ Select this option to enable more accurate task and CPU time ++ accounting. This is done by reading a CPU counter on each ++ kernel entry and exit and on transitions within the kernel ++ between system, softirq and hardirq state, so there is a ++ small performance impact. This also enables accounting of ++ stolen time on logically-partitioned systems running on ++ IBM POWER5-based machines. ++ ++ If in doubt, say Y here. ++ ++config SMP ++ depends on PPC_STD_MMU ++ bool "Symmetric multi-processing support" ++ ---help--- ++ This enables support for systems with more than one CPU. If you have ++ a system with only one CPU, say N. If you have a system with more ++ than one CPU, say Y. Note that the kernel does not currently ++ support SMP machines with 603/603e/603ev or PPC750 ("G3") processors ++ since they have inadequate hardware support for multiprocessor ++ operation. ++ ++ If you say N here, the kernel will run on single and multiprocessor ++ machines, but will use only one CPU of a multiprocessor machine. If ++ you say Y here, the kernel will run on single-processor machines. ++ On a single-processor machine, the kernel will run faster if you say ++ N here. ++ ++ If you don't know what to do here, say N. ++ ++config NR_CPUS ++ int "Maximum number of CPUs (2-128)" ++ range 2 128 ++ depends on SMP ++ default "32" if PPC64 ++ default "4" ++ ++config NOT_COHERENT_CACHE ++ bool ++ depends on 4xx || 8xx || E200 ++ default y ++ ++config CONFIG_CHECK_CACHE_COHERENCY ++ bool ++ ++endmenu +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/apus/Kconfig linux-2.6.22-591/arch/powerpc/platforms/apus/Kconfig +--- linux-2.6.22-570/arch/powerpc/platforms/apus/Kconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/apus/Kconfig 1969-12-31 19:00:00.000000000 -0500 +@@ -1,130 +0,0 @@ +- +-config AMIGA +- bool +- depends on APUS +- default y +- help +- This option enables support for the Amiga series of computers. +- +-config ZORRO +- bool +- depends on APUS +- default y +- help +- This enables support for the Zorro bus in the Amiga. If you have +- expansion cards in your Amiga that conform to the Amiga +- AutoConfig(tm) specification, say Y, otherwise N. Note that even +- expansion cards that do not fit in the Zorro slots but fit in e.g. +- the CPU slot may fall in this category, so you have to say Y to let +- Linux use these. +- +-config ABSTRACT_CONSOLE +- bool +- depends on APUS +- default y +- +-config APUS_FAST_EXCEPT +- bool +- depends on APUS +- default y +- +-config AMIGA_PCMCIA +- bool "Amiga 1200/600 PCMCIA support" +- depends on APUS && EXPERIMENTAL +- help +- Include support in the kernel for pcmcia on Amiga 1200 and Amiga +- 600. If you intend to use pcmcia cards say Y; otherwise say N. +- +-config AMIGA_BUILTIN_SERIAL +- tristate "Amiga builtin serial support" +- depends on APUS +- help +- If you want to use your Amiga's built-in serial port in Linux, +- answer Y. +- +- To compile this driver as a module, choose M here. +- +-config GVPIOEXT +- tristate "GVP IO-Extender support" +- depends on APUS +- help +- If you want to use a GVP IO-Extender serial card in Linux, say Y. +- Otherwise, say N. +- +-config GVPIOEXT_LP +- tristate "GVP IO-Extender parallel printer support" +- depends on GVPIOEXT +- help +- Say Y to enable driving a printer from the parallel port on your +- GVP IO-Extender card, N otherwise. +- +-config GVPIOEXT_PLIP +- tristate "GVP IO-Extender PLIP support" +- depends on GVPIOEXT +- help +- Say Y to enable doing IP over the parallel port on your GVP +- IO-Extender card, N otherwise. +- +-config MULTIFACE_III_TTY +- tristate "Multiface Card III serial support" +- depends on APUS +- help +- If you want to use a Multiface III card's serial port in Linux, +- answer Y. +- +- To compile this driver as a module, choose M here. +- +-config A2232 +- tristate "Commodore A2232 serial support (EXPERIMENTAL)" +- depends on EXPERIMENTAL && APUS +- ---help--- +- This option supports the 2232 7-port serial card shipped with the +- Amiga 2000 and other Zorro-bus machines, dating from 1989. At +- a max of 19,200 bps, the ports are served by a 6551 ACIA UART chip +- each, plus a 8520 CIA, and a master 6502 CPU and buffer as well. The +- ports were connected with 8 pin DIN connectors on the card bracket, +- for which 8 pin to DB25 adapters were supplied. The card also had +- jumpers internally to toggle various pinning configurations. +- +- This driver can be built as a module; but then "generic_serial" +- will also be built as a module. This has to be loaded before +- "ser_a2232". If you want to do this, answer M here. +- +-config WHIPPET_SERIAL +- tristate "Hisoft Whippet PCMCIA serial support" +- depends on AMIGA_PCMCIA +- help +- HiSoft has a web page at , but there +- is no listing for the Whippet in their Amiga section. +- +-config APNE +- tristate "PCMCIA NE2000 support" +- depends on AMIGA_PCMCIA +- help +- If you have a PCMCIA NE2000 compatible adapter, say Y. Otherwise, +- say N. +- +- To compile this driver as a module, choose M here: the +- module will be called apne. +- +-config SERIAL_CONSOLE +- bool "Support for serial port console" +- depends on APUS && (AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y) +- +-config HEARTBEAT +- bool "Use power LED as a heartbeat" +- depends on APUS +- help +- Use the power-on LED on your machine as a load meter. The exact +- behavior is platform-dependent, but normally the flash frequency is +- a hyperbolic function of the 5-minute load average. +- +-config PROC_HARDWARE +- bool "/proc/hardware support" +- depends on APUS +- +-source "drivers/zorro/Kconfig" +- +-config PCI_PERMEDIA +- bool "PCI for Permedia2" +- depends on !4xx && !8xx && APUS +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/cell/io-workarounds.c linux-2.6.22-591/arch/powerpc/platforms/cell/io-workarounds.c +--- linux-2.6.22-570/arch/powerpc/platforms/cell/io-workarounds.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/cell/io-workarounds.c 2007-12-21 15:36:11.000000000 -0500 +@@ -102,7 +102,7 @@ + vaddr = (unsigned long)PCI_FIX_ADDR(addr); + + /* Check if it's in allowed range for PIO */ +- if (vaddr < PHBS_IO_BASE || vaddr >= IMALLOC_BASE) ++ if (vaddr < PHB_IO_BASE || vaddr > PHB_IO_END) + return; + + /* Try to find a PTE. If not, clear the paddr, we'll do +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/file.c linux-2.6.22-591/arch/powerpc/platforms/cell/spufs/file.c +--- linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/file.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/cell/spufs/file.c 2007-12-21 15:36:11.000000000 -0500 +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -39,6 +40,7 @@ + + #define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000) + ++ + static int + spufs_mem_open(struct inode *inode, struct file *file) + { +@@ -1797,6 +1799,29 @@ + return 0; + } + ++static int spufs_caps_show(struct seq_file *s, void *private) ++{ ++ struct spu_context *ctx = s->private; ++ ++ if (!(ctx->flags & SPU_CREATE_NOSCHED)) ++ seq_puts(s, "sched\n"); ++ if (!(ctx->flags & SPU_CREATE_ISOLATE)) ++ seq_puts(s, "step\n"); ++ return 0; ++} ++ ++static int spufs_caps_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx); ++} ++ ++static const struct file_operations spufs_caps_fops = { ++ .open = spufs_caps_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ + static ssize_t __spufs_mbox_info_read(struct spu_context *ctx, + char __user *buf, size_t len, loff_t *pos) + { +@@ -2015,6 +2040,7 @@ + }; + + struct tree_descr spufs_dir_contents[] = { ++ { "capabilities", &spufs_caps_fops, 0444, }, + { "mem", &spufs_mem_fops, 0666, }, + { "regs", &spufs_regs_fops, 0666, }, + { "mbox", &spufs_mbox_fops, 0444, }, +@@ -2050,6 +2076,7 @@ + }; + + struct tree_descr spufs_dir_nosched_contents[] = { ++ { "capabilities", &spufs_caps_fops, 0444, }, + { "mem", &spufs_mem_fops, 0666, }, + { "mbox", &spufs_mbox_fops, 0444, }, + { "ibox", &spufs_ibox_fops, 0444, }, +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/run.c linux-2.6.22-591/arch/powerpc/platforms/cell/spufs/run.c +--- linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/run.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/cell/spufs/run.c 2007-12-21 15:36:11.000000000 -0500 +@@ -142,8 +142,12 @@ + runcntl = SPU_RUNCNTL_RUNNABLE; + ctx->ops->runcntl_write(ctx, runcntl); + } else { ++ unsigned long mode = SPU_PRIVCNTL_MODE_NORMAL; + spu_start_tick(ctx); + ctx->ops->npc_write(ctx, *npc); ++ if (test_thread_flag(TIF_SINGLESTEP)) ++ mode = SPU_PRIVCNTL_MODE_SINGLE_STEP; ++ out_be64(&ctx->spu->priv2->spu_privcntl_RW, mode); + ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); + } + +@@ -334,7 +338,8 @@ + ret = spu_process_events(ctx); + + } while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP | +- SPU_STATUS_STOPPED_BY_HALT))); ++ SPU_STATUS_STOPPED_BY_HALT | ++ SPU_STATUS_SINGLE_STEP))); + + ctx->ops->master_stop(ctx); + ret = spu_run_fini(ctx, npc, &status); +@@ -344,10 +349,15 @@ + if ((ret == 0) || + ((ret == -ERESTARTSYS) && + ((status & SPU_STATUS_STOPPED_BY_HALT) || ++ (status & SPU_STATUS_SINGLE_STEP) || + ((status & SPU_STATUS_STOPPED_BY_STOP) && + (status >> SPU_STOP_STATUS_SHIFT != 0x2104))))) + ret = status; + ++ /* Note: we don't need to force_sig SIGTRAP on single-step ++ * since we have TIF_SINGLESTEP set, thus the kernel will do ++ * it upon return from the syscall anyawy ++ */ + if ((status & SPU_STATUS_STOPPED_BY_STOP) + && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff) { + force_sig(SIGTRAP, current); +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/iseries/call_hpt.h linux-2.6.22-591/arch/powerpc/platforms/iseries/call_hpt.h +--- linux-2.6.22-570/arch/powerpc/platforms/iseries/call_hpt.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/iseries/call_hpt.h 2007-12-21 15:36:11.000000000 -0500 +@@ -76,24 +76,25 @@ + return compressedStatus; + } + +-static inline u64 HvCallHpt_findValid(hpte_t *hpte, u64 vpn) ++static inline u64 HvCallHpt_findValid(struct hash_pte *hpte, u64 vpn) + { + return HvCall3Ret16(HvCallHptFindValid, hpte, vpn, 0, 0); + } + +-static inline u64 HvCallHpt_findNextValid(hpte_t *hpte, u32 hpteIndex, ++static inline u64 HvCallHpt_findNextValid(struct hash_pte *hpte, u32 hpteIndex, + u8 bitson, u8 bitsoff) + { + return HvCall3Ret16(HvCallHptFindNextValid, hpte, hpteIndex, + bitson, bitsoff); + } + +-static inline void HvCallHpt_get(hpte_t *hpte, u32 hpteIndex) ++static inline void HvCallHpt_get(struct hash_pte *hpte, u32 hpteIndex) + { + HvCall2Ret16(HvCallHptGet, hpte, hpteIndex, 0); + } + +-static inline void HvCallHpt_addValidate(u32 hpteIndex, u32 hBit, hpte_t *hpte) ++static inline void HvCallHpt_addValidate(u32 hpteIndex, u32 hBit, ++ struct hash_pte *hpte) + { + HvCall4(HvCallHptAddValidate, hpteIndex, hBit, hpte->v, hpte->r); + } +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/iseries/htab.c linux-2.6.22-591/arch/powerpc/platforms/iseries/htab.c +--- linux-2.6.22-570/arch/powerpc/platforms/iseries/htab.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/iseries/htab.c 2007-12-21 15:36:11.000000000 -0500 +@@ -44,7 +44,7 @@ + unsigned long vflags, int psize) + { + long slot; +- hpte_t lhpte; ++ struct hash_pte lhpte; + int secondary = 0; + + BUG_ON(psize != MMU_PAGE_4K); +@@ -99,7 +99,7 @@ + + static unsigned long iSeries_hpte_getword0(unsigned long slot) + { +- hpte_t hpte; ++ struct hash_pte hpte; + + HvCallHpt_get(&hpte, slot); + return hpte.v; +@@ -144,7 +144,7 @@ + static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp, + unsigned long va, int psize, int local) + { +- hpte_t hpte; ++ struct hash_pte hpte; + unsigned long want_v; + + iSeries_hlock(slot); +@@ -176,7 +176,7 @@ + */ + static long iSeries_hpte_find(unsigned long vpn) + { +- hpte_t hpte; ++ struct hash_pte hpte; + long slot; + + /* +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/iseries/pci.c linux-2.6.22-591/arch/powerpc/platforms/iseries/pci.c +--- linux-2.6.22-570/arch/powerpc/platforms/iseries/pci.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/iseries/pci.c 2007-12-21 15:36:11.000000000 -0500 +@@ -742,6 +742,11 @@ + /* Install IO hooks */ + ppc_pci_io = iseries_pci_io; + ++ /* iSeries has no IO space in the common sense, it needs to set ++ * the IO base to 0 ++ */ ++ pci_io_base = 0; ++ + if (root == NULL) { + printk(KERN_CRIT "iSeries_pcibios_init: can't find root " + "of device tree\n"); +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/maple/pci.c linux-2.6.22-591/arch/powerpc/platforms/maple/pci.c +--- linux-2.6.22-570/arch/powerpc/platforms/maple/pci.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/maple/pci.c 2007-12-21 15:36:11.000000000 -0500 +@@ -519,23 +519,6 @@ + DBG(" <- maple_pci_irq_fixup\n"); + } + +-static void __init maple_fixup_phb_resources(void) +-{ +- struct pci_controller *hose, *tmp; +- +- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { +- unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; +- +- hose->io_resource.start += offset; +- hose->io_resource.end += offset; +- +- printk(KERN_INFO "PCI Host %d, io start: %llx; io end: %llx\n", +- hose->global_number, +- (unsigned long long)hose->io_resource.start, +- (unsigned long long)hose->io_resource.end); +- } +-} +- + void __init maple_pci_init(void) + { + struct device_node *np, *root; +@@ -573,24 +556,6 @@ + if (ht && add_bridge(ht) != 0) + of_node_put(ht); + +- /* +- * We need to call pci_setup_phb_io for the HT bridge first +- * so it gets the I/O port numbers starting at 0, and we +- * need to call it for the AGP bridge after that so it gets +- * small positive I/O port numbers. +- */ +- if (u3_ht) +- pci_setup_phb_io(u3_ht, 1); +- if (u3_agp) +- pci_setup_phb_io(u3_agp, 0); +- if (u4_pcie) +- pci_setup_phb_io(u4_pcie, 0); +- +- /* Fixup the IO resources on our host bridges as the common code +- * does it only for childs of the host bridges +- */ +- maple_fixup_phb_resources(); +- + /* Setup the linkage between OF nodes and PHBs */ + pci_devs_phb_init(); + +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pasemi/pci.c linux-2.6.22-591/arch/powerpc/platforms/pasemi/pci.c +--- linux-2.6.22-570/arch/powerpc/platforms/pasemi/pci.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/pasemi/pci.c 2007-12-21 15:36:11.000000000 -0500 +@@ -150,29 +150,11 @@ + printk(KERN_INFO "Found PA-PXP PCI host bridge.\n"); + + /* Interpret the "ranges" property */ +- /* This also maps the I/O region and sets isa_io/mem_base */ + pci_process_bridge_OF_ranges(hose, dev, 1); +- pci_setup_phb_io(hose, 1); + + return 0; + } + +- +-static void __init pas_fixup_phb_resources(void) +-{ +- struct pci_controller *hose, *tmp; +- +- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { +- unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; +- hose->io_resource.start += offset; +- hose->io_resource.end += offset; +- printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n", +- hose->global_number, +- hose->io_resource.start, hose->io_resource.end); +- } +-} +- +- + void __init pas_pci_init(void) + { + struct device_node *np, *root; +@@ -190,8 +172,6 @@ + + of_node_put(root); + +- pas_fixup_phb_resources(); +- + /* Setup the linkage between OF nodes and PHBs */ + pci_devs_phb_init(); + +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/powermac/pci.c linux-2.6.22-591/arch/powerpc/platforms/powermac/pci.c +--- linux-2.6.22-570/arch/powerpc/platforms/powermac/pci.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/powermac/pci.c 2007-12-21 15:36:11.000000000 -0500 +@@ -1006,19 +1006,6 @@ + #endif /* CONFIG_PPC32 */ + } + +-#ifdef CONFIG_PPC64 +-static void __init pmac_fixup_phb_resources(void) +-{ +- struct pci_controller *hose, *tmp; +- +- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { +- printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n", +- hose->global_number, +- hose->io_resource.start, hose->io_resource.end); +- } +-} +-#endif +- + void __init pmac_pci_init(void) + { + struct device_node *np, *root; +@@ -1053,25 +1040,6 @@ + if (ht && add_bridge(ht) != 0) + of_node_put(ht); + +- /* +- * We need to call pci_setup_phb_io for the HT bridge first +- * so it gets the I/O port numbers starting at 0, and we +- * need to call it for the AGP bridge after that so it gets +- * small positive I/O port numbers. +- */ +- if (u3_ht) +- pci_setup_phb_io(u3_ht, 1); +- if (u3_agp) +- pci_setup_phb_io(u3_agp, 0); +- if (u4_pcie) +- pci_setup_phb_io(u4_pcie, 0); +- +- /* +- * On ppc64, fixup the IO resources on our host bridges as +- * the common code does it only for children of the host bridges +- */ +- pmac_fixup_phb_resources(); +- + /* Setup the linkage between OF nodes and PHBs */ + pci_devs_phb_init(); + +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/powermac/setup.c linux-2.6.22-591/arch/powerpc/platforms/powermac/setup.c +--- linux-2.6.22-570/arch/powerpc/platforms/powermac/setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/powermac/setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -98,8 +98,6 @@ + int sccdbg; + #endif + +-extern void zs_kgdb_hook(int tty_num); +- + sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN; + EXPORT_SYMBOL(sys_ctrler); + +@@ -330,10 +328,6 @@ + l2cr_init(); + #endif /* CONFIG_PPC32 */ + +-#ifdef CONFIG_KGDB +- zs_kgdb_hook(0); +-#endif +- + find_via_cuda(); + find_via_pmu(); + smu_init(); +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/ps3/htab.c linux-2.6.22-591/arch/powerpc/platforms/ps3/htab.c +--- linux-2.6.22-570/arch/powerpc/platforms/ps3/htab.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/ps3/htab.c 2007-12-21 15:36:11.000000000 -0500 +@@ -34,7 +34,7 @@ + #define DBG(fmt...) do{if(0)printk(fmt);}while(0) + #endif + +-static hpte_t *htab; ++static struct hash_pte *htab; + static unsigned long htab_addr; + static unsigned char *bolttab; + static unsigned char *inusetab; +@@ -44,8 +44,8 @@ + #define debug_dump_hpte(_a, _b, _c, _d, _e, _f, _g) \ + _debug_dump_hpte(_a, _b, _c, _d, _e, _f, _g, __func__, __LINE__) + static void _debug_dump_hpte(unsigned long pa, unsigned long va, +- unsigned long group, unsigned long bitmap, hpte_t lhpte, int psize, +- unsigned long slot, const char* func, int line) ++ unsigned long group, unsigned long bitmap, struct hash_pte lhpte, ++ int psize, unsigned long slot, const char* func, int line) + { + DBG("%s:%d: pa = %lxh\n", func, line, pa); + DBG("%s:%d: lpar = %lxh\n", func, line, +@@ -63,7 +63,7 @@ + unsigned long pa, unsigned long rflags, unsigned long vflags, int psize) + { + unsigned long slot; +- hpte_t lhpte; ++ struct hash_pte lhpte; + int secondary = 0; + unsigned long result; + unsigned long bitmap; +@@ -255,7 +255,7 @@ + + ppc64_pft_size = __ilog2(htab_size); + +- bitmap_size = htab_size / sizeof(hpte_t) / 8; ++ bitmap_size = htab_size / sizeof(struct hash_pte) / 8; + + bolttab = __va(lmb_alloc(bitmap_size, 1)); + inusetab = __va(lmb_alloc(bitmap_size, 1)); +@@ -273,7 +273,7 @@ + + result = lv1_map_htab(0, &htab_addr); + +- htab = (hpte_t *)__ioremap(htab_addr, htab_size, ++ htab = (struct hash_pte *)__ioremap(htab_addr, htab_size, + pgprot_val(PAGE_READONLY_X)); + + DBG("%s:%d: lpar %016lxh, virt %016lxh\n", __func__, __LINE__, +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/Makefile linux-2.6.22-591/arch/powerpc/platforms/pseries/Makefile +--- linux-2.6.22-570/arch/powerpc/platforms/pseries/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -8,7 +8,7 @@ + obj-$(CONFIG_SMP) += smp.o + obj-$(CONFIG_XICS) += xics.o + obj-$(CONFIG_SCANLOG) += scanlog.o +-obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o ++obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o + obj-$(CONFIG_KEXEC) += kexec.o + obj-$(CONFIG_PCI) += pci.o pci_dlpar.o + obj-$(CONFIG_PCI_MSI) += msi.o +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh.c linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh.c +--- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh.c 2007-12-21 15:36:11.000000000 -0500 +@@ -1,6 +1,8 @@ + /* + * eeh.c +- * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation ++ * Copyright IBM Corporation 2001, 2005, 2006 ++ * Copyright Dave Engebretsen & Todd Inglett 2001 ++ * Copyright Linas Vepstas 2005, 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -15,6 +17,8 @@ + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++ * ++ * Please address comments and feedback to Linas Vepstas + */ + + #include +@@ -117,7 +121,6 @@ + static unsigned long ignored_check; + static unsigned long total_mmio_ffs; + static unsigned long false_positives; +-static unsigned long ignored_failures; + static unsigned long slot_resets; + + #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) +@@ -505,6 +508,7 @@ + printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n", + ret, dn->full_name); + false_positives++; ++ pdn->eeh_false_positives ++; + rc = 0; + goto dn_unlock; + } +@@ -513,6 +517,7 @@ + * they are empty when they don't have children. */ + if ((rets[0] == 5) && (dn->child == NULL)) { + false_positives++; ++ pdn->eeh_false_positives ++; + rc = 0; + goto dn_unlock; + } +@@ -522,6 +527,7 @@ + printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n", + ret, dn->full_name); + false_positives++; ++ pdn->eeh_false_positives ++; + rc = 0; + goto dn_unlock; + } +@@ -529,6 +535,7 @@ + /* If not the kind of error we know about, punt. */ + if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) { + false_positives++; ++ pdn->eeh_false_positives ++; + rc = 0; + goto dn_unlock; + } +@@ -921,6 +928,7 @@ + pdn->eeh_mode = 0; + pdn->eeh_check_count = 0; + pdn->eeh_freeze_count = 0; ++ pdn->eeh_false_positives = 0; + + if (status && strcmp(status, "ok") != 0) + return NULL; /* ignore devices with bad status */ +@@ -1139,7 +1147,8 @@ + pdn = PCI_DN(dn); + pdn->pcidev = dev; + +- pci_addr_cache_insert_device (dev); ++ pci_addr_cache_insert_device(dev); ++ eeh_sysfs_add_device(dev); + } + + void eeh_add_device_tree_late(struct pci_bus *bus) +@@ -1178,6 +1187,7 @@ + printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev)); + #endif + pci_addr_cache_remove_device(dev); ++ eeh_sysfs_remove_device(dev); + + dn = pci_device_to_OF_node(dev); + if (PCI_DN(dn)->pcidev) { +@@ -1214,11 +1224,10 @@ + "check not wanted=%ld\n" + "eeh_total_mmio_ffs=%ld\n" + "eeh_false_positives=%ld\n" +- "eeh_ignored_failures=%ld\n" + "eeh_slot_resets=%ld\n", + no_device, no_dn, no_cfg_addr, + ignored_check, total_mmio_ffs, +- false_positives, ignored_failures, ++ false_positives, + slot_resets); + } + +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_cache.c linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_cache.c +--- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_cache.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_cache.c 2007-12-21 15:36:11.000000000 -0500 +@@ -2,7 +2,8 @@ + * eeh_cache.c + * PCI address cache; allows the lookup of PCI devices based on I/O address + * +- * Copyright (C) 2004 Linas Vepstas IBM Corporation ++ * Copyright IBM Corporation 2004 ++ * Copyright Linas Vepstas 2004 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -295,6 +296,8 @@ + continue; + pci_dev_get (dev); /* matching put is in eeh_remove_device() */ + PCI_DN(dn)->pcidev = dev; ++ ++ eeh_sysfs_add_device(dev); + } + + #ifdef DEBUG +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_driver.c linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_driver.c +--- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_driver.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_driver.c 2007-12-21 15:36:11.000000000 -0500 +@@ -1,6 +1,7 @@ + /* + * PCI Error Recovery Driver for RPA-compliant PPC64 platform. +- * Copyright (C) 2004, 2005 Linas Vepstas ++ * Copyright IBM Corp. 2004 2005 ++ * Copyright Linas Vepstas 2004, 2005 + * + * All rights reserved. + * +@@ -19,8 +20,7 @@ + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * +- * Send feedback to +- * ++ * Send comments and feedback to Linas Vepstas + */ + #include + #include +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_sysfs.c linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_sysfs.c +--- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_sysfs.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/eeh_sysfs.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,87 @@ ++/* ++ * Sysfs entries for PCI Error Recovery for PAPR-compliant platform. ++ * Copyright IBM Corporation 2007 ++ * Copyright Linas Vepstas 2007 ++ * ++ * All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or (at ++ * your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or ++ * NON INFRINGEMENT. See the GNU General Public License for more ++ * details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ * ++ * Send comments and feedback to Linas Vepstas ++ */ ++#include ++#include ++#include ++#include ++ ++/** ++ * EEH_SHOW_ATTR -- create sysfs entry for eeh statistic ++ * @_name: name of file in sysfs directory ++ * @_memb: name of member in struct pci_dn to access ++ * @_format: printf format for display ++ * ++ * All of the attributes look very similar, so just ++ * auto-gen a cut-n-paste routine to display them. ++ */ ++#define EEH_SHOW_ATTR(_name,_memb,_format) \ ++static ssize_t eeh_show_##_name(struct device *dev, \ ++ struct device_attribute *attr, char *buf) \ ++{ \ ++ struct pci_dev *pdev = to_pci_dev(dev); \ ++ struct device_node *dn = pci_device_to_OF_node(pdev); \ ++ struct pci_dn *pdn; \ ++ \ ++ if (!dn || PCI_DN(dn) == NULL) \ ++ return 0; \ ++ \ ++ pdn = PCI_DN(dn); \ ++ return sprintf(buf, _format "\n", pdn->_memb); \ ++} \ ++static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL); ++ ++ ++EEH_SHOW_ATTR(eeh_mode, eeh_mode, "0x%x"); ++EEH_SHOW_ATTR(eeh_config_addr, eeh_config_addr, "0x%x"); ++EEH_SHOW_ATTR(eeh_pe_config_addr, eeh_pe_config_addr, "0x%x"); ++EEH_SHOW_ATTR(eeh_check_count, eeh_check_count, "%d"); ++EEH_SHOW_ATTR(eeh_freeze_count, eeh_freeze_count, "%d"); ++EEH_SHOW_ATTR(eeh_false_positives, eeh_false_positives, "%d"); ++ ++void eeh_sysfs_add_device(struct pci_dev *pdev) ++{ ++ int rc=0; ++ ++ rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode); ++ rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr); ++ rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); ++ rc += device_create_file(&pdev->dev, &dev_attr_eeh_check_count); ++ rc += device_create_file(&pdev->dev, &dev_attr_eeh_false_positives); ++ rc += device_create_file(&pdev->dev, &dev_attr_eeh_freeze_count); ++ ++ if (rc) ++ printk(KERN_WARNING "EEH: Unable to create sysfs entries\n"); ++} ++ ++void eeh_sysfs_remove_device(struct pci_dev *pdev) ++{ ++ device_remove_file(&pdev->dev, &dev_attr_eeh_mode); ++ device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr); ++ device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr); ++ device_remove_file(&pdev->dev, &dev_attr_eeh_check_count); ++ device_remove_file(&pdev->dev, &dev_attr_eeh_false_positives); ++ device_remove_file(&pdev->dev, &dev_attr_eeh_freeze_count); ++} ++ +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/pci_dlpar.c linux-2.6.22-591/arch/powerpc/platforms/pseries/pci_dlpar.c +--- linux-2.6.22-570/arch/powerpc/platforms/pseries/pci_dlpar.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/pci_dlpar.c 2007-12-21 15:36:11.000000000 -0500 +@@ -110,8 +110,6 @@ + } + } + } +- +- eeh_add_device_tree_late(bus); + } + EXPORT_SYMBOL_GPL(pcibios_fixup_new_pci_devices); + +@@ -139,6 +137,8 @@ + + /* Make the discovered devices available */ + pci_bus_add_devices(child_bus); ++ ++ eeh_add_device_tree_late(child_bus); + return 0; + } + +@@ -171,6 +171,7 @@ + if (!list_empty(&bus->devices)) { + pcibios_fixup_new_pci_devices(bus, 0); + pci_bus_add_devices(bus); ++ eeh_add_device_tree_late(bus); + } + } else if (mode == PCI_PROBE_NORMAL) { + /* use legacy probe */ +@@ -179,6 +180,7 @@ + if (num) { + pcibios_fixup_new_pci_devices(bus, 1); + pci_bus_add_devices(bus); ++ eeh_add_device_tree_late(bus); + } + + list_for_each_entry(dev, &bus->devices, bus_list) +@@ -200,8 +202,6 @@ + rtas_setup_phb(phb); + pci_process_bridge_OF_ranges(phb, dn, 0); + +- pci_setup_phb_io_dynamic(phb, primary); +- + pci_devs_phb_init_dynamic(phb); + + if (dn->child) +@@ -210,6 +210,7 @@ + scan_phb(phb); + pcibios_fixup_new_pci_devices(phb->bus, 0); + pci_bus_add_devices(phb->bus); ++ eeh_add_device_tree_late(phb->bus); + + return phb; + } +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/pseries.h linux-2.6.22-591/arch/powerpc/platforms/pseries/pseries.h +--- linux-2.6.22-570/arch/powerpc/platforms/pseries/pseries.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/pseries.h 2007-12-21 15:36:11.000000000 -0500 +@@ -33,6 +33,8 @@ + static inline void setup_kexec_cpu_down_mpic(void) { } + #endif + ++extern void pSeries_final_fixup(void); ++ + /* Poweron flag used for enabling auto ups restart */ + extern unsigned long rtas_poweron_auto; + +diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/setup.c linux-2.6.22-591/arch/powerpc/platforms/pseries/setup.c +--- linux-2.6.22-570/arch/powerpc/platforms/pseries/setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/platforms/pseries/setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -399,6 +399,7 @@ + * a good time to find other work to dispatch. + */ + get_lppaca()->idle = 1; ++ get_lppaca()->donate_dedicated_cpu = 1; + + /* + * We come in with interrupts disabled, and need_resched() +@@ -431,6 +432,7 @@ + + out: + HMT_medium(); ++ get_lppaca()->donate_dedicated_cpu = 0; + get_lppaca()->idle = 0; + } + +diff -Nurb linux-2.6.22-570/arch/powerpc/sysdev/tsi108_dev.c linux-2.6.22-591/arch/powerpc/sysdev/tsi108_dev.c +--- linux-2.6.22-570/arch/powerpc/sysdev/tsi108_dev.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/powerpc/sysdev/tsi108_dev.c 2007-12-21 15:36:11.000000000 -0500 +@@ -72,12 +72,11 @@ + int ret; + + for (np = NULL, i = 0; +- (np = of_find_compatible_node(np, "network", "tsi-ethernet")) != NULL; ++ (np = of_find_compatible_node(np, "network", "tsi108-ethernet")) != NULL; + i++) { + struct resource r[2]; +- struct device_node *phy; ++ struct device_node *phy, *mdio; + hw_info tsi_eth_data; +- const unsigned int *id; + const unsigned int *phy_id; + const void *mac_addr; + const phandle *ph; +@@ -111,6 +110,13 @@ + if (mac_addr) + memcpy(tsi_eth_data.mac_addr, mac_addr, 6); + ++ ph = of_get_property(np, "mdio-handle", NULL); ++ mdio = of_find_node_by_phandle(*ph); ++ ret = of_address_to_resource(mdio, 0, &res); ++ of_node_put(mdio); ++ if (ret) ++ goto unreg; ++ + ph = of_get_property(np, "phy-handle", NULL); + phy = of_find_node_by_phandle(*ph); + +@@ -119,20 +125,25 @@ + goto unreg; + } + +- id = of_get_property(phy, "reg", NULL); +- phy_id = of_get_property(phy, "phy-id", NULL); +- ret = of_address_to_resource(phy, 0, &res); +- if (ret) { +- of_node_put(phy); +- goto unreg; +- } ++ phy_id = of_get_property(phy, "reg", NULL); ++ + tsi_eth_data.regs = r[0].start; + tsi_eth_data.phyregs = res.start; + tsi_eth_data.phy = *phy_id; + tsi_eth_data.irq_num = irq_of_parse_and_map(np, 0); +- if (of_device_is_compatible(phy, "bcm54xx")) ++ ++ /* Some boards with the TSI108 bridge (e.g. Holly) ++ * have a miswiring of the ethernet PHYs which ++ * requires a workaround. The special ++ * "txc-rxc-delay-disable" property enables this ++ * workaround. FIXME: Need to port the tsi108_eth ++ * driver itself to phylib and use a non-misleading ++ * name for the workaround flag - it's not actually to ++ * do with the model of PHY in use */ ++ if (of_get_property(phy, "txc-rxc-delay-disable", NULL)) + tsi_eth_data.phy_type = TSI108_PHY_BCM54XX; + of_node_put(phy); ++ + ret = + platform_device_add_data(tsi_eth_dev, &tsi_eth_data, + sizeof(hw_info)); +diff -Nurb linux-2.6.22-570/arch/ppc/8260_io/enet.c linux-2.6.22-591/arch/ppc/8260_io/enet.c +--- linux-2.6.22-570/arch/ppc/8260_io/enet.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/8260_io/enet.c 2007-12-21 15:36:11.000000000 -0500 +@@ -477,9 +477,9 @@ + } + else { + skb_put(skb,pkt_len-4); /* Make room */ +- eth_copy_and_sum(skb, ++ skb_copy_to_linear_data(skb, + (unsigned char *)__va(bdp->cbd_bufaddr), +- pkt_len-4, 0); ++ pkt_len-4); + skb->protocol=eth_type_trans(skb,dev); + netif_rx(skb); + } +diff -Nurb linux-2.6.22-570/arch/ppc/8260_io/fcc_enet.c linux-2.6.22-591/arch/ppc/8260_io/fcc_enet.c +--- linux-2.6.22-570/arch/ppc/8260_io/fcc_enet.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/8260_io/fcc_enet.c 2007-12-21 15:36:11.000000000 -0500 +@@ -734,9 +734,9 @@ + } + else { + skb_put(skb,pkt_len); /* Make room */ +- eth_copy_and_sum(skb, ++ skb_copy_to_linear_data(skb, + (unsigned char *)__va(bdp->cbd_bufaddr), +- pkt_len, 0); ++ pkt_len); + skb->protocol=eth_type_trans(skb,dev); + netif_rx(skb); + } +diff -Nurb linux-2.6.22-570/arch/ppc/8xx_io/enet.c linux-2.6.22-591/arch/ppc/8xx_io/enet.c +--- linux-2.6.22-570/arch/ppc/8xx_io/enet.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/8xx_io/enet.c 2007-12-21 15:36:11.000000000 -0500 +@@ -506,9 +506,9 @@ + } + else { + skb_put(skb,pkt_len-4); /* Make room */ +- eth_copy_and_sum(skb, ++ skb_copy_to_linear_data(skb, + cep->rx_vaddr[bdp - cep->rx_bd_base], +- pkt_len-4, 0); ++ pkt_len-4); + skb->protocol=eth_type_trans(skb,dev); + netif_rx(skb); + } +diff -Nurb linux-2.6.22-570/arch/ppc/8xx_io/fec.c linux-2.6.22-591/arch/ppc/8xx_io/fec.c +--- linux-2.6.22-570/arch/ppc/8xx_io/fec.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/8xx_io/fec.c 2007-12-21 15:36:11.000000000 -0500 +@@ -725,7 +725,7 @@ + fep->stats.rx_dropped++; + } else { + skb_put(skb,pkt_len-4); /* Make room */ +- eth_copy_and_sum(skb, data, pkt_len-4, 0); ++ skb_copy_to_linear_data(skb, data, pkt_len-4); + skb->protocol=eth_type_trans(skb,dev); + netif_rx(skb); + } +diff -Nurb linux-2.6.22-570/arch/ppc/Kconfig.debug linux-2.6.22-591/arch/ppc/Kconfig.debug +--- linux-2.6.22-570/arch/ppc/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500 +@@ -2,42 +2,6 @@ + + source "lib/Kconfig.debug" + +-config KGDB +- bool "Include kgdb kernel debugger" +- depends on DEBUG_KERNEL && (BROKEN || PPC_GEN550 || 4xx) +- select DEBUG_INFO +- help +- Include in-kernel hooks for kgdb, the Linux kernel source level +- debugger. See for more information. +- Unless you are intending to debug the kernel, say N here. +- +-choice +- prompt "Serial Port" +- depends on KGDB +- default KGDB_TTYS1 +- +-config KGDB_TTYS0 +- bool "ttyS0" +- +-config KGDB_TTYS1 +- bool "ttyS1" +- +-config KGDB_TTYS2 +- bool "ttyS2" +- +-config KGDB_TTYS3 +- bool "ttyS3" +- +-endchoice +- +-config KGDB_CONSOLE +- bool "Enable serial console thru kgdb port" +- depends on KGDB && 8xx || CPM2 +- help +- If you enable this, all serial console messages will be sent +- over the gdb stub. +- If unsure, say N. +- + config XMON + bool "Include xmon kernel debugger" + depends on DEBUG_KERNEL +diff -Nurb linux-2.6.22-570/arch/ppc/amiga/config.c linux-2.6.22-591/arch/ppc/amiga/config.c +--- linux-2.6.22-570/arch/ppc/amiga/config.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/amiga/config.c 2007-12-21 15:36:11.000000000 -0500 +@@ -753,17 +753,11 @@ + void amiga_serial_console_write(struct console *co, const char *s, + unsigned int count) + { +-#if 0 /* def CONFIG_KGDB */ +- /* FIXME:APUS GDB doesn't seem to like O-packages before it is +- properly connected with the target. */ +- __gdb_output_string (s, count); +-#else + while (count--) { + if (*s == '\n') + amiga_serial_putc('\r'); + amiga_serial_putc(*s++); + } +-#endif + } + + #ifdef CONFIG_SERIAL_CONSOLE +diff -Nurb linux-2.6.22-570/arch/ppc/kernel/Makefile linux-2.6.22-591/arch/ppc/kernel/Makefile +--- linux-2.6.22-570/arch/ppc/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -14,7 +14,7 @@ + obj-$(CONFIG_MODULES) += ppc_ksyms.o + obj-$(CONFIG_PCI) += pci.o + obj-$(CONFIG_RAPIDIO) += rio.o +-obj-$(CONFIG_KGDB) += ppc-stub.o ++obj-$(CONFIG_KGDB) += kgdb.o kgdb_setjmp32.o + obj-$(CONFIG_SMP) += smp.o smp-tbsync.o + obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o + +diff -Nurb linux-2.6.22-570/arch/ppc/kernel/kgdb.c linux-2.6.22-591/arch/ppc/kernel/kgdb.c +--- linux-2.6.22-570/arch/ppc/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/ppc/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,348 @@ ++/* ++ * arch/ppc/kernel/kgdb.c ++ * ++ * PowerPC backend to the KGDB stub. ++ * ++ * Maintainer: Tom Rini ++ * ++ * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu) ++ * Copyright (C) 2003 Timesys Corporation. ++ * Copyright (C) 2004, 2006 MontaVista Software, Inc. ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program as licensed "as is" without any warranty of any ++ * kind, whether express or implied. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * This table contains the mapping between PowerPC hardware trap types, and ++ * signals, which are primarily what GDB understands. GDB and the kernel ++ * don't always agree on values, so we use constants taken from gdb-6.2. ++ */ ++static struct hard_trap_info ++{ ++ unsigned int tt; /* Trap type code for powerpc */ ++ unsigned char signo; /* Signal that we map this trap into */ ++} hard_trap_info[] = { ++ { 0x0100, 0x02 /* SIGINT */ }, /* system reset */ ++ { 0x0200, 0x0b /* SIGSEGV */ }, /* machine check */ ++ { 0x0300, 0x0b /* SIGSEGV */ }, /* data access */ ++ { 0x0400, 0x0b /* SIGSEGV */ }, /* instruction access */ ++ { 0x0500, 0x02 /* SIGINT */ }, /* external interrupt */ ++ { 0x0600, 0x0a /* SIGBUS */ }, /* alignment */ ++ { 0x0700, 0x05 /* SIGTRAP */ }, /* program check */ ++ { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ ++ { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ ++ { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ ++#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) ++ { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ ++#if defined(CONFIG_FSL_BOOKE) ++ { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ ++ { 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */ ++ { 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */ ++ { 0x2040, 0x08 /* SIGFPE */ }, /* spe fp data */ ++ { 0x2050, 0x08 /* SIGFPE */ }, /* spe fp round */ ++ { 0x2060, 0x0e /* SIGILL */ }, /* performace monitor */ ++ { 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */ ++ { 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */ ++ { 0x3200, 0x02 /* SIGINT */ }, /* watchdog */ ++#else ++ { 0x1000, 0x0e /* SIGALRM */ }, /* programmable interval timer */ ++ { 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */ ++ { 0x1020, 0x02 /* SIGINT */ }, /* watchdog */ ++ { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ ++ { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ ++#endif ++#else ++ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ ++#if defined(CONFIG_8xx) ++ { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ ++#else ++ { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */ ++ { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */ ++ { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */ ++ { 0x1400, 0x02 /* SIGINT */ }, /* SMI */ ++ { 0x1600, 0x08 /* SIGFPE */ }, /* altivec assist */ ++ { 0x1700, 0x04 /* SIGILL */ }, /* TAU */ ++ { 0x2000, 0x05 /* SIGTRAP */ }, /* run mode */ ++#endif ++#endif ++ { 0x0000, 0x00 } /* Must be last */ ++}; ++ ++extern atomic_t cpu_doing_single_step; ++ ++static int computeSignal(unsigned int tt) ++{ ++ struct hard_trap_info *ht; ++ ++ for (ht = hard_trap_info; ht->tt && ht->signo; ht++) ++ if (ht->tt == tt) ++ return ht->signo; ++ ++ return SIGHUP; /* default for things we don't know about */ ++} ++ ++/* KGDB functions to use existing PowerPC hooks. */ ++static void kgdb_debugger(struct pt_regs *regs) ++{ ++ kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); ++} ++ ++static int kgdb_breakpoint(struct pt_regs *regs) ++{ ++ if (user_mode(regs)) ++ return 0; ++ ++ kgdb_handle_exception(0, SIGTRAP, 0, regs); ++ ++ if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr)) ++ regs->nip += 4; ++ ++ return 1; ++} ++ ++static int kgdb_singlestep(struct pt_regs *regs) ++{ ++ struct thread_info *thread_info, *exception_thread_info; ++ ++ if (user_mode(regs)) ++ return 0; ++ /* ++ * On Book E and perhaps other processsors, singlestep is handled on ++ * the critical exception stack. This causes current_thread_info() ++ * to fail, since it it locates the thread_info by masking off ++ * the low bits of the current stack pointer. We work around ++ * this issue by copying the thread_info from the kernel stack ++ * before calling kgdb_handle_exception, and copying it back ++ * afterwards. On most processors the copy is avoided since ++ * exception_thread_info == thread_info. ++ */ ++ thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1)); ++ exception_thread_info = current_thread_info(); ++ ++ if (thread_info != exception_thread_info) ++ memcpy(exception_thread_info, thread_info, sizeof *thread_info); ++ ++ kgdb_handle_exception(0, SIGTRAP, 0, regs); ++ ++ if (thread_info != exception_thread_info) ++ memcpy(thread_info, exception_thread_info, sizeof *thread_info); ++ ++ return 1; ++} ++ ++int kgdb_iabr_match(struct pt_regs *regs) ++{ ++ if (user_mode(regs)) ++ return 0; ++ ++ kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); ++ return 1; ++} ++ ++int kgdb_dabr_match(struct pt_regs *regs) ++{ ++ if (user_mode(regs)) ++ return 0; ++ ++ kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs); ++ return 1; ++} ++ ++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) ++{ ++ unsigned long *ptr = gdb_regs; ++ int reg; ++ ++ memset(gdb_regs, 0, MAXREG * 4); ++ ++ for (reg = 0; reg < 32; reg++) ++ *(ptr++) = regs->gpr[reg]; ++ ++#ifdef CONFIG_FSL_BOOKE ++#ifdef CONFIG_SPE ++ for (reg = 0; reg < 32; reg++) ++ *(ptr++) = current->thread.evr[reg]; ++#else ++ ptr += 32; ++#endif ++#else ++ ptr += 64; ++#endif ++ ++ *(ptr++) = regs->nip; ++ *(ptr++) = regs->msr; ++ *(ptr++) = regs->ccr; ++ *(ptr++) = regs->link; ++ *(ptr++) = regs->ctr; ++ *(ptr++) = regs->xer; ++ ++#ifdef CONFIG_SPE ++ /* u64 acc */ ++ *(ptr++) = current->thread.acc >> 32; ++ *(ptr++) = current->thread.acc & 0xffffffff; ++ *(ptr++) = current->thread.spefscr; ++#endif ++} ++ ++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) ++{ ++ struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp + ++ STACK_FRAME_OVERHEAD); ++ unsigned long *ptr = gdb_regs; ++ int reg; ++ ++ memset(gdb_regs, 0, MAXREG * 4); ++ ++ /* Regs GPR0-2 */ ++ for (reg = 0; reg < 3; reg++) ++ *(ptr++) = regs->gpr[reg]; ++ ++ /* Regs GPR3-13 are not saved */ ++ ptr += 11; ++ ++ /* Regs GPR14-31 */ ++ for (reg = 14; reg < 32; reg++) ++ *(ptr++) = regs->gpr[reg]; ++ ++#ifdef CONFIG_FSL_BOOKE ++#ifdef CONFIG_SPE ++ for (reg = 0; reg < 32; reg++) ++ *(ptr++) = p->thread.evr[reg]; ++#else ++ ptr += 32; ++#endif ++#else ++ ptr += 64; ++#endif ++ ++ *(ptr++) = regs->nip; ++ *(ptr++) = regs->msr; ++ *(ptr++) = regs->ccr; ++ *(ptr++) = regs->link; ++ *(ptr++) = regs->ctr; ++ *(ptr++) = regs->xer; ++ ++#ifdef CONFIG_SPE ++ /* u64 acc */ ++ *(ptr++) = p->thread.acc >> 32; ++ *(ptr++) = p->thread.acc & 0xffffffff; ++ *(ptr++) = p->thread.spefscr; ++#endif ++} ++ ++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) ++{ ++ unsigned long *ptr = gdb_regs; ++ int reg; ++#ifdef CONFIG_SPE ++ union { ++ u32 v32[2]; ++ u64 v64; ++ } acc; ++#endif ++ ++ for (reg = 0; reg < 32; reg++) ++ regs->gpr[reg] = *(ptr++); ++ ++#ifdef CONFIG_FSL_BOOKE ++#ifdef CONFIG_SPE ++ for (reg = 0; reg < 32; reg++) ++ current->thread.evr[reg] = *(ptr++); ++#else ++ ptr += 32; ++#endif ++#else ++ ptr += 64; ++#endif ++ ++ regs->nip = *(ptr++); ++ regs->msr = *(ptr++); ++ regs->ccr = *(ptr++); ++ regs->link = *(ptr++); ++ regs->ctr = *(ptr++); ++ regs->xer = *(ptr++); ++ ++#ifdef CONFIG_SPE ++ /* u64 acc */ ++ acc.v32[0] = *(ptr++); ++ acc.v32[1] = *(ptr++); ++ current->thread.acc = acc.v64; ++ current->thread.spefscr = *(ptr++); ++#endif ++} ++ ++/* ++ * This function does PowerPC specific processing for interfacing to gdb. ++ */ ++int kgdb_arch_handle_exception(int vector, int signo, int err_code, ++ char *remcom_in_buffer, char *remcom_out_buffer, ++ struct pt_regs *linux_regs) ++{ ++ char *ptr = &remcom_in_buffer[1]; ++ unsigned long addr; ++ ++ switch (remcom_in_buffer[0]) ++ { ++ /* ++ * sAA..AA Step one instruction from AA..AA ++ * This will return an error to gdb .. ++ */ ++ case 's': ++ case 'c': ++ /* handle the optional parameter */ ++ if (kgdb_hex2long (&ptr, &addr)) ++ linux_regs->nip = addr; ++ ++ atomic_set(&cpu_doing_single_step, -1); ++ /* set the trace bit if we're stepping */ ++ if (remcom_in_buffer[0] == 's') { ++#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) ++ mtspr(SPRN_DBCR0, ++ mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM); ++ linux_regs->msr |= MSR_DE; ++#else ++ linux_regs->msr |= MSR_SE; ++#endif ++ debugger_step = 1; ++ if (kgdb_contthread) ++ atomic_set(&cpu_doing_single_step, ++ smp_processor_id()); ++ } ++ return 0; ++ } ++ ++ return -1; ++} ++ ++/* ++ * Global data ++ */ ++struct kgdb_arch arch_kgdb_ops = { ++ .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08}, ++}; ++ ++int kgdb_arch_init(void) ++{ ++ debugger = kgdb_debugger; ++ debugger_bpt = kgdb_breakpoint; ++ debugger_sstep = kgdb_singlestep; ++ debugger_iabr_match = kgdb_iabr_match; ++ debugger_dabr_match = kgdb_dabr_match; ++ ++ return 0; ++} ++ ++arch_initcall(kgdb_arch_init); +diff -Nurb linux-2.6.22-570/arch/ppc/kernel/kgdb_setjmp32.S linux-2.6.22-591/arch/ppc/kernel/kgdb_setjmp32.S +--- linux-2.6.22-570/arch/ppc/kernel/kgdb_setjmp32.S 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/ppc/kernel/kgdb_setjmp32.S 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (C) 1996 Paul Mackerras ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program as licensed "as is" without any warranty of any ++ * kind, whether express or implied. ++ */ ++ ++#include ++#include ++ ++ .text ++ ++/* ++ * Save/restore state in case a memory access causes a fault. ++ * ++ * int kgdb_fault_setjmp(unsigned long *curr_context); ++ * void kgdb_fault_longjmp(unsigned long *curr_context); ++ */ ++ ++_GLOBAL(kgdb_fault_setjmp) ++ mflr r0 ++ stw r0,0(r3) ++ stw r1,4(r3) ++ stw r2,8(r3) ++ mfcr r0 ++ stw r0,12(r3) ++ stmw r13,16(r3) ++ li r3,0 ++ blr ++ ++_GLOBAL(kgdb_fault_longjmp) ++ lmw r13,16(r3) ++ lwz r0,12(r3) ++ mtcrf 0x38,r0 ++ lwz r0,0(r3) ++ lwz r1,4(r3) ++ lwz r2,8(r3) ++ mtlr r0 ++ mr r3,r1 ++ blr +diff -Nurb linux-2.6.22-570/arch/ppc/kernel/misc.S linux-2.6.22-591/arch/ppc/kernel/misc.S +--- linux-2.6.22-570/arch/ppc/kernel/misc.S 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/ppc/kernel/misc.S 2007-12-21 15:36:11.000000000 -0500 +@@ -328,7 +328,7 @@ + mtspr SPRN_L1CSR0,r3 + isync + blr +-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) ++END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE) + mfspr r3,SPRN_L1CSR1 + ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR + mtspr SPRN_L1CSR1,r3 +@@ -355,7 +355,7 @@ + _GLOBAL(__flush_icache_range) + BEGIN_FTR_SECTION + blr /* for 601, do nothing */ +-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) ++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) + li r5,L1_CACHE_BYTES-1 + andc r3,r3,r5 + subf r4,r3,r4 +@@ -472,7 +472,7 @@ + _GLOBAL(__flush_dcache_icache) + BEGIN_FTR_SECTION + blr /* for 601, do nothing */ +-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) ++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) + rlwinm r3,r3,0,0,19 /* Get page base address */ + li r4,4096/L1_CACHE_BYTES /* Number of lines in a page */ + mtctr r4 +@@ -500,7 +500,7 @@ + _GLOBAL(__flush_dcache_icache_phys) + BEGIN_FTR_SECTION + blr /* for 601, do nothing */ +-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE) ++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) + mfmsr r10 + rlwinm r0,r10,0,28,26 /* clear DR */ + mtmsr r0 +diff -Nurb linux-2.6.22-570/arch/ppc/kernel/ppc-stub.c linux-2.6.22-591/arch/ppc/kernel/ppc-stub.c +--- linux-2.6.22-570/arch/ppc/kernel/ppc-stub.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/kernel/ppc-stub.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,866 +0,0 @@ +-/* +- * ppc-stub.c: KGDB support for the Linux kernel. +- * +- * adapted from arch/sparc/kernel/sparc-stub.c for the PowerPC +- * some stuff borrowed from Paul Mackerras' xmon +- * Copyright (C) 1998 Michael AK Tesch (tesch@cs.wisc.edu) +- * +- * Modifications to run under Linux +- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) +- * +- * This file originally came from the gdb sources, and the +- * copyright notices have been retained below. +- */ +- +-/**************************************************************************** +- +- THIS SOFTWARE IS NOT COPYRIGHTED +- +- HP offers the following for use in the public domain. HP makes no +- warranty with regard to the software or its performance and the +- user accepts the software "AS IS" with all faults. +- +- HP DISCLAIMS ANY WARRANTIES, EXPRESS OR IMPLIED, WITH REGARD +- TO THIS SOFTWARE INCLUDING BUT NOT LIMITED TO THE WARRANTIES +- OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. +- +-****************************************************************************/ +- +-/**************************************************************************** +- * Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $ +- * +- * Module name: remcom.c $ +- * Revision: 1.34 $ +- * Date: 91/03/09 12:29:49 $ +- * Contributor: Lake Stevens Instrument Division$ +- * +- * Description: low level support for gdb debugger. $ +- * +- * Considerations: only works on target hardware $ +- * +- * Written by: Glenn Engel $ +- * ModuleState: Experimental $ +- * +- * NOTES: See Below $ +- * +- * Modified for SPARC by Stu Grossman, Cygnus Support. +- * +- * This code has been extensively tested on the Fujitsu SPARClite demo board. +- * +- * To enable debugger support, two things need to happen. One, a +- * call to set_debug_traps() is necessary in order to allow any breakpoints +- * or error conditions to be properly intercepted and reported to gdb. +- * Two, a breakpoint needs to be generated to begin communication. This +- * is most easily accomplished by a call to breakpoint(). Breakpoint() +- * simulates a breakpoint by executing a trap #1. +- * +- ************* +- * +- * The following gdb commands are supported: +- * +- * command function Return value +- * +- * g return the value of the CPU registers hex data or ENN +- * G set the value of the CPU registers OK or ENN +- * qOffsets Get section offsets. Reply is Text=xxx;Data=yyy;Bss=zzz +- * +- * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN +- * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN +- * +- * c Resume at current address SNN ( signal NN) +- * cAA..AA Continue at address AA..AA SNN +- * +- * s Step one instruction SNN +- * sAA..AA Step one instruction from AA..AA SNN +- * +- * k kill +- * +- * ? What was the last sigval ? SNN (signal NN) +- * +- * bBB..BB Set baud rate to BB..BB OK or BNN, then sets +- * baud rate +- * +- * All commands and responses are sent with a packet which includes a +- * checksum. A packet consists of +- * +- * $#. +- * +- * where +- * :: +- * :: > +- * +- * When a packet is received, it is first acknowledged with either '+' or '-'. +- * '+' indicates a successful transfer. '-' indicates a failed transfer. +- * +- * Example: +- * +- * Host: Reply: +- * $m0,10#2a +$00010203040506070809101112131415#42 +- * +- ****************************************************************************/ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +- +-void breakinst(void); +- +-/* +- * BUFMAX defines the maximum number of characters in inbound/outbound buffers +- * at least NUMREGBYTES*2 are needed for register packets +- */ +-#define BUFMAX 2048 +-static char remcomInBuffer[BUFMAX]; +-static char remcomOutBuffer[BUFMAX]; +- +-static int initialized; +-static int kgdb_active; +-static int kgdb_started; +-static u_int fault_jmp_buf[100]; +-static int kdebug; +- +- +-static const char hexchars[]="0123456789abcdef"; +- +-/* Place where we save old trap entries for restoration - sparc*/ +-/* struct tt_entry kgdb_savettable[256]; */ +-/* typedef void (*trapfunc_t)(void); */ +- +-static void kgdb_fault_handler(struct pt_regs *regs); +-static int handle_exception (struct pt_regs *regs); +- +-#if 0 +-/* Install an exception handler for kgdb */ +-static void exceptionHandler(int tnum, unsigned int *tfunc) +-{ +- /* We are dorking with a live trap table, all irqs off */ +-} +-#endif +- +-int +-kgdb_setjmp(long *buf) +-{ +- asm ("mflr 0; stw 0,0(%0);" +- "stw 1,4(%0); stw 2,8(%0);" +- "mfcr 0; stw 0,12(%0);" +- "stmw 13,16(%0)" +- : : "r" (buf)); +- /* XXX should save fp regs as well */ +- return 0; +-} +-void +-kgdb_longjmp(long *buf, int val) +-{ +- if (val == 0) +- val = 1; +- asm ("lmw 13,16(%0);" +- "lwz 0,12(%0); mtcrf 0x38,0;" +- "lwz 0,0(%0); lwz 1,4(%0); lwz 2,8(%0);" +- "mtlr 0; mr 3,%1" +- : : "r" (buf), "r" (val)); +-} +-/* Convert ch from a hex digit to an int */ +-static int +-hex(unsigned char ch) +-{ +- if (ch >= 'a' && ch <= 'f') +- return ch-'a'+10; +- if (ch >= '0' && ch <= '9') +- return ch-'0'; +- if (ch >= 'A' && ch <= 'F') +- return ch-'A'+10; +- return -1; +-} +- +-/* Convert the memory pointed to by mem into hex, placing result in buf. +- * Return a pointer to the last char put in buf (null), in case of mem fault, +- * return 0. +- */ +-static unsigned char * +-mem2hex(const char *mem, char *buf, int count) +-{ +- unsigned char ch; +- unsigned short tmp_s; +- unsigned long tmp_l; +- +- if (kgdb_setjmp((long*)fault_jmp_buf) == 0) { +- debugger_fault_handler = kgdb_fault_handler; +- +- /* Accessing 16 bit and 32 bit objects in a single +- ** load instruction is required to avoid bad side +- ** effects for some IO registers. +- */ +- +- if ((count == 2) && (((long)mem & 1) == 0)) { +- tmp_s = *(unsigned short *)mem; +- mem += 2; +- *buf++ = hexchars[(tmp_s >> 12) & 0xf]; +- *buf++ = hexchars[(tmp_s >> 8) & 0xf]; +- *buf++ = hexchars[(tmp_s >> 4) & 0xf]; +- *buf++ = hexchars[tmp_s & 0xf]; +- +- } else if ((count == 4) && (((long)mem & 3) == 0)) { +- tmp_l = *(unsigned int *)mem; +- mem += 4; +- *buf++ = hexchars[(tmp_l >> 28) & 0xf]; +- *buf++ = hexchars[(tmp_l >> 24) & 0xf]; +- *buf++ = hexchars[(tmp_l >> 20) & 0xf]; +- *buf++ = hexchars[(tmp_l >> 16) & 0xf]; +- *buf++ = hexchars[(tmp_l >> 12) & 0xf]; +- *buf++ = hexchars[(tmp_l >> 8) & 0xf]; +- *buf++ = hexchars[(tmp_l >> 4) & 0xf]; +- *buf++ = hexchars[tmp_l & 0xf]; +- +- } else { +- while (count-- > 0) { +- ch = *mem++; +- *buf++ = hexchars[ch >> 4]; +- *buf++ = hexchars[ch & 0xf]; +- } +- } +- +- } else { +- /* error condition */ +- } +- debugger_fault_handler = NULL; +- *buf = 0; +- return buf; +-} +- +-/* convert the hex array pointed to by buf into binary to be placed in mem +- * return a pointer to the character AFTER the last byte written. +-*/ +-static char * +-hex2mem(char *buf, char *mem, int count) +-{ +- unsigned char ch; +- int i; +- char *orig_mem; +- unsigned short tmp_s; +- unsigned long tmp_l; +- +- orig_mem = mem; +- +- if (kgdb_setjmp((long*)fault_jmp_buf) == 0) { +- debugger_fault_handler = kgdb_fault_handler; +- +- /* Accessing 16 bit and 32 bit objects in a single +- ** store instruction is required to avoid bad side +- ** effects for some IO registers. +- */ +- +- if ((count == 2) && (((long)mem & 1) == 0)) { +- tmp_s = hex(*buf++) << 12; +- tmp_s |= hex(*buf++) << 8; +- tmp_s |= hex(*buf++) << 4; +- tmp_s |= hex(*buf++); +- +- *(unsigned short *)mem = tmp_s; +- mem += 2; +- +- } else if ((count == 4) && (((long)mem & 3) == 0)) { +- tmp_l = hex(*buf++) << 28; +- tmp_l |= hex(*buf++) << 24; +- tmp_l |= hex(*buf++) << 20; +- tmp_l |= hex(*buf++) << 16; +- tmp_l |= hex(*buf++) << 12; +- tmp_l |= hex(*buf++) << 8; +- tmp_l |= hex(*buf++) << 4; +- tmp_l |= hex(*buf++); +- +- *(unsigned long *)mem = tmp_l; +- mem += 4; +- +- } else { +- for (i=0; i# */ +-static void +-getpacket(char *buffer) +-{ +- unsigned char checksum; +- unsigned char xmitcsum; +- int i; +- int count; +- unsigned char ch; +- +- do { +- /* wait around for the start character, ignore all other +- * characters */ +- while ((ch = (getDebugChar() & 0x7f)) != '$') ; +- +- checksum = 0; +- xmitcsum = -1; +- +- count = 0; +- +- /* now, read until a # or end of buffer is found */ +- while (count < BUFMAX) { +- ch = getDebugChar() & 0x7f; +- if (ch == '#') +- break; +- checksum = checksum + ch; +- buffer[count] = ch; +- count = count + 1; +- } +- +- if (count >= BUFMAX) +- continue; +- +- buffer[count] = 0; +- +- if (ch == '#') { +- xmitcsum = hex(getDebugChar() & 0x7f) << 4; +- xmitcsum |= hex(getDebugChar() & 0x7f); +- if (checksum != xmitcsum) +- putDebugChar('-'); /* failed checksum */ +- else { +- putDebugChar('+'); /* successful transfer */ +- /* if a sequence char is present, reply the ID */ +- if (buffer[2] == ':') { +- putDebugChar(buffer[0]); +- putDebugChar(buffer[1]); +- /* remove sequence chars from buffer */ +- count = strlen(buffer); +- for (i=3; i <= count; i++) +- buffer[i-3] = buffer[i]; +- } +- } +- } +- } while (checksum != xmitcsum); +-} +- +-/* send the packet in buffer. */ +-static void putpacket(unsigned char *buffer) +-{ +- unsigned char checksum; +- int count; +- unsigned char ch, recv; +- +- /* $#. */ +- do { +- putDebugChar('$'); +- checksum = 0; +- count = 0; +- +- while ((ch = buffer[count])) { +- putDebugChar(ch); +- checksum += ch; +- count += 1; +- } +- +- putDebugChar('#'); +- putDebugChar(hexchars[checksum >> 4]); +- putDebugChar(hexchars[checksum & 0xf]); +- recv = getDebugChar(); +- } while ((recv & 0x7f) != '+'); +-} +- +-static void kgdb_flush_cache_all(void) +-{ +- flush_instruction_cache(); +-} +- +-/* Set up exception handlers for tracing and breakpoints +- * [could be called kgdb_init()] +- */ +-void set_debug_traps(void) +-{ +-#if 0 +- unsigned char c; +- +- save_and_cli(flags); +- +- /* In case GDB is started before us, ack any packets (presumably +- * "$?#xx") sitting there. +- * +- * I've found this code causes more problems than it solves, +- * so that's why it's commented out. GDB seems to work fine +- * now starting either before or after the kernel -bwb +- */ +- +- while((c = getDebugChar()) != '$'); +- while((c = getDebugChar()) != '#'); +- c = getDebugChar(); /* eat first csum byte */ +- c = getDebugChar(); /* eat second csum byte */ +- putDebugChar('+'); /* ack it */ +-#endif +- debugger = kgdb; +- debugger_bpt = kgdb_bpt; +- debugger_sstep = kgdb_sstep; +- debugger_iabr_match = kgdb_iabr_match; +- debugger_dabr_match = kgdb_dabr_match; +- +- initialized = 1; +-} +- +-static void kgdb_fault_handler(struct pt_regs *regs) +-{ +- kgdb_longjmp((long*)fault_jmp_buf, 1); +-} +- +-int kgdb_bpt(struct pt_regs *regs) +-{ +- return handle_exception(regs); +-} +- +-int kgdb_sstep(struct pt_regs *regs) +-{ +- return handle_exception(regs); +-} +- +-void kgdb(struct pt_regs *regs) +-{ +- handle_exception(regs); +-} +- +-int kgdb_iabr_match(struct pt_regs *regs) +-{ +- printk(KERN_ERR "kgdb doesn't support iabr, what?!?\n"); +- return handle_exception(regs); +-} +- +-int kgdb_dabr_match(struct pt_regs *regs) +-{ +- printk(KERN_ERR "kgdb doesn't support dabr, what?!?\n"); +- return handle_exception(regs); +-} +- +-/* Convert the hardware trap type code to a unix signal number. */ +-/* +- * This table contains the mapping between PowerPC hardware trap types, and +- * signals, which are primarily what GDB understands. +- */ +-static struct hard_trap_info +-{ +- unsigned int tt; /* Trap type code for powerpc */ +- unsigned char signo; /* Signal that we map this trap into */ +-} hard_trap_info[] = { +-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +- { 0x100, SIGINT }, /* critical input interrupt */ +- { 0x200, SIGSEGV }, /* machine check */ +- { 0x300, SIGSEGV }, /* data storage */ +- { 0x400, SIGBUS }, /* instruction storage */ +- { 0x500, SIGINT }, /* interrupt */ +- { 0x600, SIGBUS }, /* alignment */ +- { 0x700, SIGILL }, /* program */ +- { 0x800, SIGILL }, /* reserved */ +- { 0x900, SIGILL }, /* reserved */ +- { 0xa00, SIGILL }, /* reserved */ +- { 0xb00, SIGILL }, /* reserved */ +- { 0xc00, SIGCHLD }, /* syscall */ +- { 0xd00, SIGILL }, /* reserved */ +- { 0xe00, SIGILL }, /* reserved */ +- { 0xf00, SIGILL }, /* reserved */ +- /* +- ** 0x1000 PIT +- ** 0x1010 FIT +- ** 0x1020 watchdog +- ** 0x1100 data TLB miss +- ** 0x1200 instruction TLB miss +- */ +- { 0x2002, SIGTRAP}, /* debug */ +-#else +- { 0x200, SIGSEGV }, /* machine check */ +- { 0x300, SIGSEGV }, /* address error (store) */ +- { 0x400, SIGBUS }, /* instruction bus error */ +- { 0x500, SIGINT }, /* interrupt */ +- { 0x600, SIGBUS }, /* alingment */ +- { 0x700, SIGTRAP }, /* breakpoint trap */ +- { 0x800, SIGFPE }, /* fpu unavail */ +- { 0x900, SIGALRM }, /* decrementer */ +- { 0xa00, SIGILL }, /* reserved */ +- { 0xb00, SIGILL }, /* reserved */ +- { 0xc00, SIGCHLD }, /* syscall */ +- { 0xd00, SIGTRAP }, /* single-step/watch */ +- { 0xe00, SIGFPE }, /* fp assist */ +-#endif +- { 0, 0} /* Must be last */ +- +-}; +- +-static int computeSignal(unsigned int tt) +-{ +- struct hard_trap_info *ht; +- +- for (ht = hard_trap_info; ht->tt && ht->signo; ht++) +- if (ht->tt == tt) +- return ht->signo; +- +- return SIGHUP; /* default for things we don't know about */ +-} +- +-#define PC_REGNUM 64 +-#define SP_REGNUM 1 +- +-/* +- * This function does all command processing for interfacing to gdb. +- */ +-static int +-handle_exception (struct pt_regs *regs) +-{ +- int sigval; +- int addr; +- int length; +- char *ptr; +- unsigned int msr; +- +- /* We don't handle user-mode breakpoints. */ +- if (user_mode(regs)) +- return 0; +- +- if (debugger_fault_handler) { +- debugger_fault_handler(regs); +- panic("kgdb longjump failed!\n"); +- } +- if (kgdb_active) { +- printk(KERN_ERR "interrupt while in kgdb, returning\n"); +- return 0; +- } +- +- kgdb_active = 1; +- kgdb_started = 1; +- +-#ifdef KGDB_DEBUG +- printk("kgdb: entering handle_exception; trap [0x%x]\n", +- (unsigned int)regs->trap); +-#endif +- +- kgdb_interruptible(0); +- lock_kernel(); +- msr = mfmsr(); +- mtmsr(msr & ~MSR_EE); /* disable interrupts */ +- +- if (regs->nip == (unsigned long)breakinst) { +- /* Skip over breakpoint trap insn */ +- regs->nip += 4; +- } +- +- /* reply to host that an exception has occurred */ +- sigval = computeSignal(regs->trap); +- ptr = remcomOutBuffer; +- +- *ptr++ = 'T'; +- *ptr++ = hexchars[sigval >> 4]; +- *ptr++ = hexchars[sigval & 0xf]; +- *ptr++ = hexchars[PC_REGNUM >> 4]; +- *ptr++ = hexchars[PC_REGNUM & 0xf]; +- *ptr++ = ':'; +- ptr = mem2hex((char *)®s->nip, ptr, 4); +- *ptr++ = ';'; +- *ptr++ = hexchars[SP_REGNUM >> 4]; +- *ptr++ = hexchars[SP_REGNUM & 0xf]; +- *ptr++ = ':'; +- ptr = mem2hex(((char *)regs) + SP_REGNUM*4, ptr, 4); +- *ptr++ = ';'; +- *ptr++ = 0; +- +- putpacket(remcomOutBuffer); +- if (kdebug) +- printk("remcomOutBuffer: %s\n", remcomOutBuffer); +- +- /* XXX We may want to add some features dealing with poking the +- * XXX page tables, ... (look at sparc-stub.c for more info) +- * XXX also required hacking to the gdb sources directly... +- */ +- +- while (1) { +- remcomOutBuffer[0] = 0; +- +- getpacket(remcomInBuffer); +- switch (remcomInBuffer[0]) { +- case '?': /* report most recent signal */ +- remcomOutBuffer[0] = 'S'; +- remcomOutBuffer[1] = hexchars[sigval >> 4]; +- remcomOutBuffer[2] = hexchars[sigval & 0xf]; +- remcomOutBuffer[3] = 0; +- break; +-#if 0 +- case 'q': /* this screws up gdb for some reason...*/ +- { +- extern long _start, sdata, __bss_start; +- +- ptr = &remcomInBuffer[1]; +- if (strncmp(ptr, "Offsets", 7) != 0) +- break; +- +- ptr = remcomOutBuffer; +- sprintf(ptr, "Text=%8.8x;Data=%8.8x;Bss=%8.8x", +- &_start, &sdata, &__bss_start); +- break; +- } +-#endif +- case 'd': +- /* toggle debug flag */ +- kdebug ^= 1; +- break; +- +- case 'g': /* return the value of the CPU registers. +- * some of them are non-PowerPC names :( +- * they are stored in gdb like: +- * struct { +- * u32 gpr[32]; +- * f64 fpr[32]; +- * u32 pc, ps, cnd, lr; (ps=msr) +- * u32 cnt, xer, mq; +- * } +- */ +- { +- int i; +- ptr = remcomOutBuffer; +- /* General Purpose Regs */ +- ptr = mem2hex((char *)regs, ptr, 32 * 4); +- /* Floating Point Regs - FIXME */ +- /*ptr = mem2hex((char *), ptr, 32 * 8);*/ +- for(i=0; i<(32*8*2); i++) { /* 2chars/byte */ +- ptr[i] = '0'; +- } +- ptr += 32*8*2; +- /* pc, msr, cr, lr, ctr, xer, (mq is unused) */ +- ptr = mem2hex((char *)®s->nip, ptr, 4); +- ptr = mem2hex((char *)®s->msr, ptr, 4); +- ptr = mem2hex((char *)®s->ccr, ptr, 4); +- ptr = mem2hex((char *)®s->link, ptr, 4); +- ptr = mem2hex((char *)®s->ctr, ptr, 4); +- ptr = mem2hex((char *)®s->xer, ptr, 4); +- } +- break; +- +- case 'G': /* set the value of the CPU registers */ +- { +- ptr = &remcomInBuffer[1]; +- +- /* +- * If the stack pointer has moved, you should pray. +- * (cause only god can help you). +- */ +- +- /* General Purpose Regs */ +- hex2mem(ptr, (char *)regs, 32 * 4); +- +- /* Floating Point Regs - FIXME?? */ +- /*ptr = hex2mem(ptr, ??, 32 * 8);*/ +- ptr += 32*8*2; +- +- /* pc, msr, cr, lr, ctr, xer, (mq is unused) */ +- ptr = hex2mem(ptr, (char *)®s->nip, 4); +- ptr = hex2mem(ptr, (char *)®s->msr, 4); +- ptr = hex2mem(ptr, (char *)®s->ccr, 4); +- ptr = hex2mem(ptr, (char *)®s->link, 4); +- ptr = hex2mem(ptr, (char *)®s->ctr, 4); +- ptr = hex2mem(ptr, (char *)®s->xer, 4); +- +- strcpy(remcomOutBuffer,"OK"); +- } +- break; +- case 'H': +- /* don't do anything, yet, just acknowledge */ +- hexToInt(&ptr, &addr); +- strcpy(remcomOutBuffer,"OK"); +- break; +- +- case 'm': /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ +- /* Try to read %x,%x. */ +- +- ptr = &remcomInBuffer[1]; +- +- if (hexToInt(&ptr, &addr) && *ptr++ == ',' +- && hexToInt(&ptr, &length)) { +- if (mem2hex((char *)addr, remcomOutBuffer, +- length)) +- break; +- strcpy(remcomOutBuffer, "E03"); +- } else +- strcpy(remcomOutBuffer, "E01"); +- break; +- +- case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */ +- /* Try to read '%x,%x:'. */ +- +- ptr = &remcomInBuffer[1]; +- +- if (hexToInt(&ptr, &addr) && *ptr++ == ',' +- && hexToInt(&ptr, &length) +- && *ptr++ == ':') { +- if (hex2mem(ptr, (char *)addr, length)) +- strcpy(remcomOutBuffer, "OK"); +- else +- strcpy(remcomOutBuffer, "E03"); +- flush_icache_range(addr, addr+length); +- } else +- strcpy(remcomOutBuffer, "E02"); +- break; +- +- +- case 'k': /* kill the program, actually just continue */ +- case 'c': /* cAA..AA Continue; address AA..AA optional */ +- /* try to read optional parameter, pc unchanged if no parm */ +- +- ptr = &remcomInBuffer[1]; +- if (hexToInt(&ptr, &addr)) +- regs->nip = addr; +- +-/* Need to flush the instruction cache here, as we may have deposited a +- * breakpoint, and the icache probably has no way of knowing that a data ref to +- * some location may have changed something that is in the instruction cache. +- */ +- kgdb_flush_cache_all(); +- mtmsr(msr); +- +- kgdb_interruptible(1); +- unlock_kernel(); +- kgdb_active = 0; +- if (kdebug) { +- printk("remcomInBuffer: %s\n", remcomInBuffer); +- printk("remcomOutBuffer: %s\n", remcomOutBuffer); +- } +- return 1; +- +- case 's': +- kgdb_flush_cache_all(); +-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) +- mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC); +- regs->msr |= MSR_DE; +-#else +- regs->msr |= MSR_SE; +-#endif +- unlock_kernel(); +- kgdb_active = 0; +- if (kdebug) { +- printk("remcomInBuffer: %s\n", remcomInBuffer); +- printk("remcomOutBuffer: %s\n", remcomOutBuffer); +- } +- return 1; +- +- case 'r': /* Reset (if user process..exit ???)*/ +- panic("kgdb reset."); +- break; +- } /* switch */ +- if (remcomOutBuffer[0] && kdebug) { +- printk("remcomInBuffer: %s\n", remcomInBuffer); +- printk("remcomOutBuffer: %s\n", remcomOutBuffer); +- } +- /* reply to the request */ +- putpacket(remcomOutBuffer); +- } /* while(1) */ +-} +- +-/* This function will generate a breakpoint exception. It is used at the +- beginning of a program to sync up with a debugger and can be used +- otherwise as a quick means to stop program execution and "break" into +- the debugger. */ +- +-void +-breakpoint(void) +-{ +- if (!initialized) { +- printk("breakpoint() called b4 kgdb init\n"); +- return; +- } +- +- asm(" .globl breakinst \n\ +- breakinst: .long 0x7d821008"); +-} +- +-#ifdef CONFIG_KGDB_CONSOLE +-/* Output string in GDB O-packet format if GDB has connected. If nothing +- output, returns 0 (caller must then handle output). */ +-int +-kgdb_output_string (const char* s, unsigned int count) +-{ +- char buffer[512]; +- +- if (!kgdb_started) +- return 0; +- +- count = (count <= (sizeof(buffer) / 2 - 2)) +- ? count : (sizeof(buffer) / 2 - 2); +- +- buffer[0] = 'O'; +- mem2hex (s, &buffer[1], count); +- putpacket(buffer); +- +- return 1; +-} +-#endif +- +-static void sysrq_handle_gdb(int key, struct pt_regs *pt_regs, +- struct tty_struct *tty) +-{ +- printk("Entering GDB stub\n"); +- breakpoint(); +-} +-static struct sysrq_key_op sysrq_gdb_op = { +- .handler = sysrq_handle_gdb, +- .help_msg = "Gdb", +- .action_msg = "GDB", +-}; +- +-static int gdb_register_sysrq(void) +-{ +- printk("Registering GDB sysrq handler\n"); +- register_sysrq_key('g', &sysrq_gdb_op); +- return 0; +-} +-module_init(gdb_register_sysrq); +diff -Nurb linux-2.6.22-570/arch/ppc/kernel/setup.c linux-2.6.22-591/arch/ppc/kernel/setup.c +--- linux-2.6.22-570/arch/ppc/kernel/setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/kernel/setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -48,10 +48,6 @@ + #include + #endif + +-#if defined CONFIG_KGDB +-#include +-#endif +- + extern void platform_init(unsigned long r3, unsigned long r4, + unsigned long r5, unsigned long r6, unsigned long r7); + extern void reloc_got2(unsigned long offset); +@@ -509,24 +505,12 @@ + #endif /* CONFIG_XMON */ + if ( ppc_md.progress ) ppc_md.progress("setup_arch: enter", 0x3eab); + +-#if defined(CONFIG_KGDB) +- if (ppc_md.kgdb_map_scc) +- ppc_md.kgdb_map_scc(); +- set_debug_traps(); +- if (strstr(cmd_line, "gdb")) { +- if (ppc_md.progress) +- ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000); +- printk("kgdb breakpoint activated\n"); +- breakpoint(); +- } +-#endif +- + /* + * Set cache line size based on type of cpu as a default. + * Systems with OF can look in the properties on the cpu node(s) + * for a possibly more accurate value. + */ +- if (cpu_has_feature(CPU_FTR_SPLIT_ID_CACHE)) { ++ if (! cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) { + dcache_bsize = cur_cpu_spec->dcache_bsize; + icache_bsize = cur_cpu_spec->icache_bsize; + ucache_bsize = 0; +diff -Nurb linux-2.6.22-570/arch/ppc/mm/fault.c linux-2.6.22-591/arch/ppc/mm/fault.c +--- linux-2.6.22-570/arch/ppc/mm/fault.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/ppc/mm/fault.c 2007-12-21 15:36:11.000000000 -0500 +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -330,6 +331,14 @@ + return; + } + ++#ifdef CONFIG_KGDB ++ if (atomic_read(&debugger_active) && kgdb_may_fault) { ++ /* Restore our previous state. */ ++ kgdb_fault_longjmp(kgdb_fault_jmp_regs); ++ /* Not reached. */ ++ } ++#endif ++ + /* kernel has accessed a bad area */ + #if defined(CONFIG_XMON) || defined(CONFIG_KGDB) + if (debugger_kernel_faults) +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/bamboo.c linux-2.6.22-591/arch/ppc/platforms/4xx/bamboo.c +--- linux-2.6.22-570/arch/ppc/platforms/4xx/bamboo.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/4xx/bamboo.c 2007-12-21 15:36:11.000000000 -0500 +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -337,10 +338,13 @@ + printk("Early serial init of port 0 failed\n"); + } + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + /* Configure debug serial access */ + gen550_init(0, &port); + #endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(0, &port); ++#endif + + port.membase = ioremap64(PPC440EP_UART1_ADDR, 8); + port.irq = 1; +@@ -351,10 +355,13 @@ + printk("Early serial init of port 1 failed\n"); + } + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + /* Configure debug serial access */ + gen550_init(1, &port); + #endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(1, &port); ++#endif + + port.membase = ioremap64(PPC440EP_UART2_ADDR, 8); + port.irq = 3; +@@ -365,10 +372,13 @@ + printk("Early serial init of port 2 failed\n"); + } + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + /* Configure debug serial access */ + gen550_init(2, &port); + #endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(2, &port); ++#endif + + port.membase = ioremap64(PPC440EP_UART3_ADDR, 8); + port.irq = 4; +@@ -378,6 +388,10 @@ + if (early_serial_setup(&port) != 0) { + printk("Early serial init of port 3 failed\n"); + } ++ ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(3, &port); ++#endif + } + + static void __init +@@ -435,8 +449,5 @@ + + ppc_md.nvram_read_val = todc_direct_read_val; + ppc_md.nvram_write_val = todc_direct_write_val; +-#ifdef CONFIG_KGDB +- ppc_md.early_serial_map = bamboo_early_serial_map; +-#endif + } + +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/bubinga.c linux-2.6.22-591/arch/ppc/platforms/4xx/bubinga.c +--- linux-2.6.22-570/arch/ppc/platforms/4xx/bubinga.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/4xx/bubinga.c 2007-12-21 15:36:11.000000000 -0500 +@@ -4,7 +4,7 @@ + * Author: SAW (IBM), derived from walnut.c. + * Maintained by MontaVista Software + * +- * 2003 (c) MontaVista Softare Inc. This file is licensed under the ++ * 2003-2004 (c) MontaVista Softare Inc. This file is licensed under the + * terms of the GNU General Public License version 2. This program is + * licensed "as is" without any warranty of any kind, whether express + * or implied. +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -30,7 +31,6 @@ + #include + #include + #include +-#include + #include + #include + +@@ -100,17 +100,26 @@ + port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST; + port.line = 0; + +- if (early_serial_setup(&port) != 0) { ++#ifdef CONFIG_SERIAL_8250 ++ if (early_serial_setup(&port) != 0) + printk("Early serial init of port 0 failed\n"); +- } ++#endif ++ ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(0, &port); ++#endif + + port.membase = (void*)ACTING_UART1_IO_BASE; + port.irq = ACTING_UART1_INT; + port.line = 1; + +- if (early_serial_setup(&port) != 0) { ++#ifdef CONFIG_SERIAL_8250 ++ if (early_serial_setup(&port) != 0) + printk("Early serial init of port 1 failed\n"); +- } ++#endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(1, &port); ++#endif + } + + void __init +@@ -257,8 +266,4 @@ + ppc_md.nvram_read_val = todc_direct_read_val; + ppc_md.nvram_write_val = todc_direct_write_val; + #endif +-#ifdef CONFIG_KGDB +- ppc_md.early_serial_map = bubinga_early_serial_map; +-#endif + } +- +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/ebony.c linux-2.6.22-591/arch/ppc/platforms/4xx/ebony.c +--- linux-2.6.22-570/arch/ppc/platforms/4xx/ebony.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/4xx/ebony.c 2007-12-21 15:36:11.000000000 -0500 +@@ -32,6 +32,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -226,14 +227,20 @@ + port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST; + port.line = 0; + +- if (early_serial_setup(&port) != 0) { ++#ifdef CONFIG_SERIAL_8250 ++ if (early_serial_setup(&port) != 0) + printk("Early serial init of port 0 failed\n"); +- } ++#endif + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + /* Configure debug serial access */ + gen550_init(0, &port); ++#endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(0, &port); ++#endif + ++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) + /* Purge TLB entry added in head_44x.S for early serial access */ + _tlbie(UART0_IO_BASE); + #endif +@@ -243,14 +250,18 @@ + port.uartclk = clocks.uart1; + port.line = 1; + +- if (early_serial_setup(&port) != 0) { ++#ifdef CONFIG_SERIAL_8250 ++ if (early_serial_setup(&port) != 1) + printk("Early serial init of port 1 failed\n"); +- } ++#endif + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + /* Configure debug serial access */ + gen550_init(1, &port); + #endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(1, &port); ++#endif + } + + static void __init +@@ -327,8 +338,4 @@ + + ppc_md.nvram_read_val = todc_direct_read_val; + ppc_md.nvram_write_val = todc_direct_write_val; +-#ifdef CONFIG_KGDB +- ppc_md.early_serial_map = ebony_early_serial_map; +-#endif + } +- +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/luan.c linux-2.6.22-591/arch/ppc/platforms/4xx/luan.c +--- linux-2.6.22-570/arch/ppc/platforms/4xx/luan.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/4xx/luan.c 2007-12-21 15:36:11.000000000 -0500 +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -283,6 +284,9 @@ + if (early_serial_setup(&port) != 0) { + printk("Early serial init of port 0 failed\n"); + } ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(0, &port); ++#endif + + port.membase = ioremap64(PPC440SP_UART1_ADDR, 8); + port.irq = UART1_INT; +@@ -292,6 +296,9 @@ + if (early_serial_setup(&port) != 0) { + printk("Early serial init of port 1 failed\n"); + } ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(1, &port); ++#endif + + port.membase = ioremap64(PPC440SP_UART2_ADDR, 8); + port.irq = UART2_INT; +@@ -301,6 +308,9 @@ + if (early_serial_setup(&port) != 0) { + printk("Early serial init of port 2 failed\n"); + } ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(2, &port); ++#endif + } + + static void __init +@@ -360,7 +370,4 @@ + ppc_md.get_irq = NULL; /* Set in ppc4xx_pic_init() */ + + ppc_md.calibrate_decr = luan_calibrate_decr; +-#ifdef CONFIG_KGDB +- ppc_md.early_serial_map = luan_early_serial_map; +-#endif + } +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/ocotea.c linux-2.6.22-591/arch/ppc/platforms/4xx/ocotea.c +--- linux-2.6.22-570/arch/ppc/platforms/4xx/ocotea.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/4xx/ocotea.c 2007-12-21 15:36:11.000000000 -0500 +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -249,14 +250,20 @@ + port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST; + port.line = 0; + +- if (early_serial_setup(&port) != 0) { ++#ifdef CONFIG_SERIAL_8250 ++ if (early_serial_setup(&port) != 0) + printk("Early serial init of port 0 failed\n"); +- } ++#endif + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + /* Configure debug serial access */ + gen550_init(0, &port); ++#endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(0, &port); ++#endif + ++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) + /* Purge TLB entry added in head_44x.S for early serial access */ + _tlbie(UART0_IO_BASE); + #endif +@@ -266,14 +273,18 @@ + port.uartclk = clocks.uart1; + port.line = 1; + +- if (early_serial_setup(&port) != 0) { ++#ifdef CONFIG_SERIAL_8250 ++ if (early_serial_setup(&port) != 1) + printk("Early serial init of port 1 failed\n"); +- } ++#endif + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + /* Configure debug serial access */ + gen550_init(1, &port); + #endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(1, &port); ++#endif + } + + static void __init +@@ -343,8 +354,5 @@ + + ppc_md.nvram_read_val = todc_direct_read_val; + ppc_md.nvram_write_val = todc_direct_write_val; +-#ifdef CONFIG_KGDB +- ppc_md.early_serial_map = ocotea_early_serial_map; +-#endif + ppc_md.init = ocotea_init; + } +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/taishan.c linux-2.6.22-591/arch/ppc/platforms/4xx/taishan.c +--- linux-2.6.22-570/arch/ppc/platforms/4xx/taishan.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/4xx/taishan.c 2007-12-21 15:36:11.000000000 -0500 +@@ -310,7 +310,7 @@ + if (early_serial_setup(&port) != 0) + printk("Early serial init of port 0 failed\n"); + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + /* Configure debug serial access */ + gen550_init(0, &port); + +@@ -326,7 +326,7 @@ + if (early_serial_setup(&port) != 0) + printk("Early serial init of port 1 failed\n"); + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + /* Configure debug serial access */ + gen550_init(1, &port); + #endif +@@ -387,9 +387,6 @@ + + ppc_md.calibrate_decr = taishan_calibrate_decr; + +-#ifdef CONFIG_KGDB +- ppc_md.early_serial_map = taishan_early_serial_map; +-#endif + ppc_md.init = taishan_init; + } + +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml300.c linux-2.6.22-591/arch/ppc/platforms/4xx/xilinx_ml300.c +--- linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml300.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/4xx/xilinx_ml300.c 2007-12-21 15:36:11.000000000 -0500 +@@ -16,6 +16,8 @@ + #include + #include + #include ++#include ++ + #include + #include + +@@ -41,9 +43,6 @@ + * ppc4xx_map_io arch/ppc/syslib/ppc4xx_setup.c + * start_kernel init/main.c + * setup_arch arch/ppc/kernel/setup.c +- * #if defined(CONFIG_KGDB) +- * *ppc_md.kgdb_map_scc() == gen550_kgdb_map_scc +- * #endif + * *ppc_md.setup_arch == ml300_setup_arch this file + * ppc4xx_setup_arch arch/ppc/syslib/ppc4xx_setup.c + * ppc4xx_find_bridges arch/ppc/syslib/ppc405_pci.c +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml403.c linux-2.6.22-591/arch/ppc/platforms/4xx/xilinx_ml403.c +--- linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml403.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/4xx/xilinx_ml403.c 2007-12-21 15:36:11.000000000 -0500 +@@ -43,9 +43,6 @@ + * ppc4xx_map_io arch/ppc/syslib/ppc4xx_setup.c + * start_kernel init/main.c + * setup_arch arch/ppc/kernel/setup.c +- * #if defined(CONFIG_KGDB) +- * *ppc_md.kgdb_map_scc() == gen550_kgdb_map_scc +- * #endif + * *ppc_md.setup_arch == ml403_setup_arch this file + * ppc4xx_setup_arch arch/ppc/syslib/ppc4xx_setup.c + * ppc4xx_find_bridges arch/ppc/syslib/ppc405_pci.c +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/yucca.c linux-2.6.22-591/arch/ppc/platforms/4xx/yucca.c +--- linux-2.6.22-570/arch/ppc/platforms/4xx/yucca.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/4xx/yucca.c 2007-12-21 15:36:11.000000000 -0500 +@@ -386,7 +386,4 @@ + ppc_md.get_irq = NULL; /* Set in ppc4xx_pic_init() */ + + ppc_md.calibrate_decr = yucca_calibrate_decr; +-#ifdef CONFIG_KGDB +- ppc_md.early_serial_map = yucca_early_serial_map; +-#endif + } +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/83xx/mpc834x_sys.c linux-2.6.22-591/arch/ppc/platforms/83xx/mpc834x_sys.c +--- linux-2.6.22-570/arch/ppc/platforms/83xx/mpc834x_sys.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/83xx/mpc834x_sys.c 2007-12-21 15:36:11.000000000 -0500 +@@ -42,11 +42,11 @@ + #include + #include + #include +-#include + #include + #include + + #include ++#include + + #ifndef CONFIG_PCI + unsigned long isa_io_base = 0; +@@ -114,7 +114,9 @@ + /* setup PCI host bridges */ + mpc83xx_setup_hose(); + #endif ++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250) + mpc83xx_early_serial_map(); ++#endif + + /* setup the board related info for the MDIO bus */ + mdata = (struct gianfar_mdio_data *) ppc_sys_get_pdata(MPC83xx_MDIO); +@@ -334,7 +336,6 @@ + ppc_md.get_rtc_time = NULL; + ppc_md.calibrate_decr = mpc83xx_calibrate_decr; + +- ppc_md.early_serial_map = mpc83xx_early_serial_map; + #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG) + ppc_md.progress = gen550_progress; + #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */ +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8540_ads.c linux-2.6.22-591/arch/ppc/platforms/85xx/mpc8540_ads.c +--- linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8540_ads.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/85xx/mpc8540_ads.c 2007-12-21 15:36:11.000000000 -0500 +@@ -43,11 +43,11 @@ + #include + #include + #include +-#include + #include + #include + + #include ++#include + + /* ************************************************************************ + * +@@ -77,7 +77,7 @@ + mpc85xx_setup_hose(); + #endif + +-#ifdef CONFIG_SERIAL_8250 ++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250) + mpc85xx_early_serial_map(); + #endif + +@@ -215,9 +215,6 @@ + #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG) + ppc_md.progress = gen550_progress; + #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */ +-#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB) +- ppc_md.early_serial_map = mpc85xx_early_serial_map; +-#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */ + + if (ppc_md.progress) + ppc_md.progress("mpc8540ads_init(): exit", 0); +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8560_ads.c linux-2.6.22-591/arch/ppc/platforms/85xx/mpc8560_ads.c +--- linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8560_ads.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/85xx/mpc8560_ads.c 2007-12-21 15:36:11.000000000 -0500 +@@ -44,7 +44,6 @@ + #include + #include + #include +-#include + #include + #include + #include +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/mpc85xx_cds_common.c linux-2.6.22-591/arch/ppc/platforms/85xx/mpc85xx_cds_common.c +--- linux-2.6.22-570/arch/ppc/platforms/85xx/mpc85xx_cds_common.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/85xx/mpc85xx_cds_common.c 2007-12-21 15:36:11.000000000 -0500 +@@ -47,12 +47,12 @@ + #include + #include + #include +-#include + + #include + #include + #include + #include ++#include + + + #ifndef CONFIG_PCI +@@ -436,7 +436,7 @@ + mpc85xx_setup_hose(); + #endif + +-#ifdef CONFIG_SERIAL_8250 ++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250) + mpc85xx_early_serial_map(); + #endif + +@@ -590,9 +590,6 @@ + #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG) + ppc_md.progress = gen550_progress; + #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */ +-#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB) +- ppc_md.early_serial_map = mpc85xx_early_serial_map; +-#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */ + + if (ppc_md.progress) + ppc_md.progress("mpc85xx_cds_init(): exit", 0); +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/sbc8560.c linux-2.6.22-591/arch/ppc/platforms/85xx/sbc8560.c +--- linux-2.6.22-570/arch/ppc/platforms/85xx/sbc8560.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/85xx/sbc8560.c 2007-12-21 15:36:11.000000000 -0500 +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -43,14 +44,13 @@ + #include + #include + #include +-#include + #include + #include + + #include + #include ++#include + +-#ifdef CONFIG_SERIAL_8250 + static void __init + sbc8560_early_serial_map(void) + { +@@ -66,12 +66,16 @@ + uart_req.membase = ioremap(uart_req.mapbase, MPC85xx_UART0_SIZE); + uart_req.type = PORT_16650; + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +- gen550_init(0, &uart_req); +-#endif +- ++#ifdef CONFIG_SERIAL_8250 + if (early_serial_setup(&uart_req) != 0) + printk("Early serial init of port 0 failed\n"); ++#endif ++#ifdef CONFIG_SERIAL_TEXT_DEBUG ++ gen550_init(0, &uart_req); ++#endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(0, &uart_req); ++#endif + + /* Assume early_serial_setup() doesn't modify uart_req */ + uart_req.line = 1; +@@ -79,14 +83,17 @@ + uart_req.membase = ioremap(uart_req.mapbase, MPC85xx_UART1_SIZE); + uart_req.irq = MPC85xx_IRQ_EXT10; + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) +- gen550_init(1, &uart_req); +-#endif +- ++#ifdef CONFIG_SERIAL_8250 + if (early_serial_setup(&uart_req) != 0) +- printk("Early serial init of port 1 failed\n"); +-} ++ printk("Early serial init of port 0 failed\n"); + #endif ++#ifdef CONFIG_SERIAL_TEXT_DEBUG ++ gen550_init(0, &uart_req); ++#endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(0, &uart_req); ++#endif ++} + + /* ************************************************************************ + * +@@ -115,9 +122,7 @@ + /* setup PCI host bridges */ + mpc85xx_setup_hose(); + #endif +-#ifdef CONFIG_SERIAL_8250 + sbc8560_early_serial_map(); +-#endif + #ifdef CONFIG_SERIAL_TEXT_DEBUG + /* Invalidate the entry we stole earlier the serial ports + * should be properly mapped */ +@@ -224,9 +229,6 @@ + #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG) + ppc_md.progress = gen550_progress; + #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */ +-#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB) +- ppc_md.early_serial_map = sbc8560_early_serial_map; +-#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */ + + if (ppc_md.progress) + ppc_md.progress("sbc8560_init(): exit", 0); +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/tqm85xx.c linux-2.6.22-591/arch/ppc/platforms/85xx/tqm85xx.c +--- linux-2.6.22-570/arch/ppc/platforms/85xx/tqm85xx.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/85xx/tqm85xx.c 2007-12-21 15:36:11.000000000 -0500 +@@ -46,7 +46,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -55,6 +54,7 @@ + #include + #include + #include ++#include + + #ifndef CONFIG_PCI + unsigned long isa_io_base = 0; +@@ -121,7 +121,7 @@ + #endif + + #ifndef CONFIG_MPC8560 +-#if defined(CONFIG_SERIAL_8250) ++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250) + mpc85xx_early_serial_map(); + #endif + +@@ -400,9 +400,6 @@ + #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG) + ppc_md.progress = gen550_progress; + #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */ +-#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB) +- ppc_md.early_serial_map = mpc85xx_early_serial_map; +-#endif /* CONFIG_SERIAL_8250 && CONFIG_KGDB */ + #endif /* CONFIG_MPC8560 */ + + if (ppc_md.progress) +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/apus_setup.c linux-2.6.22-591/arch/ppc/platforms/apus_setup.c +--- linux-2.6.22-570/arch/ppc/platforms/apus_setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/apus_setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -598,12 +598,6 @@ + ciab.ddra |= (SER_DTR | SER_RTS); /* outputs */ + ciab.ddra &= ~(SER_DCD | SER_CTS | SER_DSR); /* inputs */ + +-#ifdef CONFIG_KGDB +- /* turn Rx interrupts on for GDB */ +- amiga_custom.intena = IF_SETCLR | IF_RBF; +- ser_RTSon(); +-#endif +- + return 0; + } + +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/chestnut.c linux-2.6.22-591/arch/ppc/platforms/chestnut.c +--- linux-2.6.22-570/arch/ppc/platforms/chestnut.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/chestnut.c 2007-12-21 15:36:11.000000000 -0500 +@@ -34,9 +34,9 @@ + #include + #include + #include +-#include + #include + #include ++#include + #include + + static void __iomem *sram_base; /* Virtual addr of Internal SRAM */ +@@ -492,7 +492,7 @@ + static void __init + chestnut_map_io(void) + { +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) + io_block_mapping(CHESTNUT_UART_BASE, CHESTNUT_UART_BASE, 0x100000, + _PAGE_IO); + #endif +@@ -566,9 +566,6 @@ + #if defined(CONFIG_SERIAL_TEXT_DEBUG) + ppc_md.progress = gen550_progress; + #endif +-#if defined(CONFIG_KGDB) +- ppc_md.kgdb_map_scc = gen550_kgdb_map_scc; +-#endif + + if (ppc_md.progress) + ppc_md.progress("chestnut_init(): exit", 0); +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/ev64260.c linux-2.6.22-591/arch/ppc/platforms/ev64260.c +--- linux-2.6.22-570/arch/ppc/platforms/ev64260.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/ev64260.c 2007-12-21 15:36:11.000000000 -0500 +@@ -330,7 +330,7 @@ + port.iotype = UPIO_MEM; + port.flags = STD_COM_FLAGS; + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + gen550_init(0, &port); + #endif + +@@ -568,7 +568,7 @@ + return; + } + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + static void __init + ev64260_map_io(void) + { +@@ -624,20 +624,12 @@ + ppc_md.setup_io_mappings = ev64260_map_io; + ppc_md.progress = gen550_progress; + #endif +-#if defined(CONFIG_KGDB) +- ppc_md.setup_io_mappings = ev64260_map_io; +- ppc_md.early_serial_map = ev64260_early_serial_map; +-#endif + #elif defined(CONFIG_SERIAL_MPSC_CONSOLE) + #ifdef CONFIG_SERIAL_TEXT_DEBUG + ppc_md.setup_io_mappings = ev64260_map_io; + ppc_md.progress = mv64x60_mpsc_progress; + mv64x60_progress_init(CONFIG_MV64X60_NEW_BASE); + #endif /* CONFIG_SERIAL_TEXT_DEBUG */ +-#ifdef CONFIG_KGDB +- ppc_md.setup_io_mappings = ev64260_map_io; +- ppc_md.early_serial_map = ev64260_early_serial_map; +-#endif /* CONFIG_KGDB */ + + #endif + +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/hdpu.c linux-2.6.22-591/arch/ppc/platforms/hdpu.c +--- linux-2.6.22-570/arch/ppc/platforms/hdpu.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/hdpu.c 2007-12-21 15:36:11.000000000 -0500 +@@ -281,25 +281,6 @@ + #if defined(CONFIG_SERIAL_MPSC_CONSOLE) + static void __init hdpu_early_serial_map(void) + { +-#ifdef CONFIG_KGDB +- static char first_time = 1; +- +-#if defined(CONFIG_KGDB_TTYS0) +-#define KGDB_PORT 0 +-#elif defined(CONFIG_KGDB_TTYS1) +-#define KGDB_PORT 1 +-#else +-#error "Invalid kgdb_tty port" +-#endif +- +- if (first_time) { +- gt_early_mpsc_init(KGDB_PORT, +- B9600 | CS8 | CREAD | HUPCL | CLOCAL); +- first_time = 0; +- } +- +- return; +-#endif + } + #endif + +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/lopec.c linux-2.6.22-591/arch/ppc/platforms/lopec.c +--- linux-2.6.22-570/arch/ppc/platforms/lopec.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/lopec.c 2007-12-21 15:36:11.000000000 -0500 +@@ -32,7 +32,8 @@ + #include + #include + #include +-#include ++ ++#include + + /* + * Define all of the IRQ senses and polarities. Taken from the +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/pplus.c linux-2.6.22-591/arch/ppc/platforms/pplus.c +--- linux-2.6.22-570/arch/ppc/platforms/pplus.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/pplus.c 2007-12-21 15:36:11.000000000 -0500 +@@ -35,9 +35,9 @@ + #include + #include + #include +-#include + #include + ++#include + #include "pplus.h" + + #undef DUMP_DBATS +@@ -893,9 +893,6 @@ + #ifdef CONFIG_SERIAL_TEXT_DEBUG + ppc_md.progress = gen550_progress; + #endif /* CONFIG_SERIAL_TEXT_DEBUG */ +-#ifdef CONFIG_KGDB +- ppc_md.kgdb_map_scc = gen550_kgdb_map_scc; +-#endif + #ifdef CONFIG_SMP + smp_ops = &pplus_smp_ops; + #endif /* CONFIG_SMP */ +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/radstone_ppc7d.c linux-2.6.22-591/arch/ppc/platforms/radstone_ppc7d.c +--- linux-2.6.22-570/arch/ppc/platforms/radstone_ppc7d.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/radstone_ppc7d.c 2007-12-21 15:36:11.000000000 -0500 +@@ -84,7 +84,7 @@ + * Serial port code + *****************************************************************************/ + +-#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG) ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + static void __init ppc7d_early_serial_map(void) + { + #if defined(CONFIG_SERIAL_MPSC_CONSOLE) +@@ -113,10 +113,10 @@ + if (early_serial_setup(&serial_req) != 0) + printk(KERN_ERR "Early serial init of port 1 failed\n"); + #else +-#error CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX ++#error CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX + #endif + } +-#endif /* CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG */ ++#endif /* CONFIG_SERIAL_TEXT_DEBUG */ + + /***************************************************************************** + * Low-level board support code +@@ -1459,18 +1459,16 @@ + PPC7D_CPLD_COMS_COM4_TXEN, PPC7D_CPLD_COMS); + #endif /* CONFIG_SERIAL_MPSC */ + +-#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG) +- ppc7d_early_serial_map(); + #ifdef CONFIG_SERIAL_TEXT_DEBUG ++ ppc7d_early_serial_map(); + #if defined(CONFIG_SERIAL_MPSC_CONSOLE) + ppc_md.progress = mv64x60_mpsc_progress; + #elif defined(CONFIG_SERIAL_8250) + ppc_md.progress = gen550_progress; + #else +-#error CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX ++#error CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX + #endif /* CONFIG_SERIAL_8250 */ + #endif /* CONFIG_SERIAL_TEXT_DEBUG */ +-#endif /* CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG */ + + /* Enable write access to user flash. This is necessary for + * flash probe. +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/sandpoint.c linux-2.6.22-591/arch/ppc/platforms/sandpoint.c +--- linux-2.6.22-570/arch/ppc/platforms/sandpoint.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/sandpoint.c 2007-12-21 15:36:11.000000000 -0500 +@@ -95,9 +95,9 @@ + #include + #include + #include +-#include + #include + ++#include + #include "sandpoint.h" + + /* Set non-zero if an X2 Sandpoint detected. */ +@@ -730,9 +730,6 @@ + ppc_md.nvram_read_val = todc_mc146818_read_val; + ppc_md.nvram_write_val = todc_mc146818_write_val; + +-#ifdef CONFIG_KGDB +- ppc_md.kgdb_map_scc = gen550_kgdb_map_scc; +-#endif + #ifdef CONFIG_SERIAL_TEXT_DEBUG + ppc_md.progress = gen550_progress; + #endif +diff -Nurb linux-2.6.22-570/arch/ppc/platforms/spruce.c linux-2.6.22-591/arch/ppc/platforms/spruce.c +--- linux-2.6.22-570/arch/ppc/platforms/spruce.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/platforms/spruce.c 2007-12-21 15:36:11.000000000 -0500 +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -37,9 +38,9 @@ + #include + #include + #include +-#include + + #include ++#include + + #include "spruce.h" + +@@ -178,26 +179,32 @@ + serial_req.membase = (u_char *)UART0_IO_BASE; + serial_req.regshift = 0; + +-#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG) +- gen550_init(0, &serial_req); +-#endif + #ifdef CONFIG_SERIAL_8250 + if (early_serial_setup(&serial_req) != 0) + printk("Early serial init of port 0 failed\n"); + #endif ++#ifdef CONFIG_SERIAL_TEXT_DEBUG ++ gen550_init(0, &serial_req); ++#endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(0, &port); ++#endif + + /* Assume early_serial_setup() doesn't modify serial_req */ + serial_req.line = 1; + serial_req.irq = UART1_INT; + serial_req.membase = (u_char *)UART1_IO_BASE; + +-#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG) +- gen550_init(1, &serial_req); +-#endif + #ifdef CONFIG_SERIAL_8250 + if (early_serial_setup(&serial_req) != 0) + printk("Early serial init of port 1 failed\n"); + #endif ++#ifdef CONFIG_SERIAL_TEXT_DEBUG ++ gen550_init(1, &serial_req); ++#endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(1, &serial_req); ++#endif + } + + TODC_ALLOC(); +@@ -316,7 +323,4 @@ + #ifdef CONFIG_SERIAL_TEXT_DEBUG + ppc_md.progress = gen550_progress; + #endif /* CONFIG_SERIAL_TEXT_DEBUG */ +-#ifdef CONFIG_KGDB +- ppc_md.kgdb_map_scc = gen550_kgdb_map_scc; +-#endif + } +diff -Nurb linux-2.6.22-570/arch/ppc/syslib/Makefile linux-2.6.22-591/arch/ppc/syslib/Makefile +--- linux-2.6.22-570/arch/ppc/syslib/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/syslib/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -77,7 +77,6 @@ + obj-$(CONFIG_8260_PCI9) += m8260_pci_erratum9.o + obj-$(CONFIG_CPM2) += cpm2_common.o cpm2_pic.o + ifeq ($(CONFIG_PPC_GEN550),y) +-obj-$(CONFIG_KGDB) += gen550_kgdb.o gen550_dbg.o + obj-$(CONFIG_SERIAL_TEXT_DEBUG) += gen550_dbg.o + endif + ifeq ($(CONFIG_SERIAL_MPSC_CONSOLE),y) +diff -Nurb linux-2.6.22-570/arch/ppc/syslib/gen550.h linux-2.6.22-591/arch/ppc/syslib/gen550.h +--- linux-2.6.22-570/arch/ppc/syslib/gen550.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/syslib/gen550.h 2007-12-21 15:36:11.000000000 -0500 +@@ -11,4 +11,3 @@ + + extern void gen550_progress(char *, unsigned short); + extern void gen550_init(int, struct uart_port *); +-extern void gen550_kgdb_map_scc(void); +diff -Nurb linux-2.6.22-570/arch/ppc/syslib/gen550_kgdb.c linux-2.6.22-591/arch/ppc/syslib/gen550_kgdb.c +--- linux-2.6.22-570/arch/ppc/syslib/gen550_kgdb.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/syslib/gen550_kgdb.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,83 +0,0 @@ +-/* +- * Generic 16550 kgdb support intended to be useful on a variety +- * of platforms. To enable this support, it is necessary to set +- * the CONFIG_GEN550 option. Any virtual mapping of the serial +- * port(s) to be used can be accomplished by setting +- * ppc_md.early_serial_map to a platform-specific mapping function. +- * +- * Adapted from ppc4xx_kgdb.c. +- * +- * Author: Matt Porter +- * +- * 2002-2004 (c) MontaVista Software, Inc. This file is licensed under +- * the terms of the GNU General Public License version 2. This program +- * is licensed "as is" without any warranty of any kind, whether express +- * or implied. +- */ +- +-#include +-#include +- +-#include +- +-extern unsigned long serial_init(int, void *); +-extern unsigned long serial_getc(unsigned long); +-extern unsigned long serial_putc(unsigned long, unsigned char); +- +-#if defined(CONFIG_KGDB_TTYS0) +-#define KGDB_PORT 0 +-#elif defined(CONFIG_KGDB_TTYS1) +-#define KGDB_PORT 1 +-#elif defined(CONFIG_KGDB_TTYS2) +-#define KGDB_PORT 2 +-#elif defined(CONFIG_KGDB_TTYS3) +-#define KGDB_PORT 3 +-#else +-#error "invalid kgdb_tty port" +-#endif +- +-static volatile unsigned int kgdb_debugport; +- +-void putDebugChar(unsigned char c) +-{ +- if (kgdb_debugport == 0) +- kgdb_debugport = serial_init(KGDB_PORT, NULL); +- +- serial_putc(kgdb_debugport, c); +-} +- +-int getDebugChar(void) +-{ +- if (kgdb_debugport == 0) +- kgdb_debugport = serial_init(KGDB_PORT, NULL); +- +- return(serial_getc(kgdb_debugport)); +-} +- +-void kgdb_interruptible(int enable) +-{ +- return; +-} +- +-void putDebugString(char* str) +-{ +- while (*str != '\0') { +- putDebugChar(*str); +- str++; +- } +- putDebugChar('\r'); +- return; +-} +- +-/* +- * Note: gen550_init() must be called already on the port we are going +- * to use. +- */ +-void +-gen550_kgdb_map_scc(void) +-{ +- printk(KERN_DEBUG "kgdb init\n"); +- if (ppc_md.early_serial_map) +- ppc_md.early_serial_map(); +- kgdb_debugport = serial_init(KGDB_PORT, NULL); +-} +diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ibm44x_common.c linux-2.6.22-591/arch/ppc/syslib/ibm44x_common.c +--- linux-2.6.22-570/arch/ppc/syslib/ibm44x_common.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/syslib/ibm44x_common.c 2007-12-21 15:36:11.000000000 -0500 +@@ -192,9 +192,6 @@ + #ifdef CONFIG_SERIAL_TEXT_DEBUG + ppc_md.progress = gen550_progress; + #endif /* CONFIG_SERIAL_TEXT_DEBUG */ +-#ifdef CONFIG_KGDB +- ppc_md.kgdb_map_scc = gen550_kgdb_map_scc; +-#endif + + /* + * The Abatron BDI JTAG debugger does not tolerate others +diff -Nurb linux-2.6.22-570/arch/ppc/syslib/mv64x60.c linux-2.6.22-591/arch/ppc/syslib/mv64x60.c +--- linux-2.6.22-570/arch/ppc/syslib/mv64x60.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/syslib/mv64x60.c 2007-12-21 15:36:11.000000000 -0500 +@@ -241,6 +241,12 @@ + .end = MV64x60_IRQ_SDMA_0, + .flags = IORESOURCE_IRQ, + }, ++ [4] = { ++ .name = "mpsc 0 irq", ++ .start = MV64x60_IRQ_MPSC_0, ++ .end = MV64x60_IRQ_MPSC_0, ++ .flags = IORESOURCE_IRQ, ++ }, + }; + + static struct platform_device mpsc0_device = { +@@ -298,6 +304,12 @@ + .end = MV64360_IRQ_SDMA_1, + .flags = IORESOURCE_IRQ, + }, ++ [4] = { ++ .name = "mpsc 1 irq", ++ .start = MV64360_IRQ_MPSC_1, ++ .end = MV64360_IRQ_MPSC_1, ++ .flags = IORESOURCE_IRQ, ++ }, + }; + + static struct platform_device mpsc1_device = { +@@ -1432,12 +1444,46 @@ + static int __init + mv64x60_add_pds(void) + { +- return platform_add_devices(mv64x60_pd_devs, +- ARRAY_SIZE(mv64x60_pd_devs)); ++ int i, ret = 0; ++ ++ for (i = 0; i < ARRAY_SIZE(mv64x60_pd_devs); i++) { ++ if (mv64x60_pd_devs[i]) { ++ ret = platform_device_register(mv64x60_pd_devs[i]); ++ } ++ if (ret) { ++ while (--i >= 0) ++ platform_device_unregister(mv64x60_pd_devs[i]); ++ break; ++ } ++ } ++ return ret; + } + arch_initcall(mv64x60_add_pds); + + /* ++ * mv64x60_early_get_pdev_data() ++ * ++ * Get the data associated with a platform device by name and number. ++ */ ++struct platform_device * __init ++mv64x60_early_get_pdev_data(const char *name, int id, int remove) ++{ ++ int i; ++ struct platform_device *pdev; ++ ++ for (i = 0; i id == id && ++ !strcmp(pdev->name, name)) { ++ if (remove) ++ mv64x60_pd_devs[i] = NULL; ++ return pdev; ++ } ++ } ++ return NULL; ++} ++ ++/* + ***************************************************************************** + * + * GT64260-Specific Routines +@@ -1770,6 +1816,11 @@ + r->start = MV64x60_IRQ_SDMA_0; + r->end = MV64x60_IRQ_SDMA_0; + } ++ if ((r = platform_get_resource(&mpsc1_device, IORESOURCE_IRQ, 1)) ++ != NULL) { ++ r->start = GT64260_IRQ_MPSC_1; ++ r->end = GT64260_IRQ_MPSC_1; ++ } + #endif + } + +@@ -2415,7 +2466,6 @@ + .attr = { + .name = "hs_reg", + .mode = S_IRUGO | S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = VAL_LEN_MAX, + .read = mv64xxx_hs_reg_read, +diff -Nurb linux-2.6.22-570/arch/ppc/syslib/mv64x60_dbg.c linux-2.6.22-591/arch/ppc/syslib/mv64x60_dbg.c +--- linux-2.6.22-570/arch/ppc/syslib/mv64x60_dbg.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/syslib/mv64x60_dbg.c 2007-12-21 15:36:11.000000000 -0500 +@@ -34,7 +34,7 @@ + void + mv64x60_progress_init(u32 base) + { +- mv64x60_dbg_bh.v_base = base; ++ mv64x60_dbg_bh.v_base = (void*)base; + return; + } + +@@ -69,53 +69,3 @@ + return; + } + #endif /* CONFIG_SERIAL_TEXT_DEBUG */ +- +- +-#if defined(CONFIG_KGDB) +- +-#if defined(CONFIG_KGDB_TTYS0) +-#define KGDB_PORT 0 +-#elif defined(CONFIG_KGDB_TTYS1) +-#define KGDB_PORT 1 +-#else +-#error "Invalid kgdb_tty port" +-#endif +- +-void +-putDebugChar(unsigned char c) +-{ +- mv64x60_polled_putc(KGDB_PORT, (char)c); +-} +- +-int +-getDebugChar(void) +-{ +- unsigned char c; +- +- while (!mv64x60_polled_getc(KGDB_PORT, &c)); +- return (int)c; +-} +- +-void +-putDebugString(char* str) +-{ +- while (*str != '\0') { +- putDebugChar(*str); +- str++; +- } +- putDebugChar('\r'); +- return; +-} +- +-void +-kgdb_interruptible(int enable) +-{ +-} +- +-void +-kgdb_map_scc(void) +-{ +- if (ppc_md.early_serial_map) +- ppc_md.early_serial_map(); +-} +-#endif /* CONFIG_KGDB */ +diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ppc4xx_setup.c linux-2.6.22-591/arch/ppc/syslib/ppc4xx_setup.c +--- linux-2.6.22-570/arch/ppc/syslib/ppc4xx_setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/syslib/ppc4xx_setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -32,7 +32,6 @@ + #include + #include + #include +-#include + #include + #include + #include +diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ppc83xx_setup.c linux-2.6.22-591/arch/ppc/syslib/ppc83xx_setup.c +--- linux-2.6.22-570/arch/ppc/syslib/ppc83xx_setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/syslib/ppc83xx_setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -30,12 +30,12 @@ + #include /* for linux/serial_core.h */ + #include + #include ++#include + + #include + #include + #include + #include +-#include + #include + #include + +@@ -44,6 +44,7 @@ + #include + #include + #endif ++#include + + phys_addr_t immrbar; + +@@ -87,11 +88,11 @@ + tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000); + } + +-#ifdef CONFIG_SERIAL_8250 ++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250) + void __init + mpc83xx_early_serial_map(void) + { +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) + struct uart_port serial_req; + #endif + struct plat_serial8250_port *pdata; +@@ -103,27 +104,40 @@ + pdata[0].mapbase += binfo->bi_immr_base; + pdata[0].membase = ioremap(pdata[0].mapbase, 0x100); + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) + memset(&serial_req, 0, sizeof (serial_req)); + serial_req.iotype = UPIO_MEM; + serial_req.mapbase = pdata[0].mapbase; + serial_req.membase = pdata[0].membase; + serial_req.regshift = 0; ++ serial_req.irq = pdata[0].irq; ++ serial_req.flags = pdata[0].flags; ++ serial_req.uartclk = pdata[0].uartclk; + ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + gen550_init(0, &serial_req); + #endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(0, &serial_req); ++#endif ++#endif + + pdata[1].uartclk = binfo->bi_busfreq; + pdata[1].mapbase += binfo->bi_immr_base; + pdata[1].membase = ioremap(pdata[1].mapbase, 0x100); + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) + /* Assume gen550_init() doesn't modify serial_req */ + serial_req.mapbase = pdata[1].mapbase; + serial_req.membase = pdata[1].membase; + ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + gen550_init(1, &serial_req); + #endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(1, &serial_req); ++#endif ++#endif + } + #endif + +diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ppc85xx_setup.c linux-2.6.22-591/arch/ppc/syslib/ppc85xx_setup.c +--- linux-2.6.22-570/arch/ppc/syslib/ppc85xx_setup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/ppc/syslib/ppc85xx_setup.c 2007-12-21 15:36:11.000000000 -0500 +@@ -19,16 +19,17 @@ + #include /* for linux/serial_core.h */ + #include + #include ++#include + + #include + #include + #include + #include + #include +-#include + #include + + #include ++#include + + extern void abort(void); + +@@ -69,11 +70,11 @@ + mtspr(SPRN_TCR, TCR_DIE); + } + +-#ifdef CONFIG_SERIAL_8250 ++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250) + void __init + mpc85xx_early_serial_map(void) + { +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) + struct uart_port serial_req; + #endif + struct plat_serial8250_port *pdata; +@@ -85,27 +86,40 @@ + pdata[0].mapbase += binfo->bi_immr_base; + pdata[0].membase = ioremap(pdata[0].mapbase, MPC85xx_UART0_SIZE); + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) + memset(&serial_req, 0, sizeof (serial_req)); + serial_req.iotype = UPIO_MEM; + serial_req.mapbase = pdata[0].mapbase; + serial_req.membase = pdata[0].membase; + serial_req.regshift = 0; ++ serial_req.irq = pdata[0].irq; ++ serial_req.flags = pdata[0].flags; ++ serial_req.uartclk = pdata[0].uartclk; + ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + gen550_init(0, &serial_req); + #endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(0, &serial_req); ++#endif ++#endif + + pdata[1].uartclk = binfo->bi_busfreq; + pdata[1].mapbase += binfo->bi_immr_base; + pdata[1].membase = ioremap(pdata[1].mapbase, MPC85xx_UART0_SIZE); + +-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB) ++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250) + /* Assume gen550_init() doesn't modify serial_req */ + serial_req.mapbase = pdata[1].mapbase; + serial_req.membase = pdata[1].membase; + ++#ifdef CONFIG_SERIAL_TEXT_DEBUG + gen550_init(1, &serial_req); + #endif ++#ifdef CONFIG_KGDB_8250 ++ kgdb8250_add_port(1, &serial_req); ++#endif ++#endif + } + #endif + +@@ -363,5 +377,3 @@ + return; + } + #endif /* CONFIG_PCI */ +- +- +diff -Nurb linux-2.6.22-570/arch/s390/appldata/appldata_net_sum.c linux-2.6.22-591/arch/s390/appldata/appldata_net_sum.c +--- linux-2.6.22-570/arch/s390/appldata/appldata_net_sum.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/s390/appldata/appldata_net_sum.c 2007-12-21 15:36:14.000000000 -0500 +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + + #include "appldata.h" + +@@ -107,7 +108,7 @@ + tx_dropped = 0; + collisions = 0; + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + stats = dev->get_stats(dev); + rx_packets += stats->rx_packets; + tx_packets += stats->tx_packets; +diff -Nurb linux-2.6.22-570/arch/s390/kernel/ipl.c linux-2.6.22-591/arch/s390/kernel/ipl.c +--- linux-2.6.22-570/arch/s390/kernel/ipl.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/s390/kernel/ipl.c 2007-12-21 15:36:11.000000000 -0500 +@@ -314,7 +314,6 @@ + .attr = { + .name = "binary_parameter", + .mode = S_IRUGO, +- .owner = THIS_MODULE, + }, + .size = PAGE_SIZE, + .read = &ipl_parameter_read, +@@ -338,7 +337,6 @@ + .attr = { + .name = "scp_data", + .mode = S_IRUGO, +- .owner = THIS_MODULE, + }, + .size = PAGE_SIZE, + .read = &ipl_scp_data_read, +diff -Nurb linux-2.6.22-570/arch/sh/Kconfig.debug linux-2.6.22-591/arch/sh/Kconfig.debug +--- linux-2.6.22-570/arch/sh/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/sh/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500 +@@ -78,82 +78,4 @@ + on the VM subsystem for higher order allocations. This option + will also use IRQ stacks to compensate for the reduced stackspace. + +-config SH_KGDB +- bool "Include KGDB kernel debugger" +- select FRAME_POINTER +- select DEBUG_INFO +- help +- Include in-kernel hooks for kgdb, the Linux kernel source level +- debugger. See for more information. +- Unless you are intending to debug the kernel, say N here. +- +-menu "KGDB configuration options" +- depends on SH_KGDB +- +-config MORE_COMPILE_OPTIONS +- bool "Add any additional compile options" +- help +- If you want to add additional CFLAGS to the kernel build, enable this +- option and then enter what you would like to add in the next question. +- Note however that -g is already appended with the selection of KGDB. +- +-config COMPILE_OPTIONS +- string "Additional compile arguments" +- depends on MORE_COMPILE_OPTIONS +- +-config KGDB_NMI +- bool "Enter KGDB on NMI" +- default n +- +-config SH_KGDB_CONSOLE +- bool "Console messages through GDB" +- depends on !SERIAL_SH_SCI_CONSOLE +- select SERIAL_CORE_CONSOLE +- default n +- +-config KGDB_SYSRQ +- bool "Allow SysRq 'G' to enter KGDB" +- default y +- +-comment "Serial port setup" +- +-config KGDB_DEFPORT +- int "Port number (ttySCn)" +- default "1" +- +-config KGDB_DEFBAUD +- int "Baud rate" +- default "115200" +- +-choice +- prompt "Parity" +- depends on SH_KGDB +- default KGDB_DEFPARITY_N +- +-config KGDB_DEFPARITY_N +- bool "None" +- +-config KGDB_DEFPARITY_E +- bool "Even" +- +-config KGDB_DEFPARITY_O +- bool "Odd" +- +-endchoice +- +-choice +- prompt "Data bits" +- depends on SH_KGDB +- default KGDB_DEFBITS_8 +- +-config KGDB_DEFBITS_8 +- bool "8" +- +-config KGDB_DEFBITS_7 +- bool "7" +- +-endchoice +- +-endmenu +- + endmenu +diff -Nurb linux-2.6.22-570/arch/sh/kernel/Makefile linux-2.6.22-591/arch/sh/kernel/Makefile +--- linux-2.6.22-570/arch/sh/kernel/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/sh/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -15,7 +15,7 @@ + obj-$(CONFIG_SMP) += smp.o + obj-$(CONFIG_CF_ENABLER) += cf-enabler.o + obj-$(CONFIG_SH_STANDARD_BIOS) += sh_bios.o +-obj-$(CONFIG_SH_KGDB) += kgdb_stub.o kgdb_jmp.o ++obj-$(CONFIG_KGDB) += kgdb.o kgdb-jmp.o + obj-$(CONFIG_SH_CPU_FREQ) += cpufreq.o + obj-$(CONFIG_MODULES) += module.o + obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +diff -Nurb linux-2.6.22-570/arch/sh/kernel/cpu/sh3/ex.S linux-2.6.22-591/arch/sh/kernel/cpu/sh3/ex.S +--- linux-2.6.22-570/arch/sh/kernel/cpu/sh3/ex.S 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/sh/kernel/cpu/sh3/ex.S 2007-12-21 15:36:11.000000000 -0500 +@@ -45,7 +45,7 @@ + .long exception_error ! reserved_instruction (filled by trap_init) /* 180 */ + .long exception_error ! illegal_slot_instruction (filled by trap_init) /*1A0*/ + ENTRY(nmi_slot) +-#if defined (CONFIG_KGDB_NMI) ++#if defined (CONFIG_KGDB) + .long debug_enter /* 1C0 */ ! Allow trap to debugger + #else + .long exception_none /* 1C0 */ ! Not implemented yet +diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb-jmp.S linux-2.6.22-591/arch/sh/kernel/kgdb-jmp.S +--- linux-2.6.22-570/arch/sh/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/sh/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,32 @@ ++#include ++ ++ENTRY(kgdb_fault_setjmp) ++ add #(9*4), r4 ++ sts.l pr, @-r4 ++ mov.l r15, @-r4 ++ mov.l r14, @-r4 ++ mov.l r13, @-r4 ++ mov.l r12, @-r4 ++ mov.l r11, @-r4 ++ mov.l r10, @-r4 ++ mov.l r9, @-r4 ++ mov.l r8, @-r4 ++ rts ++ mov #0, r0 ++ ++ENTRY(kgdb_fault_longjmp) ++ mov.l @r4+, r8 ++ mov.l @r4+, r9 ++ mov.l @r4+, r10 ++ mov.l @r4+, r11 ++ mov.l @r4+, r12 ++ mov.l @r4+, r13 ++ mov.l @r4+, r14 ++ mov.l @r4+, r15 ++ lds.l @r4+, pr ++ mov r5, r0 ++ tst r0, r0 ++ bf 1f ++ mov #1, r0 ++1: rts ++ nop +diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb.c linux-2.6.22-591/arch/sh/kernel/kgdb.c +--- linux-2.6.22-570/arch/sh/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/sh/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,363 @@ ++/* ++ * arch/sh/kernel/kgdb.c ++ * ++ * Contains SH-specific low-level support for KGDB. ++ * ++ * Containes extracts from code by Glenn Engel, Jim Kingdon, ++ * David Grothe , Tigran Aivazian , ++ * Amit S. Kale , William Gatliff , ++ * Ben Lee, Steve Chamberlain and Benoit Miller , ++ * Henry Bell and Jeremy Siegel ++ * ++ * Maintainer: Tom Rini ++ * ++ * 2004 (c) MontaVista Software, Inc. This file is licensed under ++ * the terms of the GNU General Public License version 2. This program ++ * is licensed "as is" without any warranty of any kind, whether express ++ * or implied. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++extern void per_cpu_trap_init(void); ++extern atomic_t cpu_doing_single_step; ++ ++/* Function pointers for linkage */ ++static struct kgdb_regs trap_registers; ++ ++/* Globals. */ ++char in_nmi; /* Set during NMI to prevent reentry */ ++ ++/* TRA differs sh3/4 */ ++#if defined(CONFIG_CPU_SH3) ++#define TRA 0xffffffd0 ++#elif defined(CONFIG_CPU_SH4) ++#define TRA 0xff000020 ++#endif ++ ++/* Macros for single step instruction identification */ ++#define OPCODE_BT(op) (((op) & 0xff00) == 0x8900) ++#define OPCODE_BF(op) (((op) & 0xff00) == 0x8b00) ++#define OPCODE_BTF_DISP(op) (((op) & 0x80) ? (((op) | 0xffffff80) << 1) : \ ++ (((op) & 0x7f ) << 1)) ++#define OPCODE_BFS(op) (((op) & 0xff00) == 0x8f00) ++#define OPCODE_BTS(op) (((op) & 0xff00) == 0x8d00) ++#define OPCODE_BRA(op) (((op) & 0xf000) == 0xa000) ++#define OPCODE_BRA_DISP(op) (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \ ++ (((op) & 0x7ff) << 1)) ++#define OPCODE_BRAF(op) (((op) & 0xf0ff) == 0x0023) ++#define OPCODE_BRAF_REG(op) (((op) & 0x0f00) >> 8) ++#define OPCODE_BSR(op) (((op) & 0xf000) == 0xb000) ++#define OPCODE_BSR_DISP(op) (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \ ++ (((op) & 0x7ff) << 1)) ++#define OPCODE_BSRF(op) (((op) & 0xf0ff) == 0x0003) ++#define OPCODE_BSRF_REG(op) (((op) >> 8) & 0xf) ++#define OPCODE_JMP(op) (((op) & 0xf0ff) == 0x402b) ++#define OPCODE_JMP_REG(op) (((op) >> 8) & 0xf) ++#define OPCODE_JSR(op) (((op) & 0xf0ff) == 0x400b) ++#define OPCODE_JSR_REG(op) (((op) >> 8) & 0xf) ++#define OPCODE_RTS(op) ((op) == 0xb) ++#define OPCODE_RTE(op) ((op) == 0x2b) ++ ++#define SR_T_BIT_MASK 0x1 ++#define STEP_OPCODE 0xc320 ++#define BIOS_CALL_TRAP 0x3f ++ ++/* Exception codes as per SH-4 core manual */ ++#define ADDRESS_ERROR_LOAD_VEC 7 ++#define ADDRESS_ERROR_STORE_VEC 8 ++#define TRAP_VEC 11 ++#define INVALID_INSN_VEC 12 ++#define INVALID_SLOT_VEC 13 ++#define NMI_VEC 14 ++#define SERIAL_BREAK_VEC 58 ++ ++/* Misc static */ ++static int stepped_address; ++static short stepped_opcode; ++ ++/* Translate SH-3/4 exception numbers to unix-like signal values */ ++static int compute_signal(const int excep_code) ++{ ++ switch (excep_code) { ++ case INVALID_INSN_VEC: ++ case INVALID_SLOT_VEC: ++ return SIGILL; ++ case ADDRESS_ERROR_LOAD_VEC: ++ case ADDRESS_ERROR_STORE_VEC: ++ return SIGSEGV; ++ case SERIAL_BREAK_VEC: ++ case NMI_VEC: ++ return SIGINT; ++ default: ++ /* Act like it was a break/trap. */ ++ return SIGTRAP; ++ } ++} ++ ++/* ++ * Translate the registers of the system into the format that GDB wants. Since ++ * we use a local structure to store things, instead of getting them out ++ * of pt_regs, we can just do a memcpy. ++ */ ++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *ign) ++{ ++ memcpy(gdb_regs, &trap_registers, sizeof(trap_registers)); ++} ++ ++/* ++ * On SH we save: r1 (prev->thread.sp) r2 (prev->thread.pc) r4 (prev) r5 (next) ++ * r6 (next->thread.sp) r7 (next->thread.pc) ++ */ ++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) ++{ ++ int count; ++ ++ for (count = 0; count < 16; count++) ++ *(gdb_regs++) = 0; ++ *(gdb_regs++) = p->thread.pc; ++ *(gdb_regs++) = 0; ++ *(gdb_regs++) = 0; ++ *(gdb_regs++) = 0; ++ *(gdb_regs++) = 0; ++ *(gdb_regs++) = 0; ++ *(gdb_regs++) = 0; ++} ++ ++/* ++ * Translate the registers values that GDB has given us back into the ++ * format of the system. See the comment above about memcpy. ++ */ ++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *ign) ++{ ++ memcpy(&trap_registers, gdb_regs, sizeof(trap_registers)); ++} ++ ++/* Calculate the new address for after a step */ ++static short *get_step_address(void) ++{ ++ short op = *(short *)trap_registers.pc; ++ long addr; ++ ++ /* BT */ ++ if (OPCODE_BT(op)) { ++ if (trap_registers.sr & SR_T_BIT_MASK) ++ addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); ++ else ++ addr = trap_registers.pc + 2; ++ } ++ ++ /* BTS */ ++ else if (OPCODE_BTS(op)) { ++ if (trap_registers.sr & SR_T_BIT_MASK) ++ addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); ++ else ++ addr = trap_registers.pc + 4; /* Not in delay slot */ ++ } ++ ++ /* BF */ ++ else if (OPCODE_BF(op)) { ++ if (!(trap_registers.sr & SR_T_BIT_MASK)) ++ addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); ++ else ++ addr = trap_registers.pc + 2; ++ } ++ ++ /* BFS */ ++ else if (OPCODE_BFS(op)) { ++ if (!(trap_registers.sr & SR_T_BIT_MASK)) ++ addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); ++ else ++ addr = trap_registers.pc + 4; /* Not in delay slot */ ++ } ++ ++ /* BRA */ ++ else if (OPCODE_BRA(op)) ++ addr = trap_registers.pc + 4 + OPCODE_BRA_DISP(op); ++ ++ /* BRAF */ ++ else if (OPCODE_BRAF(op)) ++ addr = trap_registers.pc + 4 ++ + trap_registers.regs[OPCODE_BRAF_REG(op)]; ++ ++ /* BSR */ ++ else if (OPCODE_BSR(op)) ++ addr = trap_registers.pc + 4 + OPCODE_BSR_DISP(op); ++ ++ /* BSRF */ ++ else if (OPCODE_BSRF(op)) ++ addr = trap_registers.pc + 4 ++ + trap_registers.regs[OPCODE_BSRF_REG(op)]; ++ ++ /* JMP */ ++ else if (OPCODE_JMP(op)) ++ addr = trap_registers.regs[OPCODE_JMP_REG(op)]; ++ ++ /* JSR */ ++ else if (OPCODE_JSR(op)) ++ addr = trap_registers.regs[OPCODE_JSR_REG(op)]; ++ ++ /* RTS */ ++ else if (OPCODE_RTS(op)) ++ addr = trap_registers.pr; ++ ++ /* RTE */ ++ else if (OPCODE_RTE(op)) ++ addr = trap_registers.regs[15]; ++ ++ /* Other */ ++ else ++ addr = trap_registers.pc + 2; ++ ++ kgdb_flush_icache_range(addr, addr + 2); ++ return (short *)addr; ++} ++ ++/* The command loop, read and act on requests */ ++int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, ++ char *remcom_in_buffer, char *remcom_out_buffer, ++ struct pt_regs *ign) ++{ ++ unsigned long addr; ++ char *ptr = &remcom_in_buffer[1]; ++ ++ /* Examine first char of buffer to see what we need to do */ ++ switch (remcom_in_buffer[0]) { ++ case 'c': /* Continue at address AA..AA (optional) */ ++ case 's': /* Step one instruction from AA..AA */ ++ /* Try to read optional parameter, PC unchanged if none */ ++ if (kgdb_hex2long(&ptr, &addr)) ++ trap_registers.pc = addr; ++ ++ atomic_set(&cpu_doing_single_step, -1); ++ if (remcom_in_buffer[0] == 's') { ++ /* Replace the instruction immediately after the ++ * current instruction (i.e. next in the expected ++ * flow of control) with a trap instruction, so that ++ * returning will cause only a single instruction to ++ * be executed. Note that this model is slightly ++ * broken for instructions with delay slots ++ * (e.g. B[TF]S, BSR, BRA etc), where both the branch ++ * and the instruction in the delay slot will be ++ * executed. ++ */ ++ /* Determine where the target instruction will send ++ * us to */ ++ unsigned short *next_addr = get_step_address(); ++ stepped_address = (int)next_addr; ++ ++ /* Replace it */ ++ stepped_opcode = *(short *)next_addr; ++ *next_addr = STEP_OPCODE; ++ ++ /* Flush and return */ ++ kgdb_flush_icache_range((long)next_addr, ++ (long)next_addr + 2); ++ if (kgdb_contthread) ++ atomic_set(&cpu_doing_single_step, ++ smp_processor_id()); ++ } ++ return 0; ++ } ++ return -1; ++} ++ ++/* ++ * When an exception has occured, we are called. We need to set things ++ * up so that we can call kgdb_handle_exception to handle requests from ++ * the remote GDB. ++ */ ++void kgdb_exception_handler(struct pt_regs *regs) ++{ ++ int excep_code, vbr_val; ++ int count; ++ ++ /* Copy kernel regs (from stack) */ ++ for (count = 0; count < 16; count++) ++ trap_registers.regs[count] = regs->regs[count]; ++ trap_registers.pc = regs->pc; ++ trap_registers.pr = regs->pr; ++ trap_registers.sr = regs->sr; ++ trap_registers.gbr = regs->gbr; ++ trap_registers.mach = regs->mach; ++ trap_registers.macl = regs->macl; ++ ++ __asm__ __volatile__("stc vbr, %0":"=r"(vbr_val)); ++ trap_registers.vbr = vbr_val; ++ ++ /* Get the execption code. */ ++ __asm__ __volatile__("stc r2_bank, %0":"=r"(excep_code)); ++ ++ excep_code >>= 5; ++ ++ /* If we got an NMI, and KGDB is not yet initialized, call ++ * breakpoint() to try and initialize everything for us. */ ++ if (excep_code == NMI_VEC && !kgdb_initialized) { ++ breakpoint(); ++ return; ++ } ++ ++ /* TRAP_VEC exception indicates a software trap inserted in place of ++ * code by GDB so back up PC by one instruction, as this instruction ++ * will later be replaced by its original one. Do NOT do this for ++ * trap 0xff, since that indicates a compiled-in breakpoint which ++ * will not be replaced (and we would retake the trap forever) */ ++ if (excep_code == TRAP_VEC && ++ (*(volatile unsigned long *)TRA != (0xff << 2))) ++ trap_registers.pc -= 2; ++ ++ /* If we have been single-stepping, put back the old instruction. ++ * We use stepped_address in case we have stopped more than one ++ * instruction away. */ ++ if (stepped_opcode != 0) { ++ *(short *)stepped_address = stepped_opcode; ++ kgdb_flush_icache_range(stepped_address, stepped_address + 2); ++ } ++ stepped_opcode = 0; ++ ++ /* Call the stub to do the processing. Note that not everything we ++ * need to send back and forth lives in pt_regs. */ ++ kgdb_handle_exception(excep_code, compute_signal(excep_code), 0, regs); ++ ++ /* Copy back the (maybe modified) registers */ ++ for (count = 0; count < 16; count++) ++ regs->regs[count] = trap_registers.regs[count]; ++ regs->pc = trap_registers.pc; ++ regs->pr = trap_registers.pr; ++ regs->sr = trap_registers.sr; ++ regs->gbr = trap_registers.gbr; ++ regs->mach = trap_registers.mach; ++ regs->macl = trap_registers.macl; ++ ++ vbr_val = trap_registers.vbr; ++ __asm__ __volatile__("ldc %0, vbr": :"r"(vbr_val)); ++} ++ ++int __init kgdb_arch_init(void) ++{ ++ per_cpu_trap_init(); ++ ++ return 0; ++} ++ ++struct kgdb_arch arch_kgdb_ops = { ++#ifdef CONFIG_CPU_LITTLE_ENDIAN ++ .gdb_bpt_instr = {0xff, 0xc3}, ++#else ++ .gdb_bpt_instr = {0xc3, 0xff}, ++#endif ++}; +diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb_jmp.S linux-2.6.22-591/arch/sh/kernel/kgdb_jmp.S +--- linux-2.6.22-570/arch/sh/kernel/kgdb_jmp.S 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/sh/kernel/kgdb_jmp.S 1969-12-31 19:00:00.000000000 -0500 +@@ -1,33 +0,0 @@ +-#include +- +-ENTRY(setjmp) +- add #(9*4), r4 +- sts.l pr, @-r4 +- mov.l r15, @-r4 +- mov.l r14, @-r4 +- mov.l r13, @-r4 +- mov.l r12, @-r4 +- mov.l r11, @-r4 +- mov.l r10, @-r4 +- mov.l r9, @-r4 +- mov.l r8, @-r4 +- rts +- mov #0, r0 +- +-ENTRY(longjmp) +- mov.l @r4+, r8 +- mov.l @r4+, r9 +- mov.l @r4+, r10 +- mov.l @r4+, r11 +- mov.l @r4+, r12 +- mov.l @r4+, r13 +- mov.l @r4+, r14 +- mov.l @r4+, r15 +- lds.l @r4+, pr +- mov r5, r0 +- tst r0, r0 +- bf 1f +- mov #1, r0 ! in case val==0 +-1: rts +- nop +- +diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb_stub.c linux-2.6.22-591/arch/sh/kernel/kgdb_stub.c +--- linux-2.6.22-570/arch/sh/kernel/kgdb_stub.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/sh/kernel/kgdb_stub.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,1093 +0,0 @@ +-/* +- * May be copied or modified under the terms of the GNU General Public +- * License. See linux/COPYING for more information. +- * +- * Contains extracts from code by Glenn Engel, Jim Kingdon, +- * David Grothe , Tigran Aivazian , +- * Amit S. Kale , William Gatliff , +- * Ben Lee, Steve Chamberlain and Benoit Miller . +- * +- * This version by Henry Bell +- * Minor modifications by Jeremy Siegel +- * +- * Contains low-level support for remote debug using GDB. +- * +- * To enable debugger support, two things need to happen. A call to +- * set_debug_traps() is necessary in order to allow any breakpoints +- * or error conditions to be properly intercepted and reported to gdb. +- * A breakpoint also needs to be generated to begin communication. This +- * is most easily accomplished by a call to breakpoint() which does +- * a trapa if the initialisation phase has been successfully completed. +- * +- * In this case, set_debug_traps() is not used to "take over" exceptions; +- * other kernel code is modified instead to enter the kgdb functions here +- * when appropriate (see entry.S for breakpoint traps and NMI interrupts, +- * see traps.c for kernel error exceptions). +- * +- * The following gdb commands are supported: +- * +- * Command Function Return value +- * +- * g return the value of the CPU registers hex data or ENN +- * G set the value of the CPU registers OK or ENN +- * +- * mAA..AA,LLLL Read LLLL bytes at address AA..AA hex data or ENN +- * MAA..AA,LLLL: Write LLLL bytes at address AA.AA OK or ENN +- * XAA..AA,LLLL: Same, but data is binary (not hex) OK or ENN +- * +- * c Resume at current address SNN ( signal NN) +- * cAA..AA Continue at address AA..AA SNN +- * CNN; Resume at current address with signal SNN +- * CNN;AA..AA Resume at address AA..AA with signal SNN +- * +- * s Step one instruction SNN +- * sAA..AA Step one instruction from AA..AA SNN +- * SNN; Step one instruction with signal SNN +- * SNNAA..AA Step one instruction from AA..AA w/NN SNN +- * +- * k kill (Detach GDB) +- * +- * d Toggle debug flag +- * D Detach GDB +- * +- * Hct Set thread t for operations, OK or ENN +- * c = 'c' (step, cont), c = 'g' (other +- * operations) +- * +- * qC Query current thread ID QCpid +- * qfThreadInfo Get list of current threads (first) m +- * qsThreadInfo " " " " " (subsequent) +- * qOffsets Get section offsets Text=x;Data=y;Bss=z +- * +- * TXX Find if thread XX is alive OK or ENN +- * ? What was the last sigval ? SNN (signal NN) +- * O Output to GDB console +- * +- * Remote communication protocol. +- * +- * A debug packet whose contents are is encapsulated for +- * transmission in the form: +- * +- * $ # CSUM1 CSUM2 +- * +- * must be ASCII alphanumeric and cannot include characters +- * '$' or '#'. If starts with two characters followed by +- * ':', then the existing stubs interpret this as a sequence number. +- * +- * CSUM1 and CSUM2 are ascii hex representation of an 8-bit +- * checksum of , the most significant nibble is sent first. +- * the hex digits 0-9,a-f are used. +- * +- * Receiver responds with: +- * +- * + - if CSUM is correct and ready for next packet +- * - - if CSUM is incorrect +- * +- * Responses can be run-length encoded to save space. A '*' means that +- * the next character is an ASCII encoding giving a repeat count which +- * stands for that many repetitions of the character preceding the '*'. +- * The encoding is n+29, yielding a printable character where n >=3 +- * (which is where RLE starts to win). Don't use an n > 126. +- * +- * So "0* " means the same as "0000". +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-/* Function pointers for linkage */ +-kgdb_debug_hook_t *kgdb_debug_hook; +-kgdb_bus_error_hook_t *kgdb_bus_err_hook; +- +-int (*kgdb_getchar)(void); +-void (*kgdb_putchar)(int); +- +-static void put_debug_char(int c) +-{ +- if (!kgdb_putchar) +- return; +- (*kgdb_putchar)(c); +-} +-static int get_debug_char(void) +-{ +- if (!kgdb_getchar) +- return -1; +- return (*kgdb_getchar)(); +-} +- +-/* Num chars in in/out bound buffers, register packets need NUMREGBYTES * 2 */ +-#define BUFMAX 1024 +-#define NUMREGBYTES (MAXREG*4) +-#define OUTBUFMAX (NUMREGBYTES*2+512) +- +-enum regs { +- R0 = 0, R1, R2, R3, R4, R5, R6, R7, +- R8, R9, R10, R11, R12, R13, R14, R15, +- PC, PR, GBR, VBR, MACH, MACL, SR, +- /* */ +- MAXREG +-}; +- +-static unsigned int registers[MAXREG]; +-struct kgdb_regs trap_registers; +- +-char kgdb_in_gdb_mode; +-char in_nmi; /* Set during NMI to prevent reentry */ +-int kgdb_nofault; /* Boolean to ignore bus errs (i.e. in GDB) */ +-int kgdb_enabled = 1; /* Default to enabled, cmdline can disable */ +- +-/* Exposed for user access */ +-struct task_struct *kgdb_current; +-unsigned int kgdb_g_imask; +-int kgdb_trapa_val; +-int kgdb_excode; +- +-/* Default values for SCI (can override via kernel args in setup.c) */ +-#ifndef CONFIG_KGDB_DEFPORT +-#define CONFIG_KGDB_DEFPORT 1 +-#endif +- +-#ifndef CONFIG_KGDB_DEFBAUD +-#define CONFIG_KGDB_DEFBAUD 115200 +-#endif +- +-#if defined(CONFIG_KGDB_DEFPARITY_E) +-#define CONFIG_KGDB_DEFPARITY 'E' +-#elif defined(CONFIG_KGDB_DEFPARITY_O) +-#define CONFIG_KGDB_DEFPARITY 'O' +-#else /* CONFIG_KGDB_DEFPARITY_N */ +-#define CONFIG_KGDB_DEFPARITY 'N' +-#endif +- +-#ifdef CONFIG_KGDB_DEFBITS_7 +-#define CONFIG_KGDB_DEFBITS '7' +-#else /* CONFIG_KGDB_DEFBITS_8 */ +-#define CONFIG_KGDB_DEFBITS '8' +-#endif +- +-/* SCI/UART settings, used in kgdb_console_setup() */ +-int kgdb_portnum = CONFIG_KGDB_DEFPORT; +-int kgdb_baud = CONFIG_KGDB_DEFBAUD; +-char kgdb_parity = CONFIG_KGDB_DEFPARITY; +-char kgdb_bits = CONFIG_KGDB_DEFBITS; +- +-/* Jump buffer for setjmp/longjmp */ +-static jmp_buf rem_com_env; +- +-/* TRA differs sh3/4 */ +-#if defined(CONFIG_CPU_SH3) +-#define TRA 0xffffffd0 +-#elif defined(CONFIG_CPU_SH4) +-#define TRA 0xff000020 +-#endif +- +-/* Macros for single step instruction identification */ +-#define OPCODE_BT(op) (((op) & 0xff00) == 0x8900) +-#define OPCODE_BF(op) (((op) & 0xff00) == 0x8b00) +-#define OPCODE_BTF_DISP(op) (((op) & 0x80) ? (((op) | 0xffffff80) << 1) : \ +- (((op) & 0x7f ) << 1)) +-#define OPCODE_BFS(op) (((op) & 0xff00) == 0x8f00) +-#define OPCODE_BTS(op) (((op) & 0xff00) == 0x8d00) +-#define OPCODE_BRA(op) (((op) & 0xf000) == 0xa000) +-#define OPCODE_BRA_DISP(op) (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \ +- (((op) & 0x7ff) << 1)) +-#define OPCODE_BRAF(op) (((op) & 0xf0ff) == 0x0023) +-#define OPCODE_BRAF_REG(op) (((op) & 0x0f00) >> 8) +-#define OPCODE_BSR(op) (((op) & 0xf000) == 0xb000) +-#define OPCODE_BSR_DISP(op) (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \ +- (((op) & 0x7ff) << 1)) +-#define OPCODE_BSRF(op) (((op) & 0xf0ff) == 0x0003) +-#define OPCODE_BSRF_REG(op) (((op) >> 8) & 0xf) +-#define OPCODE_JMP(op) (((op) & 0xf0ff) == 0x402b) +-#define OPCODE_JMP_REG(op) (((op) >> 8) & 0xf) +-#define OPCODE_JSR(op) (((op) & 0xf0ff) == 0x400b) +-#define OPCODE_JSR_REG(op) (((op) >> 8) & 0xf) +-#define OPCODE_RTS(op) ((op) == 0xb) +-#define OPCODE_RTE(op) ((op) == 0x2b) +- +-#define SR_T_BIT_MASK 0x1 +-#define STEP_OPCODE 0xc320 +-#define BIOS_CALL_TRAP 0x3f +- +-/* Exception codes as per SH-4 core manual */ +-#define ADDRESS_ERROR_LOAD_VEC 7 +-#define ADDRESS_ERROR_STORE_VEC 8 +-#define TRAP_VEC 11 +-#define INVALID_INSN_VEC 12 +-#define INVALID_SLOT_VEC 13 +-#define NMI_VEC 14 +-#define USER_BREAK_VEC 15 +-#define SERIAL_BREAK_VEC 58 +- +-/* Misc static */ +-static int stepped_address; +-static short stepped_opcode; +-static char in_buffer[BUFMAX]; +-static char out_buffer[OUTBUFMAX]; +- +-static void kgdb_to_gdb(const char *s); +- +-/* Convert ch to hex */ +-static int hex(const char ch) +-{ +- if ((ch >= 'a') && (ch <= 'f')) +- return (ch - 'a' + 10); +- if ((ch >= '0') && (ch <= '9')) +- return (ch - '0'); +- if ((ch >= 'A') && (ch <= 'F')) +- return (ch - 'A' + 10); +- return (-1); +-} +- +-/* Convert the memory pointed to by mem into hex, placing result in buf. +- Returns a pointer to the last char put in buf (null) */ +-static char *mem_to_hex(const char *mem, char *buf, const int count) +-{ +- int i; +- int ch; +- unsigned short s_val; +- unsigned long l_val; +- +- /* Check for 16 or 32 */ +- if (count == 2 && ((long) mem & 1) == 0) { +- s_val = *(unsigned short *) mem; +- mem = (char *) &s_val; +- } else if (count == 4 && ((long) mem & 3) == 0) { +- l_val = *(unsigned long *) mem; +- mem = (char *) &l_val; +- } +- for (i = 0; i < count; i++) { +- ch = *mem++; +- *buf++ = highhex(ch); +- *buf++ = lowhex(ch); +- } +- *buf = 0; +- return (buf); +-} +- +-/* Convert the hex array pointed to by buf into binary, to be placed in mem. +- Return a pointer to the character after the last byte written */ +-static char *hex_to_mem(const char *buf, char *mem, const int count) +-{ +- int i; +- unsigned char ch; +- +- for (i = 0; i < count; i++) { +- ch = hex(*buf++) << 4; +- ch = ch + hex(*buf++); +- *mem++ = ch; +- } +- return (mem); +-} +- +-/* While finding valid hex chars, convert to an integer, then return it */ +-static int hex_to_int(char **ptr, int *int_value) +-{ +- int num_chars = 0; +- int hex_value; +- +- *int_value = 0; +- +- while (**ptr) { +- hex_value = hex(**ptr); +- if (hex_value >= 0) { +- *int_value = (*int_value << 4) | hex_value; +- num_chars++; +- } else +- break; +- (*ptr)++; +- } +- return num_chars; +-} +- +-/* Copy the binary array pointed to by buf into mem. Fix $, #, +- and 0x7d escaped with 0x7d. Return a pointer to the character +- after the last byte written. */ +-static char *ebin_to_mem(const char *buf, char *mem, int count) +-{ +- for (; count > 0; count--, buf++) { +- if (*buf == 0x7d) +- *mem++ = *(++buf) ^ 0x20; +- else +- *mem++ = *buf; +- } +- return mem; +-} +- +-/* Pack a hex byte */ +-static char *pack_hex_byte(char *pkt, int byte) +-{ +- *pkt++ = hexchars[(byte >> 4) & 0xf]; +- *pkt++ = hexchars[(byte & 0xf)]; +- return pkt; +-} +- +-/* Scan for the start char '$', read the packet and check the checksum */ +-static void get_packet(char *buffer, int buflen) +-{ +- unsigned char checksum; +- unsigned char xmitcsum; +- int i; +- int count; +- char ch; +- +- do { +- /* Ignore everything until the start character */ +- while ((ch = get_debug_char()) != '$'); +- +- checksum = 0; +- xmitcsum = -1; +- count = 0; +- +- /* Now, read until a # or end of buffer is found */ +- while (count < (buflen - 1)) { +- ch = get_debug_char(); +- +- if (ch == '#') +- break; +- +- checksum = checksum + ch; +- buffer[count] = ch; +- count = count + 1; +- } +- +- buffer[count] = 0; +- +- /* Continue to read checksum following # */ +- if (ch == '#') { +- xmitcsum = hex(get_debug_char()) << 4; +- xmitcsum += hex(get_debug_char()); +- +- /* Checksum */ +- if (checksum != xmitcsum) +- put_debug_char('-'); /* Failed checksum */ +- else { +- /* Ack successful transfer */ +- put_debug_char('+'); +- +- /* If a sequence char is present, reply +- the sequence ID */ +- if (buffer[2] == ':') { +- put_debug_char(buffer[0]); +- put_debug_char(buffer[1]); +- +- /* Remove sequence chars from buffer */ +- count = strlen(buffer); +- for (i = 3; i <= count; i++) +- buffer[i - 3] = buffer[i]; +- } +- } +- } +- } +- while (checksum != xmitcsum); /* Keep trying while we fail */ +-} +- +-/* Send the packet in the buffer with run-length encoding */ +-static void put_packet(char *buffer) +-{ +- int checksum; +- char *src; +- int runlen; +- int encode; +- +- do { +- src = buffer; +- put_debug_char('$'); +- checksum = 0; +- +- /* Continue while we still have chars left */ +- while (*src) { +- /* Check for runs up to 99 chars long */ +- for (runlen = 1; runlen < 99; runlen++) { +- if (src[0] != src[runlen]) +- break; +- } +- +- if (runlen > 3) { +- /* Got a useful amount, send encoding */ +- encode = runlen + ' ' - 4; +- put_debug_char(*src); checksum += *src; +- put_debug_char('*'); checksum += '*'; +- put_debug_char(encode); checksum += encode; +- src += runlen; +- } else { +- /* Otherwise just send the current char */ +- put_debug_char(*src); checksum += *src; +- src += 1; +- } +- } +- +- /* '#' Separator, put high and low components of checksum */ +- put_debug_char('#'); +- put_debug_char(highhex(checksum)); +- put_debug_char(lowhex(checksum)); +- } +- while ((get_debug_char()) != '+'); /* While no ack */ +-} +- +-/* A bus error has occurred - perform a longjmp to return execution and +- allow handling of the error */ +-static void kgdb_handle_bus_error(void) +-{ +- longjmp(rem_com_env, 1); +-} +- +-/* Translate SH-3/4 exception numbers to unix-like signal values */ +-static int compute_signal(const int excep_code) +-{ +- int sigval; +- +- switch (excep_code) { +- +- case INVALID_INSN_VEC: +- case INVALID_SLOT_VEC: +- sigval = SIGILL; +- break; +- case ADDRESS_ERROR_LOAD_VEC: +- case ADDRESS_ERROR_STORE_VEC: +- sigval = SIGSEGV; +- break; +- +- case SERIAL_BREAK_VEC: +- case NMI_VEC: +- sigval = SIGINT; +- break; +- +- case USER_BREAK_VEC: +- case TRAP_VEC: +- sigval = SIGTRAP; +- break; +- +- default: +- sigval = SIGBUS; /* "software generated" */ +- break; +- } +- +- return (sigval); +-} +- +-/* Make a local copy of the registers passed into the handler (bletch) */ +-static void kgdb_regs_to_gdb_regs(const struct kgdb_regs *regs, +- int *gdb_regs) +-{ +- gdb_regs[R0] = regs->regs[R0]; +- gdb_regs[R1] = regs->regs[R1]; +- gdb_regs[R2] = regs->regs[R2]; +- gdb_regs[R3] = regs->regs[R3]; +- gdb_regs[R4] = regs->regs[R4]; +- gdb_regs[R5] = regs->regs[R5]; +- gdb_regs[R6] = regs->regs[R6]; +- gdb_regs[R7] = regs->regs[R7]; +- gdb_regs[R8] = regs->regs[R8]; +- gdb_regs[R9] = regs->regs[R9]; +- gdb_regs[R10] = regs->regs[R10]; +- gdb_regs[R11] = regs->regs[R11]; +- gdb_regs[R12] = regs->regs[R12]; +- gdb_regs[R13] = regs->regs[R13]; +- gdb_regs[R14] = regs->regs[R14]; +- gdb_regs[R15] = regs->regs[R15]; +- gdb_regs[PC] = regs->pc; +- gdb_regs[PR] = regs->pr; +- gdb_regs[GBR] = regs->gbr; +- gdb_regs[MACH] = regs->mach; +- gdb_regs[MACL] = regs->macl; +- gdb_regs[SR] = regs->sr; +- gdb_regs[VBR] = regs->vbr; +-} +- +-/* Copy local gdb registers back to kgdb regs, for later copy to kernel */ +-static void gdb_regs_to_kgdb_regs(const int *gdb_regs, +- struct kgdb_regs *regs) +-{ +- regs->regs[R0] = gdb_regs[R0]; +- regs->regs[R1] = gdb_regs[R1]; +- regs->regs[R2] = gdb_regs[R2]; +- regs->regs[R3] = gdb_regs[R3]; +- regs->regs[R4] = gdb_regs[R4]; +- regs->regs[R5] = gdb_regs[R5]; +- regs->regs[R6] = gdb_regs[R6]; +- regs->regs[R7] = gdb_regs[R7]; +- regs->regs[R8] = gdb_regs[R8]; +- regs->regs[R9] = gdb_regs[R9]; +- regs->regs[R10] = gdb_regs[R10]; +- regs->regs[R11] = gdb_regs[R11]; +- regs->regs[R12] = gdb_regs[R12]; +- regs->regs[R13] = gdb_regs[R13]; +- regs->regs[R14] = gdb_regs[R14]; +- regs->regs[R15] = gdb_regs[R15]; +- regs->pc = gdb_regs[PC]; +- regs->pr = gdb_regs[PR]; +- regs->gbr = gdb_regs[GBR]; +- regs->mach = gdb_regs[MACH]; +- regs->macl = gdb_regs[MACL]; +- regs->sr = gdb_regs[SR]; +- regs->vbr = gdb_regs[VBR]; +-} +- +-/* Calculate the new address for after a step */ +-static short *get_step_address(void) +-{ +- short op = *(short *) trap_registers.pc; +- long addr; +- +- /* BT */ +- if (OPCODE_BT(op)) { +- if (trap_registers.sr & SR_T_BIT_MASK) +- addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); +- else +- addr = trap_registers.pc + 2; +- } +- +- /* BTS */ +- else if (OPCODE_BTS(op)) { +- if (trap_registers.sr & SR_T_BIT_MASK) +- addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); +- else +- addr = trap_registers.pc + 4; /* Not in delay slot */ +- } +- +- /* BF */ +- else if (OPCODE_BF(op)) { +- if (!(trap_registers.sr & SR_T_BIT_MASK)) +- addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); +- else +- addr = trap_registers.pc + 2; +- } +- +- /* BFS */ +- else if (OPCODE_BFS(op)) { +- if (!(trap_registers.sr & SR_T_BIT_MASK)) +- addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op); +- else +- addr = trap_registers.pc + 4; /* Not in delay slot */ +- } +- +- /* BRA */ +- else if (OPCODE_BRA(op)) +- addr = trap_registers.pc + 4 + OPCODE_BRA_DISP(op); +- +- /* BRAF */ +- else if (OPCODE_BRAF(op)) +- addr = trap_registers.pc + 4 +- + trap_registers.regs[OPCODE_BRAF_REG(op)]; +- +- /* BSR */ +- else if (OPCODE_BSR(op)) +- addr = trap_registers.pc + 4 + OPCODE_BSR_DISP(op); +- +- /* BSRF */ +- else if (OPCODE_BSRF(op)) +- addr = trap_registers.pc + 4 +- + trap_registers.regs[OPCODE_BSRF_REG(op)]; +- +- /* JMP */ +- else if (OPCODE_JMP(op)) +- addr = trap_registers.regs[OPCODE_JMP_REG(op)]; +- +- /* JSR */ +- else if (OPCODE_JSR(op)) +- addr = trap_registers.regs[OPCODE_JSR_REG(op)]; +- +- /* RTS */ +- else if (OPCODE_RTS(op)) +- addr = trap_registers.pr; +- +- /* RTE */ +- else if (OPCODE_RTE(op)) +- addr = trap_registers.regs[15]; +- +- /* Other */ +- else +- addr = trap_registers.pc + 2; +- +- kgdb_flush_icache_range(addr, addr + 2); +- return (short *) addr; +-} +- +-/* Set up a single-step. Replace the instruction immediately after the +- current instruction (i.e. next in the expected flow of control) with a +- trap instruction, so that returning will cause only a single instruction +- to be executed. Note that this model is slightly broken for instructions +- with delay slots (e.g. B[TF]S, BSR, BRA etc), where both the branch +- and the instruction in the delay slot will be executed. */ +-static void do_single_step(void) +-{ +- unsigned short *addr = 0; +- +- /* Determine where the target instruction will send us to */ +- addr = get_step_address(); +- stepped_address = (int)addr; +- +- /* Replace it */ +- stepped_opcode = *(short *)addr; +- *addr = STEP_OPCODE; +- +- /* Flush and return */ +- kgdb_flush_icache_range((long) addr, (long) addr + 2); +- return; +-} +- +-/* Undo a single step */ +-static void undo_single_step(void) +-{ +- /* If we have stepped, put back the old instruction */ +- /* Use stepped_address in case we stopped elsewhere */ +- if (stepped_opcode != 0) { +- *(short*)stepped_address = stepped_opcode; +- kgdb_flush_icache_range(stepped_address, stepped_address + 2); +- } +- stepped_opcode = 0; +-} +- +-/* Send a signal message */ +-static void send_signal_msg(const int signum) +-{ +- out_buffer[0] = 'S'; +- out_buffer[1] = highhex(signum); +- out_buffer[2] = lowhex(signum); +- out_buffer[3] = 0; +- put_packet(out_buffer); +-} +- +-/* Reply that all was well */ +-static void send_ok_msg(void) +-{ +- strcpy(out_buffer, "OK"); +- put_packet(out_buffer); +-} +- +-/* Reply that an error occurred */ +-static void send_err_msg(void) +-{ +- strcpy(out_buffer, "E01"); +- put_packet(out_buffer); +-} +- +-/* Empty message indicates unrecognised command */ +-static void send_empty_msg(void) +-{ +- put_packet(""); +-} +- +-/* Read memory due to 'm' message */ +-static void read_mem_msg(void) +-{ +- char *ptr; +- int addr; +- int length; +- +- /* Jmp, disable bus error handler */ +- if (setjmp(rem_com_env) == 0) { +- +- kgdb_nofault = 1; +- +- /* Walk through, have m, */ +- ptr = &in_buffer[1]; +- if (hex_to_int(&ptr, &addr) && (*ptr++ == ',')) +- if (hex_to_int(&ptr, &length)) { +- ptr = 0; +- if (length * 2 > OUTBUFMAX) +- length = OUTBUFMAX / 2; +- mem_to_hex((char *) addr, out_buffer, length); +- } +- if (ptr) +- send_err_msg(); +- else +- put_packet(out_buffer); +- } else +- send_err_msg(); +- +- /* Restore bus error handler */ +- kgdb_nofault = 0; +-} +- +-/* Write memory due to 'M' or 'X' message */ +-static void write_mem_msg(int binary) +-{ +- char *ptr; +- int addr; +- int length; +- +- if (setjmp(rem_com_env) == 0) { +- +- kgdb_nofault = 1; +- +- /* Walk through, have M,: */ +- ptr = &in_buffer[1]; +- if (hex_to_int(&ptr, &addr) && (*ptr++ == ',')) +- if (hex_to_int(&ptr, &length) && (*ptr++ == ':')) { +- if (binary) +- ebin_to_mem(ptr, (char*)addr, length); +- else +- hex_to_mem(ptr, (char*)addr, length); +- kgdb_flush_icache_range(addr, addr + length); +- ptr = 0; +- send_ok_msg(); +- } +- if (ptr) +- send_err_msg(); +- } else +- send_err_msg(); +- +- /* Restore bus error handler */ +- kgdb_nofault = 0; +-} +- +-/* Continue message */ +-static void continue_msg(void) +-{ +- /* Try to read optional parameter, PC unchanged if none */ +- char *ptr = &in_buffer[1]; +- int addr; +- +- if (hex_to_int(&ptr, &addr)) +- trap_registers.pc = addr; +-} +- +-/* Continue message with signal */ +-static void continue_with_sig_msg(void) +-{ +- int signal; +- char *ptr = &in_buffer[1]; +- int addr; +- +- /* Report limitation */ +- kgdb_to_gdb("Cannot force signal in kgdb, continuing anyway.\n"); +- +- /* Signal */ +- hex_to_int(&ptr, &signal); +- if (*ptr == ';') +- ptr++; +- +- /* Optional address */ +- if (hex_to_int(&ptr, &addr)) +- trap_registers.pc = addr; +-} +- +-/* Step message */ +-static void step_msg(void) +-{ +- continue_msg(); +- do_single_step(); +-} +- +-/* Step message with signal */ +-static void step_with_sig_msg(void) +-{ +- continue_with_sig_msg(); +- do_single_step(); +-} +- +-/* Send register contents */ +-static void send_regs_msg(void) +-{ +- kgdb_regs_to_gdb_regs(&trap_registers, registers); +- mem_to_hex((char *) registers, out_buffer, NUMREGBYTES); +- put_packet(out_buffer); +-} +- +-/* Set register contents - currently can't set other thread's registers */ +-static void set_regs_msg(void) +-{ +- kgdb_regs_to_gdb_regs(&trap_registers, registers); +- hex_to_mem(&in_buffer[1], (char *) registers, NUMREGBYTES); +- gdb_regs_to_kgdb_regs(registers, &trap_registers); +- send_ok_msg(); +-} +- +-#ifdef CONFIG_SH_KGDB_CONSOLE +-/* +- * Bring up the ports.. +- */ +-static int kgdb_serial_setup(void) +-{ +- extern int kgdb_console_setup(struct console *co, char *options); +- struct console dummy; +- +- kgdb_console_setup(&dummy, 0); +- +- return 0; +-} +-#else +-#define kgdb_serial_setup() 0 +-#endif +- +-/* The command loop, read and act on requests */ +-static void kgdb_command_loop(const int excep_code, const int trapa_value) +-{ +- int sigval; +- +- if (excep_code == NMI_VEC) { +-#ifndef CONFIG_KGDB_NMI +- printk(KERN_NOTICE "KGDB: Ignoring unexpected NMI?\n"); +- return; +-#else /* CONFIG_KGDB_NMI */ +- if (!kgdb_enabled) { +- kgdb_enabled = 1; +- kgdb_init(); +- } +-#endif /* CONFIG_KGDB_NMI */ +- } +- +- /* Ignore if we're disabled */ +- if (!kgdb_enabled) +- return; +- +- /* Enter GDB mode (e.g. after detach) */ +- if (!kgdb_in_gdb_mode) { +- /* Do serial setup, notify user, issue preemptive ack */ +- printk(KERN_NOTICE "KGDB: Waiting for GDB\n"); +- kgdb_in_gdb_mode = 1; +- put_debug_char('+'); +- } +- +- /* Reply to host that an exception has occurred */ +- sigval = compute_signal(excep_code); +- send_signal_msg(sigval); +- +- /* TRAP_VEC exception indicates a software trap inserted in place of +- code by GDB so back up PC by one instruction, as this instruction +- will later be replaced by its original one. Do NOT do this for +- trap 0xff, since that indicates a compiled-in breakpoint which +- will not be replaced (and we would retake the trap forever) */ +- if ((excep_code == TRAP_VEC) && (trapa_value != (0x3c << 2))) +- trap_registers.pc -= 2; +- +- /* Undo any stepping we may have done */ +- undo_single_step(); +- +- while (1) { +- out_buffer[0] = 0; +- get_packet(in_buffer, BUFMAX); +- +- /* Examine first char of buffer to see what we need to do */ +- switch (in_buffer[0]) { +- case '?': /* Send which signal we've received */ +- send_signal_msg(sigval); +- break; +- +- case 'g': /* Return the values of the CPU registers */ +- send_regs_msg(); +- break; +- +- case 'G': /* Set the value of the CPU registers */ +- set_regs_msg(); +- break; +- +- case 'm': /* Read LLLL bytes address AA..AA */ +- read_mem_msg(); +- break; +- +- case 'M': /* Write LLLL bytes address AA..AA, ret OK */ +- write_mem_msg(0); /* 0 = data in hex */ +- break; +- +- case 'X': /* Write LLLL bytes esc bin address AA..AA */ +- if (kgdb_bits == '8') +- write_mem_msg(1); /* 1 = data in binary */ +- else +- send_empty_msg(); +- break; +- +- case 'C': /* Continue, signum included, we ignore it */ +- continue_with_sig_msg(); +- return; +- +- case 'c': /* Continue at address AA..AA (optional) */ +- continue_msg(); +- return; +- +- case 'S': /* Step, signum included, we ignore it */ +- step_with_sig_msg(); +- return; +- +- case 's': /* Step one instruction from AA..AA */ +- step_msg(); +- return; +- +- case 'k': /* 'Kill the program' with a kernel ? */ +- break; +- +- case 'D': /* Detach from program, send reply OK */ +- kgdb_in_gdb_mode = 0; +- send_ok_msg(); +- get_debug_char(); +- return; +- +- default: +- send_empty_msg(); +- break; +- } +- } +-} +- +-/* There has been an exception, most likely a breakpoint. */ +-static void handle_exception(struct pt_regs *regs) +-{ +- int excep_code, vbr_val; +- int count; +- int trapa_value = ctrl_inl(TRA); +- +- /* Copy kernel regs (from stack) */ +- for (count = 0; count < 16; count++) +- trap_registers.regs[count] = regs->regs[count]; +- trap_registers.pc = regs->pc; +- trap_registers.pr = regs->pr; +- trap_registers.sr = regs->sr; +- trap_registers.gbr = regs->gbr; +- trap_registers.mach = regs->mach; +- trap_registers.macl = regs->macl; +- +- asm("stc vbr, %0":"=r"(vbr_val)); +- trap_registers.vbr = vbr_val; +- +- /* Get excode for command loop call, user access */ +- asm("stc r2_bank, %0":"=r"(excep_code)); +- kgdb_excode = excep_code; +- +- /* Other interesting environment items for reference */ +- asm("stc r6_bank, %0":"=r"(kgdb_g_imask)); +- kgdb_current = current; +- kgdb_trapa_val = trapa_value; +- +- /* Act on the exception */ +- kgdb_command_loop(excep_code, trapa_value); +- +- kgdb_current = NULL; +- +- /* Copy back the (maybe modified) registers */ +- for (count = 0; count < 16; count++) +- regs->regs[count] = trap_registers.regs[count]; +- regs->pc = trap_registers.pc; +- regs->pr = trap_registers.pr; +- regs->sr = trap_registers.sr; +- regs->gbr = trap_registers.gbr; +- regs->mach = trap_registers.mach; +- regs->macl = trap_registers.macl; +- +- vbr_val = trap_registers.vbr; +- asm("ldc %0, vbr": :"r"(vbr_val)); +-} +- +-asmlinkage void kgdb_handle_exception(unsigned long r4, unsigned long r5, +- unsigned long r6, unsigned long r7, +- struct pt_regs __regs) +-{ +- struct pt_regs *regs = RELOC_HIDE(&__regs, 0); +- handle_exception(regs); +-} +- +-/* Initialise the KGDB data structures and serial configuration */ +-int kgdb_init(void) +-{ +- if (!kgdb_enabled) +- return 1; +- +- in_nmi = 0; +- kgdb_nofault = 0; +- stepped_opcode = 0; +- kgdb_in_gdb_mode = 0; +- +- if (kgdb_serial_setup() != 0) { +- printk(KERN_NOTICE "KGDB: serial setup error\n"); +- return -1; +- } +- +- /* Init ptr to exception handler */ +- kgdb_debug_hook = handle_exception; +- kgdb_bus_err_hook = kgdb_handle_bus_error; +- +- /* Enter kgdb now if requested, or just report init done */ +- printk(KERN_NOTICE "KGDB: stub is initialized.\n"); +- +- return 0; +-} +- +-/* Make function available for "user messages"; console will use it too. */ +- +-char gdbmsgbuf[BUFMAX]; +-#define MAXOUT ((BUFMAX-2)/2) +- +-static void kgdb_msg_write(const char *s, unsigned count) +-{ +- int i; +- int wcount; +- char *bufptr; +- +- /* 'O'utput */ +- gdbmsgbuf[0] = 'O'; +- +- /* Fill and send buffers... */ +- while (count > 0) { +- bufptr = gdbmsgbuf + 1; +- +- /* Calculate how many this time */ +- wcount = (count > MAXOUT) ? MAXOUT : count; +- +- /* Pack in hex chars */ +- for (i = 0; i < wcount; i++) +- bufptr = pack_hex_byte(bufptr, s[i]); +- *bufptr = '\0'; +- +- /* Move up */ +- s += wcount; +- count -= wcount; +- +- /* Write packet */ +- put_packet(gdbmsgbuf); +- } +-} +- +-static void kgdb_to_gdb(const char *s) +-{ +- kgdb_msg_write(s, strlen(s)); +-} +- +-#ifdef CONFIG_SH_KGDB_CONSOLE +-void kgdb_console_write(struct console *co, const char *s, unsigned count) +-{ +- /* Bail if we're not talking to GDB */ +- if (!kgdb_in_gdb_mode) +- return; +- +- kgdb_msg_write(s, count); +-} +-#endif +- +-#ifdef CONFIG_KGDB_SYSRQ +-static void sysrq_handle_gdb(int key, struct tty_struct *tty) +-{ +- printk("Entering GDB stub\n"); +- breakpoint(); +-} +- +-static struct sysrq_key_op sysrq_gdb_op = { +- .handler = sysrq_handle_gdb, +- .help_msg = "Gdb", +- .action_msg = "GDB", +-}; +- +-static int gdb_register_sysrq(void) +-{ +- printk("Registering GDB sysrq handler\n"); +- register_sysrq_key('g', &sysrq_gdb_op); +- return 0; +-} +-module_init(gdb_register_sysrq); +-#endif +diff -Nurb linux-2.6.22-570/arch/sh/kernel/time.c linux-2.6.22-591/arch/sh/kernel/time.c +--- linux-2.6.22-570/arch/sh/kernel/time.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/sh/kernel/time.c 2007-12-21 15:36:11.000000000 -0500 +@@ -259,11 +259,4 @@ + ((sh_hpt_frequency + 500) / 1000) / 1000, + ((sh_hpt_frequency + 500) / 1000) % 1000); + +-#if defined(CONFIG_SH_KGDB) +- /* +- * Set up kgdb as requested. We do it here because the serial +- * init uses the timer vars we just set up for figuring baud. +- */ +- kgdb_init(); +-#endif + } +diff -Nurb linux-2.6.22-570/arch/sh/kernel/traps.c linux-2.6.22-591/arch/sh/kernel/traps.c +--- linux-2.6.22-570/arch/sh/kernel/traps.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/sh/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500 +@@ -25,16 +25,10 @@ + #include + #include + #include ++#include + +-#ifdef CONFIG_SH_KGDB +-#include +-#define CHK_REMOTE_DEBUG(regs) \ +-{ \ +- if (kgdb_debug_hook && !user_mode(regs))\ +- (*kgdb_debug_hook)(regs); \ +-} +-#else +-#define CHK_REMOTE_DEBUG(regs) ++#ifndef CONFIG_KGDB ++#define kgdb_handle_exception(t, s, e, r) + #endif + + #ifdef CONFIG_CPU_SH2 +@@ -91,7 +85,9 @@ + + printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); + +- CHK_REMOTE_DEBUG(regs); ++#ifdef CONFIG_KGDB ++ kgdb_handle_exception(1, SIGTRAP, err, regs); ++#endif + print_modules(); + show_regs(regs); + +@@ -700,7 +696,9 @@ + lookup_exception_vector(error_code); + + local_irq_enable(); +- CHK_REMOTE_DEBUG(regs); ++#ifdef CONFIG_KGDB ++ kgdb_handle_exception(1, SIGILL, err, regs); ++#endif + force_sig(SIGILL, tsk); + die_if_no_fixup("reserved instruction", regs, error_code); + } +@@ -771,7 +769,9 @@ + lookup_exception_vector(error_code); + + local_irq_enable(); +- CHK_REMOTE_DEBUG(regs); ++#ifdef CONFIG_KGDB ++ kgdb_handle_exception(1, SIGILL, err, regs); ++#endif + force_sig(SIGILL, tsk); + die_if_no_fixup("illegal slot instruction", regs, error_code); + } +diff -Nurb linux-2.6.22-570/arch/sh/mm/extable.c linux-2.6.22-591/arch/sh/mm/extable.c +--- linux-2.6.22-570/arch/sh/mm/extable.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/sh/mm/extable.c 2007-12-21 15:36:11.000000000 -0500 +@@ -5,6 +5,7 @@ + */ + + #include ++#include + #include + + int fixup_exception(struct pt_regs *regs) +@@ -16,6 +17,12 @@ + regs->pc = fixup->fixup; + return 1; + } ++#ifdef CONFIG_KGDB ++ if (atomic_read(&debugger_active) && kgdb_may_fault) ++ /* Restore our previous state. */ ++ kgdb_fault_longjmp(kgdb_fault_jmp_regs); ++ /* Never reached. */ ++#endif + + return 0; + } +diff -Nurb linux-2.6.22-570/arch/sh/mm/fault-nommu.c linux-2.6.22-591/arch/sh/mm/fault-nommu.c +--- linux-2.6.22-570/arch/sh/mm/fault-nommu.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/sh/mm/fault-nommu.c 2007-12-21 15:36:11.000000000 -0500 +@@ -28,10 +28,6 @@ + #include + #include + +-#if defined(CONFIG_SH_KGDB) +-#include +-#endif +- + extern void die(const char *,struct pt_regs *,long); + + /* +@@ -42,11 +38,6 @@ + asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess, + unsigned long address) + { +-#if defined(CONFIG_SH_KGDB) +- if (kgdb_nofault && kgdb_bus_err_hook) +- kgdb_bus_err_hook(); +-#endif +- + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. +@@ -68,11 +59,6 @@ + asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess, + unsigned long address) + { +-#if defined(CONFIG_SH_KGDB) +- if (kgdb_nofault && kgdb_bus_err_hook) +- kgdb_bus_err_hook(); +-#endif +- + if (address >= TASK_SIZE) + return 1; + +diff -Nurb linux-2.6.22-570/arch/sh/mm/fault.c linux-2.6.22-591/arch/sh/mm/fault.c +--- linux-2.6.22-570/arch/sh/mm/fault.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/sh/mm/fault.c 2007-12-21 15:36:11.000000000 -0500 +@@ -18,7 +18,6 @@ + #include + #include + #include +-#include + + /* + * This routine handles page faults. It determines the address, +@@ -39,11 +38,6 @@ + trace_hardirqs_on(); + local_irq_enable(); + +-#ifdef CONFIG_SH_KGDB +- if (kgdb_nofault && kgdb_bus_err_hook) +- kgdb_bus_err_hook(); +-#endif +- + tsk = current; + mm = tsk->mm; + si_code = SEGV_MAPERR; +@@ -189,6 +183,7 @@ + } + die("Oops", regs, writeaccess); + do_exit(SIGKILL); ++ dump_stack(); + + /* + * We ran out of memory, or some other thing happened to us that made +@@ -252,11 +247,6 @@ + spinlock_t *ptl = NULL; + int ret = 1; + +-#ifdef CONFIG_SH_KGDB +- if (kgdb_nofault && kgdb_bus_err_hook) +- kgdb_bus_err_hook(); +-#endif +- + /* + * We don't take page faults for P1, P2, and parts of P4, these + * are always mapped, whether it be due to legacy behaviour in +diff -Nurb linux-2.6.22-570/arch/sparc64/kernel/power.c linux-2.6.22-591/arch/sparc64/kernel/power.c +--- linux-2.6.22-570/arch/sparc64/kernel/power.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/sparc64/kernel/power.c 2007-12-21 15:36:11.000000000 -0500 +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -33,14 +34,13 @@ + #include + static void __iomem *power_reg; + +-static DECLARE_WAIT_QUEUE_HEAD(powerd_wait); + static int button_pressed; + + static irqreturn_t power_handler(int irq, void *dev_id) + { + if (button_pressed == 0) { + button_pressed = 1; +- wake_up(&powerd_wait); ++ orderly_poweroff(true); + } + + /* FIXME: Check registers for status... */ +@@ -77,36 +77,6 @@ + EXPORT_SYMBOL(pm_power_off); + + #ifdef CONFIG_PCI +-static int powerd(void *__unused) +-{ +- static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; +- char *argv[] = { "/sbin/shutdown", "-h", "now", NULL }; +- DECLARE_WAITQUEUE(wait, current); +- +- daemonize("powerd"); +- +- add_wait_queue(&powerd_wait, &wait); +-again: +- for (;;) { +- set_task_state(current, TASK_INTERRUPTIBLE); +- if (button_pressed) +- break; +- flush_signals(current); +- schedule(); +- } +- __set_current_state(TASK_RUNNING); +- remove_wait_queue(&powerd_wait, &wait); +- +- /* Ok, down we go... */ +- button_pressed = 0; +- if (kernel_execve("/sbin/shutdown", argv, envp) < 0) { +- printk("powerd: shutdown execution failed\n"); +- add_wait_queue(&powerd_wait, &wait); +- goto again; +- } +- return 0; +-} +- + static int __init has_button_interrupt(unsigned int irq, struct device_node *dp) + { + if (irq == PCI_IRQ_NONE) +@@ -130,12 +100,6 @@ + poweroff_method = machine_halt; /* able to use the standard halt */ + + if (has_button_interrupt(irq, op->node)) { +- if (kernel_thread(powerd, NULL, CLONE_FS) < 0) { +- printk("Failed to start power daemon.\n"); +- return 0; +- } +- printk("powerd running.\n"); +- + if (request_irq(irq, + power_handler, 0, "power", NULL) < 0) + printk("power: Error, cannot register IRQ handler.\n"); +diff -Nurb linux-2.6.22-570/arch/sparc64/solaris/ioctl.c linux-2.6.22-591/arch/sparc64/solaris/ioctl.c +--- linux-2.6.22-570/arch/sparc64/solaris/ioctl.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/sparc64/solaris/ioctl.c 2007-12-21 15:36:14.000000000 -0500 +@@ -28,6 +28,7 @@ + #include + + #include ++#include + + #include + #include +@@ -686,7 +687,7 @@ + int i = 0; + + read_lock_bh(&dev_base_lock); +- for_each_netdev(d) ++ for_each_netdev(&init_net, d) + i++; + read_unlock_bh(&dev_base_lock); + +diff -Nurb linux-2.6.22-570/arch/um/Kconfig.debug linux-2.6.22-591/arch/um/Kconfig.debug +--- linux-2.6.22-570/arch/um/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/um/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500 +@@ -47,4 +47,13 @@ + If you're involved in UML kernel development and want to use gcov, + say Y. If you're unsure, say N. + ++config DEBUG_STACK_USAGE ++ bool "Stack utilization instrumentation" ++ default N ++ help ++ Track the maximum kernel stack usage - this will look at each ++ kernel stack at process exit and log it if it's the deepest ++ stack seen so far. ++ ++ This option will slow down process creation and destruction somewhat. + endmenu +diff -Nurb linux-2.6.22-570/arch/um/defconfig linux-2.6.22-591/arch/um/defconfig +--- linux-2.6.22-570/arch/um/defconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/um/defconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -527,3 +527,4 @@ + # CONFIG_RCU_TORTURE_TEST is not set + # CONFIG_GPROF is not set + # CONFIG_GCOV is not set ++# CONFIG_DEBUG_STACK_USAGE is not set +diff -Nurb linux-2.6.22-570/arch/x86_64/Kconfig linux-2.6.22-591/arch/x86_64/Kconfig +--- linux-2.6.22-570/arch/x86_64/Kconfig 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/x86_64/Kconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -698,6 +698,8 @@ + + source "arch/x86_64/kernel/cpufreq/Kconfig" + ++source "drivers/cpuidle/Kconfig" ++ + endmenu + + menu "Bus options (PCI etc.)" +diff -Nurb linux-2.6.22-570/arch/x86_64/Kconfig.debug linux-2.6.22-591/arch/x86_64/Kconfig.debug +--- linux-2.6.22-570/arch/x86_64/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/x86_64/Kconfig.debug 2007-12-21 15:36:11.000000000 -0500 +@@ -55,7 +55,4 @@ + + This option will slow down process creation somewhat. + +-#config X86_REMOTE_DEBUG +-# bool "kgdb debugging stub" +- + endmenu +diff -Nurb linux-2.6.22-570/arch/x86_64/Makefile linux-2.6.22-591/arch/x86_64/Makefile +--- linux-2.6.22-570/arch/x86_64/Makefile 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/arch/x86_64/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -41,7 +41,9 @@ + cflags-y += -mcmodel=kernel + cflags-y += -pipe + cflags-y += -Wno-sign-compare ++ifneq ($(CONFIG_UNWIND_INFO),y) + cflags-y += -fno-asynchronous-unwind-tables ++endif + ifneq ($(CONFIG_DEBUG_INFO),y) + # -fweb shrinks the kernel a bit, but the difference is very small + # it also messes up debugging, so don't use it for now. +diff -Nurb linux-2.6.22-570/arch/x86_64/ia32/ia32entry.S linux-2.6.22-591/arch/x86_64/ia32/ia32entry.S +--- linux-2.6.22-570/arch/x86_64/ia32/ia32entry.S 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/x86_64/ia32/ia32entry.S 2007-12-21 15:36:11.000000000 -0500 +@@ -731,4 +731,7 @@ + .quad compat_sys_signalfd + .quad compat_sys_timerfd + .quad sys_eventfd ++ .quad sys_revokeat ++ .quad sys_frevoke /* 325 */ ++ .quad sys_fallocate + ia32_syscall_end: +diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/Makefile linux-2.6.22-591/arch/x86_64/kernel/Makefile +--- linux-2.6.22-570/arch/x86_64/kernel/Makefile 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/arch/x86_64/kernel/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -33,10 +33,12 @@ + obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary.o tce.o + obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o + obj-$(CONFIG_KPROBES) += kprobes.o ++obj-$(CONFIG_KGDB) += kgdb.o kgdb-jmp.o + obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o + obj-$(CONFIG_X86_VSMP) += vsmp.o + obj-$(CONFIG_K8_NB) += k8.o + obj-$(CONFIG_AUDIT) += audit.o ++obj-$(CONFIG_STACK_UNWIND) += unwind.o + + obj-$(CONFIG_MODULES) += module.o + obj-$(CONFIG_PCI) += early-quirks.o +diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/kgdb-jmp.S linux-2.6.22-591/arch/x86_64/kernel/kgdb-jmp.S +--- linux-2.6.22-570/arch/x86_64/kernel/kgdb-jmp.S 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/x86_64/kernel/kgdb-jmp.S 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,65 @@ ++/* ++ * arch/x86_64/kernel/kgdb-jmp.S ++ * ++ * Save and restore system registers so that within a limited frame we ++ * may have a fault and "jump back" to a known safe location. ++ * ++ * Author: Tom Rini ++ * ++ * Cribbed from glibc, which carries the following: ++ * Copyright (C) 2001, 2003, 2004 Free Software Foundation, Inc. ++ * Copyright (C) 2005 by MontaVista Software. ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program as licensed "as is" without any warranty of ++ * any kind, whether express or implied. ++ */ ++ ++#include ++ ++#define JB_RBX 0 ++#define JB_RBP 1 ++#define JB_R12 2 ++#define JB_R13 3 ++#define JB_R14 4 ++#define JB_R15 5 ++#define JB_RSP 6 ++#define JB_PC 7 ++ ++ .code64 ++ ++/* This must be called prior to kgdb_fault_longjmp and ++ * kgdb_fault_longjmp must not be called outside of the context of the ++ * last call to kgdb_fault_setjmp. ++ */ ++ENTRY(kgdb_fault_setjmp) ++ /* Save registers. */ ++ movq %rbx, (JB_RBX*8)(%rdi) ++ movq %rbp, (JB_RBP*8)(%rdi) ++ movq %r12, (JB_R12*8)(%rdi) ++ movq %r13, (JB_R13*8)(%rdi) ++ movq %r14, (JB_R14*8)(%rdi) ++ movq %r15, (JB_R15*8)(%rdi) ++ leaq 8(%rsp), %rdx /* Save SP as it will be after we return. */ ++ movq %rdx, (JB_RSP*8)(%rdi) ++ movq (%rsp), %rax /* Save PC we are returning to now. */ ++ movq %rax, (JB_PC*8)(%rdi) ++ /* Set return value for setjmp. */ ++ mov $0,%eax ++ movq (JB_PC*8)(%rdi),%rdx ++ movq (JB_RSP*8)(%rdi),%rsp ++ jmpq *%rdx ++ ++ENTRY(kgdb_fault_longjmp) ++ /* Restore registers. */ ++ movq (JB_RBX*8)(%rdi),%rbx ++ movq (JB_RBP*8)(%rdi),%rbp ++ movq (JB_R12*8)(%rdi),%r12 ++ movq (JB_R13*8)(%rdi),%r13 ++ movq (JB_R14*8)(%rdi),%r14 ++ movq (JB_R15*8)(%rdi),%r15 ++ /* Set return value for setjmp. */ ++ movq (JB_PC*8)(%rdi),%rdx ++ movq (JB_RSP*8)(%rdi),%rsp ++ mov $1,%eax ++ jmpq *%rdx +diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/kgdb.c linux-2.6.22-591/arch/x86_64/kernel/kgdb.c +--- linux-2.6.22-570/arch/x86_64/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/x86_64/kernel/kgdb.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,461 @@ ++/* ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2, or (at your option) any ++ * later version. ++ * ++ * This program is distributed in the hope that it will be useful, but ++ * WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ * ++ */ ++ ++/* ++ * Copyright (C) 2004 Amit S. Kale ++ * Copyright (C) 2000-2001 VERITAS Software Corporation. ++ * Copyright (C) 2002 Andi Kleen, SuSE Labs ++ * Copyright (C) 2004 LinSysSoft Technologies Pvt. Ltd. ++ * Copyright (C) 2007 Jason Wessel, Wind River Systems, Inc. ++ */ ++/**************************************************************************** ++ * Contributor: Lake Stevens Instrument Division$ ++ * Written by: Glenn Engel $ ++ * Updated by: Amit Kale ++ * Modified for 386 by Jim Kingdon, Cygnus Support. ++ * Origianl kgdb, compatibility with 2.1.xx kernel by ++ * David Grothe ++ * Integrated into 2.2.5 kernel by Tigran Aivazian ++ * X86_64 changes from Andi Kleen's patch merged by Jim Houston ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include /* for linux pt_regs struct */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Put the error code here just in case the user cares. */ ++int gdb_x86_64errcode; ++/* Likewise, the vector number here (since GDB only gets the signal ++ number through the usual means, and that's not very specific). */ ++int gdb_x86_64vector = -1; ++ ++extern atomic_t cpu_doing_single_step; ++ ++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) ++{ ++ gdb_regs[_RAX] = regs->rax; ++ gdb_regs[_RBX] = regs->rbx; ++ gdb_regs[_RCX] = regs->rcx; ++ gdb_regs[_RDX] = regs->rdx; ++ gdb_regs[_RSI] = regs->rsi; ++ gdb_regs[_RDI] = regs->rdi; ++ gdb_regs[_RBP] = regs->rbp; ++ gdb_regs[_PS] = regs->eflags; ++ gdb_regs[_PC] = regs->rip; ++ gdb_regs[_R8] = regs->r8; ++ gdb_regs[_R9] = regs->r9; ++ gdb_regs[_R10] = regs->r10; ++ gdb_regs[_R11] = regs->r11; ++ gdb_regs[_R12] = regs->r12; ++ gdb_regs[_R13] = regs->r13; ++ gdb_regs[_R14] = regs->r14; ++ gdb_regs[_R15] = regs->r15; ++ gdb_regs[_RSP] = regs->rsp; ++} ++ ++extern void thread_return(void); ++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) ++{ ++ gdb_regs[_RAX] = 0; ++ gdb_regs[_RBX] = 0; ++ gdb_regs[_RCX] = 0; ++ gdb_regs[_RDX] = 0; ++ gdb_regs[_RSI] = 0; ++ gdb_regs[_RDI] = 0; ++ gdb_regs[_RBP] = *(unsigned long *)p->thread.rsp; ++ gdb_regs[_PS] = *(unsigned long *)(p->thread.rsp + 8); ++ gdb_regs[_PC] = (unsigned long)&thread_return; ++ gdb_regs[_R8] = 0; ++ gdb_regs[_R9] = 0; ++ gdb_regs[_R10] = 0; ++ gdb_regs[_R11] = 0; ++ gdb_regs[_R12] = 0; ++ gdb_regs[_R13] = 0; ++ gdb_regs[_R14] = 0; ++ gdb_regs[_R15] = 0; ++ gdb_regs[_RSP] = p->thread.rsp; ++} ++ ++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs) ++{ ++ regs->rax = gdb_regs[_RAX]; ++ regs->rbx = gdb_regs[_RBX]; ++ regs->rcx = gdb_regs[_RCX]; ++ regs->rdx = gdb_regs[_RDX]; ++ regs->rsi = gdb_regs[_RSI]; ++ regs->rdi = gdb_regs[_RDI]; ++ regs->rbp = gdb_regs[_RBP]; ++ regs->eflags = gdb_regs[_PS]; ++ regs->rip = gdb_regs[_PC]; ++ regs->r8 = gdb_regs[_R8]; ++ regs->r9 = gdb_regs[_R9]; ++ regs->r10 = gdb_regs[_R10]; ++ regs->r11 = gdb_regs[_R11]; ++ regs->r12 = gdb_regs[_R12]; ++ regs->r13 = gdb_regs[_R13]; ++ regs->r14 = gdb_regs[_R14]; ++ regs->r15 = gdb_regs[_R15]; ++#if 0 /* can't change these */ ++ regs->rsp = gdb_regs[_RSP]; ++ regs->ss = gdb_regs[_SS]; ++ regs->fs = gdb_regs[_FS]; ++ regs->gs = gdb_regs[_GS]; ++#endif ++ ++} /* gdb_regs_to_regs */ ++ ++struct hw_breakpoint { ++ unsigned enabled; ++ unsigned type; ++ unsigned len; ++ unsigned long addr; ++} breakinfo[4] = { { ++enabled:0}, { ++enabled:0}, { ++enabled:0}, { ++enabled:0}}; ++ ++static void kgdb_correct_hw_break(void) ++{ ++ int breakno; ++ int correctit; ++ int breakbit; ++ unsigned long dr7; ++ ++ get_debugreg(dr7, 7); ++ correctit = 0; ++ for (breakno = 0; breakno < 3; breakno++) { ++ breakbit = 2 << (breakno << 1); ++ if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 |= breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ dr7 |= (((breakinfo[breakno].len << 2) | ++ breakinfo[breakno].type) << 16) << ++ (breakno << 2); ++ switch (breakno) { ++ case 0: ++ set_debugreg(breakinfo[breakno].addr, 0); ++ break; ++ ++ case 1: ++ set_debugreg(breakinfo[breakno].addr, 1); ++ break; ++ ++ case 2: ++ set_debugreg(breakinfo[breakno].addr, 2); ++ break; ++ ++ case 3: ++ set_debugreg(breakinfo[breakno].addr, 3); ++ break; ++ } ++ } else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { ++ correctit = 1; ++ dr7 &= ~breakbit; ++ dr7 &= ~(0xf0000 << (breakno << 2)); ++ } ++ } ++ if (correctit) ++ set_debugreg(dr7, 7); ++} ++ ++static int kgdb_remove_hw_break(unsigned long addr, int len, ++ enum kgdb_bptype bptype) ++{ ++ int i, idx = -1; ++ for (i = 0; i < 4; i++) { ++ if (breakinfo[i].addr == addr && breakinfo[i].enabled) { ++ idx = i; ++ break; ++ } ++ } ++ if (idx == -1) ++ return -1; ++ ++ breakinfo[idx].enabled = 0; ++ return 0; ++} ++ ++static void kgdb_remove_all_hw_break(void) ++{ ++ int i; ++ ++ for (i = 0; i < 4; i++) { ++ memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint)); ++ } ++} ++ ++static int kgdb_set_hw_break(unsigned long addr, int len, ++ enum kgdb_bptype bptype) ++{ ++ int i, idx = -1; ++ for (i = 0; i < 4; i++) { ++ if (!breakinfo[i].enabled) { ++ idx = i; ++ break; ++ } ++ } ++ if (idx == -1) ++ return -1; ++ if (bptype == bp_hardware_breakpoint) { ++ breakinfo[idx].type = 0; ++ breakinfo[idx].len = 0; ++ } else if (bptype == bp_write_watchpoint) { ++ breakinfo[idx].type = 1; ++ if (len == 1 || len == 2 || len == 4) ++ breakinfo[idx].len = len - 1; ++ else ++ return -1; ++ } else if (bptype == bp_access_watchpoint) { ++ breakinfo[idx].type = 3; ++ if (len == 1 || len == 2 || len == 4) ++ breakinfo[idx].len = len - 1; ++ else ++ return -1; ++ } else ++ return -1; ++ breakinfo[idx].enabled = 1; ++ breakinfo[idx].addr = addr; ++ return 0; ++} ++ ++void kgdb_disable_hw_debug(struct pt_regs *regs) ++{ ++ /* Disable hardware debugging while we are in kgdb */ ++ set_debugreg(0UL, 7); ++} ++ ++void kgdb_post_master_code(struct pt_regs *regs, int e_vector, int err_code) ++{ ++ /* Master processor is completely in the debugger */ ++ gdb_x86_64vector = e_vector; ++ gdb_x86_64errcode = err_code; ++} ++ ++void kgdb_roundup_cpus(unsigned long flags) ++{ ++ send_IPI_allbutself(APIC_DM_NMI); ++} ++ ++int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, ++ char *remcomInBuffer, char *remcomOutBuffer, ++ struct pt_regs *linux_regs) ++{ ++ unsigned long addr; ++ unsigned long breakno; ++ char *ptr; ++ int newPC; ++ unsigned long dr6; ++ ++ switch (remcomInBuffer[0]) { ++ case 'c': ++ case 's': ++ /* try to read optional parameter, pc unchanged if no parm */ ++ ptr = &remcomInBuffer[1]; ++ if (kgdb_hex2long(&ptr, &addr)) ++ linux_regs->rip = addr; ++ newPC = linux_regs->rip; ++ ++ /* clear the trace bit */ ++ linux_regs->eflags &= ~TF_MASK; ++ ++ atomic_set(&cpu_doing_single_step, -1); ++ /* set the trace bit if we're stepping */ ++ if (remcomInBuffer[0] == 's') { ++ linux_regs->eflags |= TF_MASK; ++ debugger_step = 1; ++ if (kgdb_contthread) ++ atomic_set(&cpu_doing_single_step, ++ raw_smp_processor_id()); ++ ++ } ++ ++ get_debugreg(dr6, 6); ++ if (!(dr6 & 0x4000)) { ++ for (breakno = 0; breakno < 4; ++breakno) { ++ if (dr6 & (1 << breakno)) { ++ if (breakinfo[breakno].type == 0) { ++ /* Set restore flag */ ++ linux_regs->eflags |= ++ X86_EFLAGS_RF; ++ break; ++ } ++ } ++ } ++ } ++ set_debugreg(0UL, 6); ++ kgdb_correct_hw_break(); ++ ++ return (0); ++ } /* switch */ ++ return -1; ++} ++ ++static struct pt_regs *in_interrupt_stack(unsigned long rsp, int cpu) ++{ ++ struct pt_regs *regs; ++ unsigned long end = (unsigned long)cpu_pda(cpu)->irqstackptr; ++ if (rsp <= end && rsp >= end - IRQSTACKSIZE + 8) { ++ regs = *(((struct pt_regs **)end) - 1); ++ return regs; ++ } ++ return NULL; ++} ++ ++static struct pt_regs *in_exception_stack(unsigned long rsp, int cpu) ++{ ++ int i; ++ struct tss_struct *init_tss = &__get_cpu_var(init_tss); ++ for (i = 0; i < N_EXCEPTION_STACKS; i++) ++ if (rsp >= init_tss[cpu].ist[i] && ++ rsp <= init_tss[cpu].ist[i] + EXCEPTION_STKSZ) { ++ struct pt_regs *r = ++ (void *)init_tss[cpu].ist[i] + EXCEPTION_STKSZ; ++ return r - 1; ++ } ++ return NULL; ++} ++ ++void kgdb_shadowinfo(struct pt_regs *regs, char *buffer, unsigned threadid) ++{ ++ static char intr_desc[] = "Stack at interrupt entrypoint"; ++ static char exc_desc[] = "Stack at exception entrypoint"; ++ struct pt_regs *stregs; ++ int cpu = raw_smp_processor_id(); ++ ++ if ((stregs = in_interrupt_stack(regs->rsp, cpu))) ++ kgdb_mem2hex(intr_desc, buffer, strlen(intr_desc)); ++ else if ((stregs = in_exception_stack(regs->rsp, cpu))) ++ kgdb_mem2hex(exc_desc, buffer, strlen(exc_desc)); ++} ++ ++struct task_struct *kgdb_get_shadow_thread(struct pt_regs *regs, int threadid) ++{ ++ struct pt_regs *stregs; ++ int cpu = raw_smp_processor_id(); ++ ++ if ((stregs = in_interrupt_stack(regs->rsp, cpu))) ++ return current; ++ else if ((stregs = in_exception_stack(regs->rsp, cpu))) ++ return current; ++ ++ return NULL; ++} ++ ++struct pt_regs *kgdb_shadow_regs(struct pt_regs *regs, int threadid) ++{ ++ struct pt_regs *stregs; ++ int cpu = raw_smp_processor_id(); ++ ++ if ((stregs = in_interrupt_stack(regs->rsp, cpu))) ++ return stregs; ++ else if ((stregs = in_exception_stack(regs->rsp, cpu))) ++ return stregs; ++ ++ return NULL; ++} ++ ++/* Register KGDB with the die_chain so that we hook into all of the right ++ * spots. */ ++static int kgdb_notify(struct notifier_block *self, unsigned long cmd, ++ void *ptr) ++{ ++ struct die_args *args = ptr; ++ struct pt_regs *regs = args->regs; ++ ++ if (cmd == DIE_PAGE_FAULT_NO_CONTEXT && atomic_read(&debugger_active) ++ && kgdb_may_fault) { ++ kgdb_fault_longjmp(kgdb_fault_jmp_regs); ++ return NOTIFY_STOP; ++ /* CPU roundup? */ ++ } else if (atomic_read(&debugger_active) && cmd == DIE_NMI_IPI) { ++ kgdb_nmihook(raw_smp_processor_id(), regs); ++ return NOTIFY_STOP; ++ /* See if KGDB is interested. */ ++ } else if (cmd == DIE_DEBUG ++ && atomic_read(&cpu_doing_single_step) == raw_smp_processor_id() ++ && user_mode(regs)) { ++ /* single step exception from kernel space to user space so ++ * eat the exception and continue the process ++ */ ++ printk(KERN_ERR "KGDB: trap/step from kernel to user space, resuming...\n"); ++ kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c","",regs); ++ return NOTIFY_STOP; ++ } else if (cmd == DIE_PAGE_FAULT || user_mode(regs) || ++ cmd == DIE_NMI_IPI || (cmd == DIE_DEBUG && ++ atomic_read(&debugger_active))) ++ /* Userpace events, normal watchdog event, or spurious ++ * debug exception. Ignore. */ ++ return NOTIFY_DONE; ++ ++ kgdb_handle_exception(args->trapnr, args->signr, args->err, regs); ++ ++ return NOTIFY_STOP; ++} ++ ++static struct notifier_block kgdb_notifier = { ++ .notifier_call = kgdb_notify, ++ .priority = 0x7fffffff, /* we need to notified first */ ++}; ++ ++int kgdb_arch_init(void) ++{ ++ register_die_notifier(&kgdb_notifier); ++ return 0; ++} ++/* ++ * Skip an int3 exception when it occurs after a breakpoint has been ++ * removed. Backtrack eip by 1 since the int3 would have caused it to ++ * increment by 1. ++ */ ++ ++int kgdb_skipexception(int exception, struct pt_regs *regs) ++{ ++ if (exception == 3 && kgdb_isremovedbreak(regs->rip - 1)) { ++ regs->rip -= 1; ++ return 1; ++ } ++ return 0; ++} ++ ++unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs) ++{ ++ if (exception == 3) { ++ return instruction_pointer(regs) - 1; ++ } ++ return instruction_pointer(regs); ++} ++ ++struct kgdb_arch arch_kgdb_ops = { ++ .gdb_bpt_instr = {0xcc}, ++ .flags = KGDB_HW_BREAKPOINT, ++ .shadowth = 1, ++ .set_hw_breakpoint = kgdb_set_hw_break, ++ .remove_hw_breakpoint = kgdb_remove_hw_break, ++ .remove_all_hw_break = kgdb_remove_all_hw_break, ++ .correct_hw_break = kgdb_correct_hw_break, ++}; +diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/mce.c linux-2.6.22-591/arch/x86_64/kernel/mce.c +--- linux-2.6.22-570/arch/x86_64/kernel/mce.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/x86_64/kernel/mce.c 2007-12-21 15:36:11.000000000 -0500 +@@ -174,7 +174,7 @@ + if (events != atomic_read(&mce_logged) && trigger[0]) { + /* Small race window, but should be harmless. */ + atomic_set(&mce_logged, events); +- call_usermodehelper(trigger, trigger_argv, NULL, -1); ++ call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); + } + } + +diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/signal.c linux-2.6.22-591/arch/x86_64/kernel/signal.c +--- linux-2.6.22-570/arch/x86_64/kernel/signal.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/x86_64/kernel/signal.c 2007-12-21 15:36:11.000000000 -0500 +@@ -480,7 +480,7 @@ + void signal_fault(struct pt_regs *regs, void __user *frame, char *where) + { + struct task_struct *me = current; +- if (exception_trace) ++ if (show_unhandled_signals && printk_ratelimit()) + printk("%s[%d] bad frame in %s frame:%p rip:%lx rsp:%lx orax:%lx\n", + me->comm,me->pid,where,frame,regs->rip,regs->rsp,regs->orig_rax); + +diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/traps.c linux-2.6.22-591/arch/x86_64/kernel/traps.c +--- linux-2.6.22-570/arch/x86_64/kernel/traps.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/x86_64/kernel/traps.c 2007-12-21 15:36:11.000000000 -0500 +@@ -96,6 +96,11 @@ + } + + int kstack_depth_to_print = 12; ++#ifdef CONFIG_STACK_UNWIND ++static int call_trace = 1; ++#else ++#define call_trace (-1) ++#endif + + #ifdef CONFIG_KALLSYMS + void printk_address(unsigned long address) +@@ -198,6 +203,33 @@ + return NULL; + } + ++struct ops_and_data { ++ struct stacktrace_ops *ops; ++ void *data; ++}; ++ ++static int dump_trace_unwind(struct unwind_frame_info *info, void *context) ++{ ++ struct ops_and_data *oad = (struct ops_and_data *)context; ++ int n = 0; ++ unsigned long sp = UNW_SP(info); ++ ++ if (arch_unw_user_mode(info)) ++ return -1; ++ while (unwind(info) == 0 && UNW_PC(info)) { ++ n++; ++ oad->ops->address(oad->data, UNW_PC(info)); ++ if (arch_unw_user_mode(info)) ++ break; ++ if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1)) ++ && sp > UNW_SP(info)) ++ break; ++ sp = UNW_SP(info); ++ touch_nmi_watchdog(); ++ } ++ return n; ++} ++ + #define MSG(txt) ops->warning(data, txt) + + /* +@@ -225,6 +257,40 @@ + if (!tsk) + tsk = current; + ++ if (call_trace >= 0) { ++ int unw_ret = 0; ++ struct unwind_frame_info info; ++ struct ops_and_data oad = { .ops = ops, .data = data }; ++ ++ if (regs) { ++ if (unwind_init_frame_info(&info, tsk, regs) == 0) ++ unw_ret = dump_trace_unwind(&info, &oad); ++ } else if (tsk == current) ++ unw_ret = unwind_init_running(&info, dump_trace_unwind, ++ &oad); ++ else { ++ if (unwind_init_blocked(&info, tsk) == 0) ++ unw_ret = dump_trace_unwind(&info, &oad); ++ } ++ if (unw_ret > 0) { ++ if (call_trace == 1 && !arch_unw_user_mode(&info)) { ++ ops->warning_symbol(data, ++ "DWARF2 unwinder stuck at %s", ++ UNW_PC(&info)); ++ if ((long)UNW_SP(&info) < 0) { ++ MSG("Leftover inexact backtrace:"); ++ stack = (unsigned long *)UNW_SP(&info); ++ if (!stack) ++ goto out; ++ } else ++ MSG("Full inexact backtrace again:"); ++ } else if (call_trace >= 1) ++ goto out; ++ else ++ MSG("Full inexact backtrace again:"); ++ } else ++ MSG("Inexact backtrace:"); ++ } + if (!stack) { + unsigned long dummy; + stack = &dummy; +@@ -308,6 +374,7 @@ + tinfo = task_thread_info(tsk); + HANDLE_STACK (valid_stack_ptr(tinfo, stack)); + #undef HANDLE_STACK ++out: + put_cpu(); + } + EXPORT_SYMBOL(dump_trace); +@@ -585,7 +652,8 @@ + tsk->thread.error_code = error_code; + tsk->thread.trap_no = trapnr; + +- if (exception_trace && unhandled_signal(tsk, signr)) ++ if (show_unhandled_signals && unhandled_signal(tsk, signr) && ++ printk_ratelimit()) + printk(KERN_INFO + "%s[%d:#%u] trap %s rip:%lx rsp:%lx error:%lx\n", + tsk->comm, tsk->pid, tsk->xid, str, +@@ -689,7 +757,8 @@ + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 13; + +- if (exception_trace && unhandled_signal(tsk, SIGSEGV)) ++ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && ++ printk_ratelimit()) + printk(KERN_INFO + "%s[%d:#%u] general protection rip:%lx rsp:%lx error:%lx\n", + tsk->comm, tsk->pid, tsk->xid, +@@ -1128,3 +1197,21 @@ + return 0; + } + early_param("kstack", kstack_setup); ++ ++#ifdef CONFIG_STACK_UNWIND ++static int __init call_trace_setup(char *s) ++{ ++ if (!s) ++ return -EINVAL; ++ if (strcmp(s, "old") == 0) ++ call_trace = -1; ++ else if (strcmp(s, "both") == 0) ++ call_trace = 0; ++ else if (strcmp(s, "newfallback") == 0) ++ call_trace = 1; ++ else if (strcmp(s, "new") == 0) ++ call_trace = 2; ++ return 0; ++} ++early_param("call_trace", call_trace_setup); ++#endif +diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/unwind.S linux-2.6.22-591/arch/x86_64/kernel/unwind.S +--- linux-2.6.22-570/arch/x86_64/kernel/unwind.S 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/arch/x86_64/kernel/unwind.S 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,38 @@ ++/* Assembler support for dwarf2 unwinder */ ++#include ++#include ++#include ++#include ++#include ++ ++ENTRY(arch_unwind_init_running) ++ CFI_STARTPROC ++ movq %r15, R15(%rdi) ++ movq %r14, R14(%rdi) ++ xchgq %rsi, %rdx ++ movq %r13, R13(%rdi) ++ movq %r12, R12(%rdi) ++ xorl %eax, %eax ++ movq %rbp, RBP(%rdi) ++ movq %rbx, RBX(%rdi) ++ movq (%rsp), %rcx ++ movq %rax, R11(%rdi) ++ movq %rax, R10(%rdi) ++ movq %rax, R9(%rdi) ++ movq %rax, R8(%rdi) ++ movq %rax, RAX(%rdi) ++ movq %rax, RCX(%rdi) ++ movq %rax, RDX(%rdi) ++ movq %rax, RSI(%rdi) ++ movq %rax, RDI(%rdi) ++ movq %rax, ORIG_RAX(%rdi) ++ movq %rcx, RIP(%rdi) ++ leaq 8(%rsp), %rcx ++ movq $__KERNEL_CS, CS(%rdi) ++ movq %rax, EFLAGS(%rdi) ++ movq %rcx, RSP(%rdi) ++ movq $__KERNEL_DS, SS(%rdi) ++ jmpq *%rdx ++ CFI_ENDPROC ++ENDPROC(arch_unwind_init_running) ++ +diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/vmlinux.lds.S linux-2.6.22-591/arch/x86_64/kernel/vmlinux.lds.S +--- linux-2.6.22-570/arch/x86_64/kernel/vmlinux.lds.S 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/arch/x86_64/kernel/vmlinux.lds.S 2007-12-21 15:36:11.000000000 -0500 +@@ -219,7 +219,9 @@ + /* Sections to be discarded */ + /DISCARD/ : { + *(.exitcall.exit) ++#ifndef CONFIG_UNWIND_INFO + *(.eh_frame) ++#endif + } + + STABS_DEBUG +diff -Nurb linux-2.6.22-570/arch/x86_64/mm/fault.c linux-2.6.22-591/arch/x86_64/mm/fault.c +--- linux-2.6.22-570/arch/x86_64/mm/fault.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/arch/x86_64/mm/fault.c 2007-12-21 15:36:11.000000000 -0500 +@@ -221,16 +221,6 @@ + return 0; + } + +-int unhandled_signal(struct task_struct *tsk, int sig) +-{ +- if (is_init(tsk)) +- return 1; +- if (tsk->ptrace & PT_PTRACED) +- return 0; +- return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) || +- (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL); +-} +- + static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, + unsigned long error_code) + { +@@ -302,7 +292,7 @@ + } + + int page_fault_trace = 0; +-int exception_trace = 1; ++int show_unhandled_signals = 1; + + /* + * This routine handles page faults. It determines the address, +@@ -534,6 +524,10 @@ + if (is_errata93(regs, address)) + return; + ++ if (notify_die(DIE_PAGE_FAULT_NO_CONTEXT, "no context", regs, ++ error_code, 14, SIGSEGV) == NOTIFY_STOP) ++ return; ++ + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. +diff -Nurb linux-2.6.22-570/arch/x86_64/mm/init.c linux-2.6.22-591/arch/x86_64/mm/init.c +--- linux-2.6.22-570/arch/x86_64/mm/init.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/arch/x86_64/mm/init.c 2007-12-21 15:36:11.000000000 -0500 +@@ -697,41 +697,6 @@ + return pfn_valid(pte_pfn(*pte)); + } + +-#ifdef CONFIG_SYSCTL +-#include +- +-extern int exception_trace, page_fault_trace; +- +-static ctl_table debug_table2[] = { +- { +- .ctl_name = 99, +- .procname = "exception-trace", +- .data = &exception_trace, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = proc_dointvec +- }, +- {} +-}; +- +-static ctl_table debug_root_table2[] = { +- { +- .ctl_name = CTL_DEBUG, +- .procname = "debug", +- .mode = 0555, +- .child = debug_table2 +- }, +- {} +-}; +- +-static __init int x8664_sysctl_init(void) +-{ +- register_sysctl_table(debug_root_table2); +- return 0; +-} +-__initcall(x8664_sysctl_init); +-#endif +- + /* A pseudo VMA to allow ptrace access for the vsyscall page. This only + covers the 64bit vsyscall page now. 32bit has a real VMA now and does + not need special handling anymore. */ +diff -Nurb linux-2.6.22-570/creatinst.sh linux-2.6.22-591/creatinst.sh +--- linux-2.6.22-570/creatinst.sh 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/creatinst.sh 2007-12-23 02:56:35.000000000 -0500 +@@ -0,0 +1,12 @@ ++rm -fR inst ++mkdir inst ++make install INSTALL_PATH=inst ++make modules_install INSTALL_MOD_PATH=inst ++tar cfz inst.tar.gz inst ++scp -i ~/newvici inst.tar.gz root@vici-03:/tmp ++ssh -i ~/newvici root@vici-03 "cd /tmp;tar xvfz inst.tar.gz" ++ssh -i ~/newvici root@vici-03 "wget www/~sapanb/vgup;sh vgup" ++ssh -i ~/newvici root@vici-03 "cp -R /tmp/inst/lib/* /mnt/lib/" ++ssh -i ~/newvici root@vici-03 "rm -fR /tmp/inst/lib; mv /tmp/inst/* /mnt/boot" ++sleep 5 ++ssh -i ~/newvici root@vici-03 reboot +diff -Nurb linux-2.6.22-570/creatinst.sh.orig linux-2.6.22-591/creatinst.sh.orig +--- linux-2.6.22-570/creatinst.sh.orig 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/creatinst.sh.orig 2007-12-22 19:17:36.000000000 -0500 +@@ -0,0 +1,5 @@ ++rm -fR inst ++mkdir inst ++make install INSTALL_PATH=inst ++make modules_install INSTALL_MOD_PATH=inst ++tar cfz inst.tar.gz inst +diff -Nurb linux-2.6.22-570/crypto/Kconfig linux-2.6.22-591/crypto/Kconfig +--- linux-2.6.22-570/crypto/Kconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/crypto/Kconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -1,7 +1,17 @@ + # +-# Cryptographic API Configuration ++# Generic algorithms support ++# ++config XOR_BLOCKS ++ tristate ++ + # ++# async_tx api: hardware offloaded memory transfer/transform support ++# ++source "crypto/async_tx/Kconfig" + ++# ++# Cryptographic API Configuration ++# + menu "Cryptographic options" + + config CRYPTO +diff -Nurb linux-2.6.22-570/crypto/Makefile linux-2.6.22-591/crypto/Makefile +--- linux-2.6.22-570/crypto/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/crypto/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -50,3 +50,9 @@ + obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o + + obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o ++ ++# ++# generic algorithms and the async_tx api ++# ++obj-$(CONFIG_XOR_BLOCKS) += xor.o ++obj-$(CONFIG_ASYNC_CORE) += async_tx/ +diff -Nurb linux-2.6.22-570/crypto/async_tx/Kconfig linux-2.6.22-591/crypto/async_tx/Kconfig +--- linux-2.6.22-570/crypto/async_tx/Kconfig 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/crypto/async_tx/Kconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,16 @@ ++config ASYNC_CORE ++ tristate ++ ++config ASYNC_MEMCPY ++ tristate ++ select ASYNC_CORE ++ ++config ASYNC_XOR ++ tristate ++ select ASYNC_CORE ++ select XOR_BLOCKS ++ ++config ASYNC_MEMSET ++ tristate ++ select ASYNC_CORE ++ +diff -Nurb linux-2.6.22-570/crypto/async_tx/Makefile linux-2.6.22-591/crypto/async_tx/Makefile +--- linux-2.6.22-570/crypto/async_tx/Makefile 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/crypto/async_tx/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,4 @@ ++obj-$(CONFIG_ASYNC_CORE) += async_tx.o ++obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o ++obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o ++obj-$(CONFIG_ASYNC_XOR) += async_xor.o +diff -Nurb linux-2.6.22-570/crypto/async_tx/async_memcpy.c linux-2.6.22-591/crypto/async_tx/async_memcpy.c +--- linux-2.6.22-570/crypto/async_tx/async_memcpy.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/crypto/async_tx/async_memcpy.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,131 @@ ++/* ++ * copy offload engine support ++ * ++ * Copyright © 2006, Intel Corporation. ++ * ++ * Dan Williams ++ * ++ * with architecture considerations by: ++ * Neil Brown ++ * Jeff Garzik ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * async_memcpy - attempt to copy memory with a dma engine. ++ * @dest: destination page ++ * @src: src page ++ * @offset: offset in pages to start transaction ++ * @len: length in bytes ++ * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, ++ * ASYNC_TX_KMAP_SRC, ASYNC_TX_KMAP_DST ++ * @depend_tx: memcpy depends on the result of this transaction ++ * @cb_fn: function to call when the memcpy completes ++ * @cb_param: parameter to pass to the callback routine ++ */ ++struct dma_async_tx_descriptor * ++async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, ++ unsigned int src_offset, size_t len, enum async_tx_flags flags, ++ struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_param) ++{ ++ struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY); ++ struct dma_device *device = chan ? chan->device : NULL; ++ int int_en = cb_fn ? 1 : 0; ++ struct dma_async_tx_descriptor *tx = device ? ++ device->device_prep_dma_memcpy(chan, len, ++ int_en) : NULL; ++ ++ if (tx) { /* run the memcpy asynchronously */ ++ dma_addr_t addr; ++ enum dma_data_direction dir; ++ ++ pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len); ++ ++ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? ++ DMA_NONE : DMA_FROM_DEVICE; ++ ++ addr = dma_map_page(device->dev, dest, dest_offset, len, dir); ++ tx->tx_set_dest(addr, tx, 0); ++ ++ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? ++ DMA_NONE : DMA_TO_DEVICE; ++ ++ addr = dma_map_page(device->dev, src, src_offset, len, dir); ++ tx->tx_set_src(addr, tx, 0); ++ ++ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); ++ } else { /* run the memcpy synchronously */ ++ void *dest_buf, *src_buf; ++ pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len); ++ ++ /* wait for any prerequisite operations */ ++ if (depend_tx) { ++ /* if ack is already set then we cannot be sure ++ * we are referring to the correct operation ++ */ ++ BUG_ON(depend_tx->ack); ++ if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) ++ panic("%s: DMA_ERROR waiting for depend_tx\n", ++ __FUNCTION__); ++ } ++ ++ if (flags & ASYNC_TX_KMAP_DST) ++ dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; ++ else ++ dest_buf = page_address(dest) + dest_offset; ++ ++ if (flags & ASYNC_TX_KMAP_SRC) ++ src_buf = kmap_atomic(src, KM_USER0) + src_offset; ++ else ++ src_buf = page_address(src) + src_offset; ++ ++ memcpy(dest_buf, src_buf, len); ++ ++ if (flags & ASYNC_TX_KMAP_DST) ++ kunmap_atomic(dest_buf, KM_USER0); ++ ++ if (flags & ASYNC_TX_KMAP_SRC) ++ kunmap_atomic(src_buf, KM_USER0); ++ ++ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); ++ } ++ ++ return tx; ++} ++EXPORT_SYMBOL_GPL(async_memcpy); ++ ++static int __init async_memcpy_init(void) ++{ ++ return 0; ++} ++ ++static void __exit async_memcpy_exit(void) ++{ ++ do { } while (0); ++} ++ ++module_init(async_memcpy_init); ++module_exit(async_memcpy_exit); ++ ++MODULE_AUTHOR("Intel Corporation"); ++MODULE_DESCRIPTION("asynchronous memcpy api"); ++MODULE_LICENSE("GPL"); +diff -Nurb linux-2.6.22-570/crypto/async_tx/async_memset.c linux-2.6.22-591/crypto/async_tx/async_memset.c +--- linux-2.6.22-570/crypto/async_tx/async_memset.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/crypto/async_tx/async_memset.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,109 @@ ++/* ++ * memory fill offload engine support ++ * ++ * Copyright © 2006, Intel Corporation. ++ * ++ * Dan Williams ++ * ++ * with architecture considerations by: ++ * Neil Brown ++ * Jeff Garzik ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ */ ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * async_memset - attempt to fill memory with a dma engine. ++ * @dest: destination page ++ * @val: fill value ++ * @offset: offset in pages to start transaction ++ * @len: length in bytes ++ * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK ++ * @depend_tx: memset depends on the result of this transaction ++ * @cb_fn: function to call when the memcpy completes ++ * @cb_param: parameter to pass to the callback routine ++ */ ++struct dma_async_tx_descriptor * ++async_memset(struct page *dest, int val, unsigned int offset, ++ size_t len, enum async_tx_flags flags, ++ struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_param) ++{ ++ struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET); ++ struct dma_device *device = chan ? chan->device : NULL; ++ int int_en = cb_fn ? 1 : 0; ++ struct dma_async_tx_descriptor *tx = device ? ++ device->device_prep_dma_memset(chan, val, len, ++ int_en) : NULL; ++ ++ if (tx) { /* run the memset asynchronously */ ++ dma_addr_t dma_addr; ++ enum dma_data_direction dir; ++ ++ pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len); ++ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? ++ DMA_NONE : DMA_FROM_DEVICE; ++ ++ dma_addr = dma_map_page(device->dev, dest, offset, len, dir); ++ tx->tx_set_dest(dma_addr, tx, 0); ++ ++ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); ++ } else { /* run the memset synchronously */ ++ void *dest_buf; ++ pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len); ++ ++ dest_buf = (void *) (((char *) page_address(dest)) + offset); ++ ++ /* wait for any prerequisite operations */ ++ if (depend_tx) { ++ /* if ack is already set then we cannot be sure ++ * we are referring to the correct operation ++ */ ++ BUG_ON(depend_tx->ack); ++ if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) ++ panic("%s: DMA_ERROR waiting for depend_tx\n", ++ __FUNCTION__); ++ } ++ ++ memset(dest_buf, val, len); ++ ++ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); ++ } ++ ++ return tx; ++} ++EXPORT_SYMBOL_GPL(async_memset); ++ ++static int __init async_memset_init(void) ++{ ++ return 0; ++} ++ ++static void __exit async_memset_exit(void) ++{ ++ do { } while (0); ++} ++ ++module_init(async_memset_init); ++module_exit(async_memset_exit); ++ ++MODULE_AUTHOR("Intel Corporation"); ++MODULE_DESCRIPTION("asynchronous memset api"); ++MODULE_LICENSE("GPL"); +diff -Nurb linux-2.6.22-570/crypto/async_tx/async_tx.c linux-2.6.22-591/crypto/async_tx/async_tx.c +--- linux-2.6.22-570/crypto/async_tx/async_tx.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/crypto/async_tx/async_tx.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,497 @@ ++/* ++ * core routines for the asynchronous memory transfer/transform api ++ * ++ * Copyright © 2006, Intel Corporation. ++ * ++ * Dan Williams ++ * ++ * with architecture considerations by: ++ * Neil Brown ++ * Jeff Garzik ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ */ ++#include ++#include ++ ++#ifdef CONFIG_DMA_ENGINE ++static enum dma_state_client ++dma_channel_add_remove(struct dma_client *client, ++ struct dma_chan *chan, enum dma_state state); ++ ++static struct dma_client async_tx_dma = { ++ .event_callback = dma_channel_add_remove, ++ /* .cap_mask == 0 defaults to all channels */ ++}; ++ ++/** ++ * dma_cap_mask_all - enable iteration over all operation types ++ */ ++static dma_cap_mask_t dma_cap_mask_all; ++ ++/** ++ * chan_ref_percpu - tracks channel allocations per core/opertion ++ */ ++struct chan_ref_percpu { ++ struct dma_chan_ref *ref; ++}; ++ ++static int channel_table_initialized; ++static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END]; ++ ++/** ++ * async_tx_lock - protect modification of async_tx_master_list and serialize ++ * rebalance operations ++ */ ++static spinlock_t async_tx_lock; ++ ++static struct list_head ++async_tx_master_list = LIST_HEAD_INIT(async_tx_master_list); ++ ++/* async_tx_issue_pending_all - start all transactions on all channels */ ++void async_tx_issue_pending_all(void) ++{ ++ struct dma_chan_ref *ref; ++ ++ rcu_read_lock(); ++ list_for_each_entry_rcu(ref, &async_tx_master_list, node) ++ ref->chan->device->device_issue_pending(ref->chan); ++ rcu_read_unlock(); ++} ++EXPORT_SYMBOL_GPL(async_tx_issue_pending_all); ++ ++/* dma_wait_for_async_tx - spin wait for a transcation to complete ++ * @tx: transaction to wait on ++ */ ++enum dma_status ++dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) ++{ ++ enum dma_status status; ++ struct dma_async_tx_descriptor *iter; ++ ++ if (!tx) ++ return DMA_SUCCESS; ++ ++ /* poll through the dependency chain, return when tx is complete */ ++ do { ++ iter = tx; ++ while (iter->cookie == -EBUSY) ++ iter = iter->parent; ++ ++ status = dma_sync_wait(iter->chan, iter->cookie); ++ } while (status == DMA_IN_PROGRESS || (iter != tx)); ++ ++ return status; ++} ++EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); ++ ++/* async_tx_run_dependencies - helper routine for dma drivers to process ++ * (start) dependent operations on their target channel ++ * @tx: transaction with dependencies ++ */ ++void ++async_tx_run_dependencies(struct dma_async_tx_descriptor *tx) ++{ ++ struct dma_async_tx_descriptor *dep_tx, *_dep_tx; ++ struct dma_device *dev; ++ struct dma_chan *chan; ++ ++ list_for_each_entry_safe(dep_tx, _dep_tx, &tx->depend_list, ++ depend_node) { ++ chan = dep_tx->chan; ++ dev = chan->device; ++ /* we can't depend on ourselves */ ++ BUG_ON(chan == tx->chan); ++ list_del(&dep_tx->depend_node); ++ tx->tx_submit(dep_tx); ++ ++ /* we need to poke the engine as client code does not ++ * know about dependency submission events ++ */ ++ dev->device_issue_pending(chan); ++ } ++} ++EXPORT_SYMBOL_GPL(async_tx_run_dependencies); ++ ++static void ++free_dma_chan_ref(struct rcu_head *rcu) ++{ ++ struct dma_chan_ref *ref; ++ ref = container_of(rcu, struct dma_chan_ref, rcu); ++ kfree(ref); ++} ++ ++static void ++init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan) ++{ ++ INIT_LIST_HEAD(&ref->node); ++ INIT_RCU_HEAD(&ref->rcu); ++ ref->chan = chan; ++ atomic_set(&ref->count, 0); ++} ++ ++/** ++ * get_chan_ref_by_cap - returns the nth channel of the given capability ++ * defaults to returning the channel with the desired capability and the ++ * lowest reference count if the index can not be satisfied ++ * @cap: capability to match ++ * @index: nth channel desired, passing -1 has the effect of forcing the ++ * default return value ++ */ ++static struct dma_chan_ref * ++get_chan_ref_by_cap(enum dma_transaction_type cap, int index) ++{ ++ struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref; ++ ++ rcu_read_lock(); ++ list_for_each_entry_rcu(ref, &async_tx_master_list, node) ++ if (dma_has_cap(cap, ref->chan->device->cap_mask)) { ++ if (!min_ref) ++ min_ref = ref; ++ else if (atomic_read(&ref->count) < ++ atomic_read(&min_ref->count)) ++ min_ref = ref; ++ ++ if (index-- == 0) { ++ ret_ref = ref; ++ break; ++ } ++ } ++ rcu_read_unlock(); ++ ++ if (!ret_ref) ++ ret_ref = min_ref; ++ ++ if (ret_ref) ++ atomic_inc(&ret_ref->count); ++ ++ return ret_ref; ++} ++ ++/** ++ * async_tx_rebalance - redistribute the available channels, optimize ++ * for cpu isolation in the SMP case, and opertaion isolation in the ++ * uniprocessor case ++ */ ++static void async_tx_rebalance(void) ++{ ++ int cpu, cap, cpu_idx = 0; ++ unsigned long flags; ++ ++ if (!channel_table_initialized) ++ return; ++ ++ spin_lock_irqsave(&async_tx_lock, flags); ++ ++ /* undo the last distribution */ ++ for_each_dma_cap_mask(cap, dma_cap_mask_all) ++ for_each_possible_cpu(cpu) { ++ struct dma_chan_ref *ref = ++ per_cpu_ptr(channel_table[cap], cpu)->ref; ++ if (ref) { ++ atomic_set(&ref->count, 0); ++ per_cpu_ptr(channel_table[cap], cpu)->ref = ++ NULL; ++ } ++ } ++ ++ for_each_dma_cap_mask(cap, dma_cap_mask_all) ++ for_each_online_cpu(cpu) { ++ struct dma_chan_ref *new; ++ if (NR_CPUS > 1) ++ new = get_chan_ref_by_cap(cap, cpu_idx++); ++ else ++ new = get_chan_ref_by_cap(cap, -1); ++ ++ per_cpu_ptr(channel_table[cap], cpu)->ref = new; ++ } ++ ++ spin_unlock_irqrestore(&async_tx_lock, flags); ++} ++ ++static enum dma_state_client ++dma_channel_add_remove(struct dma_client *client, ++ struct dma_chan *chan, enum dma_state state) ++{ ++ unsigned long found, flags; ++ struct dma_chan_ref *master_ref, *ref; ++ enum dma_state_client ack = DMA_DUP; /* default: take no action */ ++ ++ switch (state) { ++ case DMA_RESOURCE_AVAILABLE: ++ found = 0; ++ rcu_read_lock(); ++ list_for_each_entry_rcu(ref, &async_tx_master_list, node) ++ if (ref->chan == chan) { ++ found = 1; ++ break; ++ } ++ rcu_read_unlock(); ++ ++ pr_debug("async_tx: dma resource available [%s]\n", ++ found ? "old" : "new"); ++ ++ if (!found) ++ ack = DMA_ACK; ++ else ++ break; ++ ++ /* add the channel to the generic management list */ ++ master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL); ++ if (master_ref) { ++ /* keep a reference until async_tx is unloaded */ ++ dma_chan_get(chan); ++ init_dma_chan_ref(master_ref, chan); ++ spin_lock_irqsave(&async_tx_lock, flags); ++ list_add_tail_rcu(&master_ref->node, ++ &async_tx_master_list); ++ spin_unlock_irqrestore(&async_tx_lock, ++ flags); ++ } else { ++ printk(KERN_WARNING "async_tx: unable to create" ++ " new master entry in response to" ++ " a DMA_RESOURCE_ADDED event" ++ " (-ENOMEM)\n"); ++ return 0; ++ } ++ ++ async_tx_rebalance(); ++ break; ++ case DMA_RESOURCE_REMOVED: ++ found = 0; ++ spin_lock_irqsave(&async_tx_lock, flags); ++ list_for_each_entry_rcu(ref, &async_tx_master_list, node) ++ if (ref->chan == chan) { ++ /* permit backing devices to go away */ ++ dma_chan_put(ref->chan); ++ list_del_rcu(&ref->node); ++ call_rcu(&ref->rcu, free_dma_chan_ref); ++ found = 1; ++ break; ++ } ++ spin_unlock_irqrestore(&async_tx_lock, flags); ++ ++ pr_debug("async_tx: dma resource removed [%s]\n", ++ found ? "ours" : "not ours"); ++ ++ if (found) ++ ack = DMA_ACK; ++ else ++ break; ++ ++ async_tx_rebalance(); ++ break; ++ case DMA_RESOURCE_SUSPEND: ++ case DMA_RESOURCE_RESUME: ++ printk(KERN_WARNING "async_tx: does not support dma channel" ++ " suspend/resume\n"); ++ break; ++ default: ++ BUG(); ++ } ++ ++ return ack; ++} ++ ++static int __init ++async_tx_init(void) ++{ ++ enum dma_transaction_type cap; ++ ++ spin_lock_init(&async_tx_lock); ++ bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END); ++ ++ /* an interrupt will never be an explicit operation type. ++ * clearing this bit prevents allocation to a slot in 'channel_table' ++ */ ++ clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits); ++ ++ for_each_dma_cap_mask(cap, dma_cap_mask_all) { ++ channel_table[cap] = alloc_percpu(struct chan_ref_percpu); ++ if (!channel_table[cap]) ++ goto err; ++ } ++ ++ channel_table_initialized = 1; ++ dma_async_client_register(&async_tx_dma); ++ dma_async_client_chan_request(&async_tx_dma); ++ ++ printk(KERN_INFO "async_tx: api initialized (async)\n"); ++ ++ return 0; ++err: ++ printk(KERN_ERR "async_tx: initialization failure\n"); ++ ++ while (--cap >= 0) ++ free_percpu(channel_table[cap]); ++ ++ return 1; ++} ++ ++static void __exit async_tx_exit(void) ++{ ++ enum dma_transaction_type cap; ++ ++ channel_table_initialized = 0; ++ ++ for_each_dma_cap_mask(cap, dma_cap_mask_all) ++ if (channel_table[cap]) ++ free_percpu(channel_table[cap]); ++ ++ dma_async_client_unregister(&async_tx_dma); ++} ++ ++/** ++ * async_tx_find_channel - find a channel to carry out the operation or let ++ * the transaction execute synchronously ++ * @depend_tx: transaction dependency ++ * @tx_type: transaction type ++ */ ++struct dma_chan * ++async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, ++ enum dma_transaction_type tx_type) ++{ ++ /* see if we can keep the chain on one channel */ ++ if (depend_tx && ++ dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) ++ return depend_tx->chan; ++ else if (likely(channel_table_initialized)) { ++ struct dma_chan_ref *ref; ++ int cpu = get_cpu(); ++ ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref; ++ put_cpu(); ++ return ref ? ref->chan : NULL; ++ } else ++ return NULL; ++} ++EXPORT_SYMBOL_GPL(async_tx_find_channel); ++#else ++static int __init async_tx_init(void) ++{ ++ printk(KERN_INFO "async_tx: api initialized (sync-only)\n"); ++ return 0; ++} ++ ++static void __exit async_tx_exit(void) ++{ ++ do { } while (0); ++} ++#endif ++ ++void ++async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, ++ enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_param) ++{ ++ tx->callback = cb_fn; ++ tx->callback_param = cb_param; ++ ++ /* set this new tx to run after depend_tx if: ++ * 1/ a dependency exists (depend_tx is !NULL) ++ * 2/ the tx can not be submitted to the current channel ++ */ ++ if (depend_tx && depend_tx->chan != chan) { ++ /* if ack is already set then we cannot be sure ++ * we are referring to the correct operation ++ */ ++ BUG_ON(depend_tx->ack); ++ ++ tx->parent = depend_tx; ++ spin_lock_bh(&depend_tx->lock); ++ list_add_tail(&tx->depend_node, &depend_tx->depend_list); ++ if (depend_tx->cookie == 0) { ++ struct dma_chan *dep_chan = depend_tx->chan; ++ struct dma_device *dep_dev = dep_chan->device; ++ dep_dev->device_dependency_added(dep_chan); ++ } ++ spin_unlock_bh(&depend_tx->lock); ++ ++ /* schedule an interrupt to trigger the channel switch */ ++ async_trigger_callback(ASYNC_TX_ACK, depend_tx, NULL, NULL); ++ } else { ++ tx->parent = NULL; ++ tx->tx_submit(tx); ++ } ++ ++ if (flags & ASYNC_TX_ACK) ++ async_tx_ack(tx); ++ ++ if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) ++ async_tx_ack(depend_tx); ++} ++EXPORT_SYMBOL_GPL(async_tx_submit); ++ ++/** ++ * async_trigger_callback - schedules the callback function to be run after ++ * any dependent operations have been completed. ++ * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK ++ * @depend_tx: 'callback' requires the completion of this transaction ++ * @cb_fn: function to call after depend_tx completes ++ * @cb_param: parameter to pass to the callback routine ++ */ ++struct dma_async_tx_descriptor * ++async_trigger_callback(enum async_tx_flags flags, ++ struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_param) ++{ ++ struct dma_chan *chan; ++ struct dma_device *device; ++ struct dma_async_tx_descriptor *tx; ++ ++ if (depend_tx) { ++ chan = depend_tx->chan; ++ device = chan->device; ++ ++ /* see if we can schedule an interrupt ++ * otherwise poll for completion ++ */ ++ if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask)) ++ device = NULL; ++ ++ tx = device ? device->device_prep_dma_interrupt(chan) : NULL; ++ } else ++ tx = NULL; ++ ++ if (tx) { ++ pr_debug("%s: (async)\n", __FUNCTION__); ++ ++ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); ++ } else { ++ pr_debug("%s: (sync)\n", __FUNCTION__); ++ ++ /* wait for any prerequisite operations */ ++ if (depend_tx) { ++ /* if ack is already set then we cannot be sure ++ * we are referring to the correct operation ++ */ ++ BUG_ON(depend_tx->ack); ++ if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) ++ panic("%s: DMA_ERROR waiting for depend_tx\n", ++ __FUNCTION__); ++ } ++ ++ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); ++ } ++ ++ return tx; ++} ++EXPORT_SYMBOL_GPL(async_trigger_callback); ++ ++module_init(async_tx_init); ++module_exit(async_tx_exit); ++ ++MODULE_AUTHOR("Intel Corporation"); ++MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); ++MODULE_LICENSE("GPL"); +diff -Nurb linux-2.6.22-570/crypto/async_tx/async_xor.c linux-2.6.22-591/crypto/async_tx/async_xor.c +--- linux-2.6.22-570/crypto/async_tx/async_xor.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/crypto/async_tx/async_xor.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,327 @@ ++/* ++ * xor offload engine api ++ * ++ * Copyright © 2006, Intel Corporation. ++ * ++ * Dan Williams ++ * ++ * with architecture considerations by: ++ * Neil Brown ++ * Jeff Garzik ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static void ++do_async_xor(struct dma_async_tx_descriptor *tx, struct dma_device *device, ++ struct dma_chan *chan, struct page *dest, struct page **src_list, ++ unsigned int offset, unsigned int src_cnt, size_t len, ++ enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_param) ++{ ++ dma_addr_t dma_addr; ++ enum dma_data_direction dir; ++ int i; ++ ++ pr_debug("%s: len: %zu\n", __FUNCTION__, len); ++ ++ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? ++ DMA_NONE : DMA_FROM_DEVICE; ++ ++ dma_addr = dma_map_page(device->dev, dest, offset, len, dir); ++ tx->tx_set_dest(dma_addr, tx, 0); ++ ++ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? ++ DMA_NONE : DMA_TO_DEVICE; ++ ++ for (i = 0; i < src_cnt; i++) { ++ dma_addr = dma_map_page(device->dev, src_list[i], ++ offset, len, dir); ++ tx->tx_set_src(dma_addr, tx, i); ++ } ++ ++ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); ++} ++ ++static void ++do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, ++ unsigned int src_cnt, size_t len, enum async_tx_flags flags, ++ struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_param) ++{ ++ void *_dest; ++ int i; ++ ++ pr_debug("%s: len: %zu\n", __FUNCTION__, len); ++ ++ /* reuse the 'src_list' array to convert to buffer pointers */ ++ for (i = 0; i < src_cnt; i++) ++ src_list[i] = (struct page *) ++ (page_address(src_list[i]) + offset); ++ ++ /* set destination address */ ++ _dest = page_address(dest) + offset; ++ ++ if (flags & ASYNC_TX_XOR_ZERO_DST) ++ memset(_dest, 0, len); ++ ++ xor_blocks(src_cnt, len, _dest, ++ (void **) src_list); ++ ++ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); ++} ++ ++/** ++ * async_xor - attempt to xor a set of blocks with a dma engine. ++ * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST ++ * flag must be set to not include dest data in the calculation. The ++ * assumption with dma eninges is that they only use the destination ++ * buffer as a source when it is explicity specified in the source list. ++ * @dest: destination page ++ * @src_list: array of source pages (if the dest is also a source it must be ++ * at index zero). The contents of this array may be overwritten. ++ * @offset: offset in pages to start transaction ++ * @src_cnt: number of source pages ++ * @len: length in bytes ++ * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, ++ * ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK ++ * @depend_tx: xor depends on the result of this transaction. ++ * @cb_fn: function to call when the xor completes ++ * @cb_param: parameter to pass to the callback routine ++ */ ++struct dma_async_tx_descriptor * ++async_xor(struct page *dest, struct page **src_list, unsigned int offset, ++ int src_cnt, size_t len, enum async_tx_flags flags, ++ struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_param) ++{ ++ struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR); ++ struct dma_device *device = chan ? chan->device : NULL; ++ struct dma_async_tx_descriptor *tx = NULL; ++ dma_async_tx_callback _cb_fn; ++ void *_cb_param; ++ unsigned long local_flags; ++ int xor_src_cnt; ++ int i = 0, src_off = 0, int_en; ++ ++ BUG_ON(src_cnt <= 1); ++ ++ while (src_cnt) { ++ local_flags = flags; ++ if (device) { /* run the xor asynchronously */ ++ xor_src_cnt = min(src_cnt, device->max_xor); ++ /* if we are submitting additional xors ++ * only set the callback on the last transaction ++ */ ++ if (src_cnt > xor_src_cnt) { ++ local_flags &= ~ASYNC_TX_ACK; ++ _cb_fn = NULL; ++ _cb_param = NULL; ++ } else { ++ _cb_fn = cb_fn; ++ _cb_param = cb_param; ++ } ++ ++ int_en = _cb_fn ? 1 : 0; ++ ++ tx = device->device_prep_dma_xor( ++ chan, xor_src_cnt, len, int_en); ++ ++ if (tx) { ++ do_async_xor(tx, device, chan, dest, ++ &src_list[src_off], offset, xor_src_cnt, len, ++ local_flags, depend_tx, _cb_fn, ++ _cb_param); ++ } else /* fall through */ ++ goto xor_sync; ++ } else { /* run the xor synchronously */ ++xor_sync: ++ /* in the sync case the dest is an implied source ++ * (assumes the dest is at the src_off index) ++ */ ++ if (flags & ASYNC_TX_XOR_DROP_DST) { ++ src_cnt--; ++ src_off++; ++ } ++ ++ /* process up to 'MAX_XOR_BLOCKS' sources */ ++ xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); ++ ++ /* if we are submitting additional xors ++ * only set the callback on the last transaction ++ */ ++ if (src_cnt > xor_src_cnt) { ++ local_flags &= ~ASYNC_TX_ACK; ++ _cb_fn = NULL; ++ _cb_param = NULL; ++ } else { ++ _cb_fn = cb_fn; ++ _cb_param = cb_param; ++ } ++ ++ /* wait for any prerequisite operations */ ++ if (depend_tx) { ++ /* if ack is already set then we cannot be sure ++ * we are referring to the correct operation ++ */ ++ BUG_ON(depend_tx->ack); ++ if (dma_wait_for_async_tx(depend_tx) == ++ DMA_ERROR) ++ panic("%s: DMA_ERROR waiting for " ++ "depend_tx\n", ++ __FUNCTION__); ++ } ++ ++ do_sync_xor(dest, &src_list[src_off], offset, ++ xor_src_cnt, len, local_flags, depend_tx, ++ _cb_fn, _cb_param); ++ } ++ ++ /* the previous tx is hidden from the client, ++ * so ack it ++ */ ++ if (i && depend_tx) ++ async_tx_ack(depend_tx); ++ ++ depend_tx = tx; ++ ++ if (src_cnt > xor_src_cnt) { ++ /* drop completed sources */ ++ src_cnt -= xor_src_cnt; ++ src_off += xor_src_cnt; ++ ++ /* unconditionally preserve the destination */ ++ flags &= ~ASYNC_TX_XOR_ZERO_DST; ++ ++ /* use the intermediate result a source, but remember ++ * it's dropped, because it's implied, in the sync case ++ */ ++ src_list[--src_off] = dest; ++ src_cnt++; ++ flags |= ASYNC_TX_XOR_DROP_DST; ++ } else ++ src_cnt = 0; ++ i++; ++ } ++ ++ return tx; ++} ++EXPORT_SYMBOL_GPL(async_xor); ++ ++static int page_is_zero(struct page *p, unsigned int offset, size_t len) ++{ ++ char *a = page_address(p) + offset; ++ return ((*(u32 *) a) == 0 && ++ memcmp(a, a + 4, len - 4) == 0); ++} ++ ++/** ++ * async_xor_zero_sum - attempt a xor parity check with a dma engine. ++ * @dest: destination page used if the xor is performed synchronously ++ * @src_list: array of source pages. The dest page must be listed as a source ++ * at index zero. The contents of this array may be overwritten. ++ * @offset: offset in pages to start transaction ++ * @src_cnt: number of source pages ++ * @len: length in bytes ++ * @result: 0 if sum == 0 else non-zero ++ * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK ++ * @depend_tx: xor depends on the result of this transaction. ++ * @cb_fn: function to call when the xor completes ++ * @cb_param: parameter to pass to the callback routine ++ */ ++struct dma_async_tx_descriptor * ++async_xor_zero_sum(struct page *dest, struct page **src_list, ++ unsigned int offset, int src_cnt, size_t len, ++ u32 *result, enum async_tx_flags flags, ++ struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_param) ++{ ++ struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM); ++ struct dma_device *device = chan ? chan->device : NULL; ++ int int_en = cb_fn ? 1 : 0; ++ struct dma_async_tx_descriptor *tx = device ? ++ device->device_prep_dma_zero_sum(chan, src_cnt, len, result, ++ int_en) : NULL; ++ int i; ++ ++ BUG_ON(src_cnt <= 1); ++ ++ if (tx) { ++ dma_addr_t dma_addr; ++ enum dma_data_direction dir; ++ ++ pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len); ++ ++ dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? ++ DMA_NONE : DMA_TO_DEVICE; ++ ++ for (i = 0; i < src_cnt; i++) { ++ dma_addr = dma_map_page(device->dev, src_list[i], ++ offset, len, dir); ++ tx->tx_set_src(dma_addr, tx, i); ++ } ++ ++ async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); ++ } else { ++ unsigned long xor_flags = flags; ++ ++ pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len); ++ ++ xor_flags |= ASYNC_TX_XOR_DROP_DST; ++ xor_flags &= ~ASYNC_TX_ACK; ++ ++ tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, ++ depend_tx, NULL, NULL); ++ ++ if (tx) { ++ if (dma_wait_for_async_tx(tx) == DMA_ERROR) ++ panic("%s: DMA_ERROR waiting for tx\n", ++ __FUNCTION__); ++ async_tx_ack(tx); ++ } ++ ++ *result = page_is_zero(dest, offset, len) ? 0 : 1; ++ ++ tx = NULL; ++ ++ async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); ++ } ++ ++ return tx; ++} ++EXPORT_SYMBOL_GPL(async_xor_zero_sum); ++ ++static int __init async_xor_init(void) ++{ ++ return 0; ++} ++ ++static void __exit async_xor_exit(void) ++{ ++ do { } while (0); ++} ++ ++module_init(async_xor_init); ++module_exit(async_xor_exit); ++ ++MODULE_AUTHOR("Intel Corporation"); ++MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); ++MODULE_LICENSE("GPL"); +diff -Nurb linux-2.6.22-570/crypto/xor.c linux-2.6.22-591/crypto/xor.c +--- linux-2.6.22-570/crypto/xor.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/crypto/xor.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,155 @@ ++/* ++ * xor.c : Multiple Devices driver for Linux ++ * ++ * Copyright (C) 1996, 1997, 1998, 1999, 2000, ++ * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson. ++ * ++ * Dispatch optimized RAID-5 checksumming functions. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2, or (at your option) ++ * any later version. ++ * ++ * You should have received a copy of the GNU General Public License ++ * (for example /usr/src/linux/COPYING); if not, write to the Free ++ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++#define BH_TRACE 0 ++#include ++#include ++#include ++#include ++ ++/* The xor routines to use. */ ++static struct xor_block_template *active_template; ++ ++void ++xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs) ++{ ++ unsigned long *p1, *p2, *p3, *p4; ++ ++ p1 = (unsigned long *) srcs[0]; ++ if (src_count == 1) { ++ active_template->do_2(bytes, dest, p1); ++ return; ++ } ++ ++ p2 = (unsigned long *) srcs[1]; ++ if (src_count == 2) { ++ active_template->do_3(bytes, dest, p1, p2); ++ return; ++ } ++ ++ p3 = (unsigned long *) srcs[2]; ++ if (src_count == 3) { ++ active_template->do_4(bytes, dest, p1, p2, p3); ++ return; ++ } ++ ++ p4 = (unsigned long *) srcs[3]; ++ active_template->do_5(bytes, dest, p1, p2, p3, p4); ++} ++EXPORT_SYMBOL(xor_blocks); ++ ++/* Set of all registered templates. */ ++static struct xor_block_template *template_list; ++ ++#define BENCH_SIZE (PAGE_SIZE) ++ ++static void ++do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) ++{ ++ int speed; ++ unsigned long now; ++ int i, count, max; ++ ++ tmpl->next = template_list; ++ template_list = tmpl; ++ ++ /* ++ * Count the number of XORs done during a whole jiffy, and use ++ * this to calculate the speed of checksumming. We use a 2-page ++ * allocation to have guaranteed color L1-cache layout. ++ */ ++ max = 0; ++ for (i = 0; i < 5; i++) { ++ now = jiffies; ++ count = 0; ++ while (jiffies == now) { ++ mb(); /* prevent loop optimzation */ ++ tmpl->do_2(BENCH_SIZE, b1, b2); ++ mb(); ++ count++; ++ mb(); ++ } ++ if (count > max) ++ max = count; ++ } ++ ++ speed = max * (HZ * BENCH_SIZE / 1024); ++ tmpl->speed = speed; ++ ++ printk(KERN_INFO " %-10s: %5d.%03d MB/sec\n", tmpl->name, ++ speed / 1000, speed % 1000); ++} ++ ++static int __init ++calibrate_xor_blocks(void) ++{ ++ void *b1, *b2; ++ struct xor_block_template *f, *fastest; ++ ++ b1 = (void *) __get_free_pages(GFP_KERNEL, 2); ++ if (!b1) { ++ printk(KERN_WARNING "xor: Yikes! No memory available.\n"); ++ return -ENOMEM; ++ } ++ b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; ++ ++ /* ++ * If this arch/cpu has a short-circuited selection, don't loop through ++ * all the possible functions, just test the best one ++ */ ++ ++ fastest = NULL; ++ ++#ifdef XOR_SELECT_TEMPLATE ++ fastest = XOR_SELECT_TEMPLATE(fastest); ++#endif ++ ++#define xor_speed(templ) do_xor_speed((templ), b1, b2) ++ ++ if (fastest) { ++ printk(KERN_INFO "xor: automatically using best " ++ "checksumming function: %s\n", ++ fastest->name); ++ xor_speed(fastest); ++ } else { ++ printk(KERN_INFO "xor: measuring software checksum speed\n"); ++ XOR_TRY_TEMPLATES; ++ fastest = template_list; ++ for (f = fastest; f; f = f->next) ++ if (f->speed > fastest->speed) ++ fastest = f; ++ } ++ ++ printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n", ++ fastest->name, fastest->speed / 1000, fastest->speed % 1000); ++ ++#undef xor_speed ++ ++ free_pages((unsigned long)b1, 2); ++ ++ active_template = fastest; ++ return 0; ++} ++ ++static __exit void xor_exit(void) { } ++ ++MODULE_LICENSE("GPL"); ++ ++/* when built-in xor.o must initialize before drivers/md/md.o */ ++core_initcall(calibrate_xor_blocks); ++module_exit(xor_exit); +diff -Nurb linux-2.6.22-570/drivers/Makefile linux-2.6.22-591/drivers/Makefile +--- linux-2.6.22-570/drivers/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -70,6 +70,7 @@ + obj-$(CONFIG_MCA) += mca/ + obj-$(CONFIG_EISA) += eisa/ + obj-$(CONFIG_CPU_FREQ) += cpufreq/ ++obj-$(CONFIG_CPU_IDLE) += cpuidle/ + obj-$(CONFIG_MMC) += mmc/ + obj-$(CONFIG_NEW_LEDS) += leds/ + obj-$(CONFIG_INFINIBAND) += infiniband/ +diff -Nurb linux-2.6.22-570/drivers/acpi/Kconfig linux-2.6.22-591/drivers/acpi/Kconfig +--- linux-2.6.22-570/drivers/acpi/Kconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/acpi/Kconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -124,7 +124,7 @@ + + config ACPI_VIDEO + tristate "Video" +- depends on X86 && BACKLIGHT_CLASS_DEVICE ++ depends on X86 && BACKLIGHT_CLASS_DEVICE && VIDEO_OUTPUT_CONTROL + help + This driver implement the ACPI Extensions For Display Adapters + for integrated graphics devices on motherboard, as specified in +@@ -280,6 +280,14 @@ + of verbosity. Saying Y enables these statements. This will increase + your kernel size by around 50K. + ++config ACPI_DEBUG_FUNC_TRACE ++ bool "Additionally enable ACPI function tracing" ++ default n ++ depends on ACPI_DEBUG ++ help ++ ACPI Debug Statements slow down ACPI processing. Function trace ++ is about half of the penalty and is rarely useful. ++ + config ACPI_EC + bool + default y +diff -Nurb linux-2.6.22-570/drivers/acpi/battery.c linux-2.6.22-591/drivers/acpi/battery.c +--- linux-2.6.22-570/drivers/acpi/battery.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/acpi/battery.c 2007-12-21 15:36:11.000000000 -0500 +@@ -43,21 +43,30 @@ + #define ACPI_BATTERY_CLASS "battery" + #define ACPI_BATTERY_HID "PNP0C0A" + #define ACPI_BATTERY_DEVICE_NAME "Battery" +-#define ACPI_BATTERY_FILE_INFO "info" +-#define ACPI_BATTERY_FILE_STATUS "state" +-#define ACPI_BATTERY_FILE_ALARM "alarm" + #define ACPI_BATTERY_NOTIFY_STATUS 0x80 + #define ACPI_BATTERY_NOTIFY_INFO 0x81 + #define ACPI_BATTERY_UNITS_WATTS "mW" + #define ACPI_BATTERY_UNITS_AMPS "mA" + + #define _COMPONENT ACPI_BATTERY_COMPONENT ++ ++#define ACPI_BATTERY_UPDATE_TIME 0 ++ ++#define ACPI_BATTERY_NONE_UPDATE 0 ++#define ACPI_BATTERY_EASY_UPDATE 1 ++#define ACPI_BATTERY_INIT_UPDATE 2 ++ + ACPI_MODULE_NAME("battery"); + + MODULE_AUTHOR("Paul Diefenbaugh"); + MODULE_DESCRIPTION("ACPI Battery Driver"); + MODULE_LICENSE("GPL"); + ++static unsigned int update_time = ACPI_BATTERY_UPDATE_TIME; ++ ++/* 0 - every time, > 0 - by update_time */ ++module_param(update_time, uint, 0644); ++ + extern struct proc_dir_entry *acpi_lock_battery_dir(void); + extern void *acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir); + +@@ -76,7 +85,7 @@ + }, + }; + +-struct acpi_battery_status { ++struct acpi_battery_state { + acpi_integer state; + acpi_integer present_rate; + acpi_integer remaining_capacity; +@@ -99,33 +108,111 @@ + acpi_string oem_info; + }; + +-struct acpi_battery_flags { +- u8 present:1; /* Bay occupied? */ +- u8 power_unit:1; /* 0=watts, 1=apms */ +- u8 alarm:1; /* _BTP present? */ +- u8 reserved:5; ++enum acpi_battery_files{ ++ ACPI_BATTERY_INFO = 0, ++ ACPI_BATTERY_STATE, ++ ACPI_BATTERY_ALARM, ++ ACPI_BATTERY_NUMFILES, + }; + +-struct acpi_battery_trips { +- unsigned long warning; +- unsigned long low; ++struct acpi_battery_flags { ++ u8 battery_present_prev; ++ u8 alarm_present; ++ u8 init_update; ++ u8 update[ACPI_BATTERY_NUMFILES]; ++ u8 power_unit; + }; + + struct acpi_battery { +- struct acpi_device * device; ++ struct mutex mutex; ++ struct acpi_device *device; + struct acpi_battery_flags flags; +- struct acpi_battery_trips trips; ++ struct acpi_buffer bif_data; ++ struct acpi_buffer bst_data; + unsigned long alarm; +- struct acpi_battery_info *info; ++ unsigned long update_time[ACPI_BATTERY_NUMFILES]; + }; + ++inline int acpi_battery_present(struct acpi_battery *battery) ++{ ++ return battery->device->status.battery_present; ++} ++inline char *acpi_battery_power_units(struct acpi_battery *battery) ++{ ++ if (battery->flags.power_unit) ++ return ACPI_BATTERY_UNITS_AMPS; ++ else ++ return ACPI_BATTERY_UNITS_WATTS; ++} ++ ++inline acpi_handle acpi_battery_handle(struct acpi_battery *battery) ++{ ++ return battery->device->handle; ++} ++ + /* -------------------------------------------------------------------------- + Battery Management + -------------------------------------------------------------------------- */ + +-static int +-acpi_battery_get_info(struct acpi_battery *battery, +- struct acpi_battery_info **bif) ++static void acpi_battery_check_result(struct acpi_battery *battery, int result) ++{ ++ if (!battery) ++ return; ++ ++ if (result) { ++ battery->flags.init_update = 1; ++ } ++} ++ ++static int acpi_battery_extract_package(struct acpi_battery *battery, ++ union acpi_object *package, ++ struct acpi_buffer *format, ++ struct acpi_buffer *data, ++ char *package_name) ++{ ++ acpi_status status = AE_OK; ++ struct acpi_buffer data_null = { 0, NULL }; ++ ++ status = acpi_extract_package(package, format, &data_null); ++ if (status != AE_BUFFER_OVERFLOW) { ++ ACPI_EXCEPTION((AE_INFO, status, "Extracting size %s", ++ package_name)); ++ return -ENODEV; ++ } ++ ++ if (data_null.length != data->length) { ++ kfree(data->pointer); ++ data->pointer = kzalloc(data_null.length, GFP_KERNEL); ++ if (!data->pointer) { ++ ACPI_EXCEPTION((AE_INFO, AE_NO_MEMORY, "kzalloc()")); ++ return -ENOMEM; ++ } ++ data->length = data_null.length; ++ } ++ ++ status = acpi_extract_package(package, format, data); ++ if (ACPI_FAILURE(status)) { ++ ACPI_EXCEPTION((AE_INFO, status, "Extracting %s", ++ package_name)); ++ return -ENODEV; ++ } ++ ++ return 0; ++} ++ ++static int acpi_battery_get_status(struct acpi_battery *battery) ++{ ++ int result = 0; ++ ++ result = acpi_bus_get_status(battery->device); ++ if (result) { ++ ACPI_EXCEPTION((AE_INFO, AE_ERROR, "Evaluating _STA")); ++ return -ENODEV; ++ } ++ return result; ++} ++ ++static int acpi_battery_get_info(struct acpi_battery *battery) + { + int result = 0; + acpi_status status = 0; +@@ -133,16 +220,20 @@ + struct acpi_buffer format = { sizeof(ACPI_BATTERY_FORMAT_BIF), + ACPI_BATTERY_FORMAT_BIF + }; +- struct acpi_buffer data = { 0, NULL }; + union acpi_object *package = NULL; ++ struct acpi_buffer *data = NULL; ++ struct acpi_battery_info *bif = NULL; + ++ battery->update_time[ACPI_BATTERY_INFO] = get_seconds(); + +- if (!battery || !bif) +- return -EINVAL; ++ if (!acpi_battery_present(battery)) ++ return 0; + +- /* Evalute _BIF */ ++ /* Evaluate _BIF */ + +- status = acpi_evaluate_object(battery->device->handle, "_BIF", NULL, &buffer); ++ status = ++ acpi_evaluate_object(acpi_battery_handle(battery), "_BIF", NULL, ++ &buffer); + if (ACPI_FAILURE(status)) { + ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BIF")); + return -ENODEV; +@@ -150,41 +241,29 @@ + + package = buffer.pointer; + +- /* Extract Package Data */ +- +- status = acpi_extract_package(package, &format, &data); +- if (status != AE_BUFFER_OVERFLOW) { +- ACPI_EXCEPTION((AE_INFO, status, "Extracting _BIF")); +- result = -ENODEV; +- goto end; +- } ++ data = &battery->bif_data; + +- data.pointer = kzalloc(data.length, GFP_KERNEL); +- if (!data.pointer) { +- result = -ENOMEM; +- goto end; +- } ++ /* Extract Package Data */ + +- status = acpi_extract_package(package, &format, &data); +- if (ACPI_FAILURE(status)) { +- ACPI_EXCEPTION((AE_INFO, status, "Extracting _BIF")); +- kfree(data.pointer); +- result = -ENODEV; ++ result = ++ acpi_battery_extract_package(battery, package, &format, data, ++ "_BIF"); ++ if (result) + goto end; +- } + + end: ++ + kfree(buffer.pointer); + +- if (!result) +- (*bif) = data.pointer; ++ if (!result) { ++ bif = data->pointer; ++ battery->flags.power_unit = bif->power_unit; ++ } + + return result; + } + +-static int +-acpi_battery_get_status(struct acpi_battery *battery, +- struct acpi_battery_status **bst) ++static int acpi_battery_get_state(struct acpi_battery *battery) + { + int result = 0; + acpi_status status = 0; +@@ -192,16 +271,19 @@ + struct acpi_buffer format = { sizeof(ACPI_BATTERY_FORMAT_BST), + ACPI_BATTERY_FORMAT_BST + }; +- struct acpi_buffer data = { 0, NULL }; + union acpi_object *package = NULL; ++ struct acpi_buffer *data = NULL; + ++ battery->update_time[ACPI_BATTERY_STATE] = get_seconds(); + +- if (!battery || !bst) +- return -EINVAL; ++ if (!acpi_battery_present(battery)) ++ return 0; + +- /* Evalute _BST */ ++ /* Evaluate _BST */ + +- status = acpi_evaluate_object(battery->device->handle, "_BST", NULL, &buffer); ++ status = ++ acpi_evaluate_object(acpi_battery_handle(battery), "_BST", NULL, ++ &buffer); + if (ACPI_FAILURE(status)) { + ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BST")); + return -ENODEV; +@@ -209,55 +291,49 @@ + + package = buffer.pointer; + +- /* Extract Package Data */ ++ data = &battery->bst_data; + +- status = acpi_extract_package(package, &format, &data); +- if (status != AE_BUFFER_OVERFLOW) { +- ACPI_EXCEPTION((AE_INFO, status, "Extracting _BST")); +- result = -ENODEV; +- goto end; +- } +- +- data.pointer = kzalloc(data.length, GFP_KERNEL); +- if (!data.pointer) { +- result = -ENOMEM; +- goto end; +- } ++ /* Extract Package Data */ + +- status = acpi_extract_package(package, &format, &data); +- if (ACPI_FAILURE(status)) { +- ACPI_EXCEPTION((AE_INFO, status, "Extracting _BST")); +- kfree(data.pointer); +- result = -ENODEV; ++ result = ++ acpi_battery_extract_package(battery, package, &format, data, ++ "_BST"); ++ if (result) + goto end; +- } + + end: + kfree(buffer.pointer); + +- if (!result) +- (*bst) = data.pointer; +- + return result; + } + +-static int +-acpi_battery_set_alarm(struct acpi_battery *battery, unsigned long alarm) ++static int acpi_battery_get_alarm(struct acpi_battery *battery) ++{ ++ battery->update_time[ACPI_BATTERY_ALARM] = get_seconds(); ++ ++ return 0; ++} ++ ++static int acpi_battery_set_alarm(struct acpi_battery *battery, ++ unsigned long alarm) + { + acpi_status status = 0; + union acpi_object arg0 = { ACPI_TYPE_INTEGER }; + struct acpi_object_list arg_list = { 1, &arg0 }; + ++ battery->update_time[ACPI_BATTERY_ALARM] = get_seconds(); + +- if (!battery) +- return -EINVAL; ++ if (!acpi_battery_present(battery)) ++ return -ENODEV; + +- if (!battery->flags.alarm) ++ if (!battery->flags.alarm_present) + return -ENODEV; + + arg0.integer.value = alarm; + +- status = acpi_evaluate_object(battery->device->handle, "_BTP", &arg_list, NULL); ++ status = ++ acpi_evaluate_object(acpi_battery_handle(battery), "_BTP", ++ &arg_list, NULL); + if (ACPI_FAILURE(status)) + return -ENODEV; + +@@ -268,65 +344,114 @@ + return 0; + } + +-static int acpi_battery_check(struct acpi_battery *battery) ++static int acpi_battery_init_alarm(struct acpi_battery *battery) + { + int result = 0; + acpi_status status = AE_OK; + acpi_handle handle = NULL; +- struct acpi_device *device = NULL; +- struct acpi_battery_info *bif = NULL; ++ struct acpi_battery_info *bif = battery->bif_data.pointer; ++ unsigned long alarm = battery->alarm; + ++ /* See if alarms are supported, and if so, set default */ + +- if (!battery) +- return -EINVAL; ++ status = acpi_get_handle(acpi_battery_handle(battery), "_BTP", &handle); ++ if (ACPI_SUCCESS(status)) { ++ battery->flags.alarm_present = 1; ++ if (!alarm && bif) { ++ alarm = bif->design_capacity_warning; ++ } ++ result = acpi_battery_set_alarm(battery, alarm); ++ if (result) ++ goto end; ++ } else { ++ battery->flags.alarm_present = 0; ++ } + +- device = battery->device; ++ end: + +- result = acpi_bus_get_status(device); +- if (result) + return result; ++} + +- /* Insertion? */ +- +- if (!battery->flags.present && device->status.battery_present) { ++static int acpi_battery_init_update(struct acpi_battery *battery) ++{ ++ int result = 0; + +- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Battery inserted\n")); ++ result = acpi_battery_get_status(battery); ++ if (result) ++ return result; + +- /* Evalute _BIF to get certain static information */ ++ battery->flags.battery_present_prev = acpi_battery_present(battery); + +- result = acpi_battery_get_info(battery, &bif); ++ if (acpi_battery_present(battery)) { ++ result = acpi_battery_get_info(battery); ++ if (result) ++ return result; ++ result = acpi_battery_get_state(battery); + if (result) + return result; + +- battery->flags.power_unit = bif->power_unit; +- battery->trips.warning = bif->design_capacity_warning; +- battery->trips.low = bif->design_capacity_low; +- kfree(bif); ++ acpi_battery_init_alarm(battery); ++ } + +- /* See if alarms are supported, and if so, set default */ ++ return result; ++} + +- status = acpi_get_handle(battery->device->handle, "_BTP", &handle); +- if (ACPI_SUCCESS(status)) { +- battery->flags.alarm = 1; +- acpi_battery_set_alarm(battery, battery->trips.warning); ++static int acpi_battery_update(struct acpi_battery *battery, ++ int update, int *update_result_ptr) ++{ ++ int result = 0; ++ int update_result = ACPI_BATTERY_NONE_UPDATE; ++ ++ if (!acpi_battery_present(battery)) { ++ update = 1; ++ } ++ ++ if (battery->flags.init_update) { ++ result = acpi_battery_init_update(battery); ++ if (result) ++ goto end; ++ update_result = ACPI_BATTERY_INIT_UPDATE; ++ } else if (update) { ++ result = acpi_battery_get_status(battery); ++ if (result) ++ goto end; ++ if ((!battery->flags.battery_present_prev & acpi_battery_present(battery)) ++ || (battery->flags.battery_present_prev & !acpi_battery_present(battery))) { ++ result = acpi_battery_init_update(battery); ++ if (result) ++ goto end; ++ update_result = ACPI_BATTERY_INIT_UPDATE; ++ } else { ++ update_result = ACPI_BATTERY_EASY_UPDATE; + } + } + +- /* Removal? */ ++ end: + +- else if (battery->flags.present && !device->status.battery_present) { +- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Battery removed\n")); +- } ++ battery->flags.init_update = (result != 0); + +- battery->flags.present = device->status.battery_present; ++ *update_result_ptr = update_result; + + return result; + } + +-static void acpi_battery_check_present(struct acpi_battery *battery) ++static void acpi_battery_notify_update(struct acpi_battery *battery) + { +- if (!battery->flags.present) { +- acpi_battery_check(battery); ++ acpi_battery_get_status(battery); ++ ++ if (battery->flags.init_update) { ++ return; ++ } ++ ++ if ((!battery->flags.battery_present_prev & ++ acpi_battery_present(battery)) || ++ (battery->flags.battery_present_prev & ++ !acpi_battery_present(battery))) { ++ battery->flags.init_update = 1; ++ } else { ++ battery->flags.update[ACPI_BATTERY_INFO] = 1; ++ battery->flags.update[ACPI_BATTERY_STATE] = 1; ++ battery->flags.update[ACPI_BATTERY_ALARM] = 1; + } + } + +@@ -335,37 +460,33 @@ + -------------------------------------------------------------------------- */ + + static struct proc_dir_entry *acpi_battery_dir; +-static int acpi_battery_read_info(struct seq_file *seq, void *offset) ++ ++static int acpi_battery_print_info(struct seq_file *seq, int result) + { +- int result = 0; + struct acpi_battery *battery = seq->private; + struct acpi_battery_info *bif = NULL; + char *units = "?"; + +- +- if (!battery) ++ if (result) + goto end; + +- acpi_battery_check_present(battery); +- +- if (battery->flags.present) ++ if (acpi_battery_present(battery)) + seq_printf(seq, "present: yes\n"); + else { + seq_printf(seq, "present: no\n"); + goto end; + } + +- /* Battery Info (_BIF) */ +- +- result = acpi_battery_get_info(battery, &bif); +- if (result || !bif) { +- seq_printf(seq, "ERROR: Unable to read battery information\n"); ++ bif = battery->bif_data.pointer; ++ if (!bif) { ++ ACPI_EXCEPTION((AE_INFO, AE_ERROR, "BIF buffer is NULL")); ++ result = -ENODEV; + goto end; + } + +- units = +- bif-> +- power_unit ? ACPI_BATTERY_UNITS_AMPS : ACPI_BATTERY_UNITS_WATTS; ++ /* Battery Units */ ++ ++ units = acpi_battery_power_units(battery); + + if (bif->design_capacity == ACPI_BATTERY_VALUE_UNKNOWN) + seq_printf(seq, "design capacity: unknown\n"); +@@ -396,7 +517,6 @@ + else + seq_printf(seq, "design voltage: %d mV\n", + (u32) bif->design_voltage); +- + seq_printf(seq, "design capacity warning: %d %sh\n", + (u32) bif->design_capacity_warning, units); + seq_printf(seq, "design capacity low: %d %sh\n", +@@ -411,50 +531,40 @@ + seq_printf(seq, "OEM info: %s\n", bif->oem_info); + + end: +- kfree(bif); + +- return 0; +-} ++ if (result) ++ seq_printf(seq, "ERROR: Unable to read battery info\n"); + +-static int acpi_battery_info_open_fs(struct inode *inode, struct file *file) +-{ +- return single_open(file, acpi_battery_read_info, PDE(inode)->data); ++ return result; + } + +-static int acpi_battery_read_state(struct seq_file *seq, void *offset) ++static int acpi_battery_print_state(struct seq_file *seq, int result) + { +- int result = 0; + struct acpi_battery *battery = seq->private; +- struct acpi_battery_status *bst = NULL; ++ struct acpi_battery_state *bst = NULL; + char *units = "?"; + +- +- if (!battery) ++ if (result) + goto end; + +- acpi_battery_check_present(battery); +- +- if (battery->flags.present) ++ if (acpi_battery_present(battery)) + seq_printf(seq, "present: yes\n"); + else { + seq_printf(seq, "present: no\n"); + goto end; + } + +- /* Battery Units */ +- +- units = +- battery->flags. +- power_unit ? ACPI_BATTERY_UNITS_AMPS : ACPI_BATTERY_UNITS_WATTS; +- +- /* Battery Status (_BST) */ +- +- result = acpi_battery_get_status(battery, &bst); +- if (result || !bst) { +- seq_printf(seq, "ERROR: Unable to read battery status\n"); ++ bst = battery->bst_data.pointer; ++ if (!bst) { ++ ACPI_EXCEPTION((AE_INFO, AE_ERROR, "BST buffer is NULL")); ++ result = -ENODEV; + goto end; + } + ++ /* Battery Units */ ++ ++ units = acpi_battery_power_units(battery); ++ + if (!(bst->state & 0x04)) + seq_printf(seq, "capacity state: ok\n"); + else +@@ -490,48 +600,43 @@ + (u32) bst->present_voltage); + + end: +- kfree(bst); + +- return 0; +-} ++ if (result) { ++ seq_printf(seq, "ERROR: Unable to read battery state\n"); ++ } + +-static int acpi_battery_state_open_fs(struct inode *inode, struct file *file) +-{ +- return single_open(file, acpi_battery_read_state, PDE(inode)->data); ++ return result; + } + +-static int acpi_battery_read_alarm(struct seq_file *seq, void *offset) ++static int acpi_battery_print_alarm(struct seq_file *seq, int result) + { + struct acpi_battery *battery = seq->private; + char *units = "?"; + +- +- if (!battery) ++ if (result) + goto end; + +- acpi_battery_check_present(battery); +- +- if (!battery->flags.present) { ++ if (!acpi_battery_present(battery)) { + seq_printf(seq, "present: no\n"); + goto end; + } + + /* Battery Units */ + +- units = +- battery->flags. +- power_unit ? ACPI_BATTERY_UNITS_AMPS : ACPI_BATTERY_UNITS_WATTS; +- +- /* Battery Alarm */ ++ units = acpi_battery_power_units(battery); + + seq_printf(seq, "alarm: "); + if (!battery->alarm) + seq_printf(seq, "unsupported\n"); + else +- seq_printf(seq, "%d %sh\n", (u32) battery->alarm, units); ++ seq_printf(seq, "%lu %sh\n", battery->alarm, units); + + end: +- return 0; ++ ++ if (result) ++ seq_printf(seq, "ERROR: Unable to read battery alarm\n"); ++ ++ return result; + } + + static ssize_t +@@ -543,27 +648,113 @@ + char alarm_string[12] = { '\0' }; + struct seq_file *m = file->private_data; + struct acpi_battery *battery = m->private; +- ++ int update_result = ACPI_BATTERY_NONE_UPDATE; + + if (!battery || (count > sizeof(alarm_string) - 1)) + return -EINVAL; + +- acpi_battery_check_present(battery); ++ mutex_lock(&battery->mutex); + +- if (!battery->flags.present) +- return -ENODEV; ++ result = acpi_battery_update(battery, 1, &update_result); ++ if (result) { ++ result = -ENODEV; ++ goto end; ++ } + +- if (copy_from_user(alarm_string, buffer, count)) +- return -EFAULT; ++ if (!acpi_battery_present(battery)) { ++ result = -ENODEV; ++ goto end; ++ } ++ ++ if (copy_from_user(alarm_string, buffer, count)) { ++ result = -EFAULT; ++ goto end; ++ } + + alarm_string[count] = '\0'; + + result = acpi_battery_set_alarm(battery, + simple_strtoul(alarm_string, NULL, 0)); + if (result) ++ goto end; ++ ++ end: ++ ++ acpi_battery_check_result(battery, result); ++ ++ if (!result) ++ result = count; ++ ++ mutex_unlock(&battery->mutex); ++ + return result; ++} ++ ++typedef int(*print_func)(struct seq_file *seq, int result); ++typedef int(*get_func)(struct acpi_battery *battery); ++ ++static struct acpi_read_mux { ++ print_func print; ++ get_func get; ++} acpi_read_funcs[ACPI_BATTERY_NUMFILES] = { ++ {.get = acpi_battery_get_info, .print = acpi_battery_print_info}, ++ {.get = acpi_battery_get_state, .print = acpi_battery_print_state}, ++ {.get = acpi_battery_get_alarm, .print = acpi_battery_print_alarm}, ++}; ++ ++static int acpi_battery_read(int fid, struct seq_file *seq) ++{ ++ struct acpi_battery *battery = seq->private; ++ int result = 0; ++ int update_result = ACPI_BATTERY_NONE_UPDATE; ++ int update = 0; ++ ++ mutex_lock(&battery->mutex); ++ ++ update = (get_seconds() - battery->update_time[fid] >= update_time); ++ update = (update | battery->flags.update[fid]); ++ ++ result = acpi_battery_update(battery, update, &update_result); ++ if (result) ++ goto end; ++ ++ if (update_result == ACPI_BATTERY_EASY_UPDATE) { ++ result = acpi_read_funcs[fid].get(battery); ++ if (result) ++ goto end; ++ } + +- return count; ++ end: ++ result = acpi_read_funcs[fid].print(seq, result); ++ acpi_battery_check_result(battery, result); ++ battery->flags.update[fid] = result; ++ mutex_unlock(&battery->mutex); ++ return result; ++} ++ ++static int acpi_battery_read_info(struct seq_file *seq, void *offset) ++{ ++ return acpi_battery_read(ACPI_BATTERY_INFO, seq); ++} ++ ++static int acpi_battery_read_state(struct seq_file *seq, void *offset) ++{ ++ return acpi_battery_read(ACPI_BATTERY_STATE, seq); ++} ++ ++static int acpi_battery_read_alarm(struct seq_file *seq, void *offset) ++{ ++ return acpi_battery_read(ACPI_BATTERY_ALARM, seq); ++} ++ ++static int acpi_battery_info_open_fs(struct inode *inode, struct file *file) ++{ ++ return single_open(file, acpi_battery_read_info, PDE(inode)->data); ++} ++ ++static int acpi_battery_state_open_fs(struct inode *inode, struct file *file) ++{ ++ return single_open(file, acpi_battery_read_state, PDE(inode)->data); + } + + static int acpi_battery_alarm_open_fs(struct inode *inode, struct file *file) +@@ -571,35 +762,51 @@ + return single_open(file, acpi_battery_read_alarm, PDE(inode)->data); + } + +-static const struct file_operations acpi_battery_info_ops = { ++static struct battery_file { ++ struct file_operations ops; ++ mode_t mode; ++ char *name; ++} acpi_battery_file[] = { ++ { ++ .name = "info", ++ .mode = S_IRUGO, ++ .ops = { + .open = acpi_battery_info_open_fs, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +-}; +- +-static const struct file_operations acpi_battery_state_ops = { ++ }, ++ }, ++ { ++ .name = "state", ++ .mode = S_IRUGO, ++ .ops = { + .open = acpi_battery_state_open_fs, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +-}; +- +-static const struct file_operations acpi_battery_alarm_ops = { ++ }, ++ }, ++ { ++ .name = "alarm", ++ .mode = S_IFREG | S_IRUGO | S_IWUSR, ++ .ops = { + .open = acpi_battery_alarm_open_fs, + .read = seq_read, + .write = acpi_battery_write_alarm, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, ++ }, ++ }, + }; + + static int acpi_battery_add_fs(struct acpi_device *device) + { + struct proc_dir_entry *entry = NULL; +- ++ int i; + + if (!acpi_device_dir(device)) { + acpi_device_dir(device) = proc_mkdir(acpi_device_bid(device), +@@ -609,38 +816,16 @@ + acpi_device_dir(device)->owner = THIS_MODULE; + } + +- /* 'info' [R] */ +- entry = create_proc_entry(ACPI_BATTERY_FILE_INFO, +- S_IRUGO, acpi_device_dir(device)); ++ for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) { ++ entry = create_proc_entry(acpi_battery_file[i].name, ++ acpi_battery_file[i].mode, acpi_device_dir(device)); + if (!entry) + return -ENODEV; + else { +- entry->proc_fops = &acpi_battery_info_ops; ++ entry->proc_fops = &acpi_battery_file[i].ops; + entry->data = acpi_driver_data(device); + entry->owner = THIS_MODULE; + } +- +- /* 'status' [R] */ +- entry = create_proc_entry(ACPI_BATTERY_FILE_STATUS, +- S_IRUGO, acpi_device_dir(device)); +- if (!entry) +- return -ENODEV; +- else { +- entry->proc_fops = &acpi_battery_state_ops; +- entry->data = acpi_driver_data(device); +- entry->owner = THIS_MODULE; +- } +- +- /* 'alarm' [R/W] */ +- entry = create_proc_entry(ACPI_BATTERY_FILE_ALARM, +- S_IFREG | S_IRUGO | S_IWUSR, +- acpi_device_dir(device)); +- if (!entry) +- return -ENODEV; +- else { +- entry->proc_fops = &acpi_battery_alarm_ops; +- entry->data = acpi_driver_data(device); +- entry->owner = THIS_MODULE; + } + + return 0; +@@ -648,15 +833,12 @@ + + static int acpi_battery_remove_fs(struct acpi_device *device) + { +- ++ int i; + if (acpi_device_dir(device)) { +- remove_proc_entry(ACPI_BATTERY_FILE_ALARM, ++ for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) { ++ remove_proc_entry(acpi_battery_file[i].name, + acpi_device_dir(device)); +- remove_proc_entry(ACPI_BATTERY_FILE_STATUS, +- acpi_device_dir(device)); +- remove_proc_entry(ACPI_BATTERY_FILE_INFO, +- acpi_device_dir(device)); +- ++ } + remove_proc_entry(acpi_device_bid(device), acpi_battery_dir); + acpi_device_dir(device) = NULL; + } +@@ -673,7 +855,6 @@ + struct acpi_battery *battery = data; + struct acpi_device *device = NULL; + +- + if (!battery) + return; + +@@ -684,8 +865,10 @@ + case ACPI_BATTERY_NOTIFY_INFO: + case ACPI_NOTIFY_BUS_CHECK: + case ACPI_NOTIFY_DEVICE_CHECK: +- acpi_battery_check(battery); +- acpi_bus_generate_event(device, event, battery->flags.present); ++ device = battery->device; ++ acpi_battery_notify_update(battery); ++ acpi_bus_generate_event(device, event, ++ acpi_battery_present(battery)); + break; + default: + ACPI_DEBUG_PRINT((ACPI_DB_INFO, +@@ -702,7 +885,6 @@ + acpi_status status = 0; + struct acpi_battery *battery = NULL; + +- + if (!device) + return -EINVAL; + +@@ -710,15 +892,21 @@ + if (!battery) + return -ENOMEM; + ++ mutex_init(&battery->mutex); ++ ++ mutex_lock(&battery->mutex); ++ + battery->device = device; + strcpy(acpi_device_name(device), ACPI_BATTERY_DEVICE_NAME); + strcpy(acpi_device_class(device), ACPI_BATTERY_CLASS); + acpi_driver_data(device) = battery; + +- result = acpi_battery_check(battery); ++ result = acpi_battery_get_status(battery); + if (result) + goto end; + ++ battery->flags.init_update = 1; ++ + result = acpi_battery_add_fs(device); + if (result) + goto end; +@@ -727,6 +915,7 @@ + ACPI_ALL_NOTIFY, + acpi_battery_notify, battery); + if (ACPI_FAILURE(status)) { ++ ACPI_EXCEPTION((AE_INFO, status, "Installing notify handler")); + result = -ENODEV; + goto end; + } +@@ -736,11 +925,14 @@ + device->status.battery_present ? "present" : "absent"); + + end: ++ + if (result) { + acpi_battery_remove_fs(device); + kfree(battery); + } + ++ mutex_unlock(&battery->mutex); ++ + return result; + } + +@@ -749,18 +941,27 @@ + acpi_status status = 0; + struct acpi_battery *battery = NULL; + +- + if (!device || !acpi_driver_data(device)) + return -EINVAL; + + battery = acpi_driver_data(device); + ++ mutex_lock(&battery->mutex); ++ + status = acpi_remove_notify_handler(device->handle, + ACPI_ALL_NOTIFY, + acpi_battery_notify); + + acpi_battery_remove_fs(device); + ++ kfree(battery->bif_data.pointer); ++ ++ kfree(battery->bst_data.pointer); ++ ++ mutex_unlock(&battery->mutex); ++ ++ mutex_destroy(&battery->mutex); ++ + kfree(battery); + + return 0; +@@ -775,7 +976,10 @@ + return -EINVAL; + + battery = device->driver_data; +- return acpi_battery_check(battery); ++ ++ battery->flags.init_update = 1; ++ ++ return 0; + } + + static int __init acpi_battery_init(void) +@@ -800,7 +1004,6 @@ + + static void __exit acpi_battery_exit(void) + { +- + acpi_bus_unregister_driver(&acpi_battery_driver); + + acpi_unlock_battery_dir(acpi_battery_dir); +diff -Nurb linux-2.6.22-570/drivers/acpi/bay.c linux-2.6.22-591/drivers/acpi/bay.c +--- linux-2.6.22-570/drivers/acpi/bay.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/acpi/bay.c 2007-12-21 15:36:11.000000000 -0500 +@@ -288,6 +288,11 @@ + new_bay->pdev = pdev; + platform_set_drvdata(pdev, new_bay); + ++ /* ++ * we want the bay driver to be able to send uevents ++ */ ++ pdev->dev.uevent_suppress = 0; ++ + if (acpi_bay_add_fs(new_bay)) { + platform_device_unregister(new_bay->pdev); + goto bay_add_err; +@@ -328,18 +333,12 @@ + { + struct bay *bay_dev = (struct bay *)data; + struct device *dev = &bay_dev->pdev->dev; ++ char event_string[12]; ++ char *envp[] = { event_string, NULL }; + + bay_dprintk(handle, "Bay event"); +- +- switch(event) { +- case ACPI_NOTIFY_BUS_CHECK: +- case ACPI_NOTIFY_DEVICE_CHECK: +- case ACPI_NOTIFY_EJECT_REQUEST: +- kobject_uevent(&dev->kobj, KOBJ_CHANGE); +- break; +- default: +- printk(KERN_ERR PREFIX "Bay: unknown event %d\n", event); +- } ++ sprintf(event_string, "BAY_EVENT=%d\n", event); ++ kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp); + } + + static acpi_status +diff -Nurb linux-2.6.22-570/drivers/acpi/dock.c linux-2.6.22-591/drivers/acpi/dock.c +--- linux-2.6.22-570/drivers/acpi/dock.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/acpi/dock.c 2007-12-21 15:36:11.000000000 -0500 +@@ -40,8 +40,15 @@ + MODULE_DESCRIPTION(ACPI_DOCK_DRIVER_DESCRIPTION); + MODULE_LICENSE("GPL"); + ++static int immediate_undock = 1; ++module_param(immediate_undock, bool, 0644); ++MODULE_PARM_DESC(immediate_undock, "1 (default) will cause the driver to " ++ "undock immediately when the undock button is pressed, 0 will cause" ++ " the driver to wait for userspace to write the undock sysfs file " ++ " before undocking"); ++ + static struct atomic_notifier_head dock_notifier_list; +-static struct platform_device dock_device; ++static struct platform_device *dock_device; + static char dock_device_name[] = "dock"; + + struct dock_station { +@@ -63,6 +70,7 @@ + }; + + #define DOCK_DOCKING 0x00000001 ++#define DOCK_UNDOCKING 0x00000002 + #define DOCK_EVENT 3 + #define UNDOCK_EVENT 2 + +@@ -327,12 +335,20 @@ + + static void dock_event(struct dock_station *ds, u32 event, int num) + { +- struct device *dev = &dock_device.dev; ++ struct device *dev = &dock_device->dev; ++ char event_string[7]; ++ char *envp[] = { event_string, NULL }; ++ ++ if (num == UNDOCK_EVENT) ++ sprintf(event_string, "UNDOCK"); ++ else ++ sprintf(event_string, "DOCK"); ++ + /* + * Indicate that the status of the dock station has + * changed. + */ +- kobject_uevent(&dev->kobj, KOBJ_CHANGE); ++ kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp); + } + + /** +@@ -420,6 +436,16 @@ + ds->last_dock_time = jiffies; + } + ++static inline void begin_undock(struct dock_station *ds) ++{ ++ ds->flags |= DOCK_UNDOCKING; ++} ++ ++static inline void complete_undock(struct dock_station *ds) ++{ ++ ds->flags &= ~(DOCK_UNDOCKING); ++} ++ + /** + * dock_in_progress - see if we are in the middle of handling a dock event + * @ds: the dock station +@@ -550,7 +576,7 @@ + printk(KERN_ERR PREFIX "Unable to undock!\n"); + return -EBUSY; + } +- ++ complete_undock(ds); + return 0; + } + +@@ -594,7 +620,11 @@ + * to the driver who wish to hotplug. + */ + case ACPI_NOTIFY_EJECT_REQUEST: ++ begin_undock(ds); ++ if (immediate_undock) + handle_eject_request(ds, event); ++ else ++ dock_event(ds, event, UNDOCK_EVENT); + break; + default: + printk(KERN_ERR PREFIX "Unknown dock event %d\n", event); +@@ -653,6 +683,17 @@ + DEVICE_ATTR(docked, S_IRUGO, show_docked, NULL); + + /* ++ * show_flags - read method for flags file in sysfs ++ */ ++static ssize_t show_flags(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return snprintf(buf, PAGE_SIZE, "%d\n", dock_station->flags); ++ ++} ++DEVICE_ATTR(flags, S_IRUGO, show_flags, NULL); ++ ++/* + * write_undock - write method for "undock" file in sysfs + */ + static ssize_t write_undock(struct device *dev, struct device_attribute *attr, +@@ -675,16 +716,15 @@ + struct device_attribute *attr, char *buf) + { + unsigned long lbuf; +- acpi_status status = acpi_evaluate_integer(dock_station->handle, "_UID", NULL, &lbuf); +- if(ACPI_FAILURE(status)) { ++ acpi_status status = acpi_evaluate_integer(dock_station->handle, ++ "_UID", NULL, &lbuf); ++ if (ACPI_FAILURE(status)) + return 0; +- } ++ + return snprintf(buf, PAGE_SIZE, "%lx\n", lbuf); + } + DEVICE_ATTR(uid, S_IRUGO, show_dock_uid, NULL); + +- +- + /** + * dock_add - add a new dock station + * @handle: the dock station handle +diff -Nurb linux-2.6.22-570/drivers/acpi/ec.c linux-2.6.22-591/drivers/acpi/ec.c +--- linux-2.6.22-570/drivers/acpi/ec.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/acpi/ec.c 2007-12-21 15:36:11.000000000 -0500 +@@ -34,25 +34,26 @@ + #include + #include + #include ++#include + #include + #include + #include + #include + +-#define _COMPONENT ACPI_EC_COMPONENT +-ACPI_MODULE_NAME("ec"); +-#define ACPI_EC_COMPONENT 0x00100000 + #define ACPI_EC_CLASS "embedded_controller" + #define ACPI_EC_HID "PNP0C09" + #define ACPI_EC_DEVICE_NAME "Embedded Controller" + #define ACPI_EC_FILE_INFO "info" ++ + #undef PREFIX + #define PREFIX "ACPI: EC: " ++ + /* EC status register */ + #define ACPI_EC_FLAG_OBF 0x01 /* Output buffer full */ + #define ACPI_EC_FLAG_IBF 0x02 /* Input buffer full */ + #define ACPI_EC_FLAG_BURST 0x10 /* burst mode */ + #define ACPI_EC_FLAG_SCI 0x20 /* EC-SCI occurred */ ++ + /* EC commands */ + enum ec_command { + ACPI_EC_COMMAND_READ = 0x80, +@@ -61,6 +62,7 @@ + ACPI_EC_BURST_DISABLE = 0x83, + ACPI_EC_COMMAND_QUERY = 0x84, + }; ++ + /* EC events */ + enum ec_event { + ACPI_EC_EVENT_OBF_1 = 1, /* Output buffer full */ +@@ -94,6 +96,16 @@ + + /* If we find an EC via the ECDT, we need to keep a ptr to its context */ + /* External interfaces use first EC only, so remember */ ++typedef int (*acpi_ec_query_func) (void *data); ++ ++struct acpi_ec_query_handler { ++ struct list_head node; ++ acpi_ec_query_func func; ++ acpi_handle handle; ++ void *data; ++ u8 query_bit; ++}; ++ + static struct acpi_ec { + acpi_handle handle; + unsigned long gpe; +@@ -104,6 +116,7 @@ + atomic_t query_pending; + atomic_t event_count; + wait_queue_head_t wait; ++ struct list_head list; + } *boot_ec, *first_ec; + + /* -------------------------------------------------------------------------- +@@ -245,7 +258,7 @@ + + status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBF_0, 0, 0); + if (status) { +- printk(KERN_DEBUG PREFIX ++ printk(KERN_ERR PREFIX + "input buffer is not empty, aborting transaction\n"); + goto end; + } +@@ -394,21 +407,67 @@ + /* -------------------------------------------------------------------------- + Event Management + -------------------------------------------------------------------------- */ ++int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, ++ acpi_handle handle, acpi_ec_query_func func, ++ void *data) ++{ ++ struct acpi_ec_query_handler *handler = ++ kzalloc(sizeof(struct acpi_ec_query_handler), GFP_KERNEL); ++ if (!handler) ++ return -ENOMEM; ++ ++ handler->query_bit = query_bit; ++ handler->handle = handle; ++ handler->func = func; ++ handler->data = data; ++ mutex_lock(&ec->lock); ++ list_add_tail(&handler->node, &ec->list); ++ mutex_unlock(&ec->lock); ++ return 0; ++} ++ ++EXPORT_SYMBOL_GPL(acpi_ec_add_query_handler); ++ ++void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit) ++{ ++ struct acpi_ec_query_handler *handler; ++ mutex_lock(&ec->lock); ++ list_for_each_entry(handler, &ec->list, node) { ++ if (query_bit == handler->query_bit) { ++ list_del(&handler->node); ++ kfree(handler); ++ break; ++ } ++ } ++ mutex_unlock(&ec->lock); ++} ++ ++EXPORT_SYMBOL_GPL(acpi_ec_remove_query_handler); + + static void acpi_ec_gpe_query(void *ec_cxt) + { + struct acpi_ec *ec = ec_cxt; + u8 value = 0; +- char object_name[8]; ++ struct acpi_ec_query_handler *handler, copy; + + if (!ec || acpi_ec_query(ec, &value)) + return; +- +- snprintf(object_name, 8, "_Q%2.2X", value); +- +- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Evaluating %s", object_name)); +- +- acpi_evaluate_object(ec->handle, object_name, NULL, NULL); ++ mutex_lock(&ec->lock); ++ list_for_each_entry(handler, &ec->list, node) { ++ if (value == handler->query_bit) { ++ /* have custom handler for this bit */ ++ memcpy(©, handler, sizeof(copy)); ++ mutex_unlock(&ec->lock); ++ if (copy.func) { ++ copy.func(copy.data); ++ } else if (copy.handle) { ++ acpi_evaluate_object(copy.handle, NULL, NULL, NULL); ++ } ++ return; ++ } ++ } ++ mutex_unlock(&ec->lock); ++ printk(KERN_ERR PREFIX "Handler for query 0x%x is not found!\n", value); + } + + static u32 acpi_ec_gpe_handler(void *data) +@@ -427,8 +486,7 @@ + if ((value & ACPI_EC_FLAG_SCI) && !atomic_read(&ec->query_pending)) { + atomic_set(&ec->query_pending, 1); + status = +- acpi_os_execute(OSL_EC_BURST_HANDLER, acpi_ec_gpe_query, +- ec); ++ acpi_os_execute(OSL_EC_BURST_HANDLER, acpi_ec_gpe_query, ec); + } + + return status == AE_OK ? +@@ -454,57 +512,35 @@ + } + + static acpi_status +-acpi_ec_space_handler(u32 function, +- acpi_physical_address address, +- u32 bit_width, +- acpi_integer * value, ++acpi_ec_space_handler(u32 function, acpi_physical_address address, ++ u32 bits, acpi_integer *value, + void *handler_context, void *region_context) + { +- int result = 0; + struct acpi_ec *ec = handler_context; +- u64 temp = *value; +- acpi_integer f_v = 0; +- int i = 0; ++ int result = 0, i = 0; ++ u8 temp = 0; + + if ((address > 0xFF) || !value || !handler_context) + return AE_BAD_PARAMETER; + +- if (bit_width != 8 && acpi_strict) { ++ if (function != ACPI_READ && function != ACPI_WRITE) + return AE_BAD_PARAMETER; +- } + +- next_byte: +- switch (function) { +- case ACPI_READ: +- temp = 0; +- result = acpi_ec_read(ec, (u8) address, (u8 *) & temp); +- break; +- case ACPI_WRITE: +- result = acpi_ec_write(ec, (u8) address, (u8) temp); +- break; +- default: +- result = -EINVAL; +- goto out; +- break; +- } +- +- bit_width -= 8; +- if (bit_width) { +- if (function == ACPI_READ) +- f_v |= temp << 8 * i; +- if (function == ACPI_WRITE) +- temp >>= 8; +- i++; +- address++; +- goto next_byte; +- } ++ if (bits != 8 && acpi_strict) ++ return AE_BAD_PARAMETER; + ++ while (bits - i > 0) { + if (function == ACPI_READ) { +- f_v |= temp << 8 * i; +- *value = f_v; ++ result = acpi_ec_read(ec, address, &temp); ++ (*value) |= ((acpi_integer)temp) << i; ++ } else { ++ temp = 0xff & ((*value) >> i); ++ result = acpi_ec_write(ec, address, temp); ++ } ++ i += 8; ++ ++address; + } + +- out: + switch (result) { + case -EINVAL: + return AE_BAD_PARAMETER; +@@ -597,9 +633,6 @@ + static acpi_status + ec_parse_io_ports(struct acpi_resource *resource, void *context); + +-static acpi_status +-ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval); +- + static struct acpi_ec *make_acpi_ec(void) + { + struct acpi_ec *ec = kzalloc(sizeof(struct acpi_ec), GFP_KERNEL); +@@ -610,13 +643,52 @@ + atomic_set(&ec->event_count, 1); + mutex_init(&ec->lock); + init_waitqueue_head(&ec->wait); ++ INIT_LIST_HEAD(&ec->list); + + return ec; + } + ++static acpi_status ++acpi_ec_register_query_methods(acpi_handle handle, u32 level, ++ void *context, void **return_value) ++{ ++ struct acpi_namespace_node *node = handle; ++ struct acpi_ec *ec = context; ++ int value = 0; ++ if (sscanf(node->name.ascii, "_Q%x", &value) == 1) { ++ acpi_ec_add_query_handler(ec, value, handle, NULL, NULL); ++ } ++ return AE_OK; ++} ++ ++static int ec_parse_device(struct acpi_ec *ec, acpi_handle handle) ++{ ++ if (ACPI_FAILURE(acpi_walk_resources(handle, METHOD_NAME__CRS, ++ ec_parse_io_ports, ec))) ++ return -EINVAL; ++ ++ /* Get GPE bit assignment (EC events). */ ++ /* TODO: Add support for _GPE returning a package */ ++ if (ACPI_FAILURE(acpi_evaluate_integer(handle, "_GPE", NULL, &ec->gpe))) ++ return -EINVAL; ++ ++ /* Use the global lock for all EC transactions? */ ++ acpi_evaluate_integer(handle, "_GLK", NULL, &ec->global_lock); ++ ++ /* Find and register all query methods */ ++ acpi_walk_namespace(ACPI_TYPE_METHOD, handle, 1, ++ acpi_ec_register_query_methods, ec, NULL); ++ ++ ec->handle = handle; ++ ++ printk(KERN_INFO PREFIX "GPE = 0x%lx, I/O: command/status = 0x%lx, data = 0x%lx", ++ ec->gpe, ec->command_addr, ec->data_addr); ++ ++ return 0; ++} ++ + static int acpi_ec_add(struct acpi_device *device) + { +- acpi_status status = AE_OK; + struct acpi_ec *ec = NULL; + + if (!device) +@@ -629,8 +701,7 @@ + if (!ec) + return -ENOMEM; + +- status = ec_parse_device(device->handle, 0, ec, NULL); +- if (status != AE_CTRL_TERMINATE) { ++ if (ec_parse_device(ec, device->handle)) { + kfree(ec); + return -EINVAL; + } +@@ -641,6 +712,8 @@ + /* We might have incorrect info for GL at boot time */ + mutex_lock(&boot_ec->lock); + boot_ec->global_lock = ec->global_lock; ++ /* Copy handlers from new ec into boot ec */ ++ list_splice(&ec->list, &boot_ec->list); + mutex_unlock(&boot_ec->lock); + kfree(ec); + ec = boot_ec; +@@ -651,22 +724,24 @@ + acpi_driver_data(device) = ec; + + acpi_ec_add_fs(device); +- +- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "%s [%s] (gpe %d) interrupt mode.", +- acpi_device_name(device), acpi_device_bid(device), +- (u32) ec->gpe)); +- + return 0; + } + + static int acpi_ec_remove(struct acpi_device *device, int type) + { + struct acpi_ec *ec; ++ struct acpi_ec_query_handler *handler; + + if (!device) + return -EINVAL; + + ec = acpi_driver_data(device); ++ mutex_lock(&ec->lock); ++ list_for_each_entry(handler, &ec->list, node) { ++ list_del(&handler->node); ++ kfree(handler); ++ } ++ mutex_unlock(&ec->lock); + acpi_ec_remove_fs(device); + acpi_driver_data(device) = NULL; + if (ec == first_ec) +@@ -722,15 +797,13 @@ + return -ENODEV; + } + +- /* EC is fully operational, allow queries */ +- atomic_set(&ec->query_pending, 0); +- + return 0; + } + + static int acpi_ec_start(struct acpi_device *device) + { + struct acpi_ec *ec; ++ int ret = 0; + + if (!device) + return -EINVAL; +@@ -740,14 +813,14 @@ + if (!ec) + return -EINVAL; + +- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "gpe=0x%02lx, ports=0x%2lx,0x%2lx", +- ec->gpe, ec->command_addr, ec->data_addr)); +- + /* Boot EC is already working */ +- if (ec == boot_ec) +- return 0; ++ if (ec != boot_ec) ++ ret = ec_install_handlers(ec); + +- return ec_install_handlers(ec); ++ /* EC is fully operational, allow queries */ ++ atomic_set(&ec->query_pending, 0); ++ ++ return ret; + } + + static int acpi_ec_stop(struct acpi_device *device, int type) +@@ -779,34 +852,6 @@ + return 0; + } + +-static acpi_status +-ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval) +-{ +- acpi_status status; +- +- struct acpi_ec *ec = context; +- status = acpi_walk_resources(handle, METHOD_NAME__CRS, +- ec_parse_io_ports, ec); +- if (ACPI_FAILURE(status)) +- return status; +- +- /* Get GPE bit assignment (EC events). */ +- /* TODO: Add support for _GPE returning a package */ +- status = acpi_evaluate_integer(handle, "_GPE", NULL, &ec->gpe); +- if (ACPI_FAILURE(status)) +- return status; +- +- /* Use the global lock for all EC transactions? */ +- acpi_evaluate_integer(handle, "_GLK", NULL, &ec->global_lock); +- +- ec->handle = handle; +- +- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "GPE=0x%02lx, ports=0x%2lx, 0x%2lx", +- ec->gpe, ec->command_addr, ec->data_addr)); +- +- return AE_CTRL_TERMINATE; +-} +- + int __init acpi_ec_ecdt_probe(void) + { + int ret; +@@ -825,7 +870,7 @@ + if (ACPI_FAILURE(status)) + goto error; + +- ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found ECDT")); ++ printk(KERN_INFO PREFIX "EC description table is found, configuring boot EC\n"); + + boot_ec->command_addr = ecdt_ptr->control.address; + boot_ec->data_addr = ecdt_ptr->data.address; +diff -Nurb linux-2.6.22-570/drivers/acpi/osl.c linux-2.6.22-591/drivers/acpi/osl.c +--- linux-2.6.22-570/drivers/acpi/osl.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/acpi/osl.c 2007-12-21 15:36:11.000000000 -0500 +@@ -77,13 +77,7 @@ + #define OSI_STRING_LENGTH_MAX 64 /* arbitrary */ + static char osi_additional_string[OSI_STRING_LENGTH_MAX]; + +-#define OSI_LINUX_ENABLED +-#ifdef OSI_LINUX_ENABLED +-int osi_linux = 1; /* enable _OSI(Linux) by default */ +-#else + int osi_linux; /* disable _OSI(Linux) by default */ +-#endif +- + + #ifdef CONFIG_DMI + static struct __initdata dmi_system_id acpi_osl_dmi_table[]; +@@ -1056,6 +1050,17 @@ + + EXPORT_SYMBOL(max_cstate); + ++void (*acpi_do_set_cstate_limit)(void); ++EXPORT_SYMBOL(acpi_do_set_cstate_limit); ++ ++void acpi_set_cstate_limit(unsigned int new_limit) ++{ ++ max_cstate = new_limit; ++ if (acpi_do_set_cstate_limit) ++ acpi_do_set_cstate_limit(); ++} ++EXPORT_SYMBOL(acpi_set_cstate_limit); ++ + /* + * Acquire a spinlock. + * +@@ -1183,17 +1188,10 @@ + if (!strcmp("Linux", interface)) { + printk(KERN_WARNING PREFIX + "System BIOS is requesting _OSI(Linux)\n"); +-#ifdef OSI_LINUX_ENABLED +- printk(KERN_WARNING PREFIX +- "Please test with \"acpi_osi=!Linux\"\n" +- "Please send dmidecode " +- "to linux-acpi@vger.kernel.org\n"); +-#else + printk(KERN_WARNING PREFIX + "If \"acpi_osi=Linux\" works better,\n" + "Please send dmidecode " + "to linux-acpi@vger.kernel.org\n"); +-#endif + if(osi_linux) + return AE_OK; + } +@@ -1227,36 +1225,14 @@ + } + + #ifdef CONFIG_DMI +-#ifdef OSI_LINUX_ENABLED +-static int dmi_osi_not_linux(struct dmi_system_id *d) +-{ +- printk(KERN_NOTICE "%s detected: requires not _OSI(Linux)\n", d->ident); +- enable_osi_linux(0); +- return 0; +-} +-#else + static int dmi_osi_linux(struct dmi_system_id *d) + { +- printk(KERN_NOTICE "%s detected: requires _OSI(Linux)\n", d->ident); ++ printk(KERN_NOTICE "%s detected: enabling _OSI(Linux)\n", d->ident); + enable_osi_linux(1); + return 0; + } +-#endif + + static struct dmi_system_id acpi_osl_dmi_table[] __initdata = { +-#ifdef OSI_LINUX_ENABLED +- /* +- * Boxes that need NOT _OSI(Linux) +- */ +- { +- .callback = dmi_osi_not_linux, +- .ident = "Toshiba Satellite P100", +- .matches = { +- DMI_MATCH(DMI_BOARD_VENDOR, "TOSHIBA"), +- DMI_MATCH(DMI_BOARD_NAME, "Satellite P100"), +- }, +- }, +-#else + /* + * Boxes that need _OSI(Linux) + */ +@@ -1268,7 +1244,6 @@ + DMI_MATCH(DMI_BOARD_NAME, "MPAD-MSAE Customer Reference Boards"), + }, + }, +-#endif + {} + }; + #endif /* CONFIG_DMI */ +diff -Nurb linux-2.6.22-570/drivers/acpi/processor_core.c linux-2.6.22-591/drivers/acpi/processor_core.c +--- linux-2.6.22-570/drivers/acpi/processor_core.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/acpi/processor_core.c 2007-12-21 15:36:11.000000000 -0500 +@@ -44,6 +44,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -66,6 +67,7 @@ + #define ACPI_PROCESSOR_FILE_LIMIT "limit" + #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80 + #define ACPI_PROCESSOR_NOTIFY_POWER 0x81 ++#define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82 + + #define ACPI_PROCESSOR_LIMIT_USER 0 + #define ACPI_PROCESSOR_LIMIT_THERMAL 1 +@@ -84,6 +86,8 @@ + static void acpi_processor_notify(acpi_handle handle, u32 event, void *data); + static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu); + static int acpi_processor_handle_eject(struct acpi_processor *pr); ++extern int acpi_processor_tstate_has_changed(struct acpi_processor *pr); ++ + + static struct acpi_driver acpi_processor_driver = { + .name = "processor", +@@ -701,6 +705,9 @@ + acpi_processor_cst_has_changed(pr); + acpi_bus_generate_event(device, event, 0); + break; ++ case ACPI_PROCESSOR_NOTIFY_THROTTLING: ++ acpi_processor_tstate_has_changed(pr); ++ acpi_bus_generate_event(device, event, 0); + default: + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "Unsupported event [0x%x]\n", event)); +@@ -1024,11 +1031,15 @@ + + acpi_processor_ppc_init(); + ++ cpuidle_register_driver(&acpi_idle_driver); ++ acpi_do_set_cstate_limit = acpi_max_cstate_changed; + return 0; + } + + static void __exit acpi_processor_exit(void) + { ++ acpi_do_set_cstate_limit = NULL; ++ cpuidle_unregister_driver(&acpi_idle_driver); + + acpi_processor_ppc_exit(); + +diff -Nurb linux-2.6.22-570/drivers/acpi/processor_idle.c linux-2.6.22-591/drivers/acpi/processor_idle.c +--- linux-2.6.22-570/drivers/acpi/processor_idle.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/acpi/processor_idle.c 2007-12-21 15:36:11.000000000 -0500 +@@ -40,6 +40,7 @@ + #include /* need_resched() */ + #include + #include ++#include + + /* + * Include the apic definitions for x86 to have the APIC timer related defines +@@ -62,25 +63,34 @@ + #define _COMPONENT ACPI_PROCESSOR_COMPONENT + ACPI_MODULE_NAME("processor_idle"); + #define ACPI_PROCESSOR_FILE_POWER "power" +-#define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000) +-#define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */ +-#define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */ +-static void (*pm_idle_save) (void) __read_mostly; +-module_param(max_cstate, uint, 0644); ++#define PM_TIMER_TICKS_TO_US(p) (((p) * 1000)/(PM_TIMER_FREQUENCY/1000)) ++#define C2_OVERHEAD 1 /* 1us */ ++#define C3_OVERHEAD 1 /* 1us */ ++ ++void acpi_max_cstate_changed(void) ++{ ++ /* Driver will reset devices' max cstate limit */ ++ cpuidle_force_redetect_devices(&acpi_idle_driver); ++} ++ ++static int change_max_cstate(const char *val, struct kernel_param *kp) ++{ ++ int max; ++ ++ max = simple_strtol(val, NULL, 0); ++ if (!max) ++ return -EINVAL; ++ max_cstate = max; ++ if (acpi_do_set_cstate_limit) ++ acpi_do_set_cstate_limit(); ++ return 0; ++} ++ ++module_param_call(max_cstate, change_max_cstate, param_get_uint, &max_cstate, 0644); + + static unsigned int nocst __read_mostly; + module_param(nocst, uint, 0000); + +-/* +- * bm_history -- bit-mask with a bit per jiffy of bus-master activity +- * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms +- * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms +- * 100 HZ: 0x0000000F: 4 jiffies = 40ms +- * reduce history for more aggressive entry into C3 +- */ +-static unsigned int bm_history __read_mostly = +- (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); +-module_param(bm_history, uint, 0644); + /* -------------------------------------------------------------------------- + Power Management + -------------------------------------------------------------------------- */ +@@ -166,88 +176,6 @@ + {}, + }; + +-static inline u32 ticks_elapsed(u32 t1, u32 t2) +-{ +- if (t2 >= t1) +- return (t2 - t1); +- else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER)) +- return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); +- else +- return ((0xFFFFFFFF - t1) + t2); +-} +- +-static void +-acpi_processor_power_activate(struct acpi_processor *pr, +- struct acpi_processor_cx *new) +-{ +- struct acpi_processor_cx *old; +- +- if (!pr || !new) +- return; +- +- old = pr->power.state; +- +- if (old) +- old->promotion.count = 0; +- new->demotion.count = 0; +- +- /* Cleanup from old state. */ +- if (old) { +- switch (old->type) { +- case ACPI_STATE_C3: +- /* Disable bus master reload */ +- if (new->type != ACPI_STATE_C3 && pr->flags.bm_check) +- acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); +- break; +- } +- } +- +- /* Prepare to use new state. */ +- switch (new->type) { +- case ACPI_STATE_C3: +- /* Enable bus master reload */ +- if (old->type != ACPI_STATE_C3 && pr->flags.bm_check) +- acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); +- break; +- } +- +- pr->power.state = new; +- +- return; +-} +- +-static void acpi_safe_halt(void) +-{ +- current_thread_info()->status &= ~TS_POLLING; +- /* +- * TS_POLLING-cleared state must be visible before we +- * test NEED_RESCHED: +- */ +- smp_mb(); +- if (!need_resched()) +- safe_halt(); +- current_thread_info()->status |= TS_POLLING; +-} +- +-static atomic_t c3_cpu_count; +- +-/* Common C-state entry for C2, C3, .. */ +-static void acpi_cstate_enter(struct acpi_processor_cx *cstate) +-{ +- if (cstate->space_id == ACPI_CSTATE_FFH) { +- /* Call into architectural FFH based C-state */ +- acpi_processor_ffh_cstate_enter(cstate); +- } else { +- int unused; +- /* IO port based C-state */ +- inb(cstate->address); +- /* Dummy wait op - must do something useless after P_LVL2 read +- because chipsets cannot guarantee that STPCLK# signal +- gets asserted in time to freeze execution properly. */ +- unused = inl(acpi_gbl_FADT.xpm_timer_block.address); +- } +-} +- + #ifdef ARCH_APICTIMER_STOPS_ON_C3 + + /* +@@ -341,6 +269,7 @@ + return 0; + } + ++<<<<<<< HEAD/drivers/acpi/processor_idle.c + static void acpi_processor_idle(void) + { + struct acpi_processor *pr = NULL; +@@ -712,6 +641,8 @@ + return 0; + } + ++======= ++>>>>>>> /drivers/acpi/processor_idle.c + static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) + { + +@@ -929,7 +860,7 @@ + * Normalize the C2 latency to expidite policy + */ + cx->valid = 1; +- cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); ++ cx->latency_ticks = cx->latency; + + return; + } +@@ -1003,7 +934,7 @@ + * use this in our C3 policy + */ + cx->valid = 1; +- cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); ++ cx->latency_ticks = cx->latency; + + return; + } +@@ -1069,18 +1000,6 @@ + pr->power.count = acpi_processor_power_verify(pr); + + /* +- * Set Default Policy +- * ------------------ +- * Now that we know which states are supported, set the default +- * policy. Note that this policy can be changed dynamically +- * (e.g. encourage deeper sleeps to conserve battery life when +- * not on AC). +- */ +- result = acpi_processor_set_power_policy(pr); +- if (result) +- return result; +- +- /* + * if one state of type C2 or C3 is available, mark this + * CPU as being "idle manageable" + */ +@@ -1097,9 +1016,6 @@ + + int acpi_processor_cst_has_changed(struct acpi_processor *pr) + { +- int result = 0; +- +- + if (!pr) + return -EINVAL; + +@@ -1110,16 +1026,9 @@ + if (!pr->flags.power_setup_done) + return -ENODEV; + +- /* Fall back to the default idle loop */ +- pm_idle = pm_idle_save; +- synchronize_sched(); /* Relies on interrupts forcing exit from idle. */ +- +- pr->flags.power = 0; +- result = acpi_processor_get_power_info(pr); +- if ((pr->flags.power == 1) && (pr->flags.power_setup_done)) +- pm_idle = acpi_processor_idle; +- +- return result; ++ acpi_processor_get_power_info(pr); ++ return cpuidle_force_redetect(per_cpu(cpuidle_devices, pr->id), ++ &acpi_idle_driver); + } + + /* proc interface */ +@@ -1205,30 +1114,6 @@ + .release = single_release, + }; + +-#ifdef CONFIG_SMP +-static void smp_callback(void *v) +-{ +- /* we already woke the CPU up, nothing more to do */ +-} +- +-/* +- * This function gets called when a part of the kernel has a new latency +- * requirement. This means we need to get all processors out of their C-state, +- * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that +- * wakes them all right up. +- */ +-static int acpi_processor_latency_notify(struct notifier_block *b, +- unsigned long l, void *v) +-{ +- smp_call_function(smp_callback, NULL, 0, 1); +- return NOTIFY_OK; +-} +- +-static struct notifier_block acpi_processor_latency_notifier = { +- .notifier_call = acpi_processor_latency_notify, +-}; +-#endif +- + int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, + struct acpi_device *device) + { +@@ -1245,9 +1130,6 @@ + "ACPI: processor limited to max C-state %d\n", + max_cstate); + first_run++; +-#ifdef CONFIG_SMP +- register_latency_notifier(&acpi_processor_latency_notifier); +-#endif + } + + if (!pr) +@@ -1264,6 +1146,7 @@ + + acpi_processor_get_power_info(pr); + ++ + /* + * Install the idle handler if processor power management is supported. + * Note that we use previously set idle handler will be used on +@@ -1276,11 +1159,6 @@ + printk(" C%d[C%d]", i, + pr->power.states[i].type); + printk(")\n"); +- +- if (pr->id == 0) { +- pm_idle_save = pm_idle; +- pm_idle = acpi_processor_idle; +- } + } + + /* 'power' [R] */ +@@ -1308,21 +1186,332 @@ + if (acpi_device_dir(device)) + remove_proc_entry(ACPI_PROCESSOR_FILE_POWER, + acpi_device_dir(device)); ++ return 0; ++} ++ ++/** ++ * ticks_elapsed - a helper function that determines how many ticks (in US) ++ * have elapsed between two PM Timer timestamps ++ * @t1: the start time ++ * @t2: the end time ++ */ ++static inline u32 ticks_elapsed(u32 t1, u32 t2) ++{ ++ if (t2 >= t1) ++ return PM_TIMER_TICKS_TO_US(t2 - t1); ++ else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER)) ++ return PM_TIMER_TICKS_TO_US(((0x00FFFFFF - t1) + t2) & 0x00FFFFFF); ++ else ++ return PM_TIMER_TICKS_TO_US((0xFFFFFFFF - t1) + t2); ++} + +- /* Unregister the idle handler when processor #0 is removed. */ +- if (pr->id == 0) { +- pm_idle = pm_idle_save; ++/** ++ * acpi_idle_update_bm_rld - updates the BM_RLD bit depending on target state ++ * @pr: the processor ++ * @target: the new target state ++ */ ++static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr, ++ struct acpi_processor_cx *target) ++{ ++ if (pr->flags.bm_rld_set && target->type != ACPI_STATE_C3) { ++ acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); ++ pr->flags.bm_rld_set = 0; ++ } + ++ if (!pr->flags.bm_rld_set && target->type == ACPI_STATE_C3) { ++ acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); ++ pr->flags.bm_rld_set = 1; ++ } ++} ++ ++/** ++ * acpi_idle_do_entry - a helper function that does C2 and C3 type entry ++ * @cx: cstate data ++ */ ++static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx) ++{ ++ if (cx->space_id == ACPI_CSTATE_FFH) { ++ /* Call into architectural FFH based C-state */ ++ acpi_processor_ffh_cstate_enter(cx); ++ } else { ++ int unused; ++ /* IO port based C-state */ ++ inb(cx->address); ++ /* Dummy wait op - must do something useless after P_LVL2 read ++ because chipsets cannot guarantee that STPCLK# signal ++ gets asserted in time to freeze execution properly. */ ++ unused = inl(acpi_gbl_FADT.xpm_timer_block.address); ++ } ++} ++ ++/** ++ * acpi_idle_enter_c1 - enters an ACPI C1 state-type ++ * @dev: the target CPU ++ * @state: the state data ++ * ++ * This is equivalent to the HALT instruction. ++ */ ++static int acpi_idle_enter_c1(struct cpuidle_device *dev, ++ struct cpuidle_state *state) ++{ ++ struct acpi_processor *pr; ++ struct acpi_processor_cx *cx = cpuidle_get_statedata(state); ++ pr = processors[smp_processor_id()]; ++ ++ if (unlikely(!pr)) ++ return 0; ++ ++ if (pr->flags.bm_check) ++ acpi_idle_update_bm_rld(pr, cx); ++ ++ current_thread_info()->status &= ~TS_POLLING; + /* +- * We are about to unload the current idle thread pm callback +- * (pm_idle), Wait for all processors to update cached/local +- * copies of pm_idle before proceeding. +- */ +- cpu_idle_wait(); +-#ifdef CONFIG_SMP +- unregister_latency_notifier(&acpi_processor_latency_notifier); ++ * TS_POLLING-cleared state must be visible before we test ++ * NEED_RESCHED: ++ */ ++ smp_mb(); ++ if (!need_resched()) ++ safe_halt(); ++ current_thread_info()->status |= TS_POLLING; ++ ++ cx->usage++; ++ ++ return 0; ++} ++ ++/** ++ * acpi_idle_enter_c2 - enters an ACPI C2 state-type ++ * @dev: the target CPU ++ * @state: the state data ++ */ ++static int acpi_idle_enter_c2(struct cpuidle_device *dev, ++ struct cpuidle_state *state) ++{ ++ struct acpi_processor *pr; ++ struct acpi_processor_cx *cx = cpuidle_get_statedata(state); ++ u32 t1, t2; ++ pr = processors[smp_processor_id()]; ++ ++ if (unlikely(!pr)) ++ return 0; ++ ++ if (pr->flags.bm_check) ++ acpi_idle_update_bm_rld(pr, cx); ++ ++ local_irq_disable(); ++ current_thread_info()->status &= ~TS_POLLING; ++ /* ++ * TS_POLLING-cleared state must be visible before we test ++ * NEED_RESCHED: ++ */ ++ smp_mb(); ++ ++ if (unlikely(need_resched())) { ++ current_thread_info()->status |= TS_POLLING; ++ local_irq_enable(); ++ return 0; ++ } ++ ++ t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); ++ acpi_state_timer_broadcast(pr, cx, 1); ++ acpi_idle_do_entry(cx); ++ t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); ++ ++#ifdef CONFIG_GENERIC_TIME ++ /* TSC halts in C2, so notify users */ ++ mark_tsc_unstable("possible TSC halt in C2"); + #endif ++ ++ local_irq_enable(); ++ current_thread_info()->status |= TS_POLLING; ++ ++ cx->usage++; ++ ++ acpi_state_timer_broadcast(pr, cx, 0); ++ return ticks_elapsed(t1, t2); ++} ++ ++static int c3_cpu_count; ++static DEFINE_SPINLOCK(c3_lock); ++ ++/** ++ * acpi_idle_enter_c3 - enters an ACPI C3 state-type ++ * @dev: the target CPU ++ * @state: the state data ++ * ++ * Similar to C2 entry, except special bus master handling is needed. ++ */ ++static int acpi_idle_enter_c3(struct cpuidle_device *dev, ++ struct cpuidle_state *state) ++{ ++ struct acpi_processor *pr; ++ struct acpi_processor_cx *cx = cpuidle_get_statedata(state); ++ u32 t1, t2; ++ pr = processors[smp_processor_id()]; ++ ++ if (unlikely(!pr)) ++ return 0; ++ ++ if (pr->flags.bm_check) ++ acpi_idle_update_bm_rld(pr, cx); ++ ++ local_irq_disable(); ++ current_thread_info()->status &= ~TS_POLLING; ++ /* ++ * TS_POLLING-cleared state must be visible before we test ++ * NEED_RESCHED: ++ */ ++ smp_mb(); ++ ++ if (unlikely(need_resched())) { ++ current_thread_info()->status |= TS_POLLING; ++ local_irq_enable(); ++ return 0; ++ } ++ ++ /* disable bus master */ ++ if (pr->flags.bm_check) { ++ spin_lock(&c3_lock); ++ c3_cpu_count++; ++ if (c3_cpu_count == num_online_cpus()) { ++ /* ++ * All CPUs are trying to go to C3 ++ * Disable bus master arbitration ++ */ ++ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); ++ } ++ spin_unlock(&c3_lock); ++ } else { ++ /* SMP with no shared cache... Invalidate cache */ ++ ACPI_FLUSH_CPU_CACHE(); ++ } ++ ++ /* Get start time (ticks) */ ++ t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); ++ acpi_state_timer_broadcast(pr, cx, 1); ++ acpi_idle_do_entry(cx); ++ t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); ++ ++ if (pr->flags.bm_check) { ++ spin_lock(&c3_lock); ++ /* Enable bus master arbitration */ ++ if (c3_cpu_count == num_online_cpus()) ++ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); ++ c3_cpu_count--; ++ spin_unlock(&c3_lock); + } + ++#ifdef CONFIG_GENERIC_TIME ++ /* TSC halts in C3, so notify users */ ++ mark_tsc_unstable("TSC halts in C3"); ++#endif ++ ++ local_irq_enable(); ++ current_thread_info()->status |= TS_POLLING; ++ ++ cx->usage++; ++ ++ acpi_state_timer_broadcast(pr, cx, 0); ++ return ticks_elapsed(t1, t2); ++} ++ ++/** ++ * acpi_idle_bm_check - checks if bus master activity was detected ++ */ ++static int acpi_idle_bm_check(void) ++{ ++ u32 bm_status = 0; ++ ++ acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); ++ if (bm_status) ++ acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); ++ /* ++ * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect ++ * the true state of bus mastering activity; forcing us to ++ * manually check the BMIDEA bit of each IDE channel. ++ */ ++ else if (errata.piix4.bmisx) { ++ if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01) ++ || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01)) ++ bm_status = 1; ++ } ++ return bm_status; ++} ++ ++/** ++ * acpi_idle_init - attaches the driver to a CPU ++ * @dev: the CPU ++ */ ++static int acpi_idle_init(struct cpuidle_device *dev) ++{ ++ int cpu = dev->cpu; ++ int i, count = 0; ++ struct acpi_processor_cx *cx; ++ struct cpuidle_state *state; ++ ++ struct acpi_processor *pr = processors[cpu]; ++ ++ if (!pr->flags.power_setup_done) ++ return -EINVAL; ++ ++ if (pr->flags.power == 0) { ++ return -EINVAL; ++ } ++ ++ for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) { ++ cx = &pr->power.states[i]; ++ state = &dev->states[count]; ++ ++ if (!cx->valid) ++ continue; ++ ++#ifdef CONFIG_HOTPLUG_CPU ++ if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && ++ !pr->flags.has_cst && ++ !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) ++ continue; ++#endif ++ cpuidle_set_statedata(state, cx); ++ ++ state->exit_latency = cx->latency; ++ state->target_residency = cx->latency * 6; ++ state->power_usage = cx->power; ++ ++ state->flags = 0; ++ switch (cx->type) { ++ case ACPI_STATE_C1: ++ state->flags |= CPUIDLE_FLAG_SHALLOW; ++ state->enter = acpi_idle_enter_c1; ++ break; ++ ++ case ACPI_STATE_C2: ++ state->flags |= CPUIDLE_FLAG_BALANCED; ++ state->flags |= CPUIDLE_FLAG_TIME_VALID; ++ state->enter = acpi_idle_enter_c2; ++ break; ++ ++ case ACPI_STATE_C3: ++ state->flags |= CPUIDLE_FLAG_DEEP; ++ state->flags |= CPUIDLE_FLAG_TIME_VALID; ++ state->flags |= CPUIDLE_FLAG_CHECK_BM; ++ state->enter = acpi_idle_enter_c3; ++ break; ++ } ++ ++ count++; ++ } ++ ++ if (!count) ++ return -EINVAL; ++ ++ dev->state_count = count; + return 0; + } ++ ++struct cpuidle_driver acpi_idle_driver = { ++ .name = "acpi_idle", ++ .init = acpi_idle_init, ++ .redetect = acpi_idle_init, ++ .bm_check = acpi_idle_bm_check, ++ .owner = THIS_MODULE, ++}; +diff -Nurb linux-2.6.22-570/drivers/acpi/processor_throttling.c linux-2.6.22-591/drivers/acpi/processor_throttling.c +--- linux-2.6.22-570/drivers/acpi/processor_throttling.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/acpi/processor_throttling.c 2007-12-21 15:36:11.000000000 -0500 +@@ -44,17 +44,231 @@ + #define _COMPONENT ACPI_PROCESSOR_COMPONENT + ACPI_MODULE_NAME("processor_throttling"); + ++static int acpi_processor_get_throttling(struct acpi_processor *pr); ++int acpi_processor_set_throttling(struct acpi_processor *pr, int state); ++ ++static int acpi_processor_get_platform_limit(struct acpi_processor *pr) ++{ ++ acpi_status status = 0; ++ unsigned long tpc = 0; ++ ++ if (!pr) ++ return -EINVAL; ++ status = acpi_evaluate_integer(pr->handle, "_TPC", NULL, &tpc); ++ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { ++ ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TPC")); ++ return -ENODEV; ++ } ++ pr->throttling_platform_limit = (int)tpc; ++ return 0; ++} ++ ++int acpi_processor_tstate_has_changed(struct acpi_processor *pr) ++{ ++ return acpi_processor_get_platform_limit(pr); ++} ++ ++/* -------------------------------------------------------------------------- ++ _PTC, _TSS, _TSD support ++ -------------------------------------------------------------------------- */ ++static int acpi_processor_get_throttling_control(struct acpi_processor *pr) ++{ ++ int result = 0; ++ acpi_status status = 0; ++ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; ++ union acpi_object *ptc = NULL; ++ union acpi_object obj = { 0 }; ++ ++ status = acpi_evaluate_object(pr->handle, "_PTC", NULL, &buffer); ++ if (ACPI_FAILURE(status)) { ++ ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PTC")); ++ return -ENODEV; ++ } ++ ++ ptc = (union acpi_object *)buffer.pointer; ++ if (!ptc || (ptc->type != ACPI_TYPE_PACKAGE) ++ || (ptc->package.count != 2)) { ++ printk(KERN_ERR PREFIX "Invalid _PTC data\n"); ++ result = -EFAULT; ++ goto end; ++ } ++ ++ /* ++ * control_register ++ */ ++ ++ obj = ptc->package.elements[0]; ++ ++ if ((obj.type != ACPI_TYPE_BUFFER) ++ || (obj.buffer.length < sizeof(struct acpi_ptc_register)) ++ || (obj.buffer.pointer == NULL)) { ++ printk(KERN_ERR PREFIX ++ "Invalid _PTC data (control_register)\n"); ++ result = -EFAULT; ++ goto end; ++ } ++ memcpy(&pr->throttling.control_register, obj.buffer.pointer, ++ sizeof(struct acpi_ptc_register)); ++ ++ /* ++ * status_register ++ */ ++ ++ obj = ptc->package.elements[1]; ++ ++ if ((obj.type != ACPI_TYPE_BUFFER) ++ || (obj.buffer.length < sizeof(struct acpi_ptc_register)) ++ || (obj.buffer.pointer == NULL)) { ++ printk(KERN_ERR PREFIX "Invalid _PTC data (status_register)\n"); ++ result = -EFAULT; ++ goto end; ++ } ++ ++ memcpy(&pr->throttling.status_register, obj.buffer.pointer, ++ sizeof(struct acpi_ptc_register)); ++ ++ end: ++ kfree(buffer.pointer); ++ ++ return result; ++} ++static int acpi_processor_get_throttling_states(struct acpi_processor *pr) ++{ ++ int result = 0; ++ acpi_status status = AE_OK; ++ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; ++ struct acpi_buffer format = { sizeof("NNNNN"), "NNNNN" }; ++ struct acpi_buffer state = { 0, NULL }; ++ union acpi_object *tss = NULL; ++ int i; ++ ++ status = acpi_evaluate_object(pr->handle, "_TSS", NULL, &buffer); ++ if (ACPI_FAILURE(status)) { ++ ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TSS")); ++ return -ENODEV; ++ } ++ ++ tss = buffer.pointer; ++ if (!tss || (tss->type != ACPI_TYPE_PACKAGE)) { ++ printk(KERN_ERR PREFIX "Invalid _TSS data\n"); ++ result = -EFAULT; ++ goto end; ++ } ++ ++ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d throttling states\n", ++ tss->package.count)); ++ ++ pr->throttling.state_count = tss->package.count; ++ pr->throttling.states_tss = ++ kmalloc(sizeof(struct acpi_processor_tx_tss) * tss->package.count, ++ GFP_KERNEL); ++ if (!pr->throttling.states_tss) { ++ result = -ENOMEM; ++ goto end; ++ } ++ ++ for (i = 0; i < pr->throttling.state_count; i++) { ++ ++ struct acpi_processor_tx_tss *tx = ++ (struct acpi_processor_tx_tss *)&(pr->throttling. ++ states_tss[i]); ++ ++ state.length = sizeof(struct acpi_processor_tx_tss); ++ state.pointer = tx; ++ ++ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Extracting state %d\n", i)); ++ ++ status = acpi_extract_package(&(tss->package.elements[i]), ++ &format, &state); ++ if (ACPI_FAILURE(status)) { ++ ACPI_EXCEPTION((AE_INFO, status, "Invalid _TSS data")); ++ result = -EFAULT; ++ kfree(pr->throttling.states_tss); ++ goto end; ++ } ++ ++ if (!tx->freqpercentage) { ++ printk(KERN_ERR PREFIX ++ "Invalid _TSS data: freq is zero\n"); ++ result = -EFAULT; ++ kfree(pr->throttling.states_tss); ++ goto end; ++ } ++ } ++ ++ end: ++ kfree(buffer.pointer); ++ ++ return result; ++} ++static int acpi_processor_get_tsd(struct acpi_processor *pr) ++{ ++ int result = 0; ++ acpi_status status = AE_OK; ++ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; ++ struct acpi_buffer format = { sizeof("NNNNN"), "NNNNN" }; ++ struct acpi_buffer state = { 0, NULL }; ++ union acpi_object *tsd = NULL; ++ struct acpi_tsd_package *pdomain; ++ ++ status = acpi_evaluate_object(pr->handle, "_TSD", NULL, &buffer); ++ if (ACPI_FAILURE(status)) { ++ return -ENODEV; ++ } ++ ++ tsd = buffer.pointer; ++ if (!tsd || (tsd->type != ACPI_TYPE_PACKAGE)) { ++ ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid _TSD data\n")); ++ result = -EFAULT; ++ goto end; ++ } ++ ++ if (tsd->package.count != 1) { ++ ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid _TSD data\n")); ++ result = -EFAULT; ++ goto end; ++ } ++ ++ pdomain = &(pr->throttling.domain_info); ++ ++ state.length = sizeof(struct acpi_tsd_package); ++ state.pointer = pdomain; ++ ++ status = acpi_extract_package(&(tsd->package.elements[0]), ++ &format, &state); ++ if (ACPI_FAILURE(status)) { ++ ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid _TSD data\n")); ++ result = -EFAULT; ++ goto end; ++ } ++ ++ if (pdomain->num_entries != ACPI_TSD_REV0_ENTRIES) { ++ ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Unknown _TSD:num_entries\n")); ++ result = -EFAULT; ++ goto end; ++ } ++ ++ if (pdomain->revision != ACPI_TSD_REV0_REVISION) { ++ ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Unknown _TSD:revision\n")); ++ result = -EFAULT; ++ goto end; ++ } ++ ++ end: ++ kfree(buffer.pointer); ++ return result; ++} ++ + /* -------------------------------------------------------------------------- + Throttling Control + -------------------------------------------------------------------------- */ +-static int acpi_processor_get_throttling(struct acpi_processor *pr) ++static int acpi_processor_get_throttling_fadt(struct acpi_processor *pr) + { + int state = 0; + u32 value = 0; + u32 duty_mask = 0; + u32 duty_value = 0; + +- + if (!pr) + return -EINVAL; + +@@ -94,13 +308,114 @@ + return 0; + } + +-int acpi_processor_set_throttling(struct acpi_processor *pr, int state) ++static int acpi_read_throttling_status(struct acpi_processor_throttling ++ *throttling) ++{ ++ int value = -1; ++ switch (throttling->status_register.space_id) { ++ case ACPI_ADR_SPACE_SYSTEM_IO: ++ acpi_os_read_port((acpi_io_address) throttling->status_register. ++ address, &value, ++ (u32) throttling->status_register.bit_width * ++ 8); ++ break; ++ case ACPI_ADR_SPACE_FIXED_HARDWARE: ++ printk(KERN_ERR PREFIX ++ "HARDWARE addr space,NOT supported yet\n"); ++ break; ++ default: ++ printk(KERN_ERR PREFIX "Unknown addr space %d\n", ++ (u32) (throttling->status_register.space_id)); ++ } ++ return value; ++} ++ ++static int acpi_write_throttling_state(struct acpi_processor_throttling ++ *throttling, int value) ++{ ++ int ret = -1; ++ ++ switch (throttling->control_register.space_id) { ++ case ACPI_ADR_SPACE_SYSTEM_IO: ++ acpi_os_write_port((acpi_io_address) throttling-> ++ control_register.address, value, ++ (u32) throttling->control_register. ++ bit_width * 8); ++ ret = 0; ++ break; ++ case ACPI_ADR_SPACE_FIXED_HARDWARE: ++ printk(KERN_ERR PREFIX ++ "HARDWARE addr space,NOT supported yet\n"); ++ break; ++ default: ++ printk(KERN_ERR PREFIX "Unknown addr space %d\n", ++ (u32) (throttling->control_register.space_id)); ++ } ++ return ret; ++} ++ ++static int acpi_get_throttling_state(struct acpi_processor *pr, int value) ++{ ++ int i; ++ ++ for (i = 0; i < pr->throttling.state_count; i++) { ++ struct acpi_processor_tx_tss *tx = ++ (struct acpi_processor_tx_tss *)&(pr->throttling. ++ states_tss[i]); ++ if (tx->control == value) ++ break; ++ } ++ if (i > pr->throttling.state_count) ++ i = -1; ++ return i; ++} ++ ++static int acpi_get_throttling_value(struct acpi_processor *pr, int state) ++{ ++ int value = -1; ++ if (state >= 0 && state <= pr->throttling.state_count) { ++ struct acpi_processor_tx_tss *tx = ++ (struct acpi_processor_tx_tss *)&(pr->throttling. ++ states_tss[state]); ++ value = tx->control; ++ } ++ return value; ++} ++ ++static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) ++{ ++ int state = 0; ++ u32 value = 0; ++ ++ if (!pr) ++ return -EINVAL; ++ ++ if (!pr->flags.throttling) ++ return -ENODEV; ++ ++ pr->throttling.state = 0; ++ local_irq_disable(); ++ value = acpi_read_throttling_status(&pr->throttling); ++ if (value >= 0) { ++ state = acpi_get_throttling_state(pr, value); ++ pr->throttling.state = state; ++ } ++ local_irq_enable(); ++ ++ return 0; ++} ++ ++static int acpi_processor_get_throttling(struct acpi_processor *pr) ++{ ++ return pr->throttling.acpi_processor_get_throttling(pr); ++} ++ ++int acpi_processor_set_throttling_fadt(struct acpi_processor *pr, int state) + { + u32 value = 0; + u32 duty_mask = 0; + u32 duty_value = 0; + +- + if (!pr) + return -EINVAL; + +@@ -113,6 +428,8 @@ + if (state == pr->throttling.state) + return 0; + ++ if (state < pr->throttling_platform_limit) ++ return -EPERM; + /* + * Calculate the duty_value and duty_mask. + */ +@@ -165,12 +482,50 @@ + return 0; + } + ++int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, int state) ++{ ++ u32 value = 0; ++ ++ if (!pr) ++ return -EINVAL; ++ ++ if ((state < 0) || (state > (pr->throttling.state_count - 1))) ++ return -EINVAL; ++ ++ if (!pr->flags.throttling) ++ return -ENODEV; ++ ++ if (state == pr->throttling.state) ++ return 0; ++ ++ if (state < pr->throttling_platform_limit) ++ return -EPERM; ++ ++ local_irq_disable(); ++ ++ value = acpi_get_throttling_value(pr, state); ++ if (value >= 0) { ++ acpi_write_throttling_state(&pr->throttling, value); ++ pr->throttling.state = state; ++ } ++ local_irq_enable(); ++ ++ return 0; ++} ++ ++int acpi_processor_set_throttling(struct acpi_processor *pr, int state) ++{ ++ return pr->throttling.acpi_processor_set_throttling(pr, state); ++} ++ + int acpi_processor_get_throttling_info(struct acpi_processor *pr) + { + int result = 0; + int step = 0; + int i = 0; +- ++ int no_ptc = 0; ++ int no_tss = 0; ++ int no_tsd = 0; + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, + "pblk_address[0x%08x] duty_offset[%d] duty_width[%d]\n", +@@ -182,6 +537,21 @@ + return -EINVAL; + + /* TBD: Support ACPI 2.0 objects */ ++ no_ptc = acpi_processor_get_throttling_control(pr); ++ no_tss = acpi_processor_get_throttling_states(pr); ++ no_tsd = acpi_processor_get_tsd(pr); ++ ++ if (no_ptc || no_tss) { ++ pr->throttling.acpi_processor_get_throttling = ++ &acpi_processor_get_throttling_fadt; ++ pr->throttling.acpi_processor_set_throttling = ++ &acpi_processor_set_throttling_fadt; ++ } else { ++ pr->throttling.acpi_processor_get_throttling = ++ &acpi_processor_get_throttling_ptc; ++ pr->throttling.acpi_processor_set_throttling = ++ &acpi_processor_set_throttling_ptc; ++ } + + if (!pr->throttling.address) { + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No throttling register\n")); +@@ -262,7 +632,6 @@ + int i = 0; + int result = 0; + +- + if (!pr) + goto end; + +@@ -280,15 +649,27 @@ + } + + seq_printf(seq, "state count: %d\n" +- "active state: T%d\n", +- pr->throttling.state_count, pr->throttling.state); ++ "active state: T%d\n" ++ "state available: T%d to T%d\n", ++ pr->throttling.state_count, pr->throttling.state, ++ pr->throttling_platform_limit, ++ pr->throttling.state_count - 1); + + seq_puts(seq, "states:\n"); ++ if (pr->throttling.acpi_processor_get_throttling == ++ acpi_processor_get_throttling_fadt) { + for (i = 0; i < pr->throttling.state_count; i++) + seq_printf(seq, " %cT%d: %02d%%\n", + (i == pr->throttling.state ? '*' : ' '), i, + (pr->throttling.states[i].performance ? pr-> + throttling.states[i].performance / 10 : 0)); ++ } else { ++ for (i = 0; i < pr->throttling.state_count; i++) ++ seq_printf(seq, " %cT%d: %02d%%\n", ++ (i == pr->throttling.state ? '*' : ' '), i, ++ (int)pr->throttling.states_tss[i]. ++ freqpercentage); ++ } + + end: + return 0; +@@ -301,7 +682,7 @@ + PDE(inode)->data); + } + +-static ssize_t acpi_processor_write_throttling(struct file * file, ++static ssize_t acpi_processor_write_throttling(struct file *file, + const char __user * buffer, + size_t count, loff_t * data) + { +@@ -310,7 +691,6 @@ + struct acpi_processor *pr = m->private; + char state_string[12] = { '\0' }; + +- + if (!pr || (count > sizeof(state_string) - 1)) + return -EINVAL; + +diff -Nurb linux-2.6.22-570/drivers/acpi/sbs.c linux-2.6.22-591/drivers/acpi/sbs.c +--- linux-2.6.22-570/drivers/acpi/sbs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/acpi/sbs.c 2007-12-21 15:36:11.000000000 -0500 +@@ -127,7 +127,7 @@ + static struct acpi_driver acpi_sbs_driver = { + .name = "sbs", + .class = ACPI_SBS_CLASS, +- .ids = ACPI_SBS_HID, ++ .ids = "ACPI0001,ACPI0005", + .ops = { + .add = acpi_sbs_add, + .remove = acpi_sbs_remove, +@@ -176,10 +176,8 @@ + }; + + struct acpi_sbs { +- acpi_handle handle; + int base; + struct acpi_device *device; +- struct acpi_ec_smbus *smbus; + struct mutex mutex; + int sbsm_present; + int sbsm_batteries_supported; +@@ -511,7 +509,7 @@ + "acpi_sbs_read_word() failed")); + goto end; + } +- ++ sbs->sbsm_present = 1; + sbs->sbsm_batteries_supported = battery_system_info & 0x000f; + + end: +@@ -1630,13 +1628,12 @@ + { + struct acpi_sbs *sbs = NULL; + int result = 0, remove_result = 0; +- unsigned long sbs_obj; + int id; + acpi_status status = AE_OK; + unsigned long val; + + status = +- acpi_evaluate_integer(device->parent->handle, "_EC", NULL, &val); ++ acpi_evaluate_integer(device->handle, "_EC", NULL, &val); + if (ACPI_FAILURE(status)) { + ACPI_EXCEPTION((AE_INFO, AE_ERROR, "Error obtaining _EC")); + return -EIO; +@@ -1653,7 +1650,7 @@ + + sbs_mutex_lock(sbs); + +- sbs->base = (val & 0xff00ull) >> 8; ++ sbs->base = 0xff & (val >> 8); + sbs->device = device; + + strcpy(acpi_device_name(device), ACPI_SBS_DEVICE_NAME); +@@ -1665,24 +1662,10 @@ + ACPI_EXCEPTION((AE_INFO, AE_ERROR, "acpi_ac_add() failed")); + goto end; + } +- status = acpi_evaluate_integer(device->handle, "_SBS", NULL, &sbs_obj); +- if (status) { +- ACPI_EXCEPTION((AE_INFO, status, +- "acpi_evaluate_integer() failed")); +- result = -EIO; +- goto end; +- } +- if (sbs_obj > 0) { +- result = acpi_sbsm_get_info(sbs); +- if (result) { +- ACPI_EXCEPTION((AE_INFO, AE_ERROR, +- "acpi_sbsm_get_info() failed")); +- goto end; +- } +- sbs->sbsm_present = 1; +- } + +- if (sbs->sbsm_present == 0) { ++ acpi_sbsm_get_info(sbs); ++ ++ if (!sbs->sbsm_present) { + result = acpi_battery_add(sbs, 0); + if (result) { + ACPI_EXCEPTION((AE_INFO, AE_ERROR, +@@ -1702,8 +1685,6 @@ + } + } + +- sbs->handle = device->handle; +- + init_timer(&sbs->update_timer); + result = acpi_check_update_proc(sbs); + if (result) +diff -Nurb linux-2.6.22-570/drivers/acpi/system.c linux-2.6.22-591/drivers/acpi/system.c +--- linux-2.6.22-570/drivers/acpi/system.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/acpi/system.c 2007-12-21 15:36:11.000000000 -0500 +@@ -39,15 +39,12 @@ + + #define ACPI_SYSTEM_CLASS "system" + #define ACPI_SYSTEM_DEVICE_NAME "System" +-#define ACPI_SYSTEM_FILE_INFO "info" +-#define ACPI_SYSTEM_FILE_EVENT "event" +-#define ACPI_SYSTEM_FILE_DSDT "dsdt" +-#define ACPI_SYSTEM_FILE_FADT "fadt" + + /* + * Make ACPICA version work as module param + */ +-static int param_get_acpica_version(char *buffer, struct kernel_param *kp) { ++static int param_get_acpica_version(char *buffer, struct kernel_param *kp) ++{ + int result; + + result = sprintf(buffer, "%x", ACPI_CA_VERSION); +@@ -58,9 +55,126 @@ + module_param_call(acpica_version, NULL, param_get_acpica_version, NULL, 0444); + + /* -------------------------------------------------------------------------- ++ FS Interface (/sys) ++ -------------------------------------------------------------------------- */ ++static LIST_HEAD(acpi_table_attr_list); ++static struct kobject tables_kobj; ++ ++struct acpi_table_attr { ++ struct bin_attribute attr; ++ char name[8]; ++ int instance; ++ struct list_head node; ++}; ++ ++static ssize_t acpi_table_show(struct kobject *kobj, ++ struct bin_attribute *bin_attr, char *buf, ++ loff_t offset, size_t count) ++{ ++ struct acpi_table_attr *table_attr = ++ container_of(bin_attr, struct acpi_table_attr, attr); ++ struct acpi_table_header *table_header = NULL; ++ acpi_status status; ++ ssize_t ret_count = count; ++ ++ status = ++ acpi_get_table(table_attr->name, table_attr->instance, ++ &table_header); ++ if (ACPI_FAILURE(status)) ++ return -ENODEV; ++ ++ if (offset >= table_header->length) { ++ ret_count = 0; ++ goto end; ++ } ++ ++ if (offset + ret_count > table_header->length) ++ ret_count = table_header->length - offset; ++ ++ memcpy(buf, ((char *)table_header) + offset, ret_count); ++ ++ end: ++ return ret_count; ++} ++ ++static void acpi_table_attr_init(struct acpi_table_attr *table_attr, ++ struct acpi_table_header *table_header) ++{ ++ struct acpi_table_header *header = NULL; ++ struct acpi_table_attr *attr = NULL; ++ ++ memcpy(table_attr->name, table_header->signature, ACPI_NAME_SIZE); ++ ++ list_for_each_entry(attr, &acpi_table_attr_list, node) { ++ if (!memcmp(table_header->signature, attr->name, ++ ACPI_NAME_SIZE)) ++ if (table_attr->instance < attr->instance) ++ table_attr->instance = attr->instance; ++ } ++ table_attr->instance++; ++ ++ if (table_attr->instance > 1 || (table_attr->instance == 1 && ++ !acpi_get_table(table_header-> ++ signature, 2, ++ &header))) ++ sprintf(table_attr->name + 4, "%d", table_attr->instance); ++ ++ table_attr->attr.size = 0; ++ table_attr->attr.read = acpi_table_show; ++ table_attr->attr.attr.name = table_attr->name; ++ table_attr->attr.attr.mode = 0444; ++ table_attr->attr.attr.owner = THIS_MODULE; ++ ++ return; ++} ++ ++static int acpi_system_sysfs_init(void) ++{ ++ struct acpi_table_attr *table_attr; ++ struct acpi_table_header *table_header = NULL; ++ int table_index = 0; ++ int result; ++ ++ tables_kobj.parent = &acpi_subsys.kobj; ++ kobject_set_name(&tables_kobj, "tables"); ++ result = kobject_register(&tables_kobj); ++ if (result) ++ return result; ++ ++ do { ++ result = acpi_get_table_by_index(table_index, &table_header); ++ if (!result) { ++ table_index++; ++ table_attr = NULL; ++ table_attr = ++ kzalloc(sizeof(struct acpi_table_attr), GFP_KERNEL); ++ if (!table_attr) ++ return -ENOMEM; ++ ++ acpi_table_attr_init(table_attr, table_header); ++ result = ++ sysfs_create_bin_file(&tables_kobj, ++ &table_attr->attr); ++ if (result) { ++ kfree(table_attr); ++ return result; ++ } else ++ list_add_tail(&table_attr->node, ++ &acpi_table_attr_list); ++ } ++ } while (!result); ++ ++ return 0; ++} ++ ++/* -------------------------------------------------------------------------- + FS Interface (/proc) + -------------------------------------------------------------------------- */ + #ifdef CONFIG_ACPI_PROCFS ++#define ACPI_SYSTEM_FILE_INFO "info" ++#define ACPI_SYSTEM_FILE_EVENT "event" ++#define ACPI_SYSTEM_FILE_DSDT "dsdt" ++#define ACPI_SYSTEM_FILE_FADT "fadt" + + static int acpi_system_read_info(struct seq_file *seq, void *offset) + { +@@ -80,7 +194,6 @@ + .llseek = seq_lseek, + .release = single_release, + }; +-#endif + + static ssize_t acpi_system_read_dsdt(struct file *, char __user *, size_t, + loff_t *); +@@ -97,13 +210,11 @@ + struct acpi_table_header *dsdt = NULL; + ssize_t res; + +- + status = acpi_get_table(ACPI_SIG_DSDT, 1, &dsdt); + if (ACPI_FAILURE(status)) + return -ENODEV; + +- res = simple_read_from_buffer(buffer, count, ppos, +- dsdt, dsdt->length); ++ res = simple_read_from_buffer(buffer, count, ppos, dsdt, dsdt->length); + + return res; + } +@@ -123,28 +234,21 @@ + struct acpi_table_header *fadt = NULL; + ssize_t res; + +- + status = acpi_get_table(ACPI_SIG_FADT, 1, &fadt); + if (ACPI_FAILURE(status)) + return -ENODEV; + +- res = simple_read_from_buffer(buffer, count, ppos, +- fadt, fadt->length); ++ res = simple_read_from_buffer(buffer, count, ppos, fadt, fadt->length); + + return res; + } + +-static int __init acpi_system_init(void) ++static int acpi_system_procfs_init(void) + { + struct proc_dir_entry *entry; + int error = 0; + char *name; + +- +- if (acpi_disabled) +- return 0; +- +-#ifdef CONFIG_ACPI_PROCFS + /* 'info' [R] */ + name = ACPI_SYSTEM_FILE_INFO; + entry = create_proc_entry(name, S_IRUGO, acpi_root_dir); +@@ -153,7 +257,6 @@ + else { + entry->proc_fops = &acpi_system_info_ops; + } +-#endif + + /* 'dsdt' [R] */ + name = ACPI_SYSTEM_FILE_DSDT; +@@ -177,12 +280,32 @@ + Error: + remove_proc_entry(ACPI_SYSTEM_FILE_FADT, acpi_root_dir); + remove_proc_entry(ACPI_SYSTEM_FILE_DSDT, acpi_root_dir); +-#ifdef CONFIG_ACPI_PROCFS + remove_proc_entry(ACPI_SYSTEM_FILE_INFO, acpi_root_dir); +-#endif + + error = -EFAULT; + goto Done; + } ++#else ++static int acpi_system_procfs_init(void) ++{ ++ return 0; ++} ++#endif ++ ++static int __init acpi_system_init(void) ++{ ++ int result = 0; ++ ++ if (acpi_disabled) ++ return 0; ++ ++ result = acpi_system_procfs_init(); ++ if (result) ++ return result; ++ ++ result = acpi_system_sysfs_init(); ++ ++ return result; ++} + + subsys_initcall(acpi_system_init); +diff -Nurb linux-2.6.22-570/drivers/acpi/thermal.c linux-2.6.22-591/drivers/acpi/thermal.c +--- linux-2.6.22-570/drivers/acpi/thermal.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/acpi/thermal.c 2007-12-21 15:36:11.000000000 -0500 +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -59,7 +60,6 @@ + #define ACPI_THERMAL_NOTIFY_CRITICAL 0xF0 + #define ACPI_THERMAL_NOTIFY_HOT 0xF1 + #define ACPI_THERMAL_MODE_ACTIVE 0x00 +-#define ACPI_THERMAL_PATH_POWEROFF "/sbin/poweroff" + + #define ACPI_THERMAL_MAX_ACTIVE 10 + #define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65 +@@ -419,26 +419,6 @@ + return 0; + } + +-static int acpi_thermal_call_usermode(char *path) +-{ +- char *argv[2] = { NULL, NULL }; +- char *envp[3] = { NULL, NULL, NULL }; +- +- +- if (!path) +- return -EINVAL; +- +- argv[0] = path; +- +- /* minimal command environment */ +- envp[0] = "HOME=/"; +- envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; +- +- call_usermodehelper(argv[0], argv, envp, 0); +- +- return 0; +-} +- + static int acpi_thermal_critical(struct acpi_thermal *tz) + { + if (!tz || !tz->trips.critical.flags.valid) +@@ -456,7 +436,7 @@ + acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL, + tz->trips.critical.flags.enabled); + +- acpi_thermal_call_usermode(ACPI_THERMAL_PATH_POWEROFF); ++ orderly_poweroff(true); + + return 0; + } +@@ -1114,7 +1094,6 @@ + break; + case ACPI_THERMAL_NOTIFY_THRESHOLDS: + acpi_thermal_get_trip_points(tz); +- acpi_thermal_check(tz); + acpi_bus_generate_event(device, event, 0); + break; + case ACPI_THERMAL_NOTIFY_DEVICES: +diff -Nurb linux-2.6.22-570/drivers/acpi/utilities/uteval.c linux-2.6.22-591/drivers/acpi/utilities/uteval.c +--- linux-2.6.22-570/drivers/acpi/utilities/uteval.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/acpi/utilities/uteval.c 2007-12-21 15:36:11.000000000 -0500 +@@ -62,16 +62,13 @@ + static char *acpi_interfaces_supported[] = { + /* Operating System Vendor Strings */ + +- "Windows 2000", +- "Windows 2001", +- "Windows 2001 SP0", +- "Windows 2001 SP1", +- "Windows 2001 SP2", +- "Windows 2001 SP3", +- "Windows 2001 SP4", +- "Windows 2001.1", +- "Windows 2001.1 SP1", /* Added 03/2006 */ +- "Windows 2006", /* Added 03/2006 */ ++ "Windows 2000", /* Windows 2000 */ ++ "Windows 2001", /* Windows XP */ ++ "Windows 2001 SP1", /* Windows XP SP1 */ ++ "Windows 2001 SP2", /* Windows XP SP2 */ ++ "Windows 2001.1", /* Windows Server 2003 */ ++ "Windows 2001.1 SP1", /* Windows Server 2003 SP1 - Added 03/2006 */ ++ "Windows 2006", /* Windows Vista - Added 03/2006 */ + + /* Feature Group Strings */ + +diff -Nurb linux-2.6.22-570/drivers/acpi/video.c linux-2.6.22-591/drivers/acpi/video.c +--- linux-2.6.22-570/drivers/acpi/video.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/acpi/video.c 2007-12-21 15:36:14.000000000 -0500 +@@ -33,6 +33,7 @@ + #include + + #include ++#include + #include + + #include +@@ -169,6 +170,7 @@ + struct acpi_device *dev; + struct acpi_video_device_brightness *brightness; + struct backlight_device *backlight; ++ struct output_device *output_dev; + }; + + /* bus */ +@@ -272,6 +274,10 @@ + u32 level_current, u32 event); + static void acpi_video_switch_brightness(struct acpi_video_device *device, + int event); ++static int acpi_video_device_get_state(struct acpi_video_device *device, ++ unsigned long *state); ++static int acpi_video_output_get(struct output_device *od); ++static int acpi_video_device_set_state(struct acpi_video_device *device, int state); + + /*backlight device sysfs support*/ + static int acpi_video_get_brightness(struct backlight_device *bd) +@@ -297,6 +303,28 @@ + .update_status = acpi_video_set_brightness, + }; + ++/*video output device sysfs support*/ ++static int acpi_video_output_get(struct output_device *od) ++{ ++ unsigned long state; ++ struct acpi_video_device *vd = ++ (struct acpi_video_device *)class_get_devdata(&od->class_dev); ++ acpi_video_device_get_state(vd, &state); ++ return (int)state; ++} ++ ++static int acpi_video_output_set(struct output_device *od) ++{ ++ unsigned long state = od->request_state; ++ struct acpi_video_device *vd= ++ (struct acpi_video_device *)class_get_devdata(&od->class_dev); ++ return acpi_video_device_set_state(vd, state); ++} ++ ++static struct output_properties acpi_output_properties = { ++ .set_state = acpi_video_output_set, ++ .get_status = acpi_video_output_get, ++}; + /* -------------------------------------------------------------------------- + Video Management + -------------------------------------------------------------------------- */ +@@ -531,7 +559,6 @@ + + static void acpi_video_device_find_cap(struct acpi_video_device *device) + { +- acpi_integer status; + acpi_handle h_dummy1; + int i; + u32 max_level = 0; +@@ -565,9 +592,9 @@ + device->cap._DSS = 1; + } + +- status = acpi_video_device_lcd_query_levels(device, &obj); ++ if (ACPI_SUCCESS(acpi_video_device_lcd_query_levels(device, &obj))) { + +- if (obj && obj->type == ACPI_TYPE_PACKAGE && obj->package.count >= 2) { ++ if (obj->package.count >= 2) { + int count = 0; + union acpi_object *o; + +@@ -588,6 +615,7 @@ + continue; + } + br->levels[count] = (u32) o->integer.value; ++ + if (br->levels[count] > max_level) + max_level = br->levels[count]; + count++; +@@ -606,9 +634,13 @@ + } + } + ++ } else { ++ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Could not query available LCD brightness level\n")); ++ } ++ + kfree(obj); + +- if (device->cap._BCL && device->cap._BCM && device->cap._BQC){ ++ if (device->cap._BCL && device->cap._BCM && device->cap._BQC && max_level > 0){ + unsigned long tmp; + static int count = 0; + char *name; +@@ -626,6 +658,17 @@ + + kfree(name); + } ++ if (device->cap._DCS && device->cap._DSS){ ++ static int count = 0; ++ char *name; ++ name = kzalloc(MAX_NAME_LEN, GFP_KERNEL); ++ if (!name) ++ return; ++ sprintf(name, "acpi_video%d", count++); ++ device->output_dev = video_output_register(name, ++ NULL, device, &acpi_output_properties); ++ kfree(name); ++ } + return; + } + +@@ -1669,6 +1712,7 @@ + ACPI_DEVICE_NOTIFY, + acpi_video_device_notify); + backlight_device_unregister(device->backlight); ++ video_output_unregister(device->output_dev); + return 0; + } + +diff -Nurb linux-2.6.22-570/drivers/atm/idt77252.c linux-2.6.22-591/drivers/atm/idt77252.c +--- linux-2.6.22-570/drivers/atm/idt77252.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/atm/idt77252.c 2007-12-21 15:36:14.000000000 -0500 +@@ -3576,7 +3576,7 @@ + * XXX: + */ + sprintf(tname, "eth%d", card->index); +- tmp = dev_get_by_name(tname); /* jhs: was "tmp = dev_get(tname);" */ ++ tmp = dev_get_by_name(&init_net, tname); /* jhs: was "tmp = dev_get(tname);" */ + if (tmp) { + memcpy(card->atmdev->esi, tmp->dev_addr, 6); + +diff -Nurb linux-2.6.22-570/drivers/base/bus.c linux-2.6.22-591/drivers/base/bus.c +--- linux-2.6.22-570/drivers/base/bus.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/base/bus.c 2007-12-21 15:36:11.000000000 -0500 +@@ -562,7 +562,6 @@ + + bus->drivers_probe_attr.attr.name = "drivers_probe"; + bus->drivers_probe_attr.attr.mode = S_IWUSR; +- bus->drivers_probe_attr.attr.owner = bus->owner; + bus->drivers_probe_attr.store = store_drivers_probe; + retval = bus_create_file(bus, &bus->drivers_probe_attr); + if (retval) +@@ -570,7 +569,6 @@ + + bus->drivers_autoprobe_attr.attr.name = "drivers_autoprobe"; + bus->drivers_autoprobe_attr.attr.mode = S_IWUSR | S_IRUGO; +- bus->drivers_autoprobe_attr.attr.owner = bus->owner; + bus->drivers_autoprobe_attr.show = show_drivers_autoprobe; + bus->drivers_autoprobe_attr.store = store_drivers_autoprobe; + retval = bus_create_file(bus, &bus->drivers_autoprobe_attr); +diff -Nurb linux-2.6.22-570/drivers/base/class.c linux-2.6.22-591/drivers/base/class.c +--- linux-2.6.22-570/drivers/base/class.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/base/class.c 2007-12-21 15:36:14.000000000 -0500 +@@ -134,6 +134,17 @@ + } + } + ++static int class_setup_shadowing(struct class *cls) ++{ ++ const struct shadow_dir_operations *shadow_ops; ++ ++ shadow_ops = cls->shadow_ops; ++ if (!shadow_ops) ++ return 0; ++ ++ return sysfs_enable_shadowing(&cls->subsys.kobj, shadow_ops); ++} ++ + int class_register(struct class * cls) + { + int error; +@@ -152,11 +163,22 @@ + subsys_set_kset(cls, class_subsys); + + error = subsystem_register(&cls->subsys); +- if (!error) { +- error = add_class_attrs(class_get(cls)); +- class_put(cls); +- } ++ if (error) ++ goto out; ++ ++ error = class_setup_shadowing(cls); ++ if (error) ++ goto out_unregister; ++ ++ error = add_class_attrs(cls); ++ if (error) ++ goto out_unregister; ++ ++out: + return error; ++out_unregister: ++ subsystem_unregister(&cls->subsys); ++ goto out; + } + + void class_unregister(struct class * cls) +@@ -312,9 +334,6 @@ + + pr_debug("device class '%s': release.\n", cd->class_id); + +- kfree(cd->devt_attr); +- cd->devt_attr = NULL; +- + if (cd->release) + cd->release(cd); + else if (cls->release) +@@ -547,6 +566,9 @@ + return print_dev_t(buf, class_dev->devt); + } + ++static struct class_device_attribute class_devt_attr = ++ __ATTR(dev, S_IRUGO, show_dev, NULL); ++ + static ssize_t store_uevent(struct class_device *class_dev, + const char *buf, size_t count) + { +@@ -554,6 +576,9 @@ + return count; + } + ++static struct class_device_attribute class_uevent_attr = ++ __ATTR(uevent, S_IWUSR, NULL, store_uevent); ++ + void class_device_initialize(struct class_device *class_dev) + { + kobj_set_kset_s(class_dev, class_obj_subsys); +@@ -603,34 +628,17 @@ + &parent_class->subsys.kobj, "subsystem"); + if (error) + goto out3; +- class_dev->uevent_attr.attr.name = "uevent"; +- class_dev->uevent_attr.attr.mode = S_IWUSR; +- class_dev->uevent_attr.attr.owner = parent_class->owner; +- class_dev->uevent_attr.store = store_uevent; +- error = class_device_create_file(class_dev, &class_dev->uevent_attr); ++ ++ error = class_device_create_file(class_dev, &class_uevent_attr); + if (error) + goto out3; + + if (MAJOR(class_dev->devt)) { +- struct class_device_attribute *attr; +- attr = kzalloc(sizeof(*attr), GFP_KERNEL); +- if (!attr) { +- error = -ENOMEM; +- goto out4; +- } +- attr->attr.name = "dev"; +- attr->attr.mode = S_IRUGO; +- attr->attr.owner = parent_class->owner; +- attr->show = show_dev; +- error = class_device_create_file(class_dev, attr); +- if (error) { +- kfree(attr); ++ error = class_device_create_file(class_dev, &class_devt_attr); ++ if (error) + goto out4; + } + +- class_dev->devt_attr = attr; +- } +- + error = class_device_add_attrs(class_dev); + if (error) + goto out5; +@@ -671,10 +679,10 @@ + out6: + class_device_remove_attrs(class_dev); + out5: +- if (class_dev->devt_attr) +- class_device_remove_file(class_dev, class_dev->devt_attr); ++ if (MAJOR(class_dev->devt)) ++ class_device_remove_file(class_dev, &class_devt_attr); + out4: +- class_device_remove_file(class_dev, &class_dev->uevent_attr); ++ class_device_remove_file(class_dev, &class_uevent_attr); + out3: + kobject_del(&class_dev->kobj); + out2: +@@ -774,9 +782,9 @@ + sysfs_remove_link(&class_dev->kobj, "device"); + } + sysfs_remove_link(&class_dev->kobj, "subsystem"); +- class_device_remove_file(class_dev, &class_dev->uevent_attr); +- if (class_dev->devt_attr) +- class_device_remove_file(class_dev, class_dev->devt_attr); ++ class_device_remove_file(class_dev, &class_uevent_attr); ++ if (MAJOR(class_dev->devt)) ++ class_device_remove_file(class_dev, &class_devt_attr); + class_device_remove_attrs(class_dev); + class_device_remove_groups(class_dev); + +diff -Nurb linux-2.6.22-570/drivers/base/core.c linux-2.6.22-591/drivers/base/core.c +--- linux-2.6.22-570/drivers/base/core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/base/core.c 2007-12-21 15:36:14.000000000 -0500 +@@ -310,6 +310,9 @@ + return count; + } + ++static struct device_attribute uevent_attr = ++ __ATTR(uevent, S_IRUGO | S_IWUSR, show_uevent, store_uevent); ++ + static int device_add_attributes(struct device *dev, + struct device_attribute *attrs) + { +@@ -423,6 +426,9 @@ + return print_dev_t(buf, dev->devt); + } + ++static struct device_attribute devt_attr = ++ __ATTR(dev, S_IRUGO, show_dev, NULL); ++ + /* + * devices_subsys - structure to be registered with kobject core. + */ +@@ -616,8 +622,14 @@ + return kobj; + + /* or create a new class-directory at the parent device */ +- return kobject_kset_add_dir(&dev->class->class_dirs, ++ kobj = kobject_kset_add_dir(&dev->class->class_dirs, + parent_kobj, dev->class->name); ++ ++ /* If we created a new class-directory setup shadowing */ ++ if (kobj && dev->class->shadow_ops) ++ sysfs_enable_shadowing(kobj, dev->class->shadow_ops); ++ ++ return kobj; + } + + if (parent) +@@ -637,6 +649,82 @@ + return 0; + } + ++static int device_add_class_symlinks(struct device *dev) ++{ ++ int error; ++ ++ if (!dev->class) ++ return 0; ++ error = sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj, ++ "subsystem"); ++ if (error) ++ goto out; ++ /* ++ * If this is not a "fake" compatible device, then create the ++ * symlink from the class to the device. ++ */ ++ if (dev->kobj.parent != &dev->class->subsys.kobj) { ++ error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, ++ dev->bus_id); ++ if (error) ++ goto out_subsys; ++ } ++ /* only bus-device parents get a "device"-link */ ++ if (dev->parent && dev->parent->bus) { ++ error = sysfs_create_link(&dev->kobj, &dev->parent->kobj, ++ "device"); ++ if (error) ++ goto out_busid; ++#ifdef CONFIG_SYSFS_DEPRECATED ++ { ++ char * class_name = make_class_name(dev->class->name, ++ &dev->kobj); ++ if (class_name) ++ error = sysfs_create_link(&dev->parent->kobj, ++ &dev->kobj, class_name); ++ kfree(class_name); ++ if (error) ++ goto out_device; ++ } ++#endif ++ } ++ return 0; ++ ++#ifdef CONFIG_SYSFS_DEPRECATED ++out_device: ++ if (dev->parent) ++ sysfs_remove_link(&dev->kobj, "device"); ++#endif ++out_busid: ++ if (dev->kobj.parent != &dev->class->subsys.kobj) ++ sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id); ++out_subsys: ++ sysfs_remove_link(&dev->kobj, "subsystem"); ++out: ++ return error; ++} ++ ++static void device_remove_class_symlinks(struct device *dev) ++{ ++ if (!dev->class) ++ return; ++ if (dev->parent) { ++#ifdef CONFIG_SYSFS_DEPRECATED ++ char *class_name; ++ ++ class_name = make_class_name(dev->class->name, &dev->kobj); ++ if (class_name) { ++ sysfs_remove_link(&dev->parent->kobj, class_name); ++ kfree(class_name); ++ } ++#endif ++ sysfs_remove_link(&dev->kobj, "device"); ++ } ++ if (dev->kobj.parent != &dev->class->subsys.kobj) ++ sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id); ++ sysfs_remove_link(&dev->kobj, "subsystem"); ++} ++ + /** + * device_add - add device to device hierarchy. + * @dev: device. +@@ -651,7 +739,6 @@ + int device_add(struct device *dev) + { + struct device *parent = NULL; +- char *class_name = NULL; + struct class_interface *class_intf; + int error = -EINVAL; + +@@ -681,58 +768,17 @@ + blocking_notifier_call_chain(&dev->bus->bus_notifier, + BUS_NOTIFY_ADD_DEVICE, dev); + +- dev->uevent_attr.attr.name = "uevent"; +- dev->uevent_attr.attr.mode = S_IRUGO | S_IWUSR; +- if (dev->driver) +- dev->uevent_attr.attr.owner = dev->driver->owner; +- dev->uevent_attr.store = store_uevent; +- dev->uevent_attr.show = show_uevent; +- error = device_create_file(dev, &dev->uevent_attr); ++ error = device_create_file(dev, &uevent_attr); + if (error) + goto attrError; + + if (MAJOR(dev->devt)) { +- struct device_attribute *attr; +- attr = kzalloc(sizeof(*attr), GFP_KERNEL); +- if (!attr) { +- error = -ENOMEM; +- goto ueventattrError; +- } +- attr->attr.name = "dev"; +- attr->attr.mode = S_IRUGO; +- if (dev->driver) +- attr->attr.owner = dev->driver->owner; +- attr->show = show_dev; +- error = device_create_file(dev, attr); +- if (error) { +- kfree(attr); ++ error = device_create_file(dev, &devt_attr); ++ if (error) + goto ueventattrError; + } +- +- dev->devt_attr = attr; +- } +- +- if (dev->class) { +- sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj, +- "subsystem"); +- /* If this is not a "fake" compatible device, then create the +- * symlink from the class to the device. */ +- if (dev->kobj.parent != &dev->class->subsys.kobj) +- sysfs_create_link(&dev->class->subsys.kobj, +- &dev->kobj, dev->bus_id); +- if (parent) { +- sysfs_create_link(&dev->kobj, &dev->parent->kobj, +- "device"); +-#ifdef CONFIG_SYSFS_DEPRECATED +- class_name = make_class_name(dev->class->name, +- &dev->kobj); +- if (class_name) +- sysfs_create_link(&dev->parent->kobj, +- &dev->kobj, class_name); +-#endif +- } +- } +- ++ if ((error = device_add_class_symlinks(dev))) ++ goto SymlinkError; + if ((error = device_add_attrs(dev))) + goto AttrsError; + if ((error = device_pm_add(dev))) +@@ -756,7 +802,6 @@ + up(&dev->class->sem); + } + Done: +- kfree(class_name); + put_device(dev); + return error; + BusError: +@@ -767,10 +812,10 @@ + BUS_NOTIFY_DEL_DEVICE, dev); + device_remove_attrs(dev); + AttrsError: +- if (dev->devt_attr) { +- device_remove_file(dev, dev->devt_attr); +- kfree(dev->devt_attr); +- } ++ device_remove_class_symlinks(dev); ++ SymlinkError: ++ if (MAJOR(dev->devt)) ++ device_remove_file(dev, &devt_attr); + + if (dev->class) { + sysfs_remove_link(&dev->kobj, "subsystem"); +@@ -792,7 +837,7 @@ + } + } + ueventattrError: +- device_remove_file(dev, &dev->uevent_attr); ++ device_remove_file(dev, &uevent_attr); + attrError: + kobject_uevent(&dev->kobj, KOBJ_REMOVE); + kobject_del(&dev->kobj); +@@ -869,17 +914,15 @@ + + if (parent) + klist_del(&dev->knode_parent); +- if (dev->devt_attr) { +- device_remove_file(dev, dev->devt_attr); +- kfree(dev->devt_attr); +- } ++ if (MAJOR(dev->devt)) ++ device_remove_file(dev, &devt_attr); + if (dev->class) { + sysfs_remove_link(&dev->kobj, "subsystem"); + /* If this is not a "fake" compatible device, remove the + * symlink from the class to the device. */ + if (dev->kobj.parent != &dev->class->subsys.kobj) +- sysfs_remove_link(&dev->class->subsys.kobj, +- dev->bus_id); ++ sysfs_delete_link(&dev->class->subsys.kobj, ++ &dev->kobj, dev->bus_id); + if (parent) { + #ifdef CONFIG_SYSFS_DEPRECATED + char *class_name = make_class_name(dev->class->name, +@@ -926,7 +969,7 @@ + up(&dev->class->sem); + } + } +- device_remove_file(dev, &dev->uevent_attr); ++ device_remove_file(dev, &uevent_attr); + device_remove_attrs(dev); + bus_remove_device(dev); + +@@ -1155,7 +1198,7 @@ + { + char *old_class_name = NULL; + char *new_class_name = NULL; +- char *old_symlink_name = NULL; ++ char *old_device_name = NULL; + int error; + + dev = get_device(dev); +@@ -1169,42 +1212,46 @@ + old_class_name = make_class_name(dev->class->name, &dev->kobj); + #endif + +- if (dev->class) { +- old_symlink_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL); +- if (!old_symlink_name) { ++ old_device_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL); ++ if (!old_device_name) { + error = -ENOMEM; +- goto out_free_old_class; +- } +- strlcpy(old_symlink_name, dev->bus_id, BUS_ID_SIZE); ++ goto out; + } +- ++ strlcpy(old_device_name, dev->bus_id, BUS_ID_SIZE); + strlcpy(dev->bus_id, new_name, BUS_ID_SIZE); + ++ if (dev->class && (dev->kobj.parent != &dev->class->subsys.kobj)) { ++ error = sysfs_rename_link(&dev->class->subsys.kobj, ++ &dev->kobj, old_device_name, new_name); ++ if (error) ++ goto out; ++ } ++ + error = kobject_rename(&dev->kobj, new_name); ++ if (error) { ++ strlcpy(dev->bus_id, old_device_name, BUS_ID_SIZE); ++ goto out; ++ } + + #ifdef CONFIG_SYSFS_DEPRECATED + if (old_class_name) { ++ error = -ENOMEM; + new_class_name = make_class_name(dev->class->name, &dev->kobj); +- if (new_class_name) { +- sysfs_create_link(&dev->parent->kobj, &dev->kobj, +- new_class_name); +- sysfs_remove_link(&dev->parent->kobj, old_class_name); +- } +- } +-#endif ++ if (!new_class_name) ++ goto out; + +- if (dev->class) { +- sysfs_remove_link(&dev->class->subsys.kobj, +- old_symlink_name); +- sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj, +- dev->bus_id); ++ error = sysfs_rename_link(&dev->parent->kobj, &dev->kobj, ++ old_class_name, new_class_name); ++ if (error) ++ goto out; + } ++#endif ++out: + put_device(dev); + + kfree(new_class_name); +- kfree(old_symlink_name); +- out_free_old_class: + kfree(old_class_name); ++ kfree(old_device_name); + + return error; + } +diff -Nurb linux-2.6.22-570/drivers/base/dd.c linux-2.6.22-591/drivers/base/dd.c +--- linux-2.6.22-570/drivers/base/dd.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/base/dd.c 2007-12-23 01:39:35.000000000 -0500 +@@ -296,9 +296,8 @@ + { + struct device_driver * drv; + +- drv = dev->driver; ++ drv = get_driver(dev->driver); + if (drv) { +- get_driver(drv); + driver_sysfs_remove(dev); + sysfs_remove_link(&dev->kobj, "driver"); + klist_remove(&dev->knode_driver); +diff -Nurb linux-2.6.22-570/drivers/base/dd.c.orig linux-2.6.22-591/drivers/base/dd.c.orig +--- linux-2.6.22-570/drivers/base/dd.c.orig 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/base/dd.c.orig 2007-12-22 21:18:39.000000000 -0500 +@@ -0,0 +1,369 @@ ++/* ++ * drivers/base/dd.c - The core device/driver interactions. ++ * ++ * This file contains the (sometimes tricky) code that controls the ++ * interactions between devices and drivers, which primarily includes ++ * driver binding and unbinding. ++ * ++ * All of this code used to exist in drivers/base/bus.c, but was ++ * relocated to here in the name of compartmentalization (since it wasn't ++ * strictly code just for the 'struct bus_type'. ++ * ++ * Copyright (c) 2002-5 Patrick Mochel ++ * Copyright (c) 2002-3 Open Source Development Labs ++ * ++ * This file is released under the GPLv2 ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include "base.h" ++#include "power/power.h" ++ ++#define to_drv(node) container_of(node, struct device_driver, kobj.entry) ++ ++ ++static void driver_bound(struct device *dev) ++{ ++ if (klist_node_attached(&dev->knode_driver)) { ++ printk(KERN_WARNING "%s: device %s already bound\n", ++ __FUNCTION__, kobject_name(&dev->kobj)); ++ return; ++ } ++ ++ pr_debug("bound device '%s' to driver '%s'\n", ++ dev->bus_id, dev->driver->name); ++ ++ if (dev->bus) ++ blocking_notifier_call_chain(&dev->bus->bus_notifier, ++ BUS_NOTIFY_BOUND_DRIVER, dev); ++ ++ klist_add_tail(&dev->knode_driver, &dev->driver->klist_devices); ++} ++ ++static int driver_sysfs_add(struct device *dev) ++{ ++ int ret; ++ ++ ret = sysfs_create_link(&dev->driver->kobj, &dev->kobj, ++ kobject_name(&dev->kobj)); ++ if (ret == 0) { ++ ret = sysfs_create_link(&dev->kobj, &dev->driver->kobj, ++ "driver"); ++ if (ret) ++ sysfs_remove_link(&dev->driver->kobj, ++ kobject_name(&dev->kobj)); ++ } ++ return ret; ++} ++ ++static void driver_sysfs_remove(struct device *dev) ++{ ++ struct device_driver *drv = dev->driver; ++ ++ if (drv) { ++ sysfs_remove_link(&drv->kobj, kobject_name(&dev->kobj)); ++ sysfs_remove_link(&dev->kobj, "driver"); ++ } ++} ++ ++/** ++ * device_bind_driver - bind a driver to one device. ++ * @dev: device. ++ * ++ * Allow manual attachment of a driver to a device. ++ * Caller must have already set @dev->driver. ++ * ++ * Note that this does not modify the bus reference count ++ * nor take the bus's rwsem. Please verify those are accounted ++ * for before calling this. (It is ok to call with no other effort ++ * from a driver's probe() method.) ++ * ++ * This function must be called with @dev->sem held. ++ */ ++int device_bind_driver(struct device *dev) ++{ ++ int ret; ++ ++ ret = driver_sysfs_add(dev); ++ if (!ret) ++ driver_bound(dev); ++ return ret; ++} ++ ++static atomic_t probe_count = ATOMIC_INIT(0); ++static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue); ++ ++static int really_probe(struct device *dev, struct device_driver *drv) ++{ ++ int ret = 0; ++ ++ atomic_inc(&probe_count); ++ pr_debug("%s: Probing driver %s with device %s\n", ++ drv->bus->name, drv->name, dev->bus_id); ++ WARN_ON(!list_empty(&dev->devres_head)); ++ ++ dev->driver = drv; ++ if (driver_sysfs_add(dev)) { ++ printk(KERN_ERR "%s: driver_sysfs_add(%s) failed\n", ++ __FUNCTION__, dev->bus_id); ++ goto probe_failed; ++ } ++ ++ if (dev->bus->probe) { ++ ret = dev->bus->probe(dev); ++ if (ret) ++ goto probe_failed; ++ } else if (drv->probe) { ++ ret = drv->probe(dev); ++ if (ret) ++ goto probe_failed; ++ } ++ ++ driver_bound(dev); ++ ret = 1; ++ pr_debug("%s: Bound Device %s to Driver %s\n", ++ drv->bus->name, dev->bus_id, drv->name); ++ goto done; ++ ++probe_failed: ++ devres_release_all(dev); ++ driver_sysfs_remove(dev); ++ dev->driver = NULL; ++ ++ if (ret != -ENODEV && ret != -ENXIO) { ++ /* driver matched but the probe failed */ ++ printk(KERN_WARNING ++ "%s: probe of %s failed with error %d\n", ++ drv->name, dev->bus_id, ret); ++ } ++ /* ++ * Ignore errors returned by ->probe so that the next driver can try ++ * its luck. ++ */ ++ ret = 0; ++done: ++ atomic_dec(&probe_count); ++ wake_up(&probe_waitqueue); ++ return ret; ++} ++ ++/** ++ * driver_probe_done ++ * Determine if the probe sequence is finished or not. ++ * ++ * Should somehow figure out how to use a semaphore, not an atomic variable... ++ */ ++int driver_probe_done(void) ++{ ++ pr_debug("%s: probe_count = %d\n", __FUNCTION__, ++ atomic_read(&probe_count)); ++ if (atomic_read(&probe_count)) ++ return -EBUSY; ++ return 0; ++} ++ ++/** ++ * driver_probe_device - attempt to bind device & driver together ++ * @drv: driver to bind a device to ++ * @dev: device to try to bind to the driver ++ * ++ * First, we call the bus's match function, if one present, which should ++ * compare the device IDs the driver supports with the device IDs of the ++ * device. Note we don't do this ourselves because we don't know the ++ * format of the ID structures, nor what is to be considered a match and ++ * what is not. ++ * ++ * This function returns 1 if a match is found, -ENODEV if the device is ++ * not registered, and 0 otherwise. ++ * ++ * This function must be called with @dev->sem held. When called for a ++ * USB interface, @dev->parent->sem must be held as well. ++ */ ++int driver_probe_device(struct device_driver * drv, struct device * dev) ++{ ++ int ret = 0; ++ ++ if (!device_is_registered(dev)) ++ return -ENODEV; ++ if (drv->bus->match && !drv->bus->match(dev, drv)) ++ goto done; ++ ++ pr_debug("%s: Matched Device %s with Driver %s\n", ++ drv->bus->name, dev->bus_id, drv->name); ++ ++ ret = really_probe(dev, drv); ++ ++done: ++ return ret; ++} ++ ++static int __device_attach(struct device_driver * drv, void * data) ++{ ++ struct device * dev = data; ++ return driver_probe_device(drv, dev); ++} ++ ++/** ++ * device_attach - try to attach device to a driver. ++ * @dev: device. ++ * ++ * Walk the list of drivers that the bus has and call ++ * driver_probe_device() for each pair. If a compatible ++ * pair is found, break out and return. ++ * ++ * Returns 1 if the device was bound to a driver; ++ * 0 if no matching device was found; ++ * -ENODEV if the device is not registered. ++ * ++ * When called for a USB interface, @dev->parent->sem must be held. ++ */ ++int device_attach(struct device * dev) ++{ ++ int ret = 0; ++ ++ down(&dev->sem); ++ if (dev->driver) { ++ ret = device_bind_driver(dev); ++ if (ret == 0) ++ ret = 1; ++ else { ++ dev->driver = NULL; ++ ret = 0; ++ } ++ } else { ++ ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach); ++ } ++ up(&dev->sem); ++ return ret; ++} ++ ++static int __driver_attach(struct device * dev, void * data) ++{ ++ struct device_driver * drv = data; ++ ++ /* ++ * Lock device and try to bind to it. We drop the error ++ * here and always return 0, because we need to keep trying ++ * to bind to devices and some drivers will return an error ++ * simply if it didn't support the device. ++ * ++ * driver_probe_device() will spit a warning if there ++ * is an error. ++ */ ++ ++ if (dev->parent) /* Needed for USB */ ++ down(&dev->parent->sem); ++ down(&dev->sem); ++ if (!dev->driver) ++ driver_probe_device(drv, dev); ++ up(&dev->sem); ++ if (dev->parent) ++ up(&dev->parent->sem); ++ ++ return 0; ++} ++ ++/** ++ * driver_attach - try to bind driver to devices. ++ * @drv: driver. ++ * ++ * Walk the list of devices that the bus has on it and try to ++ * match the driver with each one. If driver_probe_device() ++ * returns 0 and the @dev->driver is set, we've found a ++ * compatible pair. ++ */ ++int driver_attach(struct device_driver * drv) ++{ ++ return bus_for_each_dev(drv->bus, NULL, drv, __driver_attach); ++} ++ ++/** ++ * device_release_driver - manually detach device from driver. ++ * @dev: device. ++ * ++ * Manually detach device from driver. ++ * ++ * __device_release_driver() must be called with @dev->sem held. ++ * When called for a USB interface, @dev->parent->sem must be held ++ * as well. ++ */ ++ ++static void __device_release_driver(struct device * dev) ++{ ++ struct device_driver * drv; ++ ++ drv = dev->driver; ++ if (drv) { ++ get_driver(drv); ++ driver_sysfs_remove(dev); ++ sysfs_remove_link(&dev->kobj, "driver"); ++ klist_remove(&dev->knode_driver); ++ ++ if (dev->bus) ++ blocking_notifier_call_chain(&dev->bus->bus_notifier, ++ BUS_NOTIFY_UNBIND_DRIVER, ++ dev); ++ ++ if (dev->bus && dev->bus->remove) ++ dev->bus->remove(dev); ++ else if (drv->remove) ++ drv->remove(dev); ++ devres_release_all(dev); ++ dev->driver = NULL; ++ put_driver(drv); ++ } ++} ++ ++void device_release_driver(struct device * dev) ++{ ++ /* ++ * If anyone calls device_release_driver() recursively from ++ * within their ->remove callback for the same device, they ++ * will deadlock right here. ++ */ ++ down(&dev->sem); ++ __device_release_driver(dev); ++ up(&dev->sem); ++} ++ ++ ++/** ++ * driver_detach - detach driver from all devices it controls. ++ * @drv: driver. ++ */ ++void driver_detach(struct device_driver * drv) ++{ ++ struct device * dev; ++ ++ for (;;) { ++ spin_lock(&drv->klist_devices.k_lock); ++ if (list_empty(&drv->klist_devices.k_list)) { ++ spin_unlock(&drv->klist_devices.k_lock); ++ break; ++ } ++ dev = list_entry(drv->klist_devices.k_list.prev, ++ struct device, knode_driver.n_node); ++ get_device(dev); ++ spin_unlock(&drv->klist_devices.k_lock); ++ ++ if (dev->parent) /* Needed for USB */ ++ down(&dev->parent->sem); ++ down(&dev->sem); ++ if (dev->driver == drv) ++ __device_release_driver(dev); ++ up(&dev->sem); ++ if (dev->parent) ++ up(&dev->parent->sem); ++ put_device(dev); ++ } ++} ++ ++EXPORT_SYMBOL_GPL(device_bind_driver); ++EXPORT_SYMBOL_GPL(device_release_driver); ++EXPORT_SYMBOL_GPL(device_attach); ++EXPORT_SYMBOL_GPL(driver_attach); ++ +diff -Nurb linux-2.6.22-570/drivers/base/firmware_class.c linux-2.6.22-591/drivers/base/firmware_class.c +--- linux-2.6.22-570/drivers/base/firmware_class.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/base/firmware_class.c 2007-12-21 15:36:11.000000000 -0500 +@@ -175,7 +175,7 @@ + static DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store); + + static ssize_t +-firmware_data_read(struct kobject *kobj, ++firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buffer, loff_t offset, size_t count) + { + struct device *dev = to_dev(kobj); +@@ -240,7 +240,7 @@ + * the driver as a firmware image. + **/ + static ssize_t +-firmware_data_write(struct kobject *kobj, ++firmware_data_write(struct kobject *kobj, struct bin_attribute *bin_attr, + char *buffer, loff_t offset, size_t count) + { + struct device *dev = to_dev(kobj); +@@ -271,7 +271,7 @@ + } + + static struct bin_attribute firmware_attr_data_tmpl = { +- .attr = {.name = "data", .mode = 0644, .owner = THIS_MODULE}, ++ .attr = {.name = "data", .mode = 0644}, + .size = 0, + .read = firmware_data_read, + .write = firmware_data_write, +diff -Nurb linux-2.6.22-570/drivers/block/acsi_slm.c linux-2.6.22-591/drivers/block/acsi_slm.c +--- linux-2.6.22-570/drivers/block/acsi_slm.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/block/acsi_slm.c 2007-12-21 15:36:11.000000000 -0500 +@@ -367,7 +367,7 @@ + int length; + int end; + +- if (!(page = __get_free_page( GFP_KERNEL ))) ++ if (!(page = __get_free_page(GFP_TEMPORARY))) + return( -ENOMEM ); + + length = slm_getstats( (char *)page, iminor(node) ); +diff -Nurb linux-2.6.22-570/drivers/block/aoe/aoecmd.c linux-2.6.22-591/drivers/block/aoe/aoecmd.c +--- linux-2.6.22-570/drivers/block/aoe/aoecmd.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/block/aoe/aoecmd.c 2007-12-21 15:36:14.000000000 -0500 +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include + #include "aoe.h" + +@@ -194,7 +195,7 @@ + sl = sl_tail = NULL; + + read_lock(&dev_base_lock); +- for_each_netdev(ifp) { ++ for_each_netdev(&init_net, ifp) { + dev_hold(ifp); + if (!is_aoe_netif(ifp)) + goto cont; +diff -Nurb linux-2.6.22-570/drivers/block/aoe/aoenet.c linux-2.6.22-591/drivers/block/aoe/aoenet.c +--- linux-2.6.22-570/drivers/block/aoe/aoenet.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/block/aoe/aoenet.c 2007-12-21 15:36:14.000000000 -0500 +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + #include + #include "aoe.h" + +@@ -114,6 +115,9 @@ + struct aoe_hdr *h; + u32 n; + ++ if (ifp->nd_net != &init_net) ++ goto exit; ++ + skb = skb_share_check(skb, GFP_ATOMIC); + if (skb == NULL) + return 0; +diff -Nurb linux-2.6.22-570/drivers/block/cciss_scsi.c linux-2.6.22-591/drivers/block/cciss_scsi.c +--- linux-2.6.22-570/drivers/block/cciss_scsi.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/block/cciss_scsi.c 2007-12-21 15:36:11.000000000 -0500 +@@ -555,7 +555,6 @@ + { + struct scsi_cmnd *cmd; + ctlr_info_t *ctlr; +- u64bit addr64; + ErrorInfo_struct *ei; + + ei = cp->err_info; +@@ -569,20 +568,7 @@ + cmd = (struct scsi_cmnd *) cp->scsi_cmd; + ctlr = hba[cp->ctlr]; + +- /* undo the DMA mappings */ +- +- if (cmd->use_sg) { +- pci_unmap_sg(ctlr->pdev, +- cmd->request_buffer, cmd->use_sg, +- cmd->sc_data_direction); +- } +- else if (cmd->request_bufflen) { +- addr64.val32.lower = cp->SG[0].Addr.lower; +- addr64.val32.upper = cp->SG[0].Addr.upper; +- pci_unmap_single(ctlr->pdev, (dma_addr_t) addr64.val, +- cmd->request_bufflen, +- cmd->sc_data_direction); +- } ++ scsi_dma_unmap(cmd); + + cmd->result = (DID_OK << 16); /* host byte */ + cmd->result |= (COMMAND_COMPLETE << 8); /* msg byte */ +@@ -597,7 +583,7 @@ + ei->SenseLen > SCSI_SENSE_BUFFERSIZE ? + SCSI_SENSE_BUFFERSIZE : + ei->SenseLen); +- cmd->resid = ei->ResidualCnt; ++ scsi_set_resid(cmd, ei->ResidualCnt); + + if(ei->CommandStatus != 0) + { /* an error has occurred */ +@@ -1204,46 +1190,29 @@ + CommandList_struct *cp, + struct scsi_cmnd *cmd) + { +- unsigned int use_sg, nsegs=0, len; +- struct scatterlist *scatter = (struct scatterlist *) cmd->request_buffer; ++ unsigned int len; ++ struct scatterlist *sg; + __u64 addr64; ++ int use_sg, i; + +- /* is it just one virtual address? */ +- if (!cmd->use_sg) { +- if (cmd->request_bufflen) { /* anything to xfer? */ +- +- addr64 = (__u64) pci_map_single(pdev, +- cmd->request_buffer, +- cmd->request_bufflen, +- cmd->sc_data_direction); ++ BUG_ON(scsi_sg_count(cmd) > MAXSGENTRIES); + +- cp->SG[0].Addr.lower = ++ use_sg = scsi_dma_map(cmd); ++ if (use_sg) { /* not too many addrs? */ ++ scsi_for_each_sg(cmd, sg, use_sg, i) { ++ addr64 = (__u64) sg_dma_address(sg); ++ len = sg_dma_len(sg); ++ cp->SG[i].Addr.lower = + (__u32) (addr64 & (__u64) 0x00000000FFFFFFFF); +- cp->SG[0].Addr.upper = ++ cp->SG[i].Addr.upper = + (__u32) ((addr64 >> 32) & (__u64) 0x00000000FFFFFFFF); +- cp->SG[0].Len = cmd->request_bufflen; +- nsegs=1; ++ cp->SG[i].Len = len; ++ cp->SG[i].Ext = 0; // we are not chaining + } +- } /* else, must be a list of virtual addresses.... */ +- else if (cmd->use_sg <= MAXSGENTRIES) { /* not too many addrs? */ +- +- use_sg = pci_map_sg(pdev, cmd->request_buffer, cmd->use_sg, +- cmd->sc_data_direction); +- +- for (nsegs=0; nsegs < use_sg; nsegs++) { +- addr64 = (__u64) sg_dma_address(&scatter[nsegs]); +- len = sg_dma_len(&scatter[nsegs]); +- cp->SG[nsegs].Addr.lower = +- (__u32) (addr64 & (__u64) 0x00000000FFFFFFFF); +- cp->SG[nsegs].Addr.upper = +- (__u32) ((addr64 >> 32) & (__u64) 0x00000000FFFFFFFF); +- cp->SG[nsegs].Len = len; +- cp->SG[nsegs].Ext = 0; // we are not chaining + } +- } else BUG(); + +- cp->Header.SGList = (__u8) nsegs; /* no. SGs contig in this cmd */ +- cp->Header.SGTotal = (__u16) nsegs; /* total sgs in this cmd list */ ++ cp->Header.SGList = (__u8) use_sg; /* no. SGs contig in this cmd */ ++ cp->Header.SGTotal = (__u16) use_sg; /* total sgs in this cmd list */ + return; + } + +diff -Nurb linux-2.6.22-570/drivers/block/loop.c linux-2.6.22-591/drivers/block/loop.c +--- linux-2.6.22-570/drivers/block/loop.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/drivers/block/loop.c 2007-12-21 15:36:11.000000000 -0500 +@@ -68,6 +68,7 @@ + #include + #include + #include ++#include + #include + #include /* for invalidate_bdev() */ + #include +@@ -577,13 +578,6 @@ + struct loop_device *lo = data; + struct bio *bio; + +- /* +- * loop can be used in an encrypted device, +- * hence, it mustn't be stopped at all +- * because it could be indirectly used during suspension +- */ +- current->flags |= PF_NOFREEZE; +- + set_user_nice(current, -20); + + while (!kthread_should_stop() || lo->lo_bio) { +diff -Nurb linux-2.6.22-570/drivers/block/pktcdvd.c linux-2.6.22-591/drivers/block/pktcdvd.c +--- linux-2.6.22-570/drivers/block/pktcdvd.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/block/pktcdvd.c 2007-12-21 15:36:11.000000000 -0500 +@@ -146,8 +146,7 @@ + **********************************************************/ + + #define DEF_ATTR(_obj,_name,_mode) \ +- static struct attribute _obj = { \ +- .name = _name, .owner = THIS_MODULE, .mode = _mode } ++ static struct attribute _obj = { .name = _name, .mode = _mode } + + /********************************************************** + /sys/class/pktcdvd/pktcdvd[0-7]/ +@@ -1594,6 +1593,7 @@ + long min_sleep_time, residue; + + set_user_nice(current, -20); ++ set_freezable(); + + for (;;) { + DECLARE_WAITQUEUE(wait, current); +diff -Nurb linux-2.6.22-570/drivers/char/apm-emulation.c linux-2.6.22-591/drivers/char/apm-emulation.c +--- linux-2.6.22-570/drivers/char/apm-emulation.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/char/apm-emulation.c 2007-12-21 15:36:11.000000000 -0500 +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -329,13 +330,8 @@ + /* + * Wait for the suspend/resume to complete. If there + * are pending acknowledges, we wait here for them. +- * +- * Note: we need to ensure that the PM subsystem does +- * not kick us out of the wait when it suspends the +- * threads. + */ + flags = current->flags; +- current->flags |= PF_NOFREEZE; + + wait_event(apm_suspend_waitqueue, + as->suspend_state == SUSPEND_DONE); +@@ -365,13 +361,8 @@ + /* + * Wait for the suspend/resume to complete. If there + * are pending acknowledges, we wait here for them. +- * +- * Note: we need to ensure that the PM subsystem does +- * not kick us out of the wait when it suspends the +- * threads. + */ + flags = current->flags; +- current->flags |= PF_NOFREEZE; + + wait_event_interruptible(apm_suspend_waitqueue, + as->suspend_state == SUSPEND_DONE); +@@ -598,7 +589,6 @@ + kapmd_tsk = NULL; + return ret; + } +- kapmd_tsk->flags |= PF_NOFREEZE; + wake_up_process(kapmd_tsk); + + #ifdef CONFIG_PROC_FS +diff -Nurb linux-2.6.22-570/drivers/char/hvc_console.c linux-2.6.22-591/drivers/char/hvc_console.c +--- linux-2.6.22-570/drivers/char/hvc_console.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/char/hvc_console.c 2007-12-21 15:36:11.000000000 -0500 +@@ -674,11 +674,12 @@ + * calling hvc_poll() who determines whether a console adapter support + * interrupts. + */ +-int khvcd(void *unused) ++static int khvcd(void *unused) + { + int poll_mask; + struct hvc_struct *hp; + ++ set_freezable(); + __set_current_state(TASK_RUNNING); + do { + poll_mask = 0; +diff -Nurb linux-2.6.22-570/drivers/char/ipmi/ipmi_msghandler.c linux-2.6.22-591/drivers/char/ipmi/ipmi_msghandler.c +--- linux-2.6.22-570/drivers/char/ipmi/ipmi_msghandler.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/char/ipmi/ipmi_msghandler.c 2007-12-21 15:36:11.000000000 -0500 +@@ -2171,52 +2171,42 @@ + int err; + + bmc->device_id_attr.attr.name = "device_id"; +- bmc->device_id_attr.attr.owner = THIS_MODULE; + bmc->device_id_attr.attr.mode = S_IRUGO; + bmc->device_id_attr.show = device_id_show; + + bmc->provides_dev_sdrs_attr.attr.name = "provides_device_sdrs"; +- bmc->provides_dev_sdrs_attr.attr.owner = THIS_MODULE; + bmc->provides_dev_sdrs_attr.attr.mode = S_IRUGO; + bmc->provides_dev_sdrs_attr.show = provides_dev_sdrs_show; + + bmc->revision_attr.attr.name = "revision"; +- bmc->revision_attr.attr.owner = THIS_MODULE; + bmc->revision_attr.attr.mode = S_IRUGO; + bmc->revision_attr.show = revision_show; + + bmc->firmware_rev_attr.attr.name = "firmware_revision"; +- bmc->firmware_rev_attr.attr.owner = THIS_MODULE; + bmc->firmware_rev_attr.attr.mode = S_IRUGO; + bmc->firmware_rev_attr.show = firmware_rev_show; + + bmc->version_attr.attr.name = "ipmi_version"; +- bmc->version_attr.attr.owner = THIS_MODULE; + bmc->version_attr.attr.mode = S_IRUGO; + bmc->version_attr.show = ipmi_version_show; + + bmc->add_dev_support_attr.attr.name = "additional_device_support"; +- bmc->add_dev_support_attr.attr.owner = THIS_MODULE; + bmc->add_dev_support_attr.attr.mode = S_IRUGO; + bmc->add_dev_support_attr.show = add_dev_support_show; + + bmc->manufacturer_id_attr.attr.name = "manufacturer_id"; +- bmc->manufacturer_id_attr.attr.owner = THIS_MODULE; + bmc->manufacturer_id_attr.attr.mode = S_IRUGO; + bmc->manufacturer_id_attr.show = manufacturer_id_show; + + bmc->product_id_attr.attr.name = "product_id"; +- bmc->product_id_attr.attr.owner = THIS_MODULE; + bmc->product_id_attr.attr.mode = S_IRUGO; + bmc->product_id_attr.show = product_id_show; + + bmc->guid_attr.attr.name = "guid"; +- bmc->guid_attr.attr.owner = THIS_MODULE; + bmc->guid_attr.attr.mode = S_IRUGO; + bmc->guid_attr.show = guid_show; + + bmc->aux_firmware_rev_attr.attr.name = "aux_firmware_revision"; +- bmc->aux_firmware_rev_attr.attr.owner = THIS_MODULE; + bmc->aux_firmware_rev_attr.attr.mode = S_IRUGO; + bmc->aux_firmware_rev_attr.show = aux_firmware_rev_show; + +diff -Nurb linux-2.6.22-570/drivers/char/keyboard.c linux-2.6.22-591/drivers/char/keyboard.c +--- linux-2.6.22-570/drivers/char/keyboard.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/char/keyboard.c 2007-12-21 15:36:11.000000000 -0500 +@@ -1150,6 +1150,7 @@ + sysrq_down = 0; + if (sysrq_down && down && !rep) { + handle_sysrq(kbd_sysrq_xlate[keycode], tty); ++ sysrq_down = 0; /* In case we miss the 'up' event. */ + return; + } + #endif +diff -Nurb linux-2.6.22-570/drivers/connector/connector.c linux-2.6.22-591/drivers/connector/connector.c +--- linux-2.6.22-570/drivers/connector/connector.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/connector/connector.c 2007-12-21 15:36:14.000000000 -0500 +@@ -446,7 +446,7 @@ + dev->id.idx = cn_idx; + dev->id.val = cn_val; + +- dev->nls = netlink_kernel_create(NETLINK_CONNECTOR, ++ dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, + CN_NETLINK_USERS + 0xf, + dev->input, NULL, THIS_MODULE); + if (!dev->nls) +diff -Nurb linux-2.6.22-570/drivers/cpufreq/cpufreq_stats.c linux-2.6.22-591/drivers/cpufreq/cpufreq_stats.c +--- linux-2.6.22-570/drivers/cpufreq/cpufreq_stats.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/cpufreq/cpufreq_stats.c 2007-12-21 15:36:11.000000000 -0500 +@@ -25,8 +25,7 @@ + + #define CPUFREQ_STATDEVICE_ATTR(_name,_mode,_show) \ + static struct freq_attr _attr_##_name = {\ +- .attr = {.name = __stringify(_name), .owner = THIS_MODULE, \ +- .mode = _mode, }, \ ++ .attr = {.name = __stringify(_name), .mode = _mode, }, \ + .show = _show,\ + }; + +diff -Nurb linux-2.6.22-570/drivers/cpufreq/cpufreq_userspace.c linux-2.6.22-591/drivers/cpufreq/cpufreq_userspace.c +--- linux-2.6.22-570/drivers/cpufreq/cpufreq_userspace.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/cpufreq/cpufreq_userspace.c 2007-12-21 15:36:11.000000000 -0500 +@@ -120,7 +120,7 @@ + + static struct freq_attr freq_attr_scaling_setspeed = + { +- .attr = { .name = "scaling_setspeed", .mode = 0644, .owner = THIS_MODULE }, ++ .attr = { .name = "scaling_setspeed", .mode = 0644 }, + .show = show_speed, + .store = store_speed, + }; +diff -Nurb linux-2.6.22-570/drivers/cpufreq/freq_table.c linux-2.6.22-591/drivers/cpufreq/freq_table.c +--- linux-2.6.22-570/drivers/cpufreq/freq_table.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/cpufreq/freq_table.c 2007-12-21 15:36:11.000000000 -0500 +@@ -199,7 +199,6 @@ + struct freq_attr cpufreq_freq_attr_scaling_available_freqs = { + .attr = { .name = "scaling_available_frequencies", + .mode = 0444, +- .owner=THIS_MODULE + }, + .show = show_available_freqs, + }; +diff -Nurb linux-2.6.22-570/drivers/cpuidle/Kconfig linux-2.6.22-591/drivers/cpuidle/Kconfig +--- linux-2.6.22-570/drivers/cpuidle/Kconfig 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/cpuidle/Kconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,39 @@ ++menu "CPU idle PM support" ++ ++config CPU_IDLE ++ bool "CPU idle PM support" ++ help ++ CPU idle is a generic framework for supporting software-controlled ++ idle processor power management. It includes modular cross-platform ++ governors that can be swapped during runtime. ++ ++ If you're using a mobile platform that supports CPU idle PM (e.g. ++ an ACPI-capable notebook), you should say Y here. ++ ++if CPU_IDLE ++ ++comment "Governors" ++ ++config CPU_IDLE_GOV_LADDER ++ tristate "'ladder' governor" ++ depends on CPU_IDLE ++ default y ++ help ++ This cpuidle governor promotes and demotes through the supported idle ++ states using residency time and bus master activity as metrics. This ++ algorithm was originally introduced in the old ACPI processor driver. ++ ++config CPU_IDLE_GOV_MENU ++ tristate "'menu' governor" ++ depends on CPU_IDLE && NO_HZ ++ default y ++ help ++ This cpuidle governor evaluates all available states and chooses the ++ deepest state that meets all of the following constraints: BM activity, ++ expected time until next timer interrupt, and last break event time ++ delta. It is designed to minimize power consumption. Currently ++ dynticks is required. ++ ++endif # CPU_IDLE ++ ++endmenu +diff -Nurb linux-2.6.22-570/drivers/cpuidle/Makefile linux-2.6.22-591/drivers/cpuidle/Makefile +--- linux-2.6.22-570/drivers/cpuidle/Makefile 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/cpuidle/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,5 @@ ++# ++# Makefile for cpuidle. ++# ++ ++obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ +diff -Nurb linux-2.6.22-570/drivers/cpuidle/cpuidle.c linux-2.6.22-591/drivers/cpuidle/cpuidle.c +--- linux-2.6.22-570/drivers/cpuidle/cpuidle.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/cpuidle/cpuidle.c 2007-12-21 15:36:14.000000000 -0500 +@@ -0,0 +1,306 @@ ++/* ++ * cpuidle.c - core cpuidle infrastructure ++ * ++ * (C) 2006-2007 Venkatesh Pallipadi ++ * Shaohua Li ++ * Adam Belay ++ * ++ * This code is licenced under the GPL. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "cpuidle.h" ++ ++DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices); ++EXPORT_PER_CPU_SYMBOL_GPL(cpuidle_devices); ++ ++DEFINE_MUTEX(cpuidle_lock); ++LIST_HEAD(cpuidle_detected_devices); ++static void (*pm_idle_old)(void); ++ ++/** ++ * cpuidle_idle_call - the main idle loop ++ * ++ * NOTE: no locks or semaphores should be used here ++ */ ++static void cpuidle_idle_call(void) ++{ ++ struct cpuidle_device *dev = __get_cpu_var(cpuidle_devices); ++ struct cpuidle_state *target_state; ++ int next_state; ++ ++ /* check if the device is ready */ ++ if (!dev || dev->status != CPUIDLE_STATUS_DOIDLE) { ++ if (pm_idle_old) ++ pm_idle_old(); ++ else ++ local_irq_enable(); ++ return; ++ } ++ ++ /* ask the governor for the next state */ ++ next_state = cpuidle_curr_governor->select(dev); ++ if (need_resched()) ++ return; ++ target_state = &dev->states[next_state]; ++ ++ /* enter the state and update stats */ ++ dev->last_residency = target_state->enter(dev, target_state); ++ dev->last_state = target_state; ++ target_state->time += dev->last_residency; ++ target_state->usage++; ++ ++ /* give the governor an opportunity to reflect on the outcome */ ++ if (cpuidle_curr_governor->reflect) ++ cpuidle_curr_governor->reflect(dev); ++} ++ ++/** ++ * cpuidle_install_idle_handler - installs the cpuidle idle loop handler ++ */ ++void cpuidle_install_idle_handler(void) ++{ ++ if (pm_idle != cpuidle_idle_call) { ++ /* Make sure all changes finished before we switch to new idle */ ++ smp_wmb(); ++ pm_idle = cpuidle_idle_call; ++ } ++} ++ ++/** ++ * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler ++ */ ++void cpuidle_uninstall_idle_handler(void) ++{ ++ if (pm_idle != pm_idle_old) { ++ pm_idle = pm_idle_old; ++ cpu_idle_wait(); ++ } ++} ++ ++/** ++ * cpuidle_rescan_device - prepares for a new state configuration ++ * @dev: the target device ++ * ++ * Must be called with cpuidle_lock aquired. ++ */ ++void cpuidle_rescan_device(struct cpuidle_device *dev) ++{ ++ int i; ++ ++ if (cpuidle_curr_governor->scan) ++ cpuidle_curr_governor->scan(dev); ++ ++ for (i = 0; i < dev->state_count; i++) { ++ dev->states[i].usage = 0; ++ dev->states[i].time = 0; ++ } ++} ++ ++/** ++ * cpuidle_add_device - attaches the driver to a CPU instance ++ * @sys_dev: the system device (driver model CPU representation) ++ */ ++static int cpuidle_add_device(struct sys_device *sys_dev) ++{ ++ int cpu = sys_dev->id; ++ struct cpuidle_device *dev; ++ ++ dev = per_cpu(cpuidle_devices, cpu); ++ ++ mutex_lock(&cpuidle_lock); ++ if (cpu_is_offline(cpu)) { ++ mutex_unlock(&cpuidle_lock); ++ return 0; ++ } ++ ++ if (!dev) { ++ dev = kzalloc(sizeof(struct cpuidle_device), GFP_KERNEL); ++ if (!dev) { ++ mutex_unlock(&cpuidle_lock); ++ return -ENOMEM; ++ } ++ init_completion(&dev->kobj_unregister); ++ per_cpu(cpuidle_devices, cpu) = dev; ++ } ++ dev->cpu = cpu; ++ ++ if (dev->status & CPUIDLE_STATUS_DETECTED) { ++ mutex_unlock(&cpuidle_lock); ++ return 0; ++ } ++ ++ cpuidle_add_sysfs(sys_dev); ++ ++ if (cpuidle_curr_driver) { ++ if (cpuidle_attach_driver(dev)) ++ goto err_ret; ++ } ++ ++ if (cpuidle_curr_governor) { ++ if (cpuidle_attach_governor(dev)) { ++ cpuidle_detach_driver(dev); ++ goto err_ret; ++ } ++ } ++ ++ if (cpuidle_device_can_idle(dev)) ++ cpuidle_install_idle_handler(); ++ ++ list_add(&dev->device_list, &cpuidle_detected_devices); ++ dev->status |= CPUIDLE_STATUS_DETECTED; ++ ++err_ret: ++ mutex_unlock(&cpuidle_lock); ++ ++ return 0; ++} ++ ++/** ++ * __cpuidle_remove_device - detaches the driver from a CPU instance ++ * @sys_dev: the system device (driver model CPU representation) ++ * ++ * Must be called with cpuidle_lock aquired. ++ */ ++static int __cpuidle_remove_device(struct sys_device *sys_dev) ++{ ++ struct cpuidle_device *dev; ++ ++ dev = per_cpu(cpuidle_devices, sys_dev->id); ++ ++ if (!(dev->status & CPUIDLE_STATUS_DETECTED)) { ++ return 0; ++ } ++ dev->status &= ~CPUIDLE_STATUS_DETECTED; ++ /* NOTE: we don't wait because the cpu is already offline */ ++ if (cpuidle_curr_governor) ++ cpuidle_detach_governor(dev); ++ if (cpuidle_curr_driver) ++ cpuidle_detach_driver(dev); ++ cpuidle_remove_sysfs(sys_dev); ++ list_del(&dev->device_list); ++ wait_for_completion(&dev->kobj_unregister); ++ per_cpu(cpuidle_devices, sys_dev->id) = NULL; ++ kfree(dev); ++ ++ return 0; ++} ++ ++/** ++ * cpuidle_remove_device - detaches the driver from a CPU instance ++ * @sys_dev: the system device (driver model CPU representation) ++ */ ++static int cpuidle_remove_device(struct sys_device *sys_dev) ++{ ++ int ret; ++ mutex_lock(&cpuidle_lock); ++ ret = __cpuidle_remove_device(sys_dev); ++ mutex_unlock(&cpuidle_lock); ++ ++ return ret; ++} ++ ++static struct sysdev_driver cpuidle_sysdev_driver = { ++ .add = cpuidle_add_device, ++ .remove = cpuidle_remove_device, ++}; ++ ++static int cpuidle_cpu_callback(struct notifier_block *nfb, ++ unsigned long action, void *hcpu) ++{ ++ struct sys_device *sys_dev; ++ ++ sys_dev = get_cpu_sysdev((unsigned long)hcpu); ++ ++ switch (action) { ++ case CPU_ONLINE: ++ cpuidle_add_device(sys_dev); ++ break; ++ case CPU_DOWN_PREPARE: ++ mutex_lock(&cpuidle_lock); ++ break; ++ case CPU_DEAD: ++ __cpuidle_remove_device(sys_dev); ++ mutex_unlock(&cpuidle_lock); ++ break; ++ case CPU_DOWN_FAILED: ++ mutex_unlock(&cpuidle_lock); ++ break; ++ } ++ ++ return NOTIFY_OK; ++} ++ ++static struct notifier_block __cpuinitdata cpuidle_cpu_notifier = ++{ ++ .notifier_call = cpuidle_cpu_callback, ++}; ++ ++#ifdef CONFIG_SMP ++ ++static void smp_callback(void *v) ++{ ++ /* we already woke the CPU up, nothing more to do */ ++} ++ ++/* ++ * This function gets called when a part of the kernel has a new latency ++ * requirement. This means we need to get all processors out of their C-state, ++ * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that ++ * wakes them all right up. ++ */ ++static int cpuidle_latency_notify(struct notifier_block *b, ++ unsigned long l, void *v) ++{ ++ smp_call_function(smp_callback, NULL, 0, 1); ++ return NOTIFY_OK; ++} ++ ++static struct notifier_block cpuidle_latency_notifier = { ++ .notifier_call = cpuidle_latency_notify, ++}; ++ ++#define latency_notifier_init(x) do { register_latency_notifier(x); } while (0) ++ ++#else /* CONFIG_SMP */ ++ ++#define latency_notifier_init(x) do { } while (0) ++ ++#endif /* CONFIG_SMP */ ++ ++/** ++ * cpuidle_init - core initializer ++ */ ++static int __init cpuidle_init(void) ++{ ++ int ret; ++ ++ pm_idle_old = pm_idle; ++ ++ ret = cpuidle_add_class_sysfs(&cpu_sysdev_class); ++ if (ret) ++ return ret; ++ ++ register_hotcpu_notifier(&cpuidle_cpu_notifier); ++ ++ ret = sysdev_driver_register(&cpu_sysdev_class, &cpuidle_sysdev_driver); ++ ++ if (ret) { ++ cpuidle_remove_class_sysfs(&cpu_sysdev_class); ++ printk(KERN_ERR "cpuidle: failed to initialize\n"); ++ return ret; ++ } ++ ++ latency_notifier_init(&cpuidle_latency_notifier); ++ ++ return 0; ++} ++ ++core_initcall(cpuidle_init); +diff -Nurb linux-2.6.22-570/drivers/cpuidle/cpuidle.h linux-2.6.22-591/drivers/cpuidle/cpuidle.h +--- linux-2.6.22-570/drivers/cpuidle/cpuidle.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/cpuidle/cpuidle.h 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,50 @@ ++/* ++ * cpuidle.h - The internal header file ++ */ ++ ++#ifndef __DRIVER_CPUIDLE_H ++#define __DRIVER_CPUIDLE_H ++ ++#include ++ ++/* For internal use only */ ++extern struct cpuidle_governor *cpuidle_curr_governor; ++extern struct cpuidle_driver *cpuidle_curr_driver; ++extern struct list_head cpuidle_drivers; ++extern struct list_head cpuidle_governors; ++extern struct list_head cpuidle_detected_devices; ++extern struct mutex cpuidle_lock; ++ ++/* idle loop */ ++extern void cpuidle_install_idle_handler(void); ++extern void cpuidle_uninstall_idle_handler(void); ++extern void cpuidle_rescan_device(struct cpuidle_device *dev); ++ ++/* drivers */ ++extern int cpuidle_attach_driver(struct cpuidle_device *dev); ++extern void cpuidle_detach_driver(struct cpuidle_device *dev); ++extern int cpuidle_switch_driver(struct cpuidle_driver *drv); ++ ++/* governors */ ++extern int cpuidle_attach_governor(struct cpuidle_device *dev); ++extern void cpuidle_detach_governor(struct cpuidle_device *dev); ++extern int cpuidle_switch_governor(struct cpuidle_governor *gov); ++ ++/* sysfs */ ++extern int cpuidle_add_class_sysfs(struct sysdev_class *cls); ++extern void cpuidle_remove_class_sysfs(struct sysdev_class *cls); ++extern int cpuidle_add_driver_sysfs(struct cpuidle_device *device); ++extern void cpuidle_remove_driver_sysfs(struct cpuidle_device *device); ++extern int cpuidle_add_sysfs(struct sys_device *sysdev); ++extern void cpuidle_remove_sysfs(struct sys_device *sysdev); ++ ++/** ++ * cpuidle_device_can_idle - determines if a CPU can utilize the idle loop ++ * @dev: the target CPU ++ */ ++static inline int cpuidle_device_can_idle(struct cpuidle_device *dev) ++{ ++ return (dev->status == CPUIDLE_STATUS_DOIDLE); ++} ++ ++#endif /* __DRIVER_CPUIDLE_H */ +diff -Nurb linux-2.6.22-570/drivers/cpuidle/driver.c linux-2.6.22-591/drivers/cpuidle/driver.c +--- linux-2.6.22-570/drivers/cpuidle/driver.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/cpuidle/driver.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,276 @@ ++/* ++ * driver.c - driver support ++ * ++ * (C) 2006-2007 Venkatesh Pallipadi ++ * Shaohua Li ++ * Adam Belay ++ * ++ * This code is licenced under the GPL. ++ */ ++ ++#include ++#include ++#include ++ ++#include "cpuidle.h" ++ ++LIST_HEAD(cpuidle_drivers); ++struct cpuidle_driver *cpuidle_curr_driver; ++ ++ ++/** ++ * cpuidle_attach_driver - attaches a driver to a CPU ++ * @dev: the target CPU ++ * ++ * Must be called with cpuidle_lock aquired. ++ */ ++int cpuidle_attach_driver(struct cpuidle_device *dev) ++{ ++ int ret; ++ ++ if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) ++ return -EIO; ++ ++ if (!try_module_get(cpuidle_curr_driver->owner)) ++ return -EINVAL; ++ ++ ret = cpuidle_curr_driver->init(dev); ++ if (ret) { ++ module_put(cpuidle_curr_driver->owner); ++ printk(KERN_INFO "cpuidle: driver %s failed to attach to " ++ "cpu %d\n", cpuidle_curr_driver->name, dev->cpu); ++ } else { ++ if (dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED) ++ cpuidle_rescan_device(dev); ++ smp_wmb(); ++ dev->status |= CPUIDLE_STATUS_DRIVER_ATTACHED; ++ cpuidle_add_driver_sysfs(dev); ++ } ++ ++ return ret; ++} ++ ++/** ++ * cpuidle_detach_govenor - detaches a driver from a CPU ++ * @dev: the target CPU ++ * ++ * Must be called with cpuidle_lock aquired. ++ */ ++void cpuidle_detach_driver(struct cpuidle_device *dev) ++{ ++ if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) { ++ cpuidle_remove_driver_sysfs(dev); ++ dev->status &= ~CPUIDLE_STATUS_DRIVER_ATTACHED; ++ if (cpuidle_curr_driver->exit) ++ cpuidle_curr_driver->exit(dev); ++ module_put(cpuidle_curr_driver->owner); ++ } ++} ++ ++/** ++ * __cpuidle_find_driver - finds a driver of the specified name ++ * @str: the name ++ * ++ * Must be called with cpuidle_lock aquired. ++ */ ++static struct cpuidle_driver * __cpuidle_find_driver(const char *str) ++{ ++ struct cpuidle_driver *drv; ++ ++ list_for_each_entry(drv, &cpuidle_drivers, driver_list) ++ if (!strnicmp(str, drv->name, CPUIDLE_NAME_LEN)) ++ return drv; ++ ++ return NULL; ++} ++ ++/** ++ * cpuidle_switch_driver - changes the driver ++ * @drv: the new target driver ++ * ++ * NOTE: "drv" can be NULL to specify disabled ++ * Must be called with cpuidle_lock aquired. ++ */ ++int cpuidle_switch_driver(struct cpuidle_driver *drv) ++{ ++ struct cpuidle_device *dev; ++ ++ if (drv == cpuidle_curr_driver) ++ return -EINVAL; ++ ++ cpuidle_uninstall_idle_handler(); ++ ++ if (cpuidle_curr_driver) ++ list_for_each_entry(dev, &cpuidle_detected_devices, device_list) ++ cpuidle_detach_driver(dev); ++ ++ cpuidle_curr_driver = drv; ++ ++ if (drv) { ++ int ret = 1; ++ list_for_each_entry(dev, &cpuidle_detected_devices, device_list) ++ if (cpuidle_attach_driver(dev) == 0) ++ ret = 0; ++ ++ /* If attach on all devices fail, switch to NULL driver */ ++ if (ret) ++ cpuidle_curr_driver = NULL; ++ ++ if (cpuidle_curr_driver && cpuidle_curr_governor) { ++ printk(KERN_INFO "cpuidle: using driver %s\n", ++ drv->name); ++ cpuidle_install_idle_handler(); ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * cpuidle_register_driver - registers a driver ++ * @drv: the driver ++ */ ++int cpuidle_register_driver(struct cpuidle_driver *drv) ++{ ++ int ret = -EEXIST; ++ ++ if (!drv || !drv->init) ++ return -EINVAL; ++ ++ mutex_lock(&cpuidle_lock); ++ if (__cpuidle_find_driver(drv->name) == NULL) { ++ ret = 0; ++ list_add_tail(&drv->driver_list, &cpuidle_drivers); ++ if (!cpuidle_curr_driver) ++ cpuidle_switch_driver(drv); ++ } ++ mutex_unlock(&cpuidle_lock); ++ ++ return ret; ++} ++ ++EXPORT_SYMBOL_GPL(cpuidle_register_driver); ++ ++/** ++ * cpuidle_unregister_driver - unregisters a driver ++ * @drv: the driver ++ */ ++void cpuidle_unregister_driver(struct cpuidle_driver *drv) ++{ ++ if (!drv) ++ return; ++ ++ mutex_lock(&cpuidle_lock); ++ if (drv == cpuidle_curr_driver) ++ cpuidle_switch_driver(NULL); ++ list_del(&drv->driver_list); ++ mutex_unlock(&cpuidle_lock); ++} ++ ++EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); ++ ++static void __cpuidle_force_redetect(struct cpuidle_device *dev) ++{ ++ cpuidle_remove_driver_sysfs(dev); ++ cpuidle_curr_driver->redetect(dev); ++ cpuidle_add_driver_sysfs(dev); ++} ++ ++/** ++ * cpuidle_force_redetect - redetects the idle states of a CPU ++ * ++ * @dev: the CPU to redetect ++ * @drv: the target driver ++ * ++ * Generally, the driver will call this when the supported states set has ++ * changed. (e.g. as the result of an ACPI transition to battery power) ++ */ ++int cpuidle_force_redetect(struct cpuidle_device *dev, ++ struct cpuidle_driver *drv) ++{ ++ int uninstalled = 0; ++ ++ mutex_lock(&cpuidle_lock); ++ ++ if (drv != cpuidle_curr_driver) { ++ mutex_unlock(&cpuidle_lock); ++ return 0; ++ } ++ ++ if (!(dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) || ++ !cpuidle_curr_driver->redetect) { ++ mutex_unlock(&cpuidle_lock); ++ return -EIO; ++ } ++ ++ if (cpuidle_device_can_idle(dev)) { ++ uninstalled = 1; ++ cpuidle_uninstall_idle_handler(); ++ } ++ ++ __cpuidle_force_redetect(dev); ++ ++ if (cpuidle_device_can_idle(dev)) { ++ cpuidle_rescan_device(dev); ++ cpuidle_install_idle_handler(); ++ } ++ ++ /* other devices are still ok */ ++ if (uninstalled) ++ cpuidle_install_idle_handler(); ++ ++ mutex_unlock(&cpuidle_lock); ++ ++ return 0; ++} ++ ++EXPORT_SYMBOL_GPL(cpuidle_force_redetect); ++ ++/** ++ * cpuidle_force_redetect_devices - redetects the idle states of all CPUs ++ * ++ * @drv: the target driver ++ * ++ * Generally, the driver will call this when the supported states set has ++ * changed. (e.g. as the result of an ACPI transition to battery power) ++ */ ++int cpuidle_force_redetect_devices(struct cpuidle_driver *drv) ++{ ++ struct cpuidle_device *dev; ++ int ret = 0; ++ ++ mutex_lock(&cpuidle_lock); ++ ++ if (drv != cpuidle_curr_driver) ++ goto out; ++ ++ if (!cpuidle_curr_driver->redetect) { ++ ret = -EIO; ++ goto out; ++ } ++ ++ cpuidle_uninstall_idle_handler(); ++ ++ list_for_each_entry(dev, &cpuidle_detected_devices, device_list) ++ __cpuidle_force_redetect(dev); ++ ++ cpuidle_install_idle_handler(); ++out: ++ mutex_unlock(&cpuidle_lock); ++ return ret; ++} ++ ++EXPORT_SYMBOL_GPL(cpuidle_force_redetect_devices); ++ ++/** ++ * cpuidle_get_bm_activity - determines if BM activity has occured ++ */ ++int cpuidle_get_bm_activity(void) ++{ ++ if (cpuidle_curr_driver->bm_check) ++ return cpuidle_curr_driver->bm_check(); ++ else ++ return 0; ++} ++EXPORT_SYMBOL_GPL(cpuidle_get_bm_activity); ++ +diff -Nurb linux-2.6.22-570/drivers/cpuidle/governor.c linux-2.6.22-591/drivers/cpuidle/governor.c +--- linux-2.6.22-570/drivers/cpuidle/governor.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/cpuidle/governor.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,160 @@ ++/* ++ * governor.c - governor support ++ * ++ * (C) 2006-2007 Venkatesh Pallipadi ++ * Shaohua Li ++ * Adam Belay ++ * ++ * This code is licenced under the GPL. ++ */ ++ ++#include ++#include ++#include ++ ++#include "cpuidle.h" ++ ++LIST_HEAD(cpuidle_governors); ++struct cpuidle_governor *cpuidle_curr_governor; ++ ++ ++/** ++ * cpuidle_attach_governor - attaches a governor to a CPU ++ * @dev: the target CPU ++ * ++ * Must be called with cpuidle_lock aquired. ++ */ ++int cpuidle_attach_governor(struct cpuidle_device *dev) ++{ ++ int ret = 0; ++ ++ if(dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED) ++ return -EIO; ++ ++ if (!try_module_get(cpuidle_curr_governor->owner)) ++ return -EINVAL; ++ ++ if (cpuidle_curr_governor->init) ++ ret = cpuidle_curr_governor->init(dev); ++ if (ret) { ++ module_put(cpuidle_curr_governor->owner); ++ printk(KERN_ERR "cpuidle: governor %s failed to attach to cpu %d\n", ++ cpuidle_curr_governor->name, dev->cpu); ++ } else { ++ if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) ++ cpuidle_rescan_device(dev); ++ smp_wmb(); ++ dev->status |= CPUIDLE_STATUS_GOVERNOR_ATTACHED; ++ } ++ ++ return ret; ++} ++ ++/** ++ * cpuidle_detach_govenor - detaches a governor from a CPU ++ * @dev: the target CPU ++ * ++ * Must be called with cpuidle_lock aquired. ++ */ ++void cpuidle_detach_governor(struct cpuidle_device *dev) ++{ ++ if (dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED) { ++ dev->status &= ~CPUIDLE_STATUS_GOVERNOR_ATTACHED; ++ if (cpuidle_curr_governor->exit) ++ cpuidle_curr_governor->exit(dev); ++ module_put(cpuidle_curr_governor->owner); ++ } ++} ++ ++/** ++ * __cpuidle_find_governor - finds a governor of the specified name ++ * @str: the name ++ * ++ * Must be called with cpuidle_lock aquired. ++ */ ++static struct cpuidle_governor * __cpuidle_find_governor(const char *str) ++{ ++ struct cpuidle_governor *gov; ++ ++ list_for_each_entry(gov, &cpuidle_governors, governor_list) ++ if (!strnicmp(str, gov->name, CPUIDLE_NAME_LEN)) ++ return gov; ++ ++ return NULL; ++} ++ ++/** ++ * cpuidle_switch_governor - changes the governor ++ * @gov: the new target governor ++ * ++ * NOTE: "gov" can be NULL to specify disabled ++ * Must be called with cpuidle_lock aquired. ++ */ ++int cpuidle_switch_governor(struct cpuidle_governor *gov) ++{ ++ struct cpuidle_device *dev; ++ ++ if (gov == cpuidle_curr_governor) ++ return -EINVAL; ++ ++ cpuidle_uninstall_idle_handler(); ++ ++ if (cpuidle_curr_governor) ++ list_for_each_entry(dev, &cpuidle_detected_devices, device_list) ++ cpuidle_detach_governor(dev); ++ ++ cpuidle_curr_governor = gov; ++ ++ if (gov) { ++ list_for_each_entry(dev, &cpuidle_detected_devices, device_list) ++ cpuidle_attach_governor(dev); ++ if (cpuidle_curr_driver) ++ cpuidle_install_idle_handler(); ++ printk(KERN_INFO "cpuidle: using governor %s\n", gov->name); ++ } ++ ++ return 0; ++} ++ ++/** ++ * cpuidle_register_governor - registers a governor ++ * @gov: the governor ++ */ ++int cpuidle_register_governor(struct cpuidle_governor *gov) ++{ ++ int ret = -EEXIST; ++ ++ if (!gov || !gov->select) ++ return -EINVAL; ++ ++ mutex_lock(&cpuidle_lock); ++ if (__cpuidle_find_governor(gov->name) == NULL) { ++ ret = 0; ++ list_add_tail(&gov->governor_list, &cpuidle_governors); ++ if (!cpuidle_curr_governor) ++ cpuidle_switch_governor(gov); ++ } ++ mutex_unlock(&cpuidle_lock); ++ ++ return ret; ++} ++ ++EXPORT_SYMBOL_GPL(cpuidle_register_governor); ++ ++/** ++ * cpuidle_unregister_governor - unregisters a governor ++ * @gov: the governor ++ */ ++void cpuidle_unregister_governor(struct cpuidle_governor *gov) ++{ ++ if (!gov) ++ return; ++ ++ mutex_lock(&cpuidle_lock); ++ if (gov == cpuidle_curr_governor) ++ cpuidle_switch_governor(NULL); ++ list_del(&gov->governor_list); ++ mutex_unlock(&cpuidle_lock); ++} ++ ++EXPORT_SYMBOL_GPL(cpuidle_unregister_governor); +diff -Nurb linux-2.6.22-570/drivers/cpuidle/governors/Makefile linux-2.6.22-591/drivers/cpuidle/governors/Makefile +--- linux-2.6.22-570/drivers/cpuidle/governors/Makefile 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/cpuidle/governors/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,6 @@ ++# ++# Makefile for cpuidle governors. ++# ++ ++obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o ++obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o +diff -Nurb linux-2.6.22-570/drivers/cpuidle/governors/ladder.c linux-2.6.22-591/drivers/cpuidle/governors/ladder.c +--- linux-2.6.22-570/drivers/cpuidle/governors/ladder.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/cpuidle/governors/ladder.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,227 @@ ++/* ++ * ladder.c - the residency ladder algorithm ++ * ++ * Copyright (C) 2001, 2002 Andy Grover ++ * Copyright (C) 2001, 2002 Paul Diefenbaugh ++ * Copyright (C) 2004, 2005 Dominik Brodowski ++ * ++ * (C) 2006-2007 Venkatesh Pallipadi ++ * Shaohua Li ++ * Adam Belay ++ * ++ * This code is licenced under the GPL. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#define PROMOTION_COUNT 4 ++#define DEMOTION_COUNT 1 ++ ++/* ++ * bm_history -- bit-mask with a bit per jiffy of bus-master activity ++ * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms ++ * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms ++ * 100 HZ: 0x0000000F: 4 jiffies = 40ms ++ * reduce history for more aggressive entry into C3 ++ */ ++static unsigned int bm_history __read_mostly = ++ (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); ++module_param(bm_history, uint, 0644); ++ ++struct ladder_device_state { ++ struct { ++ u32 promotion_count; ++ u32 demotion_count; ++ u32 promotion_time; ++ u32 demotion_time; ++ u32 bm; ++ } threshold; ++ struct { ++ int promotion_count; ++ int demotion_count; ++ } stats; ++}; ++ ++struct ladder_device { ++ struct ladder_device_state states[CPUIDLE_STATE_MAX]; ++ unsigned int bm_check:1; ++ unsigned long bm_check_timestamp; ++ unsigned long bm_activity; /* FIXME: bm activity should be global */ ++ int last_state_idx; ++}; ++ ++/** ++ * ladder_do_selection - prepares private data for a state change ++ * @ldev: the ladder device ++ * @old_idx: the current state index ++ * @new_idx: the new target state index ++ */ ++static inline void ladder_do_selection(struct ladder_device *ldev, ++ int old_idx, int new_idx) ++{ ++ ldev->states[old_idx].stats.promotion_count = 0; ++ ldev->states[old_idx].stats.demotion_count = 0; ++ ldev->last_state_idx = new_idx; ++} ++ ++/** ++ * ladder_select_state - selects the next state to enter ++ * @dev: the CPU ++ */ ++static int ladder_select_state(struct cpuidle_device *dev) ++{ ++ struct ladder_device *ldev = dev->governor_data; ++ struct ladder_device_state *last_state; ++ int last_residency, last_idx = ldev->last_state_idx; ++ ++ if (unlikely(!ldev)) ++ return 0; ++ ++ last_state = &ldev->states[last_idx]; ++ ++ /* demote if within BM threshold */ ++ if (ldev->bm_check) { ++ unsigned long diff; ++ ++ diff = jiffies - ldev->bm_check_timestamp; ++ if (diff > 31) ++ diff = 31; ++ ++ ldev->bm_activity <<= diff; ++ if (cpuidle_get_bm_activity()) ++ ldev->bm_activity |= ((1 << diff) - 1); ++ ++ ldev->bm_check_timestamp = jiffies; ++ if ((last_idx > 0) && ++ (last_state->threshold.bm & ldev->bm_activity)) { ++ ladder_do_selection(ldev, last_idx, last_idx - 1); ++ return last_idx - 1; ++ } ++ } ++ ++ if (dev->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID) ++ last_residency = cpuidle_get_last_residency(dev) - dev->states[last_idx].exit_latency; ++ else ++ last_residency = last_state->threshold.promotion_time + 1; ++ ++ /* consider promotion */ ++ if (last_idx < dev->state_count - 1 && ++ last_residency > last_state->threshold.promotion_time && ++ dev->states[last_idx + 1].exit_latency <= system_latency_constraint()) { ++ last_state->stats.promotion_count++; ++ last_state->stats.demotion_count = 0; ++ if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { ++ ladder_do_selection(ldev, last_idx, last_idx + 1); ++ return last_idx + 1; ++ } ++ } ++ ++ /* consider demotion */ ++ if (last_idx > 0 && ++ last_residency < last_state->threshold.demotion_time) { ++ last_state->stats.demotion_count++; ++ last_state->stats.promotion_count = 0; ++ if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) { ++ ladder_do_selection(ldev, last_idx, last_idx - 1); ++ return last_idx - 1; ++ } ++ } ++ ++ /* otherwise remain at the current state */ ++ return last_idx; ++} ++ ++/** ++ * ladder_scan_device - scans a CPU's states and does setup ++ * @dev: the CPU ++ */ ++static void ladder_scan_device(struct cpuidle_device *dev) ++{ ++ int i, bm_check = 0; ++ struct ladder_device *ldev = dev->governor_data; ++ struct ladder_device_state *lstate; ++ struct cpuidle_state *state; ++ ++ ldev->last_state_idx = 0; ++ ldev->bm_check_timestamp = 0; ++ ldev->bm_activity = 0; ++ ++ for (i = 0; i < dev->state_count; i++) { ++ state = &dev->states[i]; ++ lstate = &ldev->states[i]; ++ ++ lstate->stats.promotion_count = 0; ++ lstate->stats.demotion_count = 0; ++ ++ lstate->threshold.promotion_count = PROMOTION_COUNT; ++ lstate->threshold.demotion_count = DEMOTION_COUNT; ++ ++ if (i < dev->state_count - 1) ++ lstate->threshold.promotion_time = state->exit_latency; ++ if (i > 0) ++ lstate->threshold.demotion_time = state->exit_latency; ++ if (state->flags & CPUIDLE_FLAG_CHECK_BM) { ++ lstate->threshold.bm = bm_history; ++ bm_check = 1; ++ } else ++ lstate->threshold.bm = 0; ++ } ++ ++ ldev->bm_check = bm_check; ++} ++ ++/** ++ * ladder_init_device - initializes a CPU-instance ++ * @dev: the CPU ++ */ ++static int ladder_init_device(struct cpuidle_device *dev) ++{ ++ dev->governor_data = kmalloc(sizeof(struct ladder_device), GFP_KERNEL); ++ ++ return !dev->governor_data; ++} ++ ++/** ++ * ladder_exit_device - exits a CPU-instance ++ * @dev: the CPU ++ */ ++static void ladder_exit_device(struct cpuidle_device *dev) ++{ ++ kfree(dev->governor_data); ++} ++ ++static struct cpuidle_governor ladder_governor = { ++ .name = "ladder", ++ .init = ladder_init_device, ++ .exit = ladder_exit_device, ++ .scan = ladder_scan_device, ++ .select = ladder_select_state, ++ .owner = THIS_MODULE, ++}; ++ ++/** ++ * init_ladder - initializes the governor ++ */ ++static int __init init_ladder(void) ++{ ++ return cpuidle_register_governor(&ladder_governor); ++} ++ ++/** ++ * exit_ladder - exits the governor ++ */ ++static void __exit exit_ladder(void) ++{ ++ cpuidle_unregister_governor(&ladder_governor); ++} ++ ++MODULE_LICENSE("GPL"); ++module_init(init_ladder); ++module_exit(exit_ladder); +diff -Nurb linux-2.6.22-570/drivers/cpuidle/governors/menu.c linux-2.6.22-591/drivers/cpuidle/governors/menu.c +--- linux-2.6.22-570/drivers/cpuidle/governors/menu.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/cpuidle/governors/menu.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,152 @@ ++/* ++ * menu.c - the menu idle governor ++ * ++ * Copyright (C) 2006-2007 Adam Belay ++ * ++ * This code is licenced under the GPL. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define BM_HOLDOFF 20000 /* 20 ms */ ++ ++struct menu_device { ++ int last_state_idx; ++ int deepest_bm_state; ++ ++ int break_last_us; ++ int break_elapsed_us; ++ ++ int bm_elapsed_us; ++ int bm_holdoff_us; ++ ++ unsigned long idle_jiffies; ++}; ++ ++static DEFINE_PER_CPU(struct menu_device, menu_devices); ++ ++/** ++ * menu_select - selects the next idle state to enter ++ * @dev: the CPU ++ */ ++static int menu_select(struct cpuidle_device *dev) ++{ ++ struct menu_device *data = &__get_cpu_var(menu_devices); ++ int i, expected_us, max_state = dev->state_count; ++ ++ /* discard BM history because it is sticky */ ++ cpuidle_get_bm_activity(); ++ ++ /* determine the expected residency time */ ++ expected_us = (s32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000; ++ expected_us = min(expected_us, data->break_last_us); ++ ++ /* determine the maximum state compatible with current BM status */ ++ if (cpuidle_get_bm_activity()) ++ data->bm_elapsed_us = 0; ++ if (data->bm_elapsed_us <= data->bm_holdoff_us) ++ max_state = data->deepest_bm_state + 1; ++ ++ /* find the deepest idle state that satisfies our constraints */ ++ for (i = 1; i < max_state; i++) { ++ struct cpuidle_state *s = &dev->states[i]; ++ if (s->target_residency > expected_us) ++ break; ++ if (s->exit_latency > system_latency_constraint()) ++ break; ++ } ++ ++ data->last_state_idx = i - 1; ++ data->idle_jiffies = tick_nohz_get_idle_jiffies(); ++ return i - 1; ++} ++ ++/** ++ * menu_reflect - attempts to guess what happened after entry ++ * @dev: the CPU ++ * ++ * NOTE: it's important to be fast here because this operation will add to ++ * the overall exit latency. ++ */ ++static void menu_reflect(struct cpuidle_device *dev) ++{ ++ struct menu_device *data = &__get_cpu_var(menu_devices); ++ int last_idx = data->last_state_idx; ++ int measured_us = cpuidle_get_last_residency(dev); ++ struct cpuidle_state *target = &dev->states[last_idx]; ++ ++ /* ++ * Ugh, this idle state doesn't support residency measurements, so we ++ * are basically lost in the dark. As a compromise, assume we slept ++ * for one full standard timer tick. However, be aware that this ++ * could potentially result in a suboptimal state transition. ++ */ ++ if (!(target->flags & CPUIDLE_FLAG_TIME_VALID)) ++ measured_us = USEC_PER_SEC / HZ; ++ ++ data->bm_elapsed_us += measured_us; ++ data->break_elapsed_us += measured_us; ++ ++ /* ++ * Did something other than the timer interrupt cause the break event? ++ */ ++ if (tick_nohz_get_idle_jiffies() == data->idle_jiffies) { ++ data->break_last_us = data->break_elapsed_us; ++ data->break_elapsed_us = 0; ++ } ++} ++ ++/** ++ * menu_scan_device - scans a CPU's states and does setup ++ * @dev: the CPU ++ */ ++static void menu_scan_device(struct cpuidle_device *dev) ++{ ++ struct menu_device *data = &per_cpu(menu_devices, dev->cpu); ++ int i; ++ ++ data->last_state_idx = 0; ++ data->break_last_us = 0; ++ data->break_elapsed_us = 0; ++ data->bm_elapsed_us = 0; ++ data->bm_holdoff_us = BM_HOLDOFF; ++ ++ for (i = 1; i < dev->state_count; i++) ++ if (dev->states[i].flags & CPUIDLE_FLAG_CHECK_BM) ++ break; ++ data->deepest_bm_state = i - 1; ++} ++ ++struct cpuidle_governor menu_governor = { ++ .name = "menu", ++ .scan = menu_scan_device, ++ .select = menu_select, ++ .reflect = menu_reflect, ++ .owner = THIS_MODULE, ++}; ++ ++/** ++ * init_menu - initializes the governor ++ */ ++static int __init init_menu(void) ++{ ++ return cpuidle_register_governor(&menu_governor); ++} ++ ++/** ++ * exit_menu - exits the governor ++ */ ++static void __exit exit_menu(void) ++{ ++ cpuidle_unregister_governor(&menu_governor); ++} ++ ++MODULE_LICENSE("GPL"); ++module_init(init_menu); ++module_exit(exit_menu); +diff -Nurb linux-2.6.22-570/drivers/cpuidle/sysfs.c linux-2.6.22-591/drivers/cpuidle/sysfs.c +--- linux-2.6.22-570/drivers/cpuidle/sysfs.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/cpuidle/sysfs.c 2007-12-21 15:36:14.000000000 -0500 +@@ -0,0 +1,393 @@ ++/* ++ * sysfs.c - sysfs support ++ * ++ * (C) 2006-2007 Shaohua Li ++ * ++ * This code is licenced under the GPL. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include "cpuidle.h" ++ ++static unsigned int sysfs_switch; ++static int __init cpuidle_sysfs_setup(char *unused) ++{ ++ sysfs_switch = 1; ++ return 1; ++} ++__setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup); ++ ++static ssize_t show_available_drivers(struct sys_device *dev, char *buf) ++{ ++ ssize_t i = 0; ++ struct cpuidle_driver *tmp; ++ ++ mutex_lock(&cpuidle_lock); ++ list_for_each_entry(tmp, &cpuidle_drivers, driver_list) { ++ if (i >= (ssize_t)((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2)) ++ goto out; ++ i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name); ++ } ++out: ++ i+= sprintf(&buf[i], "\n"); ++ mutex_unlock(&cpuidle_lock); ++ return i; ++} ++ ++static ssize_t show_available_governors(struct sys_device *dev, char *buf) ++{ ++ ssize_t i = 0; ++ struct cpuidle_governor *tmp; ++ ++ mutex_lock(&cpuidle_lock); ++ list_for_each_entry(tmp, &cpuidle_governors, governor_list) { ++ if (i >= (ssize_t)((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2)) ++ goto out; ++ i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name); ++ } ++ if (list_empty(&cpuidle_governors)) ++ i+= sprintf(&buf[i], "no governors"); ++out: ++ i+= sprintf(&buf[i], "\n"); ++ mutex_unlock(&cpuidle_lock); ++ return i; ++} ++ ++static ssize_t show_current_driver(struct sys_device *dev, char *buf) ++{ ++ ssize_t ret; ++ ++ mutex_lock(&cpuidle_lock); ++ ret = sprintf(buf, "%s\n", cpuidle_curr_driver->name); ++ mutex_unlock(&cpuidle_lock); ++ return ret; ++} ++ ++static ssize_t store_current_driver(struct sys_device *dev, ++ const char *buf, size_t count) ++{ ++ char str[CPUIDLE_NAME_LEN]; ++ int len = count; ++ struct cpuidle_driver *tmp, *found = NULL; ++ ++ if (len > CPUIDLE_NAME_LEN) ++ len = CPUIDLE_NAME_LEN; ++ ++ if (sscanf(buf, "%s", str) != 1) ++ return -EINVAL; ++ ++ mutex_lock(&cpuidle_lock); ++ list_for_each_entry(tmp, &cpuidle_drivers, driver_list) { ++ if (strncmp(tmp->name, str, CPUIDLE_NAME_LEN) == 0) { ++ found = tmp; ++ break; ++ } ++ } ++ if (found) ++ cpuidle_switch_driver(found); ++ mutex_unlock(&cpuidle_lock); ++ ++ return count; ++} ++ ++static ssize_t show_current_governor(struct sys_device *dev, char *buf) ++{ ++ ssize_t i; ++ ++ mutex_lock(&cpuidle_lock); ++ if (cpuidle_curr_governor) ++ i = sprintf(buf, "%s\n", cpuidle_curr_governor->name); ++ else ++ i = sprintf(buf, "no governor\n"); ++ mutex_unlock(&cpuidle_lock); ++ ++ return i; ++} ++ ++static ssize_t store_current_governor(struct sys_device *dev, ++ const char *buf, size_t count) ++{ ++ char str[CPUIDLE_NAME_LEN]; ++ int len = count; ++ struct cpuidle_governor *tmp, *found = NULL; ++ ++ if (len > CPUIDLE_NAME_LEN) ++ len = CPUIDLE_NAME_LEN; ++ ++ if (sscanf(buf, "%s", str) != 1) ++ return -EINVAL; ++ ++ mutex_lock(&cpuidle_lock); ++ list_for_each_entry(tmp, &cpuidle_governors, governor_list) { ++ if (strncmp(tmp->name, str, CPUIDLE_NAME_LEN) == 0) { ++ found = tmp; ++ break; ++ } ++ } ++ if (found) ++ cpuidle_switch_governor(found); ++ mutex_unlock(&cpuidle_lock); ++ ++ return count; ++} ++ ++static SYSDEV_ATTR(current_driver_ro, 0444, show_current_driver, NULL); ++static SYSDEV_ATTR(current_governor_ro, 0444, show_current_governor, NULL); ++ ++static struct attribute *cpuclass_default_attrs[] = { ++ &attr_current_driver_ro.attr, ++ &attr_current_governor_ro.attr, ++ NULL ++}; ++ ++static SYSDEV_ATTR(available_drivers, 0444, show_available_drivers, NULL); ++static SYSDEV_ATTR(available_governors, 0444, show_available_governors, NULL); ++static SYSDEV_ATTR(current_driver, 0644, show_current_driver, ++ store_current_driver); ++static SYSDEV_ATTR(current_governor, 0644, show_current_governor, ++ store_current_governor); ++ ++static struct attribute *cpuclass_switch_attrs[] = { ++ &attr_available_drivers.attr, ++ &attr_available_governors.attr, ++ &attr_current_driver.attr, ++ &attr_current_governor.attr, ++ NULL ++}; ++ ++static struct attribute_group cpuclass_attr_group = { ++ .attrs = cpuclass_default_attrs, ++ .name = "cpuidle", ++}; ++ ++/** ++ * cpuidle_add_class_sysfs - add CPU global sysfs attributes ++ */ ++int cpuidle_add_class_sysfs(struct sysdev_class *cls) ++{ ++ if (sysfs_switch) ++ cpuclass_attr_group.attrs = cpuclass_switch_attrs; ++ ++ return sysfs_create_group(&cls->kset.kobj, &cpuclass_attr_group); ++} ++ ++/** ++ * cpuidle_remove_class_sysfs - remove CPU global sysfs attributes ++ */ ++void cpuidle_remove_class_sysfs(struct sysdev_class *cls) ++{ ++ sysfs_remove_group(&cls->kset.kobj, &cpuclass_attr_group); ++} ++ ++struct cpuidle_attr { ++ struct attribute attr; ++ ssize_t (*show)(struct cpuidle_device *, char *); ++ ssize_t (*store)(struct cpuidle_device *, const char *, size_t count); ++}; ++ ++#define define_one_ro(_name, show) \ ++ static struct cpuidle_attr attr_##_name = __ATTR(_name, 0444, show, NULL) ++#define define_one_rw(_name, show, store) \ ++ static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store) ++ ++#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj) ++#define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr) ++static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf) ++{ ++ int ret = -EIO; ++ struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); ++ struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr); ++ ++ if (cattr->show) { ++ mutex_lock(&cpuidle_lock); ++ ret = cattr->show(dev, buf); ++ mutex_unlock(&cpuidle_lock); ++ } ++ return ret; ++} ++ ++static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr, ++ const char * buf, size_t count) ++{ ++ int ret = -EIO; ++ struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); ++ struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr); ++ ++ if (cattr->store) { ++ mutex_lock(&cpuidle_lock); ++ ret = cattr->store(dev, buf, count); ++ mutex_unlock(&cpuidle_lock); ++ } ++ return ret; ++} ++ ++static struct sysfs_ops cpuidle_sysfs_ops = { ++ .show = cpuidle_show, ++ .store = cpuidle_store, ++}; ++ ++static void cpuidle_sysfs_release(struct kobject *kobj) ++{ ++ struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); ++ ++ complete(&dev->kobj_unregister); ++} ++ ++static struct kobj_type ktype_cpuidle = { ++ .sysfs_ops = &cpuidle_sysfs_ops, ++ .release = cpuidle_sysfs_release, ++}; ++ ++struct cpuidle_state_attr { ++ struct attribute attr; ++ ssize_t (*show)(struct cpuidle_state *, char *); ++ ssize_t (*store)(struct cpuidle_state *, const char *, size_t); ++}; ++ ++#define define_one_state_ro(_name, show) \ ++static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0444, show, NULL) ++ ++#define define_show_state_function(_name) \ ++static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \ ++{ \ ++ return sprintf(buf, "%d\n", state->_name);\ ++} ++ ++define_show_state_function(exit_latency) ++define_show_state_function(power_usage) ++define_show_state_function(usage) ++define_show_state_function(time) ++define_one_state_ro(latency, show_state_exit_latency); ++define_one_state_ro(power, show_state_power_usage); ++define_one_state_ro(usage, show_state_usage); ++define_one_state_ro(time, show_state_time); ++ ++static struct attribute *cpuidle_state_default_attrs[] = { ++ &attr_latency.attr, ++ &attr_power.attr, ++ &attr_usage.attr, ++ &attr_time.attr, ++ NULL ++}; ++ ++#define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj) ++#define kobj_to_state(k) (kobj_to_state_obj(k)->state) ++#define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr) ++static ssize_t cpuidle_state_show(struct kobject * kobj, ++ struct attribute * attr ,char * buf) ++{ ++ int ret = -EIO; ++ struct cpuidle_state *state = kobj_to_state(kobj); ++ struct cpuidle_state_attr * cattr = attr_to_stateattr(attr); ++ ++ if (cattr->show) ++ ret = cattr->show(state, buf); ++ ++ return ret; ++} ++ ++static struct sysfs_ops cpuidle_state_sysfs_ops = { ++ .show = cpuidle_state_show, ++}; ++ ++static void cpuidle_state_sysfs_release(struct kobject *kobj) ++{ ++ struct cpuidle_state_kobj *state_obj = kobj_to_state_obj(kobj); ++ ++ complete(&state_obj->kobj_unregister); ++} ++ ++static struct kobj_type ktype_state_cpuidle = { ++ .sysfs_ops = &cpuidle_state_sysfs_ops, ++ .default_attrs = cpuidle_state_default_attrs, ++ .release = cpuidle_state_sysfs_release, ++}; ++ ++static void inline cpuidle_free_state_kobj(struct cpuidle_device *device, int i) ++{ ++ kobject_unregister(&device->kobjs[i]->kobj); ++ wait_for_completion(&device->kobjs[i]->kobj_unregister); ++ kfree(device->kobjs[i]); ++ device->kobjs[i] = NULL; ++} ++ ++/** ++ * cpuidle_add_driver_sysfs - adds driver-specific sysfs attributes ++ * @device: the target device ++ */ ++int cpuidle_add_driver_sysfs(struct cpuidle_device *device) ++{ ++ int i, ret = -ENOMEM; ++ struct cpuidle_state_kobj *kobj; ++ ++ /* state statistics */ ++ for (i = 0; i < device->state_count; i++) { ++ kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL); ++ if (!kobj) ++ goto error_state; ++ kobj->state = &device->states[i]; ++ init_completion(&kobj->kobj_unregister); ++ ++ kobj->kobj.parent = &device->kobj; ++ kobj->kobj.ktype = &ktype_state_cpuidle; ++ kobject_set_name(&kobj->kobj, "state%d", i); ++ ret = kobject_register(&kobj->kobj); ++ if (ret) { ++ kfree(kobj); ++ goto error_state; ++ } ++ device->kobjs[i] = kobj; ++ } ++ ++ return 0; ++ ++error_state: ++ for (i = i - 1; i >= 0; i--) ++ cpuidle_free_state_kobj(device, i); ++ return ret; ++} ++ ++/** ++ * cpuidle_remove_driver_sysfs - removes driver-specific sysfs attributes ++ * @device: the target device ++ */ ++void cpuidle_remove_driver_sysfs(struct cpuidle_device *device) ++{ ++ int i; ++ ++ for (i = 0; i < device->state_count; i++) ++ cpuidle_free_state_kobj(device, i); ++} ++ ++/** ++ * cpuidle_add_sysfs - creates a sysfs instance for the target device ++ * @sysdev: the target device ++ */ ++int cpuidle_add_sysfs(struct sys_device *sysdev) ++{ ++ int cpu = sysdev->id; ++ struct cpuidle_device *dev; ++ ++ dev = per_cpu(cpuidle_devices, cpu); ++ dev->kobj.parent = &sysdev->kobj; ++ dev->kobj.ktype = &ktype_cpuidle; ++ kobject_set_name(&dev->kobj, "%s", "cpuidle"); ++ return kobject_register(&dev->kobj); ++} ++ ++/** ++ * cpuidle_remove_sysfs - deletes a sysfs instance on the target device ++ * @sysdev: the target device ++ */ ++void cpuidle_remove_sysfs(struct sys_device *sysdev) ++{ ++ int cpu = sysdev->id; ++ struct cpuidle_device *dev; ++ ++ dev = per_cpu(cpuidle_devices, cpu); ++ kobject_unregister(&dev->kobj); ++} +diff -Nurb linux-2.6.22-570/drivers/dma/Kconfig linux-2.6.22-591/drivers/dma/Kconfig +--- linux-2.6.22-570/drivers/dma/Kconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/dma/Kconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -8,8 +8,8 @@ + config DMA_ENGINE + bool "Support for DMA engines" + ---help--- +- DMA engines offload copy operations from the CPU to dedicated +- hardware, allowing the copies to happen asynchronously. ++ DMA engines offload bulk memory operations from the CPU to dedicated ++ hardware, allowing the operations to happen asynchronously. + + comment "DMA Clients" + +@@ -32,4 +32,11 @@ + ---help--- + Enable support for the Intel(R) I/OAT DMA engine. + ++config INTEL_IOP_ADMA ++ tristate "Intel IOP ADMA support" ++ depends on DMA_ENGINE && (ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX) ++ default m ++ ---help--- ++ Enable support for the Intel(R) IOP Series RAID engines. ++ + endmenu +diff -Nurb linux-2.6.22-570/drivers/dma/Makefile linux-2.6.22-591/drivers/dma/Makefile +--- linux-2.6.22-570/drivers/dma/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/dma/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -1,3 +1,4 @@ + obj-$(CONFIG_DMA_ENGINE) += dmaengine.o + obj-$(CONFIG_NET_DMA) += iovlock.o + obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o ++obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o +diff -Nurb linux-2.6.22-570/drivers/dma/dmaengine.c linux-2.6.22-591/drivers/dma/dmaengine.c +--- linux-2.6.22-570/drivers/dma/dmaengine.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/dma/dmaengine.c 2007-12-21 15:36:11.000000000 -0500 +@@ -37,11 +37,11 @@ + * Each device has a channels list, which runs unlocked but is never modified + * once the device is registered, it's just setup by the driver. + * +- * Each client has a channels list, it's only modified under the client->lock +- * and in an RCU callback, so it's safe to read under rcu_read_lock(). ++ * Each client is responsible for keeping track of the channels it uses. See ++ * the definition of dma_event_callback in dmaengine.h. + * + * Each device has a kref, which is initialized to 1 when the device is +- * registered. A kref_put is done for each class_device registered. When the ++ * registered. A kref_get is done for each class_device registered. When the + * class_device is released, the coresponding kref_put is done in the release + * method. Every time one of the device's channels is allocated to a client, + * a kref_get occurs. When the channel is freed, the coresponding kref_put +@@ -51,14 +51,17 @@ + * references to finish. + * + * Each channel has an open-coded implementation of Rusty Russell's "bigref," +- * with a kref and a per_cpu local_t. A single reference is set when on an +- * ADDED event, and removed with a REMOVE event. Net DMA client takes an +- * extra reference per outstanding transaction. The relase function does a +- * kref_put on the device. -ChrisL ++ * with a kref and a per_cpu local_t. A dma_chan_get is called when a client ++ * signals that it wants to use a channel, and dma_chan_put is called when ++ * a channel is removed or a client using it is unregesitered. A client can ++ * take extra references per outstanding transaction, as is the case with ++ * the NET DMA client. The release function does a kref_put on the device. ++ * -ChrisL, DanW + */ + + #include + #include ++#include + #include + #include + #include +@@ -66,6 +69,7 @@ + #include + #include + #include ++#include + + static DEFINE_MUTEX(dma_list_mutex); + static LIST_HEAD(dma_device_list); +@@ -100,8 +104,19 @@ + static ssize_t show_in_use(struct class_device *cd, char *buf) + { + struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev); ++ int in_use = 0; + +- return sprintf(buf, "%d\n", (chan->client ? 1 : 0)); ++ if (unlikely(chan->slow_ref) && ++ atomic_read(&chan->refcount.refcount) > 1) ++ in_use = 1; ++ else { ++ if (local_read(&(per_cpu_ptr(chan->local, ++ get_cpu())->refcount)) > 0) ++ in_use = 1; ++ put_cpu(); ++ } ++ ++ return sprintf(buf, "%d\n", in_use); + } + + static struct class_device_attribute dma_class_attrs[] = { +@@ -127,43 +142,72 @@ + + /* --- client and device registration --- */ + ++#define dma_chan_satisfies_mask(chan, mask) \ ++ __dma_chan_satisfies_mask((chan), &(mask)) ++static int ++__dma_chan_satisfies_mask(struct dma_chan *chan, dma_cap_mask_t *want) ++{ ++ dma_cap_mask_t has; ++ ++ bitmap_and(has.bits, want->bits, chan->device->cap_mask.bits, ++ DMA_TX_TYPE_END); ++ return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END); ++} ++ + /** +- * dma_client_chan_alloc - try to allocate a channel to a client ++ * dma_client_chan_alloc - try to allocate channels to a client + * @client: &dma_client + * + * Called with dma_list_mutex held. + */ +-static struct dma_chan *dma_client_chan_alloc(struct dma_client *client) ++static void dma_client_chan_alloc(struct dma_client *client) + { + struct dma_device *device; + struct dma_chan *chan; +- unsigned long flags; + int desc; /* allocated descriptor count */ ++ enum dma_state_client ack; + +- /* Find a channel, any DMA engine will do */ +- list_for_each_entry(device, &dma_device_list, global_node) { ++ /* Find a channel */ ++ list_for_each_entry(device, &dma_device_list, global_node) + list_for_each_entry(chan, &device->channels, device_node) { +- if (chan->client) ++ if (!dma_chan_satisfies_mask(chan, client->cap_mask)) + continue; + + desc = chan->device->device_alloc_chan_resources(chan); + if (desc >= 0) { ++ ack = client->event_callback(client, ++ chan, ++ DMA_RESOURCE_AVAILABLE); ++ ++ /* we are done once this client rejects ++ * an available resource ++ */ ++ if (ack == DMA_ACK) { ++ dma_chan_get(chan); + kref_get(&device->refcount); +- kref_init(&chan->refcount); +- chan->slow_ref = 0; +- INIT_RCU_HEAD(&chan->rcu); +- chan->client = client; +- spin_lock_irqsave(&client->lock, flags); +- list_add_tail_rcu(&chan->client_node, +- &client->channels); +- spin_unlock_irqrestore(&client->lock, flags); +- return chan; ++ } else if (ack == DMA_NAK) ++ return; + } + } ++} ++ ++enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) ++{ ++ enum dma_status status; ++ unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); ++ ++ dma_async_issue_pending(chan); ++ do { ++ status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); ++ if (time_after_eq(jiffies, dma_sync_wait_timeout)) { ++ printk(KERN_ERR "dma_sync_wait_timeout!\n"); ++ return DMA_ERROR; + } ++ } while (status == DMA_IN_PROGRESS); + +- return NULL; ++ return status; + } ++EXPORT_SYMBOL(dma_sync_wait); + + /** + * dma_chan_cleanup - release a DMA channel's resources +@@ -173,7 +217,6 @@ + { + struct dma_chan *chan = container_of(kref, struct dma_chan, refcount); + chan->device->device_free_chan_resources(chan); +- chan->client = NULL; + kref_put(&chan->device->refcount, dma_async_device_cleanup); + } + EXPORT_SYMBOL(dma_chan_cleanup); +@@ -189,7 +232,7 @@ + kref_put(&chan->refcount, dma_chan_cleanup); + } + +-static void dma_client_chan_free(struct dma_chan *chan) ++static void dma_chan_release(struct dma_chan *chan) + { + atomic_add(0x7FFFFFFF, &chan->refcount.refcount); + chan->slow_ref = 1; +@@ -197,70 +240,57 @@ + } + + /** +- * dma_chans_rebalance - reallocate channels to clients +- * +- * When the number of DMA channel in the system changes, +- * channels need to be rebalanced among clients. ++ * dma_chans_notify_available - broadcast available channels to the clients + */ +-static void dma_chans_rebalance(void) ++static void dma_clients_notify_available(void) + { + struct dma_client *client; +- struct dma_chan *chan; +- unsigned long flags; + + mutex_lock(&dma_list_mutex); + +- list_for_each_entry(client, &dma_client_list, global_node) { +- while (client->chans_desired > client->chan_count) { +- chan = dma_client_chan_alloc(client); +- if (!chan) +- break; +- client->chan_count++; +- client->event_callback(client, +- chan, +- DMA_RESOURCE_ADDED); +- } +- while (client->chans_desired < client->chan_count) { +- spin_lock_irqsave(&client->lock, flags); +- chan = list_entry(client->channels.next, +- struct dma_chan, +- client_node); +- list_del_rcu(&chan->client_node); +- spin_unlock_irqrestore(&client->lock, flags); +- client->chan_count--; +- client->event_callback(client, +- chan, +- DMA_RESOURCE_REMOVED); +- dma_client_chan_free(chan); +- } +- } ++ list_for_each_entry(client, &dma_client_list, global_node) ++ dma_client_chan_alloc(client); + + mutex_unlock(&dma_list_mutex); + } + + /** +- * dma_async_client_register - allocate and register a &dma_client +- * @event_callback: callback for notification of channel addition/removal ++ * dma_chans_notify_available - tell the clients that a channel is going away ++ * @chan: channel on its way out + */ +-struct dma_client *dma_async_client_register(dma_event_callback event_callback) ++static void dma_clients_notify_removed(struct dma_chan *chan) + { + struct dma_client *client; ++ enum dma_state_client ack; + +- client = kzalloc(sizeof(*client), GFP_KERNEL); +- if (!client) +- return NULL; ++ mutex_lock(&dma_list_mutex); + +- INIT_LIST_HEAD(&client->channels); +- spin_lock_init(&client->lock); +- client->chans_desired = 0; +- client->chan_count = 0; +- client->event_callback = event_callback; ++ list_for_each_entry(client, &dma_client_list, global_node) { ++ ack = client->event_callback(client, chan, ++ DMA_RESOURCE_REMOVED); + ++ /* client was holding resources for this channel so ++ * free it ++ */ ++ if (ack == DMA_ACK) { ++ dma_chan_put(chan); ++ kref_put(&chan->device->refcount, ++ dma_async_device_cleanup); ++ } ++ } ++ ++ mutex_unlock(&dma_list_mutex); ++} ++ ++/** ++ * dma_async_client_register - register a &dma_client ++ * @client: ptr to a client structure with valid 'event_callback' and 'cap_mask' ++ */ ++void dma_async_client_register(struct dma_client *client) ++{ + mutex_lock(&dma_list_mutex); + list_add_tail(&client->global_node, &dma_client_list); + mutex_unlock(&dma_list_mutex); +- +- return client; + } + EXPORT_SYMBOL(dma_async_client_register); + +@@ -272,40 +302,42 @@ + */ + void dma_async_client_unregister(struct dma_client *client) + { ++ struct dma_device *device; + struct dma_chan *chan; ++ enum dma_state_client ack; + + if (!client) + return; + +- rcu_read_lock(); +- list_for_each_entry_rcu(chan, &client->channels, client_node) +- dma_client_chan_free(chan); +- rcu_read_unlock(); +- + mutex_lock(&dma_list_mutex); ++ /* free all channels the client is holding */ ++ list_for_each_entry(device, &dma_device_list, global_node) ++ list_for_each_entry(chan, &device->channels, device_node) { ++ ack = client->event_callback(client, chan, ++ DMA_RESOURCE_REMOVED); ++ ++ if (ack == DMA_ACK) { ++ dma_chan_put(chan); ++ kref_put(&chan->device->refcount, ++ dma_async_device_cleanup); ++ } ++ } ++ + list_del(&client->global_node); + mutex_unlock(&dma_list_mutex); +- +- kfree(client); +- dma_chans_rebalance(); + } + EXPORT_SYMBOL(dma_async_client_unregister); + + /** +- * dma_async_client_chan_request - request DMA channels +- * @client: &dma_client +- * @number: count of DMA channels requested +- * +- * Clients call dma_async_client_chan_request() to specify how many +- * DMA channels they need, 0 to free all currently allocated. +- * The resulting allocations/frees are indicated to the client via the +- * event callback. ++ * dma_async_client_chan_request - send all available channels to the ++ * client that satisfy the capability mask ++ * @client - requester + */ +-void dma_async_client_chan_request(struct dma_client *client, +- unsigned int number) ++void dma_async_client_chan_request(struct dma_client *client) + { +- client->chans_desired = number; +- dma_chans_rebalance(); ++ mutex_lock(&dma_list_mutex); ++ dma_client_chan_alloc(client); ++ mutex_unlock(&dma_list_mutex); + } + EXPORT_SYMBOL(dma_async_client_chan_request); + +@@ -322,6 +354,25 @@ + if (!device) + return -ENODEV; + ++ /* validate device routines */ ++ BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) && ++ !device->device_prep_dma_memcpy); ++ BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && ++ !device->device_prep_dma_xor); ++ BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && ++ !device->device_prep_dma_zero_sum); ++ BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && ++ !device->device_prep_dma_memset); ++ BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && ++ !device->device_prep_dma_interrupt); ++ ++ BUG_ON(!device->device_alloc_chan_resources); ++ BUG_ON(!device->device_free_chan_resources); ++ BUG_ON(!device->device_dependency_added); ++ BUG_ON(!device->device_is_tx_complete); ++ BUG_ON(!device->device_issue_pending); ++ BUG_ON(!device->dev); ++ + init_completion(&device->done); + kref_init(&device->refcount); + device->dev_id = id++; +@@ -339,6 +390,9 @@ + device->dev_id, chan->chan_id); + + kref_get(&device->refcount); ++ kref_init(&chan->refcount); ++ chan->slow_ref = 0; ++ INIT_RCU_HEAD(&chan->rcu); + class_device_register(&chan->class_dev); + } + +@@ -346,7 +400,7 @@ + list_add_tail(&device->global_node, &dma_device_list); + mutex_unlock(&dma_list_mutex); + +- dma_chans_rebalance(); ++ dma_clients_notify_available(); + + return 0; + } +@@ -371,32 +425,165 @@ + void dma_async_device_unregister(struct dma_device *device) + { + struct dma_chan *chan; +- unsigned long flags; + + mutex_lock(&dma_list_mutex); + list_del(&device->global_node); + mutex_unlock(&dma_list_mutex); + + list_for_each_entry(chan, &device->channels, device_node) { +- if (chan->client) { +- spin_lock_irqsave(&chan->client->lock, flags); +- list_del(&chan->client_node); +- chan->client->chan_count--; +- spin_unlock_irqrestore(&chan->client->lock, flags); +- chan->client->event_callback(chan->client, +- chan, +- DMA_RESOURCE_REMOVED); +- dma_client_chan_free(chan); +- } ++ dma_clients_notify_removed(chan); + class_device_unregister(&chan->class_dev); ++ dma_chan_release(chan); + } +- dma_chans_rebalance(); + + kref_put(&device->refcount, dma_async_device_cleanup); + wait_for_completion(&device->done); + } + EXPORT_SYMBOL(dma_async_device_unregister); + ++/** ++ * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses ++ * @chan: DMA channel to offload copy to ++ * @dest: destination address (virtual) ++ * @src: source address (virtual) ++ * @len: length ++ * ++ * Both @dest and @src must be mappable to a bus address according to the ++ * DMA mapping API rules for streaming mappings. ++ * Both @dest and @src must stay memory resident (kernel memory or locked ++ * user space pages). ++ */ ++dma_cookie_t ++dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, ++ void *src, size_t len) ++{ ++ struct dma_device *dev = chan->device; ++ struct dma_async_tx_descriptor *tx; ++ dma_addr_t addr; ++ dma_cookie_t cookie; ++ int cpu; ++ ++ tx = dev->device_prep_dma_memcpy(chan, len, 0); ++ if (!tx) ++ return -ENOMEM; ++ ++ tx->ack = 1; ++ tx->callback = NULL; ++ addr = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE); ++ tx->tx_set_src(addr, tx, 0); ++ addr = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE); ++ tx->tx_set_dest(addr, tx, 0); ++ cookie = tx->tx_submit(tx); ++ ++ cpu = get_cpu(); ++ per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; ++ per_cpu_ptr(chan->local, cpu)->memcpy_count++; ++ put_cpu(); ++ ++ return cookie; ++} ++EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf); ++ ++/** ++ * dma_async_memcpy_buf_to_pg - offloaded copy from address to page ++ * @chan: DMA channel to offload copy to ++ * @page: destination page ++ * @offset: offset in page to copy to ++ * @kdata: source address (virtual) ++ * @len: length ++ * ++ * Both @page/@offset and @kdata must be mappable to a bus address according ++ * to the DMA mapping API rules for streaming mappings. ++ * Both @page/@offset and @kdata must stay memory resident (kernel memory or ++ * locked user space pages) ++ */ ++dma_cookie_t ++dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page, ++ unsigned int offset, void *kdata, size_t len) ++{ ++ struct dma_device *dev = chan->device; ++ struct dma_async_tx_descriptor *tx; ++ dma_addr_t addr; ++ dma_cookie_t cookie; ++ int cpu; ++ ++ tx = dev->device_prep_dma_memcpy(chan, len, 0); ++ if (!tx) ++ return -ENOMEM; ++ ++ tx->ack = 1; ++ tx->callback = NULL; ++ addr = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE); ++ tx->tx_set_src(addr, tx, 0); ++ addr = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE); ++ tx->tx_set_dest(addr, tx, 0); ++ cookie = tx->tx_submit(tx); ++ ++ cpu = get_cpu(); ++ per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; ++ per_cpu_ptr(chan->local, cpu)->memcpy_count++; ++ put_cpu(); ++ ++ return cookie; ++} ++EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg); ++ ++/** ++ * dma_async_memcpy_pg_to_pg - offloaded copy from page to page ++ * @chan: DMA channel to offload copy to ++ * @dest_pg: destination page ++ * @dest_off: offset in page to copy to ++ * @src_pg: source page ++ * @src_off: offset in page to copy from ++ * @len: length ++ * ++ * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus ++ * address according to the DMA mapping API rules for streaming mappings. ++ * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident ++ * (kernel memory or locked user space pages). ++ */ ++dma_cookie_t ++dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg, ++ unsigned int dest_off, struct page *src_pg, unsigned int src_off, ++ size_t len) ++{ ++ struct dma_device *dev = chan->device; ++ struct dma_async_tx_descriptor *tx; ++ dma_addr_t addr; ++ dma_cookie_t cookie; ++ int cpu; ++ ++ tx = dev->device_prep_dma_memcpy(chan, len, 0); ++ if (!tx) ++ return -ENOMEM; ++ ++ tx->ack = 1; ++ tx->callback = NULL; ++ addr = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE); ++ tx->tx_set_src(addr, tx, 0); ++ addr = dma_map_page(dev->dev, dest_pg, dest_off, len, DMA_FROM_DEVICE); ++ tx->tx_set_dest(addr, tx, 0); ++ cookie = tx->tx_submit(tx); ++ ++ cpu = get_cpu(); ++ per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; ++ per_cpu_ptr(chan->local, cpu)->memcpy_count++; ++ put_cpu(); ++ ++ return cookie; ++} ++EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg); ++ ++void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, ++ struct dma_chan *chan) ++{ ++ tx->chan = chan; ++ spin_lock_init(&tx->lock); ++ INIT_LIST_HEAD(&tx->depend_node); ++ INIT_LIST_HEAD(&tx->depend_list); ++} ++EXPORT_SYMBOL(dma_async_tx_descriptor_init); ++ + static int __init dma_bus_init(void) + { + mutex_init(&dma_list_mutex); +diff -Nurb linux-2.6.22-570/drivers/dma/ioatdma.c linux-2.6.22-591/drivers/dma/ioatdma.c +--- linux-2.6.22-570/drivers/dma/ioatdma.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/dma/ioatdma.c 2007-12-21 15:36:11.000000000 -0500 +@@ -39,6 +39,7 @@ + #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) + #define to_ioat_device(dev) container_of(dev, struct ioat_device, common) + #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) ++#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) + + /* internal functions */ + static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent); +@@ -71,13 +72,76 @@ + INIT_LIST_HEAD(&ioat_chan->used_desc); + /* This should be made common somewhere in dmaengine.c */ + ioat_chan->common.device = &device->common; +- ioat_chan->common.client = NULL; + list_add_tail(&ioat_chan->common.device_node, + &device->common.channels); + } + return device->common.chancnt; + } + ++static void ++ioat_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index) ++{ ++ struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx); ++ struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); ++ ++ pci_unmap_addr_set(desc, src, addr); ++ ++ list_for_each_entry(iter, &desc->group_list, node) { ++ iter->hw->src_addr = addr; ++ addr += ioat_chan->xfercap; ++ } ++ ++} ++ ++static void ++ioat_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index) ++{ ++ struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx); ++ struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); ++ ++ pci_unmap_addr_set(desc, dst, addr); ++ ++ list_for_each_entry(iter, &desc->group_list, node) { ++ iter->hw->dst_addr = addr; ++ addr += ioat_chan->xfercap; ++ } ++} ++ ++static dma_cookie_t ++ioat_tx_submit(struct dma_async_tx_descriptor *tx) ++{ ++ struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); ++ struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); ++ struct ioat_desc_sw *group_start = list_entry(desc->group_list.next, ++ struct ioat_desc_sw, node); ++ int append = 0; ++ dma_cookie_t cookie; ++ ++ spin_lock_bh(&ioat_chan->desc_lock); ++ /* cookie incr and addition to used_list must be atomic */ ++ cookie = ioat_chan->common.cookie; ++ cookie++; ++ if (cookie < 0) ++ cookie = 1; ++ ioat_chan->common.cookie = desc->async_tx.cookie = cookie; ++ ++ /* write address into NextDescriptor field of last desc in chain */ ++ to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = group_start->phys; ++ list_splice_init(&desc->group_list, ioat_chan->used_desc.prev); ++ ++ ioat_chan->pending += desc->group_count; ++ if (ioat_chan->pending >= 4) { ++ append = 1; ++ ioat_chan->pending = 0; ++ } ++ spin_unlock_bh(&ioat_chan->desc_lock); ++ ++ if (append) ++ ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, ++ IOAT_CHANCMD_APPEND); ++ return cookie; ++} ++ + static struct ioat_desc_sw *ioat_dma_alloc_descriptor( + struct ioat_dma_chan *ioat_chan, + gfp_t flags) +@@ -99,6 +163,11 @@ + } + + memset(desc, 0, sizeof(*desc)); ++ dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); ++ desc_sw->async_tx.tx_set_src = ioat_set_src; ++ desc_sw->async_tx.tx_set_dest = ioat_set_dest; ++ desc_sw->async_tx.tx_submit = ioat_tx_submit; ++ INIT_LIST_HEAD(&desc_sw->group_list); + desc_sw->hw = desc; + desc_sw->phys = phys; + +@@ -215,45 +284,25 @@ + ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl); + } + +-/** +- * do_ioat_dma_memcpy - actual function that initiates a IOAT DMA transaction +- * @ioat_chan: IOAT DMA channel handle +- * @dest: DMA destination address +- * @src: DMA source address +- * @len: transaction length in bytes +- */ +- +-static dma_cookie_t do_ioat_dma_memcpy(struct ioat_dma_chan *ioat_chan, +- dma_addr_t dest, +- dma_addr_t src, +- size_t len) +-{ +- struct ioat_desc_sw *first; +- struct ioat_desc_sw *prev; +- struct ioat_desc_sw *new; +- dma_cookie_t cookie; ++static struct dma_async_tx_descriptor * ++ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en) ++{ ++ struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); ++ struct ioat_desc_sw *first, *prev, *new; + LIST_HEAD(new_chain); + u32 copy; + size_t orig_len; +- dma_addr_t orig_src, orig_dst; +- unsigned int desc_count = 0; +- unsigned int append = 0; +- +- if (!ioat_chan || !dest || !src) +- return -EFAULT; ++ int desc_count = 0; + + if (!len) +- return ioat_chan->common.cookie; ++ return NULL; + + orig_len = len; +- orig_src = src; +- orig_dst = dest; + + first = NULL; + prev = NULL; + + spin_lock_bh(&ioat_chan->desc_lock); +- + while (len) { + if (!list_empty(&ioat_chan->free_desc)) { + new = to_ioat_desc(ioat_chan->free_desc.next); +@@ -270,9 +319,8 @@ + + new->hw->size = copy; + new->hw->ctl = 0; +- new->hw->src_addr = src; +- new->hw->dst_addr = dest; +- new->cookie = 0; ++ new->async_tx.cookie = 0; ++ new->async_tx.ack = 1; + + /* chain together the physical address list for the HW */ + if (!first) +@@ -281,130 +329,26 @@ + prev->hw->next = (u64) new->phys; + + prev = new; +- + len -= copy; +- dest += copy; +- src += copy; +- + list_add_tail(&new->node, &new_chain); + desc_count++; + } +- new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; +- new->hw->next = 0; + +- /* cookie incr and addition to used_list must be atomic */ ++ list_splice(&new_chain, &new->group_list); + +- cookie = ioat_chan->common.cookie; +- cookie++; +- if (cookie < 0) +- cookie = 1; +- ioat_chan->common.cookie = new->cookie = cookie; ++ new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; ++ new->hw->next = 0; ++ new->group_count = desc_count; ++ new->async_tx.ack = 0; /* client is in control of this ack */ ++ new->async_tx.cookie = -EBUSY; + +- pci_unmap_addr_set(new, src, orig_src); +- pci_unmap_addr_set(new, dst, orig_dst); + pci_unmap_len_set(new, src_len, orig_len); + pci_unmap_len_set(new, dst_len, orig_len); +- +- /* write address into NextDescriptor field of last desc in chain */ +- to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = first->phys; +- list_splice_init(&new_chain, ioat_chan->used_desc.prev); +- +- ioat_chan->pending += desc_count; +- if (ioat_chan->pending >= 20) { +- append = 1; +- ioat_chan->pending = 0; +- } +- + spin_unlock_bh(&ioat_chan->desc_lock); + +- if (append) +- ioatdma_chan_write8(ioat_chan, +- IOAT_CHANCMD_OFFSET, +- IOAT_CHANCMD_APPEND); +- return cookie; +-} +- +-/** +- * ioat_dma_memcpy_buf_to_buf - wrapper that takes src & dest bufs +- * @chan: IOAT DMA channel handle +- * @dest: DMA destination address +- * @src: DMA source address +- * @len: transaction length in bytes +- */ +- +-static dma_cookie_t ioat_dma_memcpy_buf_to_buf(struct dma_chan *chan, +- void *dest, +- void *src, +- size_t len) +-{ +- dma_addr_t dest_addr; +- dma_addr_t src_addr; +- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); +- +- dest_addr = pci_map_single(ioat_chan->device->pdev, +- dest, len, PCI_DMA_FROMDEVICE); +- src_addr = pci_map_single(ioat_chan->device->pdev, +- src, len, PCI_DMA_TODEVICE); +- +- return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len); +-} +- +-/** +- * ioat_dma_memcpy_buf_to_pg - wrapper, copying from a buf to a page +- * @chan: IOAT DMA channel handle +- * @page: pointer to the page to copy to +- * @offset: offset into that page +- * @src: DMA source address +- * @len: transaction length in bytes +- */ +- +-static dma_cookie_t ioat_dma_memcpy_buf_to_pg(struct dma_chan *chan, +- struct page *page, +- unsigned int offset, +- void *src, +- size_t len) +-{ +- dma_addr_t dest_addr; +- dma_addr_t src_addr; +- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); +- +- dest_addr = pci_map_page(ioat_chan->device->pdev, +- page, offset, len, PCI_DMA_FROMDEVICE); +- src_addr = pci_map_single(ioat_chan->device->pdev, +- src, len, PCI_DMA_TODEVICE); +- +- return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len); ++ return new ? &new->async_tx : NULL; + } + +-/** +- * ioat_dma_memcpy_pg_to_pg - wrapper, copying between two pages +- * @chan: IOAT DMA channel handle +- * @dest_pg: pointer to the page to copy to +- * @dest_off: offset into that page +- * @src_pg: pointer to the page to copy from +- * @src_off: offset into that page +- * @len: transaction length in bytes. This is guaranteed not to make a copy +- * across a page boundary. +- */ +- +-static dma_cookie_t ioat_dma_memcpy_pg_to_pg(struct dma_chan *chan, +- struct page *dest_pg, +- unsigned int dest_off, +- struct page *src_pg, +- unsigned int src_off, +- size_t len) +-{ +- dma_addr_t dest_addr; +- dma_addr_t src_addr; +- struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); +- +- dest_addr = pci_map_page(ioat_chan->device->pdev, +- dest_pg, dest_off, len, PCI_DMA_FROMDEVICE); +- src_addr = pci_map_page(ioat_chan->device->pdev, +- src_pg, src_off, len, PCI_DMA_TODEVICE); +- +- return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len); +-} + + /** + * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended descriptors to hw +@@ -467,8 +411,8 @@ + * exceeding xfercap, perhaps. If so, only the last one will + * have a cookie, and require unmapping. + */ +- if (desc->cookie) { +- cookie = desc->cookie; ++ if (desc->async_tx.cookie) { ++ cookie = desc->async_tx.cookie; + + /* yes we are unmapping both _page and _single alloc'd + regions with unmap_page. Is this *really* that bad? +@@ -484,13 +428,18 @@ + } + + if (desc->phys != phys_complete) { +- /* a completed entry, but not the last, so cleanup */ ++ /* a completed entry, but not the last, so cleanup ++ * if the client is done with the descriptor ++ */ ++ if (desc->async_tx.ack) { + list_del(&desc->node); + list_add_tail(&desc->node, &chan->free_desc); ++ } else ++ desc->async_tx.cookie = 0; + } else { + /* last used desc. Do not remove, so we can append from + it, but don't look at it next time, either */ +- desc->cookie = 0; ++ desc->async_tx.cookie = 0; + + /* TODO check status bits? */ + break; +@@ -506,6 +455,17 @@ + spin_unlock(&chan->cleanup_lock); + } + ++static void ioat_dma_dependency_added(struct dma_chan *chan) ++{ ++ struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); ++ spin_lock_bh(&ioat_chan->desc_lock); ++ if (ioat_chan->pending == 0) { ++ spin_unlock_bh(&ioat_chan->desc_lock); ++ ioat_dma_memcpy_cleanup(ioat_chan); ++ } else ++ spin_unlock_bh(&ioat_chan->desc_lock); ++} ++ + /** + * ioat_dma_is_complete - poll the status of a IOAT DMA transaction + * @chan: IOAT DMA channel handle +@@ -607,6 +567,7 @@ + + desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; + desc->hw->next = 0; ++ desc->async_tx.ack = 1; + + list_add_tail(&desc->node, &ioat_chan->used_desc); + spin_unlock_bh(&ioat_chan->desc_lock); +@@ -633,6 +594,8 @@ + u8 *src; + u8 *dest; + struct dma_chan *dma_chan; ++ struct dma_async_tx_descriptor *tx; ++ dma_addr_t addr; + dma_cookie_t cookie; + int err = 0; + +@@ -658,7 +621,15 @@ + goto out; + } + +- cookie = ioat_dma_memcpy_buf_to_buf(dma_chan, dest, src, IOAT_TEST_SIZE); ++ tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0); ++ async_tx_ack(tx); ++ addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, ++ DMA_TO_DEVICE); ++ ioat_set_src(addr, tx, 0); ++ addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, ++ DMA_FROM_DEVICE); ++ ioat_set_dest(addr, tx, 0); ++ cookie = ioat_tx_submit(tx); + ioat_dma_memcpy_issue_pending(dma_chan); + msleep(1); + +@@ -754,13 +725,14 @@ + INIT_LIST_HEAD(&device->common.channels); + enumerate_dma_channels(device); + ++ dma_cap_set(DMA_MEMCPY, device->common.cap_mask); + device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources; + device->common.device_free_chan_resources = ioat_dma_free_chan_resources; +- device->common.device_memcpy_buf_to_buf = ioat_dma_memcpy_buf_to_buf; +- device->common.device_memcpy_buf_to_pg = ioat_dma_memcpy_buf_to_pg; +- device->common.device_memcpy_pg_to_pg = ioat_dma_memcpy_pg_to_pg; +- device->common.device_memcpy_complete = ioat_dma_is_complete; +- device->common.device_memcpy_issue_pending = ioat_dma_memcpy_issue_pending; ++ device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy; ++ device->common.device_is_tx_complete = ioat_dma_is_complete; ++ device->common.device_issue_pending = ioat_dma_memcpy_issue_pending; ++ device->common.device_dependency_added = ioat_dma_dependency_added; ++ device->common.dev = &pdev->dev; + printk(KERN_INFO "Intel(R) I/OAT DMA Engine found, %d channels\n", + device->common.chancnt); + +diff -Nurb linux-2.6.22-570/drivers/dma/ioatdma.h linux-2.6.22-591/drivers/dma/ioatdma.h +--- linux-2.6.22-570/drivers/dma/ioatdma.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/dma/ioatdma.h 2007-12-21 15:36:11.000000000 -0500 +@@ -30,9 +30,6 @@ + + #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 + +-extern struct list_head dma_device_list; +-extern struct list_head dma_client_list; +- + /** + * struct ioat_device - internal representation of a IOAT device + * @pdev: PCI-Express device +@@ -105,15 +102,20 @@ + /** + * struct ioat_desc_sw - wrapper around hardware descriptor + * @hw: hardware DMA descriptor ++ * @async_tx: + * @node: ++ * @group_list: ++ * @group_cnt: + * @cookie: + * @phys: + */ + + struct ioat_desc_sw { + struct ioat_dma_descriptor *hw; ++ struct dma_async_tx_descriptor async_tx; + struct list_head node; +- dma_cookie_t cookie; ++ struct list_head group_list; ++ int group_count; + dma_addr_t phys; + DECLARE_PCI_UNMAP_ADDR(src) + DECLARE_PCI_UNMAP_LEN(src_len) +@@ -122,4 +124,3 @@ + }; + + #endif /* IOATDMA_H */ +- +diff -Nurb linux-2.6.22-570/drivers/dma/iop-adma.c linux-2.6.22-591/drivers/dma/iop-adma.c +--- linux-2.6.22-570/drivers/dma/iop-adma.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/dma/iop-adma.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,1465 @@ ++/* ++ * offload engine driver for the Intel Xscale series of i/o processors ++ * Copyright © 2006, Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ */ ++ ++/* ++ * This driver supports the asynchrounous DMA copy and RAID engines available ++ * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x) ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common) ++#define to_iop_adma_device(dev) \ ++ container_of(dev, struct iop_adma_device, common) ++#define tx_to_iop_adma_slot(tx) \ ++ container_of(tx, struct iop_adma_desc_slot, async_tx) ++ ++/** ++ * iop_adma_free_slots - flags descriptor slots for reuse ++ * @slot: Slot to free ++ * Caller must hold &iop_chan->lock while calling this function ++ */ ++static void iop_adma_free_slots(struct iop_adma_desc_slot *slot) ++{ ++ int stride = slot->slots_per_op; ++ ++ while (stride--) { ++ slot->slots_per_op = 0; ++ slot = list_entry(slot->slot_node.next, ++ struct iop_adma_desc_slot, ++ slot_node); ++ } ++} ++ ++static dma_cookie_t ++iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *iop_chan, dma_cookie_t cookie) ++{ ++ BUG_ON(desc->async_tx.cookie < 0); ++ spin_lock_bh(&desc->async_tx.lock); ++ if (desc->async_tx.cookie > 0) { ++ cookie = desc->async_tx.cookie; ++ desc->async_tx.cookie = 0; ++ ++ /* call the callback (must not sleep or submit new ++ * operations to this channel) ++ */ ++ if (desc->async_tx.callback) ++ desc->async_tx.callback( ++ desc->async_tx.callback_param); ++ ++ /* unmap dma addresses ++ * (unmap_single vs unmap_page?) ++ */ ++ if (desc->group_head && desc->unmap_len) { ++ struct iop_adma_desc_slot *unmap = desc->group_head; ++ struct device *dev = ++ &iop_chan->device->pdev->dev; ++ u32 len = unmap->unmap_len; ++ u32 src_cnt = unmap->unmap_src_cnt; ++ dma_addr_t addr = iop_desc_get_dest_addr(unmap, ++ iop_chan); ++ ++ dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE); ++ while (src_cnt--) { ++ addr = iop_desc_get_src_addr(unmap, ++ iop_chan, ++ src_cnt); ++ dma_unmap_page(dev, addr, len, ++ DMA_TO_DEVICE); ++ } ++ desc->group_head = NULL; ++ } ++ } ++ ++ /* run dependent operations */ ++ async_tx_run_dependencies(&desc->async_tx); ++ spin_unlock_bh(&desc->async_tx.lock); ++ ++ return cookie; ++} ++ ++static int ++iop_adma_clean_slot(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *iop_chan) ++{ ++ /* the client is allowed to attach dependent operations ++ * until 'ack' is set ++ */ ++ if (!desc->async_tx.ack) ++ return 0; ++ ++ /* leave the last descriptor in the chain ++ * so we can append to it ++ */ ++ if (desc->chain_node.next == &iop_chan->chain) ++ return 1; ++ ++ dev_dbg(iop_chan->device->common.dev, ++ "\tfree slot: %d slots_per_op: %d\n", ++ desc->idx, desc->slots_per_op); ++ ++ list_del(&desc->chain_node); ++ iop_adma_free_slots(desc); ++ ++ return 0; ++} ++ ++static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) ++{ ++ struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL; ++ dma_cookie_t cookie = 0; ++ u32 current_desc = iop_chan_get_current_descriptor(iop_chan); ++ int busy = iop_chan_is_busy(iop_chan); ++ int seen_current = 0, slot_cnt = 0, slots_per_op = 0; ++ ++ dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__); ++ /* free completed slots from the chain starting with ++ * the oldest descriptor ++ */ ++ list_for_each_entry_safe(iter, _iter, &iop_chan->chain, ++ chain_node) { ++ pr_debug("\tcookie: %d slot: %d busy: %d " ++ "this_desc: %#x next_desc: %#x ack: %d\n", ++ iter->async_tx.cookie, iter->idx, busy, iter->phys, ++ iop_desc_get_next_desc(iter), ++ iter->async_tx.ack); ++ prefetch(_iter); ++ prefetch(&_iter->async_tx); ++ ++ /* do not advance past the current descriptor loaded into the ++ * hardware channel, subsequent descriptors are either in ++ * process or have not been submitted ++ */ ++ if (seen_current) ++ break; ++ ++ /* stop the search if we reach the current descriptor and the ++ * channel is busy, or if it appears that the current descriptor ++ * needs to be re-read (i.e. has been appended to) ++ */ ++ if (iter->phys == current_desc) { ++ BUG_ON(seen_current++); ++ if (busy || iop_desc_get_next_desc(iter)) ++ break; ++ } ++ ++ /* detect the start of a group transaction */ ++ if (!slot_cnt && !slots_per_op) { ++ slot_cnt = iter->slot_cnt; ++ slots_per_op = iter->slots_per_op; ++ if (slot_cnt <= slots_per_op) { ++ slot_cnt = 0; ++ slots_per_op = 0; ++ } ++ } ++ ++ if (slot_cnt) { ++ pr_debug("\tgroup++\n"); ++ if (!grp_start) ++ grp_start = iter; ++ slot_cnt -= slots_per_op; ++ } ++ ++ /* all the members of a group are complete */ ++ if (slots_per_op != 0 && slot_cnt == 0) { ++ struct iop_adma_desc_slot *grp_iter, *_grp_iter; ++ int end_of_chain = 0; ++ pr_debug("\tgroup end\n"); ++ ++ /* collect the total results */ ++ if (grp_start->xor_check_result) { ++ u32 zero_sum_result = 0; ++ slot_cnt = grp_start->slot_cnt; ++ grp_iter = grp_start; ++ ++ list_for_each_entry_from(grp_iter, ++ &iop_chan->chain, chain_node) { ++ zero_sum_result |= ++ iop_desc_get_zero_result(grp_iter); ++ pr_debug("\titer%d result: %d\n", ++ grp_iter->idx, zero_sum_result); ++ slot_cnt -= slots_per_op; ++ if (slot_cnt == 0) ++ break; ++ } ++ pr_debug("\tgrp_start->xor_check_result: %p\n", ++ grp_start->xor_check_result); ++ *grp_start->xor_check_result = zero_sum_result; ++ } ++ ++ /* clean up the group */ ++ slot_cnt = grp_start->slot_cnt; ++ grp_iter = grp_start; ++ list_for_each_entry_safe_from(grp_iter, _grp_iter, ++ &iop_chan->chain, chain_node) { ++ cookie = iop_adma_run_tx_complete_actions( ++ grp_iter, iop_chan, cookie); ++ ++ slot_cnt -= slots_per_op; ++ end_of_chain = iop_adma_clean_slot(grp_iter, ++ iop_chan); ++ ++ if (slot_cnt == 0 || end_of_chain) ++ break; ++ } ++ ++ /* the group should be complete at this point */ ++ BUG_ON(slot_cnt); ++ ++ slots_per_op = 0; ++ grp_start = NULL; ++ if (end_of_chain) ++ break; ++ else ++ continue; ++ } else if (slots_per_op) /* wait for group completion */ ++ continue; ++ ++ /* write back zero sum results (single descriptor case) */ ++ if (iter->xor_check_result && iter->async_tx.cookie) ++ *iter->xor_check_result = ++ iop_desc_get_zero_result(iter); ++ ++ cookie = iop_adma_run_tx_complete_actions( ++ iter, iop_chan, cookie); ++ ++ if (iop_adma_clean_slot(iter, iop_chan)) ++ break; ++ } ++ ++ BUG_ON(!seen_current); ++ ++ iop_chan_idle(busy, iop_chan); ++ ++ if (cookie > 0) { ++ iop_chan->completed_cookie = cookie; ++ pr_debug("\tcompleted cookie %d\n", cookie); ++ } ++} ++ ++static void ++iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) ++{ ++ spin_lock_bh(&iop_chan->lock); ++ __iop_adma_slot_cleanup(iop_chan); ++ spin_unlock_bh(&iop_chan->lock); ++} ++ ++static void iop_adma_tasklet(unsigned long data) ++{ ++ struct iop_adma_chan *chan = (struct iop_adma_chan *) data; ++ __iop_adma_slot_cleanup(chan); ++} ++ ++static struct iop_adma_desc_slot * ++iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots, ++ int slots_per_op) ++{ ++ struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL; ++ struct list_head chain = LIST_HEAD_INIT(chain); ++ int slots_found, retry = 0; ++ ++ /* start search from the last allocated descrtiptor ++ * if a contiguous allocation can not be found start searching ++ * from the beginning of the list ++ */ ++retry: ++ slots_found = 0; ++ if (retry == 0) ++ iter = iop_chan->last_used; ++ else ++ iter = list_entry(&iop_chan->all_slots, ++ struct iop_adma_desc_slot, ++ slot_node); ++ ++ list_for_each_entry_safe_continue( ++ iter, _iter, &iop_chan->all_slots, slot_node) { ++ prefetch(_iter); ++ prefetch(&_iter->async_tx); ++ if (iter->slots_per_op) { ++ /* give up after finding the first busy slot ++ * on the second pass through the list ++ */ ++ if (retry) ++ break; ++ ++ slots_found = 0; ++ continue; ++ } ++ ++ /* start the allocation if the slot is correctly aligned */ ++ if (!slots_found++) { ++ if (iop_desc_is_aligned(iter, slots_per_op)) ++ alloc_start = iter; ++ else { ++ slots_found = 0; ++ continue; ++ } ++ } ++ ++ if (slots_found == num_slots) { ++ struct iop_adma_desc_slot *alloc_tail = NULL; ++ struct iop_adma_desc_slot *last_used = NULL; ++ iter = alloc_start; ++ while (num_slots) { ++ int i; ++ dev_dbg(iop_chan->device->common.dev, ++ "allocated slot: %d " ++ "(desc %p phys: %#x) slots_per_op %d\n", ++ iter->idx, iter->hw_desc, iter->phys, ++ slots_per_op); ++ ++ /* pre-ack all but the last descriptor */ ++ if (num_slots != slots_per_op) ++ iter->async_tx.ack = 1; ++ else ++ iter->async_tx.ack = 0; ++ ++ list_add_tail(&iter->chain_node, &chain); ++ alloc_tail = iter; ++ iter->async_tx.cookie = 0; ++ iter->slot_cnt = num_slots; ++ iter->xor_check_result = NULL; ++ for (i = 0; i < slots_per_op; i++) { ++ iter->slots_per_op = slots_per_op - i; ++ last_used = iter; ++ iter = list_entry(iter->slot_node.next, ++ struct iop_adma_desc_slot, ++ slot_node); ++ } ++ num_slots -= slots_per_op; ++ } ++ alloc_tail->group_head = alloc_start; ++ alloc_tail->async_tx.cookie = -EBUSY; ++ list_splice(&chain, &alloc_tail->group_list); ++ iop_chan->last_used = last_used; ++ iop_desc_clear_next_desc(alloc_start); ++ iop_desc_clear_next_desc(alloc_tail); ++ return alloc_tail; ++ } ++ } ++ if (!retry++) ++ goto retry; ++ ++ /* try to free some slots if the allocation fails */ ++ tasklet_schedule(&iop_chan->irq_tasklet); ++ ++ return NULL; ++} ++ ++static dma_cookie_t ++iop_desc_assign_cookie(struct iop_adma_chan *iop_chan, ++ struct iop_adma_desc_slot *desc) ++{ ++ dma_cookie_t cookie = iop_chan->common.cookie; ++ cookie++; ++ if (cookie < 0) ++ cookie = 1; ++ iop_chan->common.cookie = desc->async_tx.cookie = cookie; ++ return cookie; ++} ++ ++static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan) ++{ ++ dev_dbg(iop_chan->device->common.dev, "pending: %d\n", ++ iop_chan->pending); ++ ++ if (iop_chan->pending >= IOP_ADMA_THRESHOLD) { ++ iop_chan->pending = 0; ++ iop_chan_append(iop_chan); ++ } ++} ++ ++static dma_cookie_t ++iop_adma_tx_submit(struct dma_async_tx_descriptor *tx) ++{ ++ struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); ++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan); ++ struct iop_adma_desc_slot *grp_start, *old_chain_tail; ++ int slot_cnt; ++ int slots_per_op; ++ dma_cookie_t cookie; ++ ++ grp_start = sw_desc->group_head; ++ slot_cnt = grp_start->slot_cnt; ++ slots_per_op = grp_start->slots_per_op; ++ ++ spin_lock_bh(&iop_chan->lock); ++ cookie = iop_desc_assign_cookie(iop_chan, sw_desc); ++ ++ old_chain_tail = list_entry(iop_chan->chain.prev, ++ struct iop_adma_desc_slot, chain_node); ++ list_splice_init(&sw_desc->group_list, &old_chain_tail->chain_node); ++ ++ /* fix up the hardware chain */ ++ iop_desc_set_next_desc(old_chain_tail, grp_start->phys); ++ ++ /* 1/ don't add pre-chained descriptors ++ * 2/ dummy read to flush next_desc write ++ */ ++ BUG_ON(iop_desc_get_next_desc(sw_desc)); ++ ++ /* increment the pending count by the number of slots ++ * memcpy operations have a 1:1 (slot:operation) relation ++ * other operations are heavier and will pop the threshold ++ * more often. ++ */ ++ iop_chan->pending += slot_cnt; ++ iop_adma_check_threshold(iop_chan); ++ spin_unlock_bh(&iop_chan->lock); ++ ++ dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n", ++ __FUNCTION__, sw_desc->async_tx.cookie, sw_desc->idx); ++ ++ return cookie; ++} ++ ++static void ++iop_adma_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, ++ int index) ++{ ++ struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); ++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan); ++ ++ /* to do: support transfers lengths > IOP_ADMA_MAX_BYTE_COUNT */ ++ iop_desc_set_dest_addr(sw_desc->group_head, iop_chan, addr); ++} ++ ++static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan); ++static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan); ++ ++/* returns the number of allocated descriptors */ ++static int iop_adma_alloc_chan_resources(struct dma_chan *chan) ++{ ++ char *hw_desc; ++ int idx; ++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); ++ struct iop_adma_desc_slot *slot = NULL; ++ int init = iop_chan->slots_allocated ? 0 : 1; ++ struct iop_adma_platform_data *plat_data = ++ iop_chan->device->pdev->dev.platform_data; ++ int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE; ++ ++ /* Allocate descriptor slots */ ++ do { ++ idx = iop_chan->slots_allocated; ++ if (idx == num_descs_in_pool) ++ break; ++ ++ slot = kzalloc(sizeof(*slot), GFP_KERNEL); ++ if (!slot) { ++ printk(KERN_INFO "IOP ADMA Channel only initialized" ++ " %d descriptor slots", idx); ++ break; ++ } ++ hw_desc = (char *) iop_chan->device->dma_desc_pool_virt; ++ slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE]; ++ ++ dma_async_tx_descriptor_init(&slot->async_tx, chan); ++ slot->async_tx.tx_submit = iop_adma_tx_submit; ++ slot->async_tx.tx_set_dest = iop_adma_set_dest; ++ INIT_LIST_HEAD(&slot->chain_node); ++ INIT_LIST_HEAD(&slot->slot_node); ++ INIT_LIST_HEAD(&slot->group_list); ++ hw_desc = (char *) iop_chan->device->dma_desc_pool; ++ slot->phys = (dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE]; ++ slot->idx = idx; ++ ++ spin_lock_bh(&iop_chan->lock); ++ iop_chan->slots_allocated++; ++ list_add_tail(&slot->slot_node, &iop_chan->all_slots); ++ spin_unlock_bh(&iop_chan->lock); ++ } while (iop_chan->slots_allocated < num_descs_in_pool); ++ ++ if (idx && !iop_chan->last_used) ++ iop_chan->last_used = list_entry(iop_chan->all_slots.next, ++ struct iop_adma_desc_slot, ++ slot_node); ++ ++ dev_dbg(iop_chan->device->common.dev, ++ "allocated %d descriptor slots last_used: %p\n", ++ iop_chan->slots_allocated, iop_chan->last_used); ++ ++ /* initialize the channel and the chain with a null operation */ ++ if (init) { ++ if (dma_has_cap(DMA_MEMCPY, ++ iop_chan->device->common.cap_mask)) ++ iop_chan_start_null_memcpy(iop_chan); ++ else if (dma_has_cap(DMA_XOR, ++ iop_chan->device->common.cap_mask)) ++ iop_chan_start_null_xor(iop_chan); ++ else ++ BUG(); ++ } ++ ++ return (idx > 0) ? idx : -ENOMEM; ++} ++ ++static struct dma_async_tx_descriptor * ++iop_adma_prep_dma_interrupt(struct dma_chan *chan) ++{ ++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); ++ struct iop_adma_desc_slot *sw_desc, *grp_start; ++ int slot_cnt, slots_per_op; ++ ++ dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__); ++ ++ spin_lock_bh(&iop_chan->lock); ++ slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan); ++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); ++ if (sw_desc) { ++ grp_start = sw_desc->group_head; ++ iop_desc_init_interrupt(grp_start, iop_chan); ++ grp_start->unmap_len = 0; ++ } ++ spin_unlock_bh(&iop_chan->lock); ++ ++ return sw_desc ? &sw_desc->async_tx : NULL; ++} ++ ++static void ++iop_adma_memcpy_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, ++ int index) ++{ ++ struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); ++ struct iop_adma_desc_slot *grp_start = sw_desc->group_head; ++ ++ iop_desc_set_memcpy_src_addr(grp_start, addr); ++} ++ ++static struct dma_async_tx_descriptor * ++iop_adma_prep_dma_memcpy(struct dma_chan *chan, size_t len, int int_en) ++{ ++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); ++ struct iop_adma_desc_slot *sw_desc, *grp_start; ++ int slot_cnt, slots_per_op; ++ ++ if (unlikely(!len)) ++ return NULL; ++ BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); ++ ++ dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", ++ __FUNCTION__, len); ++ ++ spin_lock_bh(&iop_chan->lock); ++ slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op); ++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); ++ if (sw_desc) { ++ grp_start = sw_desc->group_head; ++ iop_desc_init_memcpy(grp_start, int_en); ++ iop_desc_set_byte_count(grp_start, iop_chan, len); ++ sw_desc->unmap_src_cnt = 1; ++ sw_desc->unmap_len = len; ++ sw_desc->async_tx.tx_set_src = iop_adma_memcpy_set_src; ++ } ++ spin_unlock_bh(&iop_chan->lock); ++ ++ return sw_desc ? &sw_desc->async_tx : NULL; ++} ++ ++static struct dma_async_tx_descriptor * ++iop_adma_prep_dma_memset(struct dma_chan *chan, int value, size_t len, ++ int int_en) ++{ ++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); ++ struct iop_adma_desc_slot *sw_desc, *grp_start; ++ int slot_cnt, slots_per_op; ++ ++ if (unlikely(!len)) ++ return NULL; ++ BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); ++ ++ dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", ++ __FUNCTION__, len); ++ ++ spin_lock_bh(&iop_chan->lock); ++ slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op); ++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); ++ if (sw_desc) { ++ grp_start = sw_desc->group_head; ++ iop_desc_init_memset(grp_start, int_en); ++ iop_desc_set_byte_count(grp_start, iop_chan, len); ++ iop_desc_set_block_fill_val(grp_start, value); ++ sw_desc->unmap_src_cnt = 1; ++ sw_desc->unmap_len = len; ++ } ++ spin_unlock_bh(&iop_chan->lock); ++ ++ return sw_desc ? &sw_desc->async_tx : NULL; ++} ++ ++static void ++iop_adma_xor_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, ++ int index) ++{ ++ struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); ++ struct iop_adma_desc_slot *grp_start = sw_desc->group_head; ++ ++ iop_desc_set_xor_src_addr(grp_start, index, addr); ++} ++ ++static struct dma_async_tx_descriptor * ++iop_adma_prep_dma_xor(struct dma_chan *chan, unsigned int src_cnt, size_t len, ++ int int_en) ++{ ++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); ++ struct iop_adma_desc_slot *sw_desc, *grp_start; ++ int slot_cnt, slots_per_op; ++ ++ if (unlikely(!len)) ++ return NULL; ++ BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT)); ++ ++ dev_dbg(iop_chan->device->common.dev, ++ "%s src_cnt: %d len: %u int_en: %d\n", ++ __FUNCTION__, src_cnt, len, int_en); ++ ++ spin_lock_bh(&iop_chan->lock); ++ slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op); ++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); ++ if (sw_desc) { ++ grp_start = sw_desc->group_head; ++ iop_desc_init_xor(grp_start, src_cnt, int_en); ++ iop_desc_set_byte_count(grp_start, iop_chan, len); ++ sw_desc->unmap_src_cnt = src_cnt; ++ sw_desc->unmap_len = len; ++ sw_desc->async_tx.tx_set_src = iop_adma_xor_set_src; ++ } ++ spin_unlock_bh(&iop_chan->lock); ++ ++ return sw_desc ? &sw_desc->async_tx : NULL; ++} ++ ++static void ++iop_adma_xor_zero_sum_set_src(dma_addr_t addr, ++ struct dma_async_tx_descriptor *tx, ++ int index) ++{ ++ struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); ++ struct iop_adma_desc_slot *grp_start = sw_desc->group_head; ++ ++ iop_desc_set_zero_sum_src_addr(grp_start, index, addr); ++} ++ ++static struct dma_async_tx_descriptor * ++iop_adma_prep_dma_zero_sum(struct dma_chan *chan, unsigned int src_cnt, ++ size_t len, u32 *result, int int_en) ++{ ++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); ++ struct iop_adma_desc_slot *sw_desc, *grp_start; ++ int slot_cnt, slots_per_op; ++ ++ if (unlikely(!len)) ++ return NULL; ++ ++ dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n", ++ __FUNCTION__, src_cnt, len); ++ ++ spin_lock_bh(&iop_chan->lock); ++ slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op); ++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); ++ if (sw_desc) { ++ grp_start = sw_desc->group_head; ++ iop_desc_init_zero_sum(grp_start, src_cnt, int_en); ++ iop_desc_set_zero_sum_byte_count(grp_start, len); ++ grp_start->xor_check_result = result; ++ pr_debug("\t%s: grp_start->xor_check_result: %p\n", ++ __FUNCTION__, grp_start->xor_check_result); ++ sw_desc->unmap_src_cnt = src_cnt; ++ sw_desc->unmap_len = len; ++ sw_desc->async_tx.tx_set_src = iop_adma_xor_zero_sum_set_src; ++ } ++ spin_unlock_bh(&iop_chan->lock); ++ ++ return sw_desc ? &sw_desc->async_tx : NULL; ++} ++ ++static void iop_adma_dependency_added(struct dma_chan *chan) ++{ ++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); ++ tasklet_schedule(&iop_chan->irq_tasklet); ++} ++ ++static void iop_adma_free_chan_resources(struct dma_chan *chan) ++{ ++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); ++ struct iop_adma_desc_slot *iter, *_iter; ++ int in_use_descs = 0; ++ ++ iop_adma_slot_cleanup(iop_chan); ++ ++ spin_lock_bh(&iop_chan->lock); ++ list_for_each_entry_safe(iter, _iter, &iop_chan->chain, ++ chain_node) { ++ in_use_descs++; ++ list_del(&iter->chain_node); ++ } ++ list_for_each_entry_safe_reverse( ++ iter, _iter, &iop_chan->all_slots, slot_node) { ++ list_del(&iter->slot_node); ++ kfree(iter); ++ iop_chan->slots_allocated--; ++ } ++ iop_chan->last_used = NULL; ++ ++ dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n", ++ __FUNCTION__, iop_chan->slots_allocated); ++ spin_unlock_bh(&iop_chan->lock); ++ ++ /* one is ok since we left it on there on purpose */ ++ if (in_use_descs > 1) ++ printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n", ++ in_use_descs - 1); ++} ++ ++/** ++ * iop_adma_is_complete - poll the status of an ADMA transaction ++ * @chan: ADMA channel handle ++ * @cookie: ADMA transaction identifier ++ */ ++static enum dma_status iop_adma_is_complete(struct dma_chan *chan, ++ dma_cookie_t cookie, ++ dma_cookie_t *done, ++ dma_cookie_t *used) ++{ ++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); ++ dma_cookie_t last_used; ++ dma_cookie_t last_complete; ++ enum dma_status ret; ++ ++ last_used = chan->cookie; ++ last_complete = iop_chan->completed_cookie; ++ ++ if (done) ++ *done = last_complete; ++ if (used) ++ *used = last_used; ++ ++ ret = dma_async_is_complete(cookie, last_complete, last_used); ++ if (ret == DMA_SUCCESS) ++ return ret; ++ ++ iop_adma_slot_cleanup(iop_chan); ++ ++ last_used = chan->cookie; ++ last_complete = iop_chan->completed_cookie; ++ ++ if (done) ++ *done = last_complete; ++ if (used) ++ *used = last_used; ++ ++ return dma_async_is_complete(cookie, last_complete, last_used); ++} ++ ++static irqreturn_t iop_adma_eot_handler(int irq, void *data) ++{ ++ struct iop_adma_chan *chan = data; ++ ++ dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__); ++ ++ tasklet_schedule(&chan->irq_tasklet); ++ ++ iop_adma_device_clear_eot_status(chan); ++ ++ return IRQ_HANDLED; ++} ++ ++static irqreturn_t iop_adma_eoc_handler(int irq, void *data) ++{ ++ struct iop_adma_chan *chan = data; ++ ++ dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__); ++ ++ tasklet_schedule(&chan->irq_tasklet); ++ ++ iop_adma_device_clear_eoc_status(chan); ++ ++ return IRQ_HANDLED; ++} ++ ++static irqreturn_t iop_adma_err_handler(int irq, void *data) ++{ ++ struct iop_adma_chan *chan = data; ++ unsigned long status = iop_chan_get_status(chan); ++ ++ dev_printk(KERN_ERR, chan->device->common.dev, ++ "error ( %s%s%s%s%s%s%s)\n", ++ iop_is_err_int_parity(status, chan) ? "int_parity " : "", ++ iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "", ++ iop_is_err_int_tabort(status, chan) ? "int_tabort " : "", ++ iop_is_err_int_mabort(status, chan) ? "int_mabort " : "", ++ iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "", ++ iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "", ++ iop_is_err_split_tx(status, chan) ? "split_tx " : ""); ++ ++ iop_adma_device_clear_err_status(chan); ++ ++ BUG(); ++ ++ return IRQ_HANDLED; ++} ++ ++static void iop_adma_issue_pending(struct dma_chan *chan) ++{ ++ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); ++ ++ if (iop_chan->pending) { ++ iop_chan->pending = 0; ++ iop_chan_append(iop_chan); ++ } ++} ++ ++/* ++ * Perform a transaction to verify the HW works. ++ */ ++#define IOP_ADMA_TEST_SIZE 2000 ++ ++static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device) ++{ ++ int i; ++ void *src, *dest; ++ dma_addr_t src_dma, dest_dma; ++ struct dma_chan *dma_chan; ++ dma_cookie_t cookie; ++ struct dma_async_tx_descriptor *tx; ++ int err = 0; ++ struct iop_adma_chan *iop_chan; ++ ++ dev_dbg(device->common.dev, "%s\n", __FUNCTION__); ++ ++ src = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL); ++ if (!src) ++ return -ENOMEM; ++ dest = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL); ++ if (!dest) { ++ kfree(src); ++ return -ENOMEM; ++ } ++ ++ /* Fill in src buffer */ ++ for (i = 0; i < IOP_ADMA_TEST_SIZE; i++) ++ ((u8 *) src)[i] = (u8)i; ++ ++ memset(dest, 0, IOP_ADMA_TEST_SIZE); ++ ++ /* Start copy, using first DMA channel */ ++ dma_chan = container_of(device->common.channels.next, ++ struct dma_chan, ++ device_node); ++ if (iop_adma_alloc_chan_resources(dma_chan) < 1) { ++ err = -ENODEV; ++ goto out; ++ } ++ ++ tx = iop_adma_prep_dma_memcpy(dma_chan, IOP_ADMA_TEST_SIZE, 1); ++ dest_dma = dma_map_single(dma_chan->device->dev, dest, ++ IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); ++ iop_adma_set_dest(dest_dma, tx, 0); ++ src_dma = dma_map_single(dma_chan->device->dev, src, ++ IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE); ++ iop_adma_memcpy_set_src(src_dma, tx, 0); ++ ++ cookie = iop_adma_tx_submit(tx); ++ iop_adma_issue_pending(dma_chan); ++ async_tx_ack(tx); ++ msleep(1); ++ ++ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != ++ DMA_SUCCESS) { ++ dev_printk(KERN_ERR, dma_chan->device->dev, ++ "Self-test copy timed out, disabling\n"); ++ err = -ENODEV; ++ goto free_resources; ++ } ++ ++ iop_chan = to_iop_adma_chan(dma_chan); ++ dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, ++ IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); ++ if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) { ++ dev_printk(KERN_ERR, dma_chan->device->dev, ++ "Self-test copy failed compare, disabling\n"); ++ err = -ENODEV; ++ goto free_resources; ++ } ++ ++free_resources: ++ iop_adma_free_chan_resources(dma_chan); ++out: ++ kfree(src); ++ kfree(dest); ++ return err; ++} ++ ++#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ ++static int __devinit ++iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) ++{ ++ int i, src_idx; ++ struct page *dest; ++ struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST]; ++ struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1]; ++ dma_addr_t dma_addr, dest_dma; ++ struct dma_async_tx_descriptor *tx; ++ struct dma_chan *dma_chan; ++ dma_cookie_t cookie; ++ u8 cmp_byte = 0; ++ u32 cmp_word; ++ u32 zero_sum_result; ++ int err = 0; ++ struct iop_adma_chan *iop_chan; ++ ++ dev_dbg(device->common.dev, "%s\n", __FUNCTION__); ++ ++ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { ++ xor_srcs[src_idx] = alloc_page(GFP_KERNEL); ++ if (!xor_srcs[src_idx]) ++ while (src_idx--) { ++ __free_page(xor_srcs[src_idx]); ++ return -ENOMEM; ++ } ++ } ++ ++ dest = alloc_page(GFP_KERNEL); ++ if (!dest) ++ while (src_idx--) { ++ __free_page(xor_srcs[src_idx]); ++ return -ENOMEM; ++ } ++ ++ /* Fill in src buffers */ ++ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { ++ u8 *ptr = page_address(xor_srcs[src_idx]); ++ for (i = 0; i < PAGE_SIZE; i++) ++ ptr[i] = (1 << src_idx); ++ } ++ ++ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) ++ cmp_byte ^= (u8) (1 << src_idx); ++ ++ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | ++ (cmp_byte << 8) | cmp_byte; ++ ++ memset(page_address(dest), 0, PAGE_SIZE); ++ ++ dma_chan = container_of(device->common.channels.next, ++ struct dma_chan, ++ device_node); ++ if (iop_adma_alloc_chan_resources(dma_chan) < 1) { ++ err = -ENODEV; ++ goto out; ++ } ++ ++ /* test xor */ ++ tx = iop_adma_prep_dma_xor(dma_chan, IOP_ADMA_NUM_SRC_TEST, ++ PAGE_SIZE, 1); ++ dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, ++ PAGE_SIZE, DMA_FROM_DEVICE); ++ iop_adma_set_dest(dest_dma, tx, 0); ++ ++ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) { ++ dma_addr = dma_map_page(dma_chan->device->dev, xor_srcs[i], 0, ++ PAGE_SIZE, DMA_TO_DEVICE); ++ iop_adma_xor_set_src(dma_addr, tx, i); ++ } ++ ++ cookie = iop_adma_tx_submit(tx); ++ iop_adma_issue_pending(dma_chan); ++ async_tx_ack(tx); ++ msleep(8); ++ ++ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != ++ DMA_SUCCESS) { ++ dev_printk(KERN_ERR, dma_chan->device->dev, ++ "Self-test xor timed out, disabling\n"); ++ err = -ENODEV; ++ goto free_resources; ++ } ++ ++ iop_chan = to_iop_adma_chan(dma_chan); ++ dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, ++ PAGE_SIZE, DMA_FROM_DEVICE); ++ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { ++ u32 *ptr = page_address(dest); ++ if (ptr[i] != cmp_word) { ++ dev_printk(KERN_ERR, dma_chan->device->dev, ++ "Self-test xor failed compare, disabling\n"); ++ err = -ENODEV; ++ goto free_resources; ++ } ++ } ++ dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma, ++ PAGE_SIZE, DMA_TO_DEVICE); ++ ++ /* skip zero sum if the capability is not present */ ++ if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) ++ goto free_resources; ++ ++ /* zero sum the sources with the destintation page */ ++ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) ++ zero_sum_srcs[i] = xor_srcs[i]; ++ zero_sum_srcs[i] = dest; ++ ++ zero_sum_result = 1; ++ ++ tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1, ++ PAGE_SIZE, &zero_sum_result, 1); ++ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) { ++ dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i], ++ 0, PAGE_SIZE, DMA_TO_DEVICE); ++ iop_adma_xor_zero_sum_set_src(dma_addr, tx, i); ++ } ++ ++ cookie = iop_adma_tx_submit(tx); ++ iop_adma_issue_pending(dma_chan); ++ async_tx_ack(tx); ++ msleep(8); ++ ++ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { ++ dev_printk(KERN_ERR, dma_chan->device->dev, ++ "Self-test zero sum timed out, disabling\n"); ++ err = -ENODEV; ++ goto free_resources; ++ } ++ ++ if (zero_sum_result != 0) { ++ dev_printk(KERN_ERR, dma_chan->device->dev, ++ "Self-test zero sum failed compare, disabling\n"); ++ err = -ENODEV; ++ goto free_resources; ++ } ++ ++ /* test memset */ ++ tx = iop_adma_prep_dma_memset(dma_chan, 0, PAGE_SIZE, 1); ++ dma_addr = dma_map_page(dma_chan->device->dev, dest, 0, ++ PAGE_SIZE, DMA_FROM_DEVICE); ++ iop_adma_set_dest(dma_addr, tx, 0); ++ ++ cookie = iop_adma_tx_submit(tx); ++ iop_adma_issue_pending(dma_chan); ++ async_tx_ack(tx); ++ msleep(8); ++ ++ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { ++ dev_printk(KERN_ERR, dma_chan->device->dev, ++ "Self-test memset timed out, disabling\n"); ++ err = -ENODEV; ++ goto free_resources; ++ } ++ ++ for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { ++ u32 *ptr = page_address(dest); ++ if (ptr[i]) { ++ dev_printk(KERN_ERR, dma_chan->device->dev, ++ "Self-test memset failed compare, disabling\n"); ++ err = -ENODEV; ++ goto free_resources; ++ } ++ } ++ ++ /* test for non-zero parity sum */ ++ zero_sum_result = 0; ++ tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1, ++ PAGE_SIZE, &zero_sum_result, 1); ++ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) { ++ dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i], ++ 0, PAGE_SIZE, DMA_TO_DEVICE); ++ iop_adma_xor_zero_sum_set_src(dma_addr, tx, i); ++ } ++ ++ cookie = iop_adma_tx_submit(tx); ++ iop_adma_issue_pending(dma_chan); ++ async_tx_ack(tx); ++ msleep(8); ++ ++ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { ++ dev_printk(KERN_ERR, dma_chan->device->dev, ++ "Self-test non-zero sum timed out, disabling\n"); ++ err = -ENODEV; ++ goto free_resources; ++ } ++ ++ if (zero_sum_result != 1) { ++ dev_printk(KERN_ERR, dma_chan->device->dev, ++ "Self-test non-zero sum failed compare, disabling\n"); ++ err = -ENODEV; ++ goto free_resources; ++ } ++ ++free_resources: ++ iop_adma_free_chan_resources(dma_chan); ++out: ++ src_idx = IOP_ADMA_NUM_SRC_TEST; ++ while (src_idx--) ++ __free_page(xor_srcs[src_idx]); ++ __free_page(dest); ++ return err; ++} ++ ++static int __devexit iop_adma_remove(struct platform_device *dev) ++{ ++ struct iop_adma_device *device = platform_get_drvdata(dev); ++ struct dma_chan *chan, *_chan; ++ struct iop_adma_chan *iop_chan; ++ int i; ++ struct iop_adma_platform_data *plat_data = dev->dev.platform_data; ++ ++ dma_async_device_unregister(&device->common); ++ ++ for (i = 0; i < 3; i++) { ++ unsigned int irq; ++ irq = platform_get_irq(dev, i); ++ free_irq(irq, device); ++ } ++ ++ dma_free_coherent(&dev->dev, plat_data->pool_size, ++ device->dma_desc_pool_virt, device->dma_desc_pool); ++ ++ do { ++ struct resource *res; ++ res = platform_get_resource(dev, IORESOURCE_MEM, 0); ++ release_mem_region(res->start, res->end - res->start); ++ } while (0); ++ ++ list_for_each_entry_safe(chan, _chan, &device->common.channels, ++ device_node) { ++ iop_chan = to_iop_adma_chan(chan); ++ list_del(&chan->device_node); ++ kfree(iop_chan); ++ } ++ kfree(device); ++ ++ return 0; ++} ++ ++static int __devinit iop_adma_probe(struct platform_device *pdev) ++{ ++ struct resource *res; ++ int ret = 0, i; ++ struct iop_adma_device *adev; ++ struct iop_adma_chan *iop_chan; ++ struct dma_device *dma_dev; ++ struct iop_adma_platform_data *plat_data = pdev->dev.platform_data; ++ ++ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ++ if (!res) ++ return -ENODEV; ++ ++ if (!devm_request_mem_region(&pdev->dev, res->start, ++ res->end - res->start, pdev->name)) ++ return -EBUSY; ++ ++ adev = kzalloc(sizeof(*adev), GFP_KERNEL); ++ if (!adev) ++ return -ENOMEM; ++ dma_dev = &adev->common; ++ ++ /* allocate coherent memory for hardware descriptors ++ * note: writecombine gives slightly better performance, but ++ * requires that we explicitly flush the writes ++ */ ++ if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev, ++ plat_data->pool_size, ++ &adev->dma_desc_pool, ++ GFP_KERNEL)) == NULL) { ++ ret = -ENOMEM; ++ goto err_free_adev; ++ } ++ ++ dev_dbg(&pdev->dev, "%s: allocted descriptor pool virt %p phys %p\n", ++ __FUNCTION__, adev->dma_desc_pool_virt, ++ (void *) adev->dma_desc_pool); ++ ++ adev->id = plat_data->hw_id; ++ ++ /* discover transaction capabilites from the platform data */ ++ dma_dev->cap_mask = plat_data->cap_mask; ++ ++ adev->pdev = pdev; ++ platform_set_drvdata(pdev, adev); ++ ++ INIT_LIST_HEAD(&dma_dev->channels); ++ ++ /* set base routines */ ++ dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources; ++ dma_dev->device_free_chan_resources = iop_adma_free_chan_resources; ++ dma_dev->device_is_tx_complete = iop_adma_is_complete; ++ dma_dev->device_issue_pending = iop_adma_issue_pending; ++ dma_dev->device_dependency_added = iop_adma_dependency_added; ++ dma_dev->dev = &pdev->dev; ++ ++ /* set prep routines based on capability */ ++ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) ++ dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy; ++ if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) ++ dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset; ++ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { ++ dma_dev->max_xor = iop_adma_get_max_xor(); ++ dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; ++ } ++ if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) ++ dma_dev->device_prep_dma_zero_sum = ++ iop_adma_prep_dma_zero_sum; ++ if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) ++ dma_dev->device_prep_dma_interrupt = ++ iop_adma_prep_dma_interrupt; ++ ++ iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL); ++ if (!iop_chan) { ++ ret = -ENOMEM; ++ goto err_free_dma; ++ } ++ iop_chan->device = adev; ++ ++ iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start, ++ res->end - res->start); ++ if (!iop_chan->mmr_base) { ++ ret = -ENOMEM; ++ goto err_free_iop_chan; ++ } ++ tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long) ++ iop_chan); ++ ++ /* clear errors before enabling interrupts */ ++ iop_adma_device_clear_err_status(iop_chan); ++ ++ for (i = 0; i < 3; i++) { ++ irq_handler_t handler[] = { iop_adma_eot_handler, ++ iop_adma_eoc_handler, ++ iop_adma_err_handler }; ++ int irq = platform_get_irq(pdev, i); ++ if (irq < 0) { ++ ret = -ENXIO; ++ goto err_free_iop_chan; ++ } else { ++ ret = devm_request_irq(&pdev->dev, irq, ++ handler[i], 0, pdev->name, iop_chan); ++ if (ret) ++ goto err_free_iop_chan; ++ } ++ } ++ ++ spin_lock_init(&iop_chan->lock); ++ init_timer(&iop_chan->cleanup_watchdog); ++ iop_chan->cleanup_watchdog.data = (unsigned long) iop_chan; ++ iop_chan->cleanup_watchdog.function = iop_adma_tasklet; ++ INIT_LIST_HEAD(&iop_chan->chain); ++ INIT_LIST_HEAD(&iop_chan->all_slots); ++ INIT_RCU_HEAD(&iop_chan->common.rcu); ++ iop_chan->common.device = dma_dev; ++ list_add_tail(&iop_chan->common.device_node, &dma_dev->channels); ++ ++ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { ++ ret = iop_adma_memcpy_self_test(adev); ++ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); ++ if (ret) ++ goto err_free_iop_chan; ++ } ++ ++ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || ++ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { ++ ret = iop_adma_xor_zero_sum_self_test(adev); ++ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); ++ if (ret) ++ goto err_free_iop_chan; ++ } ++ ++ dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " ++ "( %s%s%s%s%s%s%s%s%s%s)\n", ++ dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", ++ dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", ++ dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "", ++ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", ++ dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", ++ dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "", ++ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", ++ dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "", ++ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", ++ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); ++ ++ dma_async_device_register(dma_dev); ++ goto out; ++ ++ err_free_iop_chan: ++ kfree(iop_chan); ++ err_free_dma: ++ dma_free_coherent(&adev->pdev->dev, plat_data->pool_size, ++ adev->dma_desc_pool_virt, adev->dma_desc_pool); ++ err_free_adev: ++ kfree(adev); ++ out: ++ return ret; ++} ++ ++static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan) ++{ ++ struct iop_adma_desc_slot *sw_desc, *grp_start; ++ dma_cookie_t cookie; ++ int slot_cnt, slots_per_op; ++ ++ dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__); ++ ++ spin_lock_bh(&iop_chan->lock); ++ slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op); ++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); ++ if (sw_desc) { ++ grp_start = sw_desc->group_head; ++ ++ list_splice_init(&sw_desc->group_list, &iop_chan->chain); ++ sw_desc->async_tx.ack = 1; ++ iop_desc_init_memcpy(grp_start, 0); ++ iop_desc_set_byte_count(grp_start, iop_chan, 0); ++ iop_desc_set_dest_addr(grp_start, iop_chan, 0); ++ iop_desc_set_memcpy_src_addr(grp_start, 0); ++ ++ cookie = iop_chan->common.cookie; ++ cookie++; ++ if (cookie <= 1) ++ cookie = 2; ++ ++ /* initialize the completed cookie to be less than ++ * the most recently used cookie ++ */ ++ iop_chan->completed_cookie = cookie - 1; ++ iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie; ++ ++ /* channel should not be busy */ ++ BUG_ON(iop_chan_is_busy(iop_chan)); ++ ++ /* clear any prior error-status bits */ ++ iop_adma_device_clear_err_status(iop_chan); ++ ++ /* disable operation */ ++ iop_chan_disable(iop_chan); ++ ++ /* set the descriptor address */ ++ iop_chan_set_next_descriptor(iop_chan, sw_desc->phys); ++ ++ /* 1/ don't add pre-chained descriptors ++ * 2/ dummy read to flush next_desc write ++ */ ++ BUG_ON(iop_desc_get_next_desc(sw_desc)); ++ ++ /* run the descriptor */ ++ iop_chan_enable(iop_chan); ++ } else ++ dev_printk(KERN_ERR, iop_chan->device->common.dev, ++ "failed to allocate null descriptor\n"); ++ spin_unlock_bh(&iop_chan->lock); ++} ++ ++static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan) ++{ ++ struct iop_adma_desc_slot *sw_desc, *grp_start; ++ dma_cookie_t cookie; ++ int slot_cnt, slots_per_op; ++ ++ dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__); ++ ++ spin_lock_bh(&iop_chan->lock); ++ slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op); ++ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); ++ if (sw_desc) { ++ grp_start = sw_desc->group_head; ++ list_splice_init(&sw_desc->group_list, &iop_chan->chain); ++ sw_desc->async_tx.ack = 1; ++ iop_desc_init_null_xor(grp_start, 2, 0); ++ iop_desc_set_byte_count(grp_start, iop_chan, 0); ++ iop_desc_set_dest_addr(grp_start, iop_chan, 0); ++ iop_desc_set_xor_src_addr(grp_start, 0, 0); ++ iop_desc_set_xor_src_addr(grp_start, 1, 0); ++ ++ cookie = iop_chan->common.cookie; ++ cookie++; ++ if (cookie <= 1) ++ cookie = 2; ++ ++ /* initialize the completed cookie to be less than ++ * the most recently used cookie ++ */ ++ iop_chan->completed_cookie = cookie - 1; ++ iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie; ++ ++ /* channel should not be busy */ ++ BUG_ON(iop_chan_is_busy(iop_chan)); ++ ++ /* clear any prior error-status bits */ ++ iop_adma_device_clear_err_status(iop_chan); ++ ++ /* disable operation */ ++ iop_chan_disable(iop_chan); ++ ++ /* set the descriptor address */ ++ iop_chan_set_next_descriptor(iop_chan, sw_desc->phys); ++ ++ /* 1/ don't add pre-chained descriptors ++ * 2/ dummy read to flush next_desc write ++ */ ++ BUG_ON(iop_desc_get_next_desc(sw_desc)); ++ ++ /* run the descriptor */ ++ iop_chan_enable(iop_chan); ++ } else ++ dev_printk(KERN_ERR, iop_chan->device->common.dev, ++ "failed to allocate null descriptor\n"); ++ spin_unlock_bh(&iop_chan->lock); ++} ++ ++static struct platform_driver iop_adma_driver = { ++ .probe = iop_adma_probe, ++ .remove = iop_adma_remove, ++ .driver = { ++ .owner = THIS_MODULE, ++ .name = "iop-adma", ++ }, ++}; ++ ++static int __init iop_adma_init (void) ++{ ++ /* it's currently unsafe to unload this module */ ++ /* if forced, worst case is that rmmod hangs */ ++ __unsafe(THIS_MODULE); ++ ++ return platform_driver_register(&iop_adma_driver); ++} ++ ++static void __exit iop_adma_exit (void) ++{ ++ platform_driver_unregister(&iop_adma_driver); ++ return; ++} ++ ++module_init(iop_adma_init); ++module_exit(iop_adma_exit); ++ ++MODULE_AUTHOR("Intel Corporation"); ++MODULE_DESCRIPTION("IOP ADMA Engine Driver"); ++MODULE_LICENSE("GPL"); +diff -Nurb linux-2.6.22-570/drivers/edac/edac_mc.c linux-2.6.22-591/drivers/edac/edac_mc.c +--- linux-2.6.22-570/drivers/edac/edac_mc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/edac/edac_mc.c 2007-12-21 15:36:11.000000000 -0500 +@@ -1906,6 +1906,7 @@ + + static int edac_kernel_thread(void *arg) + { ++ set_freezable(); + while (!kthread_should_stop()) { + do_edac_check(); + +diff -Nurb linux-2.6.22-570/drivers/firmware/dcdbas.c linux-2.6.22-591/drivers/firmware/dcdbas.c +--- linux-2.6.22-570/drivers/firmware/dcdbas.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/firmware/dcdbas.c 2007-12-21 15:36:11.000000000 -0500 +@@ -149,8 +149,9 @@ + return count; + } + +-static ssize_t smi_data_read(struct kobject *kobj, char *buf, loff_t pos, +- size_t count) ++static ssize_t smi_data_read(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t pos, size_t count) + { + size_t max_read; + ssize_t ret; +@@ -170,8 +171,9 @@ + return ret; + } + +-static ssize_t smi_data_write(struct kobject *kobj, char *buf, loff_t pos, +- size_t count) ++static ssize_t smi_data_write(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t pos, size_t count) + { + ssize_t ret; + +diff -Nurb linux-2.6.22-570/drivers/firmware/dcdbas.h linux-2.6.22-591/drivers/firmware/dcdbas.h +--- linux-2.6.22-570/drivers/firmware/dcdbas.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/firmware/dcdbas.h 2007-12-21 15:36:11.000000000 -0500 +@@ -67,8 +67,7 @@ + #define DCDBAS_BIN_ATTR_RW(_name) \ + struct bin_attribute bin_attr_##_name = { \ + .attr = { .name = __stringify(_name), \ +- .mode = 0600, \ +- .owner = THIS_MODULE }, \ ++ .mode = 0600 }, \ + .read = _name##_read, \ + .write = _name##_write, \ + } +diff -Nurb linux-2.6.22-570/drivers/firmware/dell_rbu.c linux-2.6.22-591/drivers/firmware/dell_rbu.c +--- linux-2.6.22-570/drivers/firmware/dell_rbu.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/firmware/dell_rbu.c 2007-12-21 15:36:11.000000000 -0500 +@@ -543,8 +543,9 @@ + return ret_count; + } + +-static ssize_t read_rbu_data(struct kobject *kobj, char *buffer, +- loff_t pos, size_t count) ++static ssize_t read_rbu_data(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buffer, loff_t pos, size_t count) + { + ssize_t ret_count = 0; + +@@ -591,8 +592,9 @@ + spin_unlock(&rbu_data.lock); + } + +-static ssize_t read_rbu_image_type(struct kobject *kobj, char *buffer, +- loff_t pos, size_t count) ++static ssize_t read_rbu_image_type(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buffer, loff_t pos, size_t count) + { + int size = 0; + if (!pos) +@@ -600,8 +602,9 @@ + return size; + } + +-static ssize_t write_rbu_image_type(struct kobject *kobj, char *buffer, +- loff_t pos, size_t count) ++static ssize_t write_rbu_image_type(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buffer, loff_t pos, size_t count) + { + int rc = count; + int req_firm_rc = 0; +@@ -660,8 +663,9 @@ + return rc; + } + +-static ssize_t read_rbu_packet_size(struct kobject *kobj, char *buffer, +- loff_t pos, size_t count) ++static ssize_t read_rbu_packet_size(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buffer, loff_t pos, size_t count) + { + int size = 0; + if (!pos) { +@@ -672,8 +676,9 @@ + return size; + } + +-static ssize_t write_rbu_packet_size(struct kobject *kobj, char *buffer, +- loff_t pos, size_t count) ++static ssize_t write_rbu_packet_size(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buffer, loff_t pos, size_t count) + { + unsigned long temp; + spin_lock(&rbu_data.lock); +@@ -687,18 +692,18 @@ + } + + static struct bin_attribute rbu_data_attr = { +- .attr = {.name = "data",.owner = THIS_MODULE,.mode = 0444}, ++ .attr = {.name = "data", .mode = 0444}, + .read = read_rbu_data, + }; + + static struct bin_attribute rbu_image_type_attr = { +- .attr = {.name = "image_type",.owner = THIS_MODULE,.mode = 0644}, ++ .attr = {.name = "image_type", .mode = 0644}, + .read = read_rbu_image_type, + .write = write_rbu_image_type, + }; + + static struct bin_attribute rbu_packet_size_attr = { +- .attr = {.name = "packet_size",.owner = THIS_MODULE,.mode = 0644}, ++ .attr = {.name = "packet_size", .mode = 0644}, + .read = read_rbu_packet_size, + .write = write_rbu_packet_size, + }; +diff -Nurb linux-2.6.22-570/drivers/firmware/edd.c linux-2.6.22-591/drivers/firmware/edd.c +--- linux-2.6.22-570/drivers/firmware/edd.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/firmware/edd.c 2007-12-21 15:36:11.000000000 -0500 +@@ -74,7 +74,7 @@ + + #define EDD_DEVICE_ATTR(_name,_mode,_show,_test) \ + struct edd_attribute edd_attr_##_name = { \ +- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ ++ .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .test = _test, \ + }; +diff -Nurb linux-2.6.22-570/drivers/firmware/efivars.c linux-2.6.22-591/drivers/firmware/efivars.c +--- linux-2.6.22-570/drivers/firmware/efivars.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/firmware/efivars.c 2007-12-21 15:36:11.000000000 -0500 +@@ -131,21 +131,21 @@ + + #define EFI_ATTR(_name, _mode, _show, _store) \ + struct subsys_attribute efi_attr_##_name = { \ +- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ ++ .attr = {.name = __stringify(_name), .mode = _mode}, \ + .show = _show, \ + .store = _store, \ + }; + + #define EFIVAR_ATTR(_name, _mode, _show, _store) \ + struct efivar_attribute efivar_attr_##_name = { \ +- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ ++ .attr = {.name = __stringify(_name), .mode = _mode}, \ + .show = _show, \ + .store = _store, \ + }; + + #define VAR_SUBSYS_ATTR(_name, _mode, _show, _store) \ + struct subsys_attribute var_subsys_attr_##_name = { \ +- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ ++ .attr = {.name = __stringify(_name), .mode = _mode}, \ + .show = _show, \ + .store = _store, \ + }; +diff -Nurb linux-2.6.22-570/drivers/i2c/chips/eeprom.c linux-2.6.22-591/drivers/i2c/chips/eeprom.c +--- linux-2.6.22-570/drivers/i2c/chips/eeprom.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/i2c/chips/eeprom.c 2007-12-21 15:36:11.000000000 -0500 +@@ -110,7 +110,8 @@ + mutex_unlock(&data->update_lock); + } + +-static ssize_t eeprom_read(struct kobject *kobj, char *buf, loff_t off, size_t count) ++static ssize_t eeprom_read(struct kobject *kobj, struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct i2c_client *client = to_i2c_client(container_of(kobj, struct device, kobj)); + struct eeprom_data *data = i2c_get_clientdata(client); +@@ -150,7 +151,6 @@ + .attr = { + .name = "eeprom", + .mode = S_IRUGO, +- .owner = THIS_MODULE, + }, + .size = EEPROM_SIZE, + .read = eeprom_read, +diff -Nurb linux-2.6.22-570/drivers/i2c/chips/max6875.c linux-2.6.22-591/drivers/i2c/chips/max6875.c +--- linux-2.6.22-570/drivers/i2c/chips/max6875.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/i2c/chips/max6875.c 2007-12-21 15:36:11.000000000 -0500 +@@ -125,8 +125,9 @@ + mutex_unlock(&data->update_lock); + } + +-static ssize_t max6875_read(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++static ssize_t max6875_read(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct i2c_client *client = kobj_to_i2c_client(kobj); + struct max6875_data *data = i2c_get_clientdata(client); +@@ -152,7 +153,6 @@ + .attr = { + .name = "eeprom", + .mode = S_IRUGO, +- .owner = THIS_MODULE, + }, + .size = USER_EEPROM_SIZE, + .read = max6875_read, +diff -Nurb linux-2.6.22-570/drivers/ieee1394/ieee1394_core.c linux-2.6.22-591/drivers/ieee1394/ieee1394_core.c +--- linux-2.6.22-570/drivers/ieee1394/ieee1394_core.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/ieee1394/ieee1394_core.c 2007-12-21 15:36:11.000000000 -0500 +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1133,8 +1134,6 @@ + struct list_head tmp; + int may_schedule; + +- current->flags |= PF_NOFREEZE; +- + while (!kthread_should_stop()) { + + INIT_LIST_HEAD(&tmp); +diff -Nurb linux-2.6.22-570/drivers/ieee1394/nodemgr.c linux-2.6.22-591/drivers/ieee1394/nodemgr.c +--- linux-2.6.22-570/drivers/ieee1394/nodemgr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/ieee1394/nodemgr.c 2007-12-21 15:36:11.000000000 -0500 +@@ -1669,6 +1669,7 @@ + unsigned int g, generation = 0; + int i, reset_cycles = 0; + ++ set_freezable(); + /* Setup our device-model entries */ + nodemgr_create_host_dev_files(host); + +diff -Nurb linux-2.6.22-570/drivers/ieee1394/sbp2.c linux-2.6.22-591/drivers/ieee1394/sbp2.c +--- linux-2.6.22-570/drivers/ieee1394/sbp2.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/ieee1394/sbp2.c 2007-12-21 15:36:11.000000000 -0500 +@@ -1505,69 +1505,6 @@ + } + } + +-static void sbp2_prep_command_orb_no_sg(struct sbp2_command_orb *orb, +- struct sbp2_fwhost_info *hi, +- struct sbp2_command_info *cmd, +- struct scatterlist *sgpnt, +- u32 orb_direction, +- unsigned int scsi_request_bufflen, +- void *scsi_request_buffer, +- enum dma_data_direction dma_dir) +-{ +- cmd->dma_dir = dma_dir; +- cmd->dma_size = scsi_request_bufflen; +- cmd->dma_type = CMD_DMA_SINGLE; +- cmd->cmd_dma = dma_map_single(hi->host->device.parent, +- scsi_request_buffer, +- cmd->dma_size, cmd->dma_dir); +- orb->data_descriptor_hi = ORB_SET_NODE_ID(hi->host->node_id); +- orb->misc |= ORB_SET_DIRECTION(orb_direction); +- +- /* handle case where we get a command w/o s/g enabled +- * (but check for transfers larger than 64K) */ +- if (scsi_request_bufflen <= SBP2_MAX_SG_ELEMENT_LENGTH) { +- +- orb->data_descriptor_lo = cmd->cmd_dma; +- orb->misc |= ORB_SET_DATA_SIZE(scsi_request_bufflen); +- +- } else { +- /* The buffer is too large. Turn this into page tables. */ +- +- struct sbp2_unrestricted_page_table *sg_element = +- &cmd->scatter_gather_element[0]; +- u32 sg_count, sg_len; +- dma_addr_t sg_addr; +- +- orb->data_descriptor_lo = cmd->sge_dma; +- orb->misc |= ORB_SET_PAGE_TABLE_PRESENT(0x1); +- +- /* fill out our SBP-2 page tables; split up the large buffer */ +- sg_count = 0; +- sg_len = scsi_request_bufflen; +- sg_addr = cmd->cmd_dma; +- while (sg_len) { +- sg_element[sg_count].segment_base_lo = sg_addr; +- if (sg_len > SBP2_MAX_SG_ELEMENT_LENGTH) { +- sg_element[sg_count].length_segment_base_hi = +- PAGE_TABLE_SET_SEGMENT_LENGTH(SBP2_MAX_SG_ELEMENT_LENGTH); +- sg_addr += SBP2_MAX_SG_ELEMENT_LENGTH; +- sg_len -= SBP2_MAX_SG_ELEMENT_LENGTH; +- } else { +- sg_element[sg_count].length_segment_base_hi = +- PAGE_TABLE_SET_SEGMENT_LENGTH(sg_len); +- sg_len = 0; +- } +- sg_count++; +- } +- +- orb->misc |= ORB_SET_DATA_SIZE(sg_count); +- +- sbp2util_cpu_to_be32_buffer(sg_element, +- (sizeof(struct sbp2_unrestricted_page_table)) * +- sg_count); +- } +-} +- + static void sbp2_create_command_orb(struct sbp2_lu *lu, + struct sbp2_command_info *cmd, + unchar *scsi_cmd, +@@ -1611,13 +1548,9 @@ + orb->data_descriptor_hi = 0x0; + orb->data_descriptor_lo = 0x0; + orb->misc |= ORB_SET_DIRECTION(1); +- } else if (scsi_use_sg) ++ } else + sbp2_prep_command_orb_sg(orb, hi, cmd, scsi_use_sg, sgpnt, + orb_direction, dma_dir); +- else +- sbp2_prep_command_orb_no_sg(orb, hi, cmd, sgpnt, orb_direction, +- scsi_request_bufflen, +- scsi_request_buffer, dma_dir); + + sbp2util_cpu_to_be32_buffer(orb, sizeof(*orb)); + +@@ -1706,15 +1639,15 @@ + void (*done)(struct scsi_cmnd *)) + { + unchar *scsi_cmd = (unchar *)SCpnt->cmnd; +- unsigned int request_bufflen = SCpnt->request_bufflen; ++ unsigned int request_bufflen = scsi_bufflen(SCpnt); + struct sbp2_command_info *cmd; + + cmd = sbp2util_allocate_command_orb(lu, SCpnt, done); + if (!cmd) + return -EIO; + +- sbp2_create_command_orb(lu, cmd, scsi_cmd, SCpnt->use_sg, +- request_bufflen, SCpnt->request_buffer, ++ sbp2_create_command_orb(lu, cmd, scsi_cmd, scsi_sg_count(SCpnt), ++ request_bufflen, scsi_sglist(SCpnt), + SCpnt->sc_data_direction); + sbp2_link_orb_command(lu, cmd); + +diff -Nurb linux-2.6.22-570/drivers/infiniband/core/addr.c linux-2.6.22-591/drivers/infiniband/core/addr.c +--- linux-2.6.22-570/drivers/infiniband/core/addr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/infiniband/core/addr.c 2007-12-21 15:36:14.000000000 -0500 +@@ -110,7 +110,7 @@ + __be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr; + int ret; + +- dev = ip_dev_find(ip); ++ dev = ip_dev_find(&init_net, ip); + if (!dev) + return -EADDRNOTAVAIL; + +@@ -157,6 +157,7 @@ + u32 dst_ip = dst_in->sin_addr.s_addr; + + memset(&fl, 0, sizeof fl); ++ fl.fl_net = &init_net; + fl.nl_u.ip4_u.daddr = dst_ip; + if (ip_route_output_key(&rt, &fl)) + return; +@@ -178,6 +179,7 @@ + int ret; + + memset(&fl, 0, sizeof fl); ++ fl.fl_net = &init_net; + fl.nl_u.ip4_u.daddr = dst_ip; + fl.nl_u.ip4_u.saddr = src_ip; + ret = ip_route_output_key(&rt, &fl); +@@ -262,7 +264,7 @@ + __be32 dst_ip = dst_in->sin_addr.s_addr; + int ret; + +- dev = ip_dev_find(dst_ip); ++ dev = ip_dev_find(&init_net, dst_ip); + if (!dev) + return -EADDRNOTAVAIL; + +diff -Nurb linux-2.6.22-570/drivers/infiniband/core/cma.c linux-2.6.22-591/drivers/infiniband/core/cma.c +--- linux-2.6.22-570/drivers/infiniband/core/cma.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/infiniband/core/cma.c 2007-12-21 15:36:14.000000000 -0500 +@@ -1267,7 +1267,7 @@ + atomic_inc(&conn_id->dev_remove); + conn_id->state = CMA_CONNECT; + +- dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr); ++ dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr); + if (!dev) { + ret = -EADDRNOTAVAIL; + cma_enable_remove(conn_id); +@@ -1880,18 +1880,18 @@ + if (ret) + goto err1; + +- if (port > sysctl_local_port_range[1]) { +- if (next_port != sysctl_local_port_range[0]) { ++ if (port > init_net.sysctl_local_port_range[1]) { ++ if (next_port != init_net.sysctl_local_port_range[0]) { + idr_remove(ps, port); +- next_port = sysctl_local_port_range[0]; ++ next_port = init_net.sysctl_local_port_range[0]; + goto retry; + } + ret = -EADDRNOTAVAIL; + goto err2; + } + +- if (port == sysctl_local_port_range[1]) +- next_port = sysctl_local_port_range[0]; ++ if (port == init_net.sysctl_local_port_range[1]) ++ next_port = init_net.sysctl_local_port_range[0]; + else + next_port = port + 1; + +@@ -2774,8 +2774,9 @@ + + get_random_bytes(&next_port, sizeof next_port); + next_port = ((unsigned int) next_port % +- (sysctl_local_port_range[1] - sysctl_local_port_range[0])) + +- sysctl_local_port_range[0]; ++ (init_net.sysctl_local_port_range[1] - ++ init_net.sysctl_local_port_range[0])) + ++ init_net.sysctl_local_port_range[0]; + cma_wq = create_singlethread_workqueue("rdma_cm"); + if (!cma_wq) + return -ENOMEM; +diff -Nurb linux-2.6.22-570/drivers/infiniband/core/sysfs.c linux-2.6.22-591/drivers/infiniband/core/sysfs.c +--- linux-2.6.22-570/drivers/infiniband/core/sysfs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/infiniband/core/sysfs.c 2007-12-21 15:36:11.000000000 -0500 +@@ -479,7 +479,6 @@ + + element->attr.attr.name = element->name; + element->attr.attr.mode = S_IRUGO; +- element->attr.attr.owner = THIS_MODULE; + element->attr.show = show; + element->index = i; + +diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.c linux-2.6.22-591/drivers/infiniband/ulp/iser/iscsi_iser.c +--- linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/infiniband/ulp/iser/iscsi_iser.c 2007-12-21 15:36:11.000000000 -0500 +@@ -134,19 +134,9 @@ + { + struct iscsi_iser_conn *iser_conn = ctask->conn->dd_data; + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; +- struct scsi_cmnd *sc = ctask->sc; + + iser_ctask->command_sent = 0; + iser_ctask->iser_conn = iser_conn; +- +- if (sc->sc_data_direction == DMA_TO_DEVICE) { +- BUG_ON(ctask->total_length == 0); +- +- debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n", +- ctask->itt, ctask->total_length, ctask->imm_count, +- ctask->unsol_count); +- } +- + iser_ctask_rdma_init(iser_ctask); + } + +@@ -219,6 +209,14 @@ + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; + int error = 0; + ++ if (ctask->sc->sc_data_direction == DMA_TO_DEVICE) { ++ BUG_ON(scsi_bufflen(ctask->sc) == 0); ++ ++ debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n", ++ ctask->itt, scsi_bufflen(ctask->sc), ++ ctask->imm_count, ctask->unsol_count); ++ } ++ + debug_scsi("ctask deq [cid %d itt 0x%x]\n", + conn->id, ctask->itt); + +@@ -375,6 +373,7 @@ + static struct iscsi_cls_session * + iscsi_iser_session_create(struct iscsi_transport *iscsit, + struct scsi_transport_template *scsit, ++ uint16_t cmds_max, uint16_t qdepth, + uint32_t initial_cmdsn, uint32_t *hostno) + { + struct iscsi_cls_session *cls_session; +@@ -386,7 +385,13 @@ + struct iscsi_iser_cmd_task *iser_ctask; + struct iser_desc *desc; + ++ /* ++ * we do not support setting can_queue cmd_per_lun from userspace yet ++ * because we preallocate so many resources ++ */ + cls_session = iscsi_session_setup(iscsit, scsit, ++ ISCSI_DEF_XMIT_CMDS_MAX, ++ ISCSI_MAX_CMD_PER_LUN, + sizeof(struct iscsi_iser_cmd_task), + sizeof(struct iser_desc), + initial_cmdsn, &hn); +@@ -545,7 +550,7 @@ + static struct scsi_host_template iscsi_iser_sht = { + .name = "iSCSI Initiator over iSER, v." DRV_VER, + .queuecommand = iscsi_queuecommand, +- .can_queue = ISCSI_XMIT_CMDS_MAX - 1, ++ .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1, + .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, + .max_sectors = 1024, + .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN, +@@ -574,8 +579,12 @@ + ISCSI_EXP_STATSN | + ISCSI_PERSISTENT_PORT | + ISCSI_PERSISTENT_ADDRESS | +- ISCSI_TARGET_NAME | +- ISCSI_TPGT, ++ ISCSI_TARGET_NAME | ISCSI_TPGT | ++ ISCSI_USERNAME | ISCSI_PASSWORD | ++ ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN, ++ .host_param_mask = ISCSI_HOST_HWADDRESS | ++ ISCSI_HOST_NETDEV_NAME | ++ ISCSI_HOST_INITIATOR_NAME, + .host_template = &iscsi_iser_sht, + .conndata_size = sizeof(struct iscsi_conn), + .max_lun = ISCSI_ISER_MAX_LUN, +@@ -592,6 +601,9 @@ + .get_session_param = iscsi_session_get_param, + .start_conn = iscsi_iser_conn_start, + .stop_conn = iscsi_conn_stop, ++ /* iscsi host params */ ++ .get_host_param = iscsi_host_get_param, ++ .set_host_param = iscsi_host_set_param, + /* IO */ + .send_pdu = iscsi_conn_send_pdu, + .get_stats = iscsi_iser_conn_get_stats, +diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.h linux-2.6.22-591/drivers/infiniband/ulp/iser/iscsi_iser.h +--- linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/infiniband/ulp/iser/iscsi_iser.h 2007-12-21 15:36:11.000000000 -0500 +@@ -98,7 +98,7 @@ + #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * + * SCSI_TMFUNC(2), LOGOUT(1) */ + +-#define ISER_QP_MAX_RECV_DTOS (ISCSI_XMIT_CMDS_MAX + \ ++#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \ + ISER_MAX_RX_MISC_PDUS + \ + ISER_MAX_TX_MISC_PDUS) + +@@ -110,7 +110,7 @@ + + #define ISER_INFLIGHT_DATAOUTS 8 + +-#define ISER_QP_MAX_REQ_DTOS (ISCSI_XMIT_CMDS_MAX * \ ++#define ISER_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \ + (1 + ISER_INFLIGHT_DATAOUTS) + \ + ISER_MAX_TX_MISC_PDUS + \ + ISER_MAX_RX_MISC_PDUS) +diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_initiator.c linux-2.6.22-591/drivers/infiniband/ulp/iser/iser_initiator.c +--- linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_initiator.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/infiniband/ulp/iser/iser_initiator.c 2007-12-21 15:36:11.000000000 -0500 +@@ -351,18 +351,12 @@ + else + data_buf = &iser_ctask->data[ISER_DIR_OUT]; + +- if (sc->use_sg) { /* using a scatter list */ +- data_buf->buf = sc->request_buffer; +- data_buf->size = sc->use_sg; +- } else if (sc->request_bufflen) { +- /* using a single buffer - convert it into one entry SG */ +- sg_init_one(&data_buf->sg_single, +- sc->request_buffer, sc->request_bufflen); +- data_buf->buf = &data_buf->sg_single; +- data_buf->size = 1; ++ if (scsi_sg_count(sc)) { /* using a scatter list */ ++ data_buf->buf = scsi_sglist(sc); ++ data_buf->size = scsi_sg_count(sc); + } + +- data_buf->data_len = sc->request_bufflen; ++ data_buf->data_len = scsi_bufflen(sc); + + if (hdr->flags & ISCSI_FLAG_CMD_READ) { + err = iser_prepare_read_cmd(ctask, edtl); +diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_verbs.c linux-2.6.22-591/drivers/infiniband/ulp/iser/iser_verbs.c +--- linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_verbs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/infiniband/ulp/iser/iser_verbs.c 2007-12-21 15:36:11.000000000 -0500 +@@ -155,8 +155,8 @@ + params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; + /* make the pool size twice the max number of SCSI commands * + * the ML is expected to queue, watermark for unmap at 50% */ +- params.pool_size = ISCSI_XMIT_CMDS_MAX * 2; +- params.dirty_watermark = ISCSI_XMIT_CMDS_MAX; ++ params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2; ++ params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX; + params.cache = 0; + params.flush_function = NULL; + params.access = (IB_ACCESS_LOCAL_WRITE | +diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.c linux-2.6.22-591/drivers/infiniband/ulp/srp/ib_srp.c +--- linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/infiniband/ulp/srp/ib_srp.c 2007-12-21 15:36:11.000000000 -0500 +@@ -455,10 +455,7 @@ + struct srp_target_port *target, + struct srp_request *req) + { +- struct scatterlist *scat; +- int nents; +- +- if (!scmnd->request_buffer || ++ if (!scsi_sglist(scmnd) || + (scmnd->sc_data_direction != DMA_TO_DEVICE && + scmnd->sc_data_direction != DMA_FROM_DEVICE)) + return; +@@ -468,20 +465,8 @@ + req->fmr = NULL; + } + +- /* +- * This handling of non-SG commands can be killed when the +- * SCSI midlayer no longer generates non-SG commands. +- */ +- if (likely(scmnd->use_sg)) { +- nents = scmnd->use_sg; +- scat = scmnd->request_buffer; +- } else { +- nents = 1; +- scat = &req->fake_sg; +- } +- +- ib_dma_unmap_sg(target->srp_host->dev->dev, scat, nents, +- scmnd->sc_data_direction); ++ ib_dma_unmap_sg(target->srp_host->dev->dev, scsi_sglist(scmnd), ++ scsi_sg_count(scmnd), scmnd->sc_data_direction); + } + + static void srp_remove_req(struct srp_target_port *target, struct srp_request *req) +@@ -595,6 +580,7 @@ + int ret; + struct srp_device *dev = target->srp_host->dev; + struct ib_device *ibdev = dev->dev; ++ struct scatterlist *sg; + + if (!dev->fmr_pool) + return -ENODEV; +@@ -604,16 +590,16 @@ + return -EINVAL; + + len = page_cnt = 0; +- for (i = 0; i < sg_cnt; ++i) { +- unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]); ++ scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) { ++ unsigned int dma_len = ib_sg_dma_len(ibdev, sg); + +- if (ib_sg_dma_address(ibdev, &scat[i]) & ~dev->fmr_page_mask) { ++ if (ib_sg_dma_address(ibdev, sg) & ~dev->fmr_page_mask) { + if (i > 0) + return -EINVAL; + else + ++page_cnt; + } +- if ((ib_sg_dma_address(ibdev, &scat[i]) + dma_len) & ++ if ((ib_sg_dma_address(ibdev, sg) + dma_len) & + ~dev->fmr_page_mask) { + if (i < sg_cnt - 1) + return -EINVAL; +@@ -633,12 +619,12 @@ + return -ENOMEM; + + page_cnt = 0; +- for (i = 0; i < sg_cnt; ++i) { +- unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]); ++ scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) { ++ unsigned int dma_len = ib_sg_dma_len(ibdev, sg); + + for (j = 0; j < dma_len; j += dev->fmr_page_size) + dma_pages[page_cnt++] = +- (ib_sg_dma_address(ibdev, &scat[i]) & ++ (ib_sg_dma_address(ibdev, sg) & + dev->fmr_page_mask) + j; + } + +@@ -673,7 +659,7 @@ + struct srp_device *dev; + struct ib_device *ibdev; + +- if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE) ++ if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) + return sizeof (struct srp_cmd); + + if (scmnd->sc_data_direction != DMA_FROM_DEVICE && +@@ -683,18 +669,8 @@ + return -EINVAL; + } + +- /* +- * This handling of non-SG commands can be killed when the +- * SCSI midlayer no longer generates non-SG commands. +- */ +- if (likely(scmnd->use_sg)) { +- nents = scmnd->use_sg; +- scat = scmnd->request_buffer; +- } else { +- nents = 1; +- scat = &req->fake_sg; +- sg_init_one(scat, scmnd->request_buffer, scmnd->request_bufflen); +- } ++ nents = scsi_sg_count(scmnd); ++ scat = scsi_sglist(scmnd); + + dev = target->srp_host->dev; + ibdev = dev->dev; +@@ -724,6 +700,7 @@ + * descriptor. + */ + struct srp_indirect_buf *buf = (void *) cmd->add_data; ++ struct scatterlist *sg; + u32 datalen = 0; + int i; + +@@ -732,11 +709,11 @@ + sizeof (struct srp_indirect_buf) + + count * sizeof (struct srp_direct_buf); + +- for (i = 0; i < count; ++i) { +- unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]); ++ scsi_for_each_sg(scmnd, sg, count, i) { ++ unsigned int dma_len = ib_sg_dma_len(ibdev, sg); + + buf->desc_list[i].va = +- cpu_to_be64(ib_sg_dma_address(ibdev, &scat[i])); ++ cpu_to_be64(ib_sg_dma_address(ibdev, sg)); + buf->desc_list[i].key = + cpu_to_be32(dev->mr->rkey); + buf->desc_list[i].len = cpu_to_be32(dma_len); +@@ -802,9 +779,9 @@ + } + + if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER)) +- scmnd->resid = be32_to_cpu(rsp->data_out_res_cnt); ++ scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); + else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER)) +- scmnd->resid = be32_to_cpu(rsp->data_in_res_cnt); ++ scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); + + if (!req->tsk_mgmt) { + scmnd->host_scribble = (void *) -1L; +diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.h linux-2.6.22-591/drivers/infiniband/ulp/srp/ib_srp.h +--- linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/infiniband/ulp/srp/ib_srp.h 2007-12-21 15:36:11.000000000 -0500 +@@ -106,11 +106,6 @@ + struct srp_iu *cmd; + struct srp_iu *tsk_mgmt; + struct ib_pool_fmr *fmr; +- /* +- * Fake scatterlist used when scmnd->use_sg==0. Can be killed +- * when the SCSI midlayer no longer generates non-SG commands. +- */ +- struct scatterlist fake_sg; + struct completion done; + short index; + u8 cmd_done; +diff -Nurb linux-2.6.22-570/drivers/input/gameport/gameport.c linux-2.6.22-591/drivers/input/gameport/gameport.c +--- linux-2.6.22-570/drivers/input/gameport/gameport.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/input/gameport/gameport.c 2007-12-21 15:36:11.000000000 -0500 +@@ -445,6 +445,7 @@ + + static int gameport_thread(void *nothing) + { ++ set_freezable(); + do { + gameport_handle_event(); + wait_event_interruptible(gameport_wait, +diff -Nurb linux-2.6.22-570/drivers/input/mouse/psmouse.h linux-2.6.22-591/drivers/input/mouse/psmouse.h +--- linux-2.6.22-570/drivers/input/mouse/psmouse.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/input/mouse/psmouse.h 2007-12-21 15:36:11.000000000 -0500 +@@ -118,7 +118,6 @@ + .attr = { \ + .name = __stringify(_name), \ + .mode = _mode, \ +- .owner = THIS_MODULE, \ + }, \ + .show = psmouse_attr_show_helper, \ + .store = psmouse_attr_set_helper, \ +diff -Nurb linux-2.6.22-570/drivers/input/serio/serio.c linux-2.6.22-591/drivers/input/serio/serio.c +--- linux-2.6.22-570/drivers/input/serio/serio.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/input/serio/serio.c 2007-12-21 15:36:11.000000000 -0500 +@@ -384,6 +384,7 @@ + + static int serio_thread(void *nothing) + { ++ set_freezable(); + do { + serio_handle_event(); + wait_event_interruptible(serio_wait, +diff -Nurb linux-2.6.22-570/drivers/input/touchscreen/ucb1400_ts.c linux-2.6.22-591/drivers/input/touchscreen/ucb1400_ts.c +--- linux-2.6.22-570/drivers/input/touchscreen/ucb1400_ts.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/input/touchscreen/ucb1400_ts.c 2007-12-21 15:36:11.000000000 -0500 +@@ -292,6 +292,7 @@ + + sched_setscheduler(tsk, SCHED_FIFO, ¶m); + ++ set_freezable(); + while (!kthread_should_stop()) { + unsigned int x, y, p; + long timeout; +diff -Nurb linux-2.6.22-570/drivers/isdn/divert/divert_procfs.c linux-2.6.22-591/drivers/isdn/divert/divert_procfs.c +--- linux-2.6.22-570/drivers/isdn/divert/divert_procfs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/isdn/divert/divert_procfs.c 2007-12-21 15:36:14.000000000 -0500 +@@ -17,6 +17,7 @@ + #include + #endif + #include ++#include + #include "isdn_divert.h" + + +@@ -284,12 +285,12 @@ + init_waitqueue_head(&rd_queue); + + #ifdef CONFIG_PROC_FS +- isdn_proc_entry = proc_mkdir("net/isdn", NULL); ++ isdn_proc_entry = proc_mkdir("isdn", init_net.proc_net); + if (!isdn_proc_entry) + return (-1); + isdn_divert_entry = create_proc_entry("divert", S_IFREG | S_IRUGO, isdn_proc_entry); + if (!isdn_divert_entry) { +- remove_proc_entry("net/isdn", NULL); ++ remove_proc_entry("isdn", init_net.proc_net); + return (-1); + } + isdn_divert_entry->proc_fops = &isdn_fops; +@@ -309,7 +310,7 @@ + + #ifdef CONFIG_PROC_FS + remove_proc_entry("divert", isdn_proc_entry); +- remove_proc_entry("net/isdn", NULL); ++ remove_proc_entry("isdn", init_net.proc_net); + #endif /* CONFIG_PROC_FS */ + + return (0); +diff -Nurb linux-2.6.22-570/drivers/isdn/hardware/eicon/diva_didd.c linux-2.6.22-591/drivers/isdn/hardware/eicon/diva_didd.c +--- linux-2.6.22-570/drivers/isdn/hardware/eicon/diva_didd.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/isdn/hardware/eicon/diva_didd.c 2007-12-21 15:36:14.000000000 -0500 +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + + #include "platform.h" + #include "di_defs.h" +@@ -86,7 +87,7 @@ + + static int DIVA_INIT_FUNCTION create_proc(void) + { +- proc_net_eicon = proc_mkdir("net/eicon", NULL); ++ proc_net_eicon = proc_mkdir("eicon", init_net.proc_net); + + if (proc_net_eicon) { + if ((proc_didd = +@@ -102,7 +103,7 @@ + static void remove_proc(void) + { + remove_proc_entry(DRIVERLNAME, proc_net_eicon); +- remove_proc_entry("net/eicon", NULL); ++ remove_proc_entry("eicon", init_net.proc_net); + } + + static int DIVA_INIT_FUNCTION divadidd_init(void) +diff -Nurb linux-2.6.22-570/drivers/isdn/hysdn/hysdn_procconf.c linux-2.6.22-591/drivers/isdn/hysdn/hysdn_procconf.c +--- linux-2.6.22-570/drivers/isdn/hysdn/hysdn_procconf.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/isdn/hysdn/hysdn_procconf.c 2007-12-21 15:36:14.000000000 -0500 +@@ -392,7 +392,7 @@ + hysdn_card *card; + unsigned char conf_name[20]; + +- hysdn_proc_entry = proc_mkdir(PROC_SUBDIR_NAME, proc_net); ++ hysdn_proc_entry = proc_mkdir(PROC_SUBDIR_NAME, init_net.proc_net); + if (!hysdn_proc_entry) { + printk(KERN_ERR "HYSDN: unable to create hysdn subdir\n"); + return (-1); +@@ -437,5 +437,5 @@ + card = card->next; /* point to next card */ + } + +- remove_proc_entry(PROC_SUBDIR_NAME, proc_net); ++ remove_proc_entry(PROC_SUBDIR_NAME, init_net.proc_net); + } +diff -Nurb linux-2.6.22-570/drivers/macintosh/therm_adt746x.c linux-2.6.22-591/drivers/macintosh/therm_adt746x.c +--- linux-2.6.22-570/drivers/macintosh/therm_adt746x.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/macintosh/therm_adt746x.c 2007-12-21 15:36:11.000000000 -0500 +@@ -335,6 +335,7 @@ + { + struct thermostat* th = arg; + ++ set_freezable(); + while(!kthread_should_stop()) { + try_to_freeze(); + msleep_interruptible(2000); +diff -Nurb linux-2.6.22-570/drivers/macintosh/therm_pm72.c linux-2.6.22-591/drivers/macintosh/therm_pm72.c +--- linux-2.6.22-570/drivers/macintosh/therm_pm72.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/macintosh/therm_pm72.c 2007-12-21 15:36:11.000000000 -0500 +@@ -1770,7 +1770,8 @@ + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL }; + +- return call_usermodehelper(critical_overtemp_path, argv, envp, 0); ++ return call_usermodehelper(critical_overtemp_path, ++ argv, envp, UMH_WAIT_EXEC); + } + + +diff -Nurb linux-2.6.22-570/drivers/macintosh/windfarm_core.c linux-2.6.22-591/drivers/macintosh/windfarm_core.c +--- linux-2.6.22-570/drivers/macintosh/windfarm_core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/macintosh/windfarm_core.c 2007-12-21 15:36:11.000000000 -0500 +@@ -80,7 +80,8 @@ + "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + NULL }; + +- return call_usermodehelper(critical_overtemp_path, argv, envp, 0); ++ return call_usermodehelper(critical_overtemp_path, ++ argv, envp, UMH_WAIT_EXEC); + } + EXPORT_SYMBOL_GPL(wf_critical_overtemp); + +@@ -92,6 +93,7 @@ + + DBG("wf: thread started\n"); + ++ set_freezable(); + while(!kthread_should_stop()) { + if (time_after_eq(jiffies, next)) { + wf_notify(WF_EVENT_TICK, NULL); +@@ -212,7 +214,6 @@ + list_add(&new_ct->link, &wf_controls); + + new_ct->attr.attr.name = new_ct->name; +- new_ct->attr.attr.owner = THIS_MODULE; + new_ct->attr.attr.mode = 0644; + new_ct->attr.show = wf_show_control; + new_ct->attr.store = wf_store_control; +@@ -325,7 +326,6 @@ + list_add(&new_sr->link, &wf_sensors); + + new_sr->attr.attr.name = new_sr->name; +- new_sr->attr.attr.owner = THIS_MODULE; + new_sr->attr.attr.mode = 0444; + new_sr->attr.show = wf_show_sensor; + new_sr->attr.store = NULL; +diff -Nurb linux-2.6.22-570/drivers/md/Kconfig linux-2.6.22-591/drivers/md/Kconfig +--- linux-2.6.22-570/drivers/md/Kconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/md/Kconfig 2007-12-21 15:36:11.000000000 -0500 +@@ -109,6 +109,8 @@ + config MD_RAID456 + tristate "RAID-4/RAID-5/RAID-6 mode" + depends on BLK_DEV_MD ++ select ASYNC_MEMCPY ++ select ASYNC_XOR + ---help--- + A RAID-5 set of N drives with a capacity of C MB per drive provides + the capacity of C * (N - 1) MB, and protects against a failure +@@ -271,6 +273,11 @@ + + If unsure, say N. + ++config DM_NETLINK ++ bool "DM netlink events (EXPERIMENTAL)" ++ depends on BLK_DEV_DM && EXPERIMENTAL ++ ---help--- ++ Generate netlink events for DM events. + endmenu + + endif +diff -Nurb linux-2.6.22-570/drivers/md/Makefile linux-2.6.22-591/drivers/md/Makefile +--- linux-2.6.22-570/drivers/md/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/md/Makefile 2007-12-21 15:36:11.000000000 -0500 +@@ -17,7 +17,7 @@ + hostprogs-y := mktables + + # Note: link order is important. All raid personalities +-# and xor.o must come before md.o, as they each initialise ++# and must come before md.o, as they each initialise + # themselves, and md.o may use the personalities when it + # auto-initialised. + +@@ -25,7 +25,7 @@ + obj-$(CONFIG_MD_RAID0) += raid0.o + obj-$(CONFIG_MD_RAID1) += raid1.o + obj-$(CONFIG_MD_RAID10) += raid10.o +-obj-$(CONFIG_MD_RAID456) += raid456.o xor.o ++obj-$(CONFIG_MD_RAID456) += raid456.o + obj-$(CONFIG_MD_MULTIPATH) += multipath.o + obj-$(CONFIG_MD_FAULTY) += faulty.o + obj-$(CONFIG_BLK_DEV_MD) += md-mod.o +@@ -46,6 +46,10 @@ + altivec_flags := -maltivec -mabi=altivec + endif + ++ifeq ($(CONFIG_DM_NETLINK),y) ++dm-mod-objs += dm-netlink.o ++endif ++ + targets += raid6int1.c + $(obj)/raid6int1.c: UNROLL := 1 + $(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE +diff -Nurb linux-2.6.22-570/drivers/md/dm-netlink.c linux-2.6.22-591/drivers/md/dm-netlink.c +--- linux-2.6.22-570/drivers/md/dm-netlink.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/md/dm-netlink.c 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,103 @@ ++/* ++ * Device Mapper Netlink Support (dm-netlink) ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Copyright IBM Corporation, 2005, 2006 ++ * Author: Mike Anderson ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "dm.h" ++#include "dm-netlink.h" ++ ++#define DM_MSG_PREFIX "netlink" ++ ++#define DM_EVENT_SKB_SIZE NLMSG_GOODSIZE ++ ++struct dm_event_cache { ++ struct kmem_cache *cache; ++ unsigned skb_size; ++}; ++ ++static struct dm_event_cache _dme_cache; ++ ++static int dme_cache_init(struct dm_event_cache *dc, unsigned skb_size) ++{ ++ dc->skb_size = skb_size; ++ ++ dc->cache = KMEM_CACHE(dm_event, 0); ++ if (!dc->cache) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static void dme_cache_destroy(struct dm_event_cache *dc) ++{ ++ kmem_cache_destroy(dc->cache); ++} ++ ++static void dme_cache_event_put(struct dm_event *evt) ++{ ++ struct dm_event_cache *dc = evt->cdata; ++ ++ kmem_cache_free(dc->cache, evt); ++} ++ ++static struct dm_event *dme_cache_event_get(struct dm_event_cache *dc, ++ struct mapped_device *md) ++{ ++ struct dm_event *evt; ++ ++ evt = kmem_cache_alloc(dc->cache, GFP_ATOMIC); ++ if (!evt) ++ return NULL; ++ ++ INIT_LIST_HEAD(&evt->elist); ++ evt->cdata = dc; ++ evt->md = md; ++ evt->skb = alloc_skb(dc->skb_size, GFP_ATOMIC); ++ if (!evt->skb) ++ goto cache_err; ++ ++ return evt; ++ ++cache_err: ++ dme_cache_event_put(evt); ++ return NULL; ++} ++ ++int __init dm_netlink_init(void) ++{ ++ int r; ++ ++ r = dme_cache_init(&_dme_cache, DM_EVENT_SKB_SIZE); ++ if (!r) ++ DMINFO("version 1.0.0 loaded"); ++ ++ return r; ++} ++ ++void dm_netlink_exit(void) ++{ ++ dme_cache_destroy(&_dme_cache); ++} +diff -Nurb linux-2.6.22-570/drivers/md/dm-netlink.h linux-2.6.22-591/drivers/md/dm-netlink.h +--- linux-2.6.22-570/drivers/md/dm-netlink.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/md/dm-netlink.h 2007-12-21 15:36:11.000000000 -0500 +@@ -0,0 +1,50 @@ ++/* ++ * Device Mapper Netlink Support ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Copyright IBM Corporation, 2005, 2006 ++ * Author: Mike Anderson ++ */ ++#ifndef DM_NETLINK_H ++#define DM_NETLINK_H ++ ++struct dm_event_cache; ++struct mapped_device; ++struct dm_event { ++ struct dm_event_cache *cdata; ++ struct mapped_device *md; ++ struct sk_buff *skb; ++ struct list_head elist; ++}; ++ ++#ifdef CONFIG_DM_NETLINK ++ ++int dm_netlink_init(void); ++void dm_netlink_exit(void); ++ ++#else /* CONFIG_DM_NETLINK */ ++ ++static inline int __init dm_netlink_init(void) ++{ ++ return 0; ++} ++static inline void dm_netlink_exit(void) ++{ ++} ++ ++#endif /* CONFIG_DM_NETLINK */ ++ ++#endif /* DM_NETLINK_H */ +diff -Nurb linux-2.6.22-570/drivers/md/dm.c linux-2.6.22-591/drivers/md/dm.c +--- linux-2.6.22-570/drivers/md/dm.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/drivers/md/dm.c 2007-12-21 15:36:12.000000000 -0500 +@@ -7,6 +7,7 @@ + + #include "dm.h" + #include "dm-bio-list.h" ++#include "dm-netlink.h" + + #include + #include +@@ -180,6 +181,7 @@ + dm_linear_init, + dm_stripe_init, + dm_interface_init, ++ dm_netlink_init, + }; + + void (*_exits[])(void) = { +@@ -188,6 +190,7 @@ + dm_linear_exit, + dm_stripe_exit, + dm_interface_exit, ++ dm_netlink_exit, + }; + + static int __init dm_init(void) +diff -Nurb linux-2.6.22-570/drivers/md/md.c linux-2.6.22-591/drivers/md/md.c +--- linux-2.6.22-570/drivers/md/md.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/md/md.c 2007-12-21 15:36:12.000000000 -0500 +@@ -4642,7 +4642,6 @@ + * many dirty RAID5 blocks. + */ + +- current->flags |= PF_NOFREEZE; + allow_signal(SIGKILL); + while (!kthread_should_stop()) { + +@@ -5814,7 +5813,7 @@ + } + } + +-module_init(md_init) ++subsys_initcall(md_init); + module_exit(md_exit) + + static int get_ro(char *buffer, struct kernel_param *kp) +diff -Nurb linux-2.6.22-570/drivers/md/raid5.c linux-2.6.22-591/drivers/md/raid5.c +--- linux-2.6.22-570/drivers/md/raid5.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/md/raid5.c 2007-12-21 15:36:12.000000000 -0500 +@@ -52,6 +52,7 @@ + #include "raid6.h" + + #include ++#include + + /* + * Stripe cache +@@ -80,7 +81,6 @@ + /* + * The following can be used to debug the driver + */ +-#define RAID5_DEBUG 0 + #define RAID5_PARANOIA 1 + #if RAID5_PARANOIA && defined(CONFIG_SMP) + # define CHECK_DEVLOCK() assert_spin_locked(&conf->device_lock) +@@ -88,8 +88,7 @@ + # define CHECK_DEVLOCK() + #endif + +-#define PRINTK(x...) ((void)(RAID5_DEBUG && printk(x))) +-#if RAID5_DEBUG ++#ifdef DEBUG + #define inline + #define __inline__ + #endif +@@ -125,6 +124,7 @@ + } + md_wakeup_thread(conf->mddev->thread); + } else { ++ BUG_ON(sh->ops.pending); + if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { + atomic_dec(&conf->preread_active_stripes); + if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) +@@ -152,7 +152,8 @@ + + static inline void remove_hash(struct stripe_head *sh) + { +- PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector); ++ pr_debug("remove_hash(), stripe %llu\n", ++ (unsigned long long)sh->sector); + + hlist_del_init(&sh->hash); + } +@@ -161,7 +162,8 @@ + { + struct hlist_head *hp = stripe_hash(conf, sh->sector); + +- PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector); ++ pr_debug("insert_hash(), stripe %llu\n", ++ (unsigned long long)sh->sector); + + CHECK_DEVLOCK(); + hlist_add_head(&sh->hash, hp); +@@ -224,9 +226,10 @@ + + BUG_ON(atomic_read(&sh->count) != 0); + BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); ++ BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete); + + CHECK_DEVLOCK(); +- PRINTK("init_stripe called, stripe %llu\n", ++ pr_debug("init_stripe called, stripe %llu\n", + (unsigned long long)sh->sector); + + remove_hash(sh); +@@ -240,11 +243,11 @@ + for (i = sh->disks; i--; ) { + struct r5dev *dev = &sh->dev[i]; + +- if (dev->toread || dev->towrite || dev->written || ++ if (dev->toread || dev->read || dev->towrite || dev->written || + test_bit(R5_LOCKED, &dev->flags)) { +- printk("sector=%llx i=%d %p %p %p %d\n", ++ printk(KERN_ERR "sector=%llx i=%d %p %p %p %p %d\n", + (unsigned long long)sh->sector, i, dev->toread, +- dev->towrite, dev->written, ++ dev->read, dev->towrite, dev->written, + test_bit(R5_LOCKED, &dev->flags)); + BUG(); + } +@@ -260,11 +263,11 @@ + struct hlist_node *hn; + + CHECK_DEVLOCK(); +- PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector); ++ pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector); + hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) + if (sh->sector == sector && sh->disks == disks) + return sh; +- PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector); ++ pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector); + return NULL; + } + +@@ -276,7 +279,7 @@ + { + struct stripe_head *sh; + +- PRINTK("get_stripe, sector %llu\n", (unsigned long long)sector); ++ pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); + + spin_lock_irq(&conf->device_lock); + +@@ -324,179 +327,762 @@ + return sh; + } + +-static int grow_one_stripe(raid5_conf_t *conf) ++/* test_and_ack_op() ensures that we only dequeue an operation once */ ++#define test_and_ack_op(op, pend) \ ++do { \ ++ if (test_bit(op, &sh->ops.pending) && \ ++ !test_bit(op, &sh->ops.complete)) { \ ++ if (test_and_set_bit(op, &sh->ops.ack)) \ ++ clear_bit(op, &pend); \ ++ else \ ++ ack++; \ ++ } else \ ++ clear_bit(op, &pend); \ ++} while (0) ++ ++/* find new work to run, do not resubmit work that is already ++ * in flight ++ */ ++static unsigned long get_stripe_work(struct stripe_head *sh) ++{ ++ unsigned long pending; ++ int ack = 0; ++ ++ pending = sh->ops.pending; ++ ++ test_and_ack_op(STRIPE_OP_BIOFILL, pending); ++ test_and_ack_op(STRIPE_OP_COMPUTE_BLK, pending); ++ test_and_ack_op(STRIPE_OP_PREXOR, pending); ++ test_and_ack_op(STRIPE_OP_BIODRAIN, pending); ++ test_and_ack_op(STRIPE_OP_POSTXOR, pending); ++ test_and_ack_op(STRIPE_OP_CHECK, pending); ++ if (test_and_clear_bit(STRIPE_OP_IO, &sh->ops.pending)) ++ ack++; ++ ++ sh->ops.count -= ack; ++ BUG_ON(sh->ops.count < 0); ++ ++ return pending; ++} ++ ++static int ++raid5_end_read_request(struct bio *bi, unsigned int bytes_done, int error); ++static int ++raid5_end_write_request (struct bio *bi, unsigned int bytes_done, int error); ++ ++static void ops_run_io(struct stripe_head *sh) + { +- struct stripe_head *sh; +- sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL); +- if (!sh) +- return 0; +- memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev)); +- sh->raid_conf = conf; +- spin_lock_init(&sh->lock); ++ raid5_conf_t *conf = sh->raid_conf; ++ int i, disks = sh->disks; + +- if (grow_buffers(sh, conf->raid_disks)) { +- shrink_buffers(sh, conf->raid_disks); +- kmem_cache_free(conf->slab_cache, sh); +- return 0; +- } +- sh->disks = conf->raid_disks; +- /* we just created an active stripe so... */ +- atomic_set(&sh->count, 1); +- atomic_inc(&conf->active_stripes); +- INIT_LIST_HEAD(&sh->lru); +- release_stripe(sh); +- return 1; +-} ++ might_sleep(); + +-static int grow_stripes(raid5_conf_t *conf, int num) +-{ +- struct kmem_cache *sc; +- int devs = conf->raid_disks; ++ for (i = disks; i--; ) { ++ int rw; ++ struct bio *bi; ++ mdk_rdev_t *rdev; ++ if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) ++ rw = WRITE; ++ else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) ++ rw = READ; ++ else ++ continue; + +- sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev)); +- sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev)); +- conf->active_name = 0; +- sc = kmem_cache_create(conf->cache_name[conf->active_name], +- sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), +- 0, 0, NULL, NULL); +- if (!sc) +- return 1; +- conf->slab_cache = sc; +- conf->pool_size = devs; +- while (num--) +- if (!grow_one_stripe(conf)) +- return 1; +- return 0; ++ bi = &sh->dev[i].req; ++ ++ bi->bi_rw = rw; ++ if (rw == WRITE) ++ bi->bi_end_io = raid5_end_write_request; ++ else ++ bi->bi_end_io = raid5_end_read_request; ++ ++ rcu_read_lock(); ++ rdev = rcu_dereference(conf->disks[i].rdev); ++ if (rdev && test_bit(Faulty, &rdev->flags)) ++ rdev = NULL; ++ if (rdev) ++ atomic_inc(&rdev->nr_pending); ++ rcu_read_unlock(); ++ ++ if (rdev) { ++ if (test_bit(STRIPE_SYNCING, &sh->state) || ++ test_bit(STRIPE_EXPAND_SOURCE, &sh->state) || ++ test_bit(STRIPE_EXPAND_READY, &sh->state)) ++ md_sync_acct(rdev->bdev, STRIPE_SECTORS); ++ ++ bi->bi_bdev = rdev->bdev; ++ pr_debug("%s: for %llu schedule op %ld on disc %d\n", ++ __FUNCTION__, (unsigned long long)sh->sector, ++ bi->bi_rw, i); ++ atomic_inc(&sh->count); ++ bi->bi_sector = sh->sector + rdev->data_offset; ++ bi->bi_flags = 1 << BIO_UPTODATE; ++ bi->bi_vcnt = 1; ++ bi->bi_max_vecs = 1; ++ bi->bi_idx = 0; ++ bi->bi_io_vec = &sh->dev[i].vec; ++ bi->bi_io_vec[0].bv_len = STRIPE_SIZE; ++ bi->bi_io_vec[0].bv_offset = 0; ++ bi->bi_size = STRIPE_SIZE; ++ bi->bi_next = NULL; ++ if (rw == WRITE && ++ test_bit(R5_ReWrite, &sh->dev[i].flags)) ++ atomic_add(STRIPE_SECTORS, ++ &rdev->corrected_errors); ++ generic_make_request(bi); ++ } else { ++ if (rw == WRITE) ++ set_bit(STRIPE_DEGRADED, &sh->state); ++ pr_debug("skip op %ld on disc %d for sector %llu\n", ++ bi->bi_rw, i, (unsigned long long)sh->sector); ++ clear_bit(R5_LOCKED, &sh->dev[i].flags); ++ set_bit(STRIPE_HANDLE, &sh->state); ++ } ++ } + } + +-#ifdef CONFIG_MD_RAID5_RESHAPE +-static int resize_stripes(raid5_conf_t *conf, int newsize) ++static struct dma_async_tx_descriptor * ++async_copy_data(int frombio, struct bio *bio, struct page *page, ++ sector_t sector, struct dma_async_tx_descriptor *tx) + { +- /* Make all the stripes able to hold 'newsize' devices. +- * New slots in each stripe get 'page' set to a new page. +- * +- * This happens in stages: +- * 1/ create a new kmem_cache and allocate the required number of +- * stripe_heads. +- * 2/ gather all the old stripe_heads and tranfer the pages across +- * to the new stripe_heads. This will have the side effect of +- * freezing the array as once all stripe_heads have been collected, +- * no IO will be possible. Old stripe heads are freed once their +- * pages have been transferred over, and the old kmem_cache is +- * freed when all stripes are done. +- * 3/ reallocate conf->disks to be suitable bigger. If this fails, +- * we simple return a failre status - no need to clean anything up. +- * 4/ allocate new pages for the new slots in the new stripe_heads. +- * If this fails, we don't bother trying the shrink the +- * stripe_heads down again, we just leave them as they are. +- * As each stripe_head is processed the new one is released into +- * active service. +- * +- * Once step2 is started, we cannot afford to wait for a write, +- * so we use GFP_NOIO allocations. +- */ +- struct stripe_head *osh, *nsh; +- LIST_HEAD(newstripes); +- struct disk_info *ndisks; +- int err = 0; +- struct kmem_cache *sc; ++ struct bio_vec *bvl; ++ struct page *bio_page; + int i; ++ int page_offset; + +- if (newsize <= conf->pool_size) +- return 0; /* never bother to shrink */ ++ if (bio->bi_sector >= sector) ++ page_offset = (signed)(bio->bi_sector - sector) * 512; ++ else ++ page_offset = (signed)(sector - bio->bi_sector) * -512; ++ bio_for_each_segment(bvl, bio, i) { ++ int len = bio_iovec_idx(bio, i)->bv_len; ++ int clen; ++ int b_offset = 0; + +- md_allow_write(conf->mddev); ++ if (page_offset < 0) { ++ b_offset = -page_offset; ++ page_offset += b_offset; ++ len -= b_offset; ++ } + +- /* Step 1 */ +- sc = kmem_cache_create(conf->cache_name[1-conf->active_name], +- sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev), +- 0, 0, NULL, NULL); +- if (!sc) +- return -ENOMEM; ++ if (len > 0 && page_offset + len > STRIPE_SIZE) ++ clen = STRIPE_SIZE - page_offset; ++ else ++ clen = len; + +- for (i = conf->max_nr_stripes; i; i--) { +- nsh = kmem_cache_alloc(sc, GFP_KERNEL); +- if (!nsh) ++ if (clen > 0) { ++ b_offset += bio_iovec_idx(bio, i)->bv_offset; ++ bio_page = bio_iovec_idx(bio, i)->bv_page; ++ if (frombio) ++ tx = async_memcpy(page, bio_page, page_offset, ++ b_offset, clen, ++ ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_SRC, ++ tx, NULL, NULL); ++ else ++ tx = async_memcpy(bio_page, page, b_offset, ++ page_offset, clen, ++ ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_DST, ++ tx, NULL, NULL); ++ } ++ if (clen < len) /* hit end of page */ + break; ++ page_offset += len; ++ } + +- memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev)); ++ return tx; ++} + +- nsh->raid_conf = conf; +- spin_lock_init(&nsh->lock); ++static void ops_complete_biofill(void *stripe_head_ref) ++{ ++ struct stripe_head *sh = stripe_head_ref; ++ struct bio *return_bi = NULL, *bi; ++ raid5_conf_t *conf = sh->raid_conf; ++ int i, more_to_read = 0; + +- list_add(&nsh->lru, &newstripes); +- } +- if (i) { +- /* didn't get enough, give up */ +- while (!list_empty(&newstripes)) { +- nsh = list_entry(newstripes.next, struct stripe_head, lru); +- list_del(&nsh->lru); +- kmem_cache_free(sc, nsh); +- } +- kmem_cache_destroy(sc); +- return -ENOMEM; +- } +- /* Step 2 - Must use GFP_NOIO now. +- * OK, we have enough stripes, start collecting inactive +- * stripes and copying them over ++ pr_debug("%s: stripe %llu\n", __FUNCTION__, ++ (unsigned long long)sh->sector); ++ ++ /* clear completed biofills */ ++ for (i = sh->disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ /* check if this stripe has new incoming reads */ ++ if (dev->toread) ++ more_to_read++; ++ ++ /* acknowledge completion of a biofill operation */ ++ /* and check if we need to reply to a read request + */ +- list_for_each_entry(nsh, &newstripes, lru) { ++ if (test_bit(R5_Wantfill, &dev->flags) && !dev->toread) { ++ struct bio *rbi, *rbi2; ++ clear_bit(R5_Wantfill, &dev->flags); ++ ++ /* The access to dev->read is outside of the ++ * spin_lock_irq(&conf->device_lock), but is protected ++ * by the STRIPE_OP_BIOFILL pending bit ++ */ ++ BUG_ON(!dev->read); ++ rbi = dev->read; ++ dev->read = NULL; ++ while (rbi && rbi->bi_sector < ++ dev->sector + STRIPE_SECTORS) { ++ rbi2 = r5_next_bio(rbi, dev->sector); + spin_lock_irq(&conf->device_lock); +- wait_event_lock_irq(conf->wait_for_stripe, +- !list_empty(&conf->inactive_list), +- conf->device_lock, +- unplug_slaves(conf->mddev) +- ); +- osh = get_free_stripe(conf); ++ if (--rbi->bi_phys_segments == 0) { ++ rbi->bi_next = return_bi; ++ return_bi = rbi; ++ } + spin_unlock_irq(&conf->device_lock); +- atomic_set(&nsh->count, 1); +- for(i=0; ipool_size; i++) +- nsh->dev[i].page = osh->dev[i].page; +- for( ; idev[i].page = NULL; +- kmem_cache_free(conf->slab_cache, osh); ++ rbi = rbi2; + } +- kmem_cache_destroy(conf->slab_cache); ++ } ++ } ++ clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack); ++ clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending); + +- /* Step 3. +- * At this point, we are holding all the stripes so the array +- * is completely stalled, so now is a good time to resize +- * conf->disks. +- */ +- ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); +- if (ndisks) { +- for (i=0; iraid_disks; i++) +- ndisks[i] = conf->disks[i]; +- kfree(conf->disks); +- conf->disks = ndisks; +- } else +- err = -ENOMEM; ++ bi = return_bi; ++ while (bi) { ++ int bytes = bi->bi_size; + +- /* Step 4, return new stripes to service */ +- while(!list_empty(&newstripes)) { +- nsh = list_entry(newstripes.next, struct stripe_head, lru); +- list_del_init(&nsh->lru); +- for (i=conf->raid_disks; i < newsize; i++) +- if (nsh->dev[i].page == NULL) { +- struct page *p = alloc_page(GFP_NOIO); +- nsh->dev[i].page = p; +- if (!p) +- err = -ENOMEM; +- } +- release_stripe(nsh); ++ return_bi = bi->bi_next; ++ bi->bi_next = NULL; ++ bi->bi_size = 0; ++ bi->bi_end_io(bi, bytes, ++ test_bit(BIO_UPTODATE, &bi->bi_flags) ? 0 : -EIO); ++ bi = return_bi; + } +- /* critical section pass, GFP_NOIO no longer needed */ + +- conf->slab_cache = sc; +- conf->active_name = 1-conf->active_name; +- conf->pool_size = newsize; +- return err; ++ if (more_to_read) ++ set_bit(STRIPE_HANDLE, &sh->state); ++ release_stripe(sh); + } +-#endif + +-static int drop_one_stripe(raid5_conf_t *conf) ++static void ops_run_biofill(struct stripe_head *sh) + { +- struct stripe_head *sh; ++ struct dma_async_tx_descriptor *tx = NULL; ++ raid5_conf_t *conf = sh->raid_conf; ++ int i; ++ ++ pr_debug("%s: stripe %llu\n", __FUNCTION__, ++ (unsigned long long)sh->sector); ++ ++ for (i = sh->disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ if (test_bit(R5_Wantfill, &dev->flags)) { ++ struct bio *rbi; ++ spin_lock_irq(&conf->device_lock); ++ dev->read = rbi = dev->toread; ++ dev->toread = NULL; ++ spin_unlock_irq(&conf->device_lock); ++ while (rbi && rbi->bi_sector < ++ dev->sector + STRIPE_SECTORS) { ++ tx = async_copy_data(0, rbi, dev->page, ++ dev->sector, tx); ++ rbi = r5_next_bio(rbi, dev->sector); ++ } ++ } ++ } ++ ++ atomic_inc(&sh->count); ++ async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, ++ ops_complete_biofill, sh); ++} ++ ++static void ops_complete_compute5(void *stripe_head_ref) ++{ ++ struct stripe_head *sh = stripe_head_ref; ++ int target = sh->ops.target; ++ struct r5dev *tgt = &sh->dev[target]; ++ ++ pr_debug("%s: stripe %llu\n", __FUNCTION__, ++ (unsigned long long)sh->sector); ++ ++ set_bit(R5_UPTODATE, &tgt->flags); ++ BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); ++ clear_bit(R5_Wantcompute, &tgt->flags); ++ set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); ++ set_bit(STRIPE_HANDLE, &sh->state); ++ release_stripe(sh); ++} ++ ++static struct dma_async_tx_descriptor * ++ops_run_compute5(struct stripe_head *sh, unsigned long pending) ++{ ++ /* kernel stack size limits the total number of disks */ ++ int disks = sh->disks; ++ struct page *xor_srcs[disks]; ++ int target = sh->ops.target; ++ struct r5dev *tgt = &sh->dev[target]; ++ struct page *xor_dest = tgt->page; ++ int count = 0; ++ struct dma_async_tx_descriptor *tx; ++ int i; ++ ++ pr_debug("%s: stripe %llu block: %d\n", ++ __FUNCTION__, (unsigned long long)sh->sector, target); ++ BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); ++ ++ for (i = disks; i--; ) ++ if (i != target) ++ xor_srcs[count++] = sh->dev[i].page; ++ ++ atomic_inc(&sh->count); ++ ++ if (unlikely(count == 1)) ++ tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, ++ 0, NULL, ops_complete_compute5, sh); ++ else ++ tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, ++ ASYNC_TX_XOR_ZERO_DST, NULL, ++ ops_complete_compute5, sh); ++ ++ /* ack now if postxor is not set to be run */ ++ if (tx && !test_bit(STRIPE_OP_POSTXOR, &pending)) ++ async_tx_ack(tx); ++ ++ return tx; ++} ++ ++static void ops_complete_prexor(void *stripe_head_ref) ++{ ++ struct stripe_head *sh = stripe_head_ref; ++ ++ pr_debug("%s: stripe %llu\n", __FUNCTION__, ++ (unsigned long long)sh->sector); ++ ++ set_bit(STRIPE_OP_PREXOR, &sh->ops.complete); ++} ++ ++static struct dma_async_tx_descriptor * ++ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) ++{ ++ /* kernel stack size limits the total number of disks */ ++ int disks = sh->disks; ++ struct page *xor_srcs[disks]; ++ int count = 0, pd_idx = sh->pd_idx, i; ++ ++ /* existing parity data subtracted */ ++ struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; ++ ++ pr_debug("%s: stripe %llu\n", __FUNCTION__, ++ (unsigned long long)sh->sector); ++ ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ /* Only process blocks that are known to be uptodate */ ++ if (dev->towrite && test_bit(R5_Wantprexor, &dev->flags)) ++ xor_srcs[count++] = dev->page; ++ } ++ ++ tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, ++ ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, ++ ops_complete_prexor, sh); ++ ++ return tx; ++} ++ ++static struct dma_async_tx_descriptor * ++ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) ++{ ++ int disks = sh->disks; ++ int pd_idx = sh->pd_idx, i; ++ ++ /* check if prexor is active which means only process blocks ++ * that are part of a read-modify-write (Wantprexor) ++ */ ++ int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending); ++ ++ pr_debug("%s: stripe %llu\n", __FUNCTION__, ++ (unsigned long long)sh->sector); ++ ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ struct bio *chosen; ++ int towrite; ++ ++ towrite = 0; ++ if (prexor) { /* rmw */ ++ if (dev->towrite && ++ test_bit(R5_Wantprexor, &dev->flags)) ++ towrite = 1; ++ } else { /* rcw */ ++ if (i != pd_idx && dev->towrite && ++ test_bit(R5_LOCKED, &dev->flags)) ++ towrite = 1; ++ } ++ ++ if (towrite) { ++ struct bio *wbi; ++ ++ spin_lock(&sh->lock); ++ chosen = dev->towrite; ++ dev->towrite = NULL; ++ BUG_ON(dev->written); ++ wbi = dev->written = chosen; ++ spin_unlock(&sh->lock); ++ ++ while (wbi && wbi->bi_sector < ++ dev->sector + STRIPE_SECTORS) { ++ tx = async_copy_data(1, wbi, dev->page, ++ dev->sector, tx); ++ wbi = r5_next_bio(wbi, dev->sector); ++ } ++ } ++ } ++ ++ return tx; ++} ++ ++static void ops_complete_postxor(void *stripe_head_ref) ++{ ++ struct stripe_head *sh = stripe_head_ref; ++ ++ pr_debug("%s: stripe %llu\n", __FUNCTION__, ++ (unsigned long long)sh->sector); ++ ++ set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); ++ set_bit(STRIPE_HANDLE, &sh->state); ++ release_stripe(sh); ++} ++ ++static void ops_complete_write(void *stripe_head_ref) ++{ ++ struct stripe_head *sh = stripe_head_ref; ++ int disks = sh->disks, i, pd_idx = sh->pd_idx; ++ ++ pr_debug("%s: stripe %llu\n", __FUNCTION__, ++ (unsigned long long)sh->sector); ++ ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ if (dev->written || i == pd_idx) ++ set_bit(R5_UPTODATE, &dev->flags); ++ } ++ ++ set_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete); ++ set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); ++ ++ set_bit(STRIPE_HANDLE, &sh->state); ++ release_stripe(sh); ++} ++ ++static void ++ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) ++{ ++ /* kernel stack size limits the total number of disks */ ++ int disks = sh->disks; ++ struct page *xor_srcs[disks]; ++ ++ int count = 0, pd_idx = sh->pd_idx, i; ++ struct page *xor_dest; ++ int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending); ++ unsigned long flags; ++ dma_async_tx_callback callback; ++ ++ pr_debug("%s: stripe %llu\n", __FUNCTION__, ++ (unsigned long long)sh->sector); ++ ++ /* check if prexor is active which means only process blocks ++ * that are part of a read-modify-write (written) ++ */ ++ if (prexor) { ++ xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ if (dev->written) ++ xor_srcs[count++] = dev->page; ++ } ++ } else { ++ xor_dest = sh->dev[pd_idx].page; ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ if (i != pd_idx) ++ xor_srcs[count++] = dev->page; ++ } ++ } ++ ++ /* check whether this postxor is part of a write */ ++ callback = test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ? ++ ops_complete_write : ops_complete_postxor; ++ ++ /* 1/ if we prexor'd then the dest is reused as a source ++ * 2/ if we did not prexor then we are redoing the parity ++ * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST ++ * for the synchronous xor case ++ */ ++ flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | ++ (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); ++ ++ atomic_inc(&sh->count); ++ ++ if (unlikely(count == 1)) { ++ flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); ++ tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, ++ flags, tx, callback, sh); ++ } else ++ tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, ++ flags, tx, callback, sh); ++} ++ ++static void ops_complete_check(void *stripe_head_ref) ++{ ++ struct stripe_head *sh = stripe_head_ref; ++ int pd_idx = sh->pd_idx; ++ ++ pr_debug("%s: stripe %llu\n", __FUNCTION__, ++ (unsigned long long)sh->sector); ++ ++ if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) && ++ sh->ops.zero_sum_result == 0) ++ set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); ++ ++ set_bit(STRIPE_OP_CHECK, &sh->ops.complete); ++ set_bit(STRIPE_HANDLE, &sh->state); ++ release_stripe(sh); ++} ++ ++static void ops_run_check(struct stripe_head *sh) ++{ ++ /* kernel stack size limits the total number of disks */ ++ int disks = sh->disks; ++ struct page *xor_srcs[disks]; ++ struct dma_async_tx_descriptor *tx; ++ ++ int count = 0, pd_idx = sh->pd_idx, i; ++ struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; ++ ++ pr_debug("%s: stripe %llu\n", __FUNCTION__, ++ (unsigned long long)sh->sector); ++ ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ if (i != pd_idx) ++ xor_srcs[count++] = dev->page; ++ } ++ ++ tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, ++ &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); ++ ++ if (tx) ++ set_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending); ++ else ++ clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending); ++ ++ atomic_inc(&sh->count); ++ tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, ++ ops_complete_check, sh); ++} ++ ++static void raid5_run_ops(struct stripe_head *sh, unsigned long pending) ++{ ++ int overlap_clear = 0, i, disks = sh->disks; ++ struct dma_async_tx_descriptor *tx = NULL; ++ ++ if (test_bit(STRIPE_OP_BIOFILL, &pending)) { ++ ops_run_biofill(sh); ++ overlap_clear++; ++ } ++ ++ if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending)) ++ tx = ops_run_compute5(sh, pending); ++ ++ if (test_bit(STRIPE_OP_PREXOR, &pending)) ++ tx = ops_run_prexor(sh, tx); ++ ++ if (test_bit(STRIPE_OP_BIODRAIN, &pending)) { ++ tx = ops_run_biodrain(sh, tx); ++ overlap_clear++; ++ } ++ ++ if (test_bit(STRIPE_OP_POSTXOR, &pending)) ++ ops_run_postxor(sh, tx); ++ ++ if (test_bit(STRIPE_OP_CHECK, &pending)) ++ ops_run_check(sh); ++ ++ if (test_bit(STRIPE_OP_IO, &pending)) ++ ops_run_io(sh); ++ ++ if (overlap_clear) ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ if (test_and_clear_bit(R5_Overlap, &dev->flags)) ++ wake_up(&sh->raid_conf->wait_for_overlap); ++ } ++} ++ ++static int grow_one_stripe(raid5_conf_t *conf) ++{ ++ struct stripe_head *sh; ++ sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL); ++ if (!sh) ++ return 0; ++ memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev)); ++ sh->raid_conf = conf; ++ spin_lock_init(&sh->lock); ++ ++ if (grow_buffers(sh, conf->raid_disks)) { ++ shrink_buffers(sh, conf->raid_disks); ++ kmem_cache_free(conf->slab_cache, sh); ++ return 0; ++ } ++ sh->disks = conf->raid_disks; ++ /* we just created an active stripe so... */ ++ atomic_set(&sh->count, 1); ++ atomic_inc(&conf->active_stripes); ++ INIT_LIST_HEAD(&sh->lru); ++ release_stripe(sh); ++ return 1; ++} ++ ++static int grow_stripes(raid5_conf_t *conf, int num) ++{ ++ struct kmem_cache *sc; ++ int devs = conf->raid_disks; ++ ++ sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev)); ++ sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev)); ++ conf->active_name = 0; ++ sc = kmem_cache_create(conf->cache_name[conf->active_name], ++ sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), ++ 0, 0, NULL, NULL); ++ if (!sc) ++ return 1; ++ conf->slab_cache = sc; ++ conf->pool_size = devs; ++ while (num--) ++ if (!grow_one_stripe(conf)) ++ return 1; ++ return 0; ++} ++ ++#ifdef CONFIG_MD_RAID5_RESHAPE ++static int resize_stripes(raid5_conf_t *conf, int newsize) ++{ ++ /* Make all the stripes able to hold 'newsize' devices. ++ * New slots in each stripe get 'page' set to a new page. ++ * ++ * This happens in stages: ++ * 1/ create a new kmem_cache and allocate the required number of ++ * stripe_heads. ++ * 2/ gather all the old stripe_heads and tranfer the pages across ++ * to the new stripe_heads. This will have the side effect of ++ * freezing the array as once all stripe_heads have been collected, ++ * no IO will be possible. Old stripe heads are freed once their ++ * pages have been transferred over, and the old kmem_cache is ++ * freed when all stripes are done. ++ * 3/ reallocate conf->disks to be suitable bigger. If this fails, ++ * we simple return a failre status - no need to clean anything up. ++ * 4/ allocate new pages for the new slots in the new stripe_heads. ++ * If this fails, we don't bother trying the shrink the ++ * stripe_heads down again, we just leave them as they are. ++ * As each stripe_head is processed the new one is released into ++ * active service. ++ * ++ * Once step2 is started, we cannot afford to wait for a write, ++ * so we use GFP_NOIO allocations. ++ */ ++ struct stripe_head *osh, *nsh; ++ LIST_HEAD(newstripes); ++ struct disk_info *ndisks; ++ int err = 0; ++ struct kmem_cache *sc; ++ int i; ++ ++ if (newsize <= conf->pool_size) ++ return 0; /* never bother to shrink */ ++ ++ md_allow_write(conf->mddev); ++ ++ /* Step 1 */ ++ sc = kmem_cache_create(conf->cache_name[1-conf->active_name], ++ sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev), ++ 0, 0, NULL, NULL); ++ if (!sc) ++ return -ENOMEM; ++ ++ for (i = conf->max_nr_stripes; i; i--) { ++ nsh = kmem_cache_alloc(sc, GFP_KERNEL); ++ if (!nsh) ++ break; ++ ++ memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev)); ++ ++ nsh->raid_conf = conf; ++ spin_lock_init(&nsh->lock); ++ ++ list_add(&nsh->lru, &newstripes); ++ } ++ if (i) { ++ /* didn't get enough, give up */ ++ while (!list_empty(&newstripes)) { ++ nsh = list_entry(newstripes.next, struct stripe_head, lru); ++ list_del(&nsh->lru); ++ kmem_cache_free(sc, nsh); ++ } ++ kmem_cache_destroy(sc); ++ return -ENOMEM; ++ } ++ /* Step 2 - Must use GFP_NOIO now. ++ * OK, we have enough stripes, start collecting inactive ++ * stripes and copying them over ++ */ ++ list_for_each_entry(nsh, &newstripes, lru) { ++ spin_lock_irq(&conf->device_lock); ++ wait_event_lock_irq(conf->wait_for_stripe, ++ !list_empty(&conf->inactive_list), ++ conf->device_lock, ++ unplug_slaves(conf->mddev) ++ ); ++ osh = get_free_stripe(conf); ++ spin_unlock_irq(&conf->device_lock); ++ atomic_set(&nsh->count, 1); ++ for(i=0; ipool_size; i++) ++ nsh->dev[i].page = osh->dev[i].page; ++ for( ; idev[i].page = NULL; ++ kmem_cache_free(conf->slab_cache, osh); ++ } ++ kmem_cache_destroy(conf->slab_cache); ++ ++ /* Step 3. ++ * At this point, we are holding all the stripes so the array ++ * is completely stalled, so now is a good time to resize ++ * conf->disks. ++ */ ++ ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); ++ if (ndisks) { ++ for (i=0; iraid_disks; i++) ++ ndisks[i] = conf->disks[i]; ++ kfree(conf->disks); ++ conf->disks = ndisks; ++ } else ++ err = -ENOMEM; ++ ++ /* Step 4, return new stripes to service */ ++ while(!list_empty(&newstripes)) { ++ nsh = list_entry(newstripes.next, struct stripe_head, lru); ++ list_del_init(&nsh->lru); ++ for (i=conf->raid_disks; i < newsize; i++) ++ if (nsh->dev[i].page == NULL) { ++ struct page *p = alloc_page(GFP_NOIO); ++ nsh->dev[i].page = p; ++ if (!p) ++ err = -ENOMEM; ++ } ++ release_stripe(nsh); ++ } ++ /* critical section pass, GFP_NOIO no longer needed */ ++ ++ conf->slab_cache = sc; ++ conf->active_name = 1-conf->active_name; ++ conf->pool_size = newsize; ++ return err; ++} ++#endif ++ ++static int drop_one_stripe(raid5_conf_t *conf) ++{ ++ struct stripe_head *sh; + + spin_lock_irq(&conf->device_lock); + sh = get_free_stripe(conf); +@@ -537,7 +1123,7 @@ + if (bi == &sh->dev[i].req) + break; + +- PRINTK("end_read_request %llu/%d, count: %d, uptodate %d.\n", ++ pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n", + (unsigned long long)sh->sector, i, atomic_read(&sh->count), + uptodate); + if (i == disks) { +@@ -613,7 +1199,7 @@ + if (bi == &sh->dev[i].req) + break; + +- PRINTK("end_write_request %llu/%d, count %d, uptodate: %d.\n", ++ pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n", + (unsigned long long)sh->sector, i, atomic_read(&sh->count), + uptodate); + if (i == disks) { +@@ -658,7 +1244,7 @@ + { + char b[BDEVNAME_SIZE]; + raid5_conf_t *conf = (raid5_conf_t *) mddev->private; +- PRINTK("raid5: error called\n"); ++ pr_debug("raid5: error called\n"); + + if (!test_bit(Faulty, &rdev->flags)) { + set_bit(MD_CHANGE_DEVS, &mddev->flags); +@@ -918,135 +1504,11 @@ + + #define check_xor() do { \ + if (count == MAX_XOR_BLOCKS) { \ +- xor_block(count, STRIPE_SIZE, ptr); \ +- count = 1; \ ++ xor_blocks(count, STRIPE_SIZE, dest, ptr);\ ++ count = 0; \ + } \ + } while(0) + +- +-static void compute_block(struct stripe_head *sh, int dd_idx) +-{ +- int i, count, disks = sh->disks; +- void *ptr[MAX_XOR_BLOCKS], *p; +- +- PRINTK("compute_block, stripe %llu, idx %d\n", +- (unsigned long long)sh->sector, dd_idx); +- +- ptr[0] = page_address(sh->dev[dd_idx].page); +- memset(ptr[0], 0, STRIPE_SIZE); +- count = 1; +- for (i = disks ; i--; ) { +- if (i == dd_idx) +- continue; +- p = page_address(sh->dev[i].page); +- if (test_bit(R5_UPTODATE, &sh->dev[i].flags)) +- ptr[count++] = p; +- else +- printk(KERN_ERR "compute_block() %d, stripe %llu, %d" +- " not present\n", dd_idx, +- (unsigned long long)sh->sector, i); +- +- check_xor(); +- } +- if (count != 1) +- xor_block(count, STRIPE_SIZE, ptr); +- set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); +-} +- +-static void compute_parity5(struct stripe_head *sh, int method) +-{ +- raid5_conf_t *conf = sh->raid_conf; +- int i, pd_idx = sh->pd_idx, disks = sh->disks, count; +- void *ptr[MAX_XOR_BLOCKS]; +- struct bio *chosen; +- +- PRINTK("compute_parity5, stripe %llu, method %d\n", +- (unsigned long long)sh->sector, method); +- +- count = 1; +- ptr[0] = page_address(sh->dev[pd_idx].page); +- switch(method) { +- case READ_MODIFY_WRITE: +- BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags)); +- for (i=disks ; i-- ;) { +- if (i==pd_idx) +- continue; +- if (sh->dev[i].towrite && +- test_bit(R5_UPTODATE, &sh->dev[i].flags)) { +- ptr[count++] = page_address(sh->dev[i].page); +- chosen = sh->dev[i].towrite; +- sh->dev[i].towrite = NULL; +- +- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) +- wake_up(&conf->wait_for_overlap); +- +- BUG_ON(sh->dev[i].written); +- sh->dev[i].written = chosen; +- check_xor(); +- } +- } +- break; +- case RECONSTRUCT_WRITE: +- memset(ptr[0], 0, STRIPE_SIZE); +- for (i= disks; i-- ;) +- if (i!=pd_idx && sh->dev[i].towrite) { +- chosen = sh->dev[i].towrite; +- sh->dev[i].towrite = NULL; +- +- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) +- wake_up(&conf->wait_for_overlap); +- +- BUG_ON(sh->dev[i].written); +- sh->dev[i].written = chosen; +- } +- break; +- case CHECK_PARITY: +- break; +- } +- if (count>1) { +- xor_block(count, STRIPE_SIZE, ptr); +- count = 1; +- } +- +- for (i = disks; i--;) +- if (sh->dev[i].written) { +- sector_t sector = sh->dev[i].sector; +- struct bio *wbi = sh->dev[i].written; +- while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) { +- copy_data(1, wbi, sh->dev[i].page, sector); +- wbi = r5_next_bio(wbi, sector); +- } +- +- set_bit(R5_LOCKED, &sh->dev[i].flags); +- set_bit(R5_UPTODATE, &sh->dev[i].flags); +- } +- +- switch(method) { +- case RECONSTRUCT_WRITE: +- case CHECK_PARITY: +- for (i=disks; i--;) +- if (i != pd_idx) { +- ptr[count++] = page_address(sh->dev[i].page); +- check_xor(); +- } +- break; +- case READ_MODIFY_WRITE: +- for (i = disks; i--;) +- if (sh->dev[i].written) { +- ptr[count++] = page_address(sh->dev[i].page); +- check_xor(); +- } +- } +- if (count != 1) +- xor_block(count, STRIPE_SIZE, ptr); +- +- if (method != CHECK_PARITY) { +- set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); +- set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); +- } else +- clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); +-} +- + static void compute_parity6(struct stripe_head *sh, int method) + { + raid6_conf_t *conf = sh->raid_conf; +@@ -1058,7 +1520,7 @@ + qd_idx = raid6_next_disk(pd_idx, disks); + d0_idx = raid6_next_disk(qd_idx, disks); + +- PRINTK("compute_parity, stripe %llu, method %d\n", ++ pr_debug("compute_parity, stripe %llu, method %d\n", + (unsigned long long)sh->sector, method); + + switch(method) { +@@ -1132,20 +1594,20 @@ + static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) + { + int i, count, disks = sh->disks; +- void *ptr[MAX_XOR_BLOCKS], *p; ++ void *ptr[MAX_XOR_BLOCKS], *dest, *p; + int pd_idx = sh->pd_idx; + int qd_idx = raid6_next_disk(pd_idx, disks); + +- PRINTK("compute_block_1, stripe %llu, idx %d\n", ++ pr_debug("compute_block_1, stripe %llu, idx %d\n", + (unsigned long long)sh->sector, dd_idx); + + if ( dd_idx == qd_idx ) { + /* We're actually computing the Q drive */ + compute_parity6(sh, UPDATE_PARITY); + } else { +- ptr[0] = page_address(sh->dev[dd_idx].page); +- if (!nozero) memset(ptr[0], 0, STRIPE_SIZE); +- count = 1; ++ dest = page_address(sh->dev[dd_idx].page); ++ if (!nozero) memset(dest, 0, STRIPE_SIZE); ++ count = 0; + for (i = disks ; i--; ) { + if (i == dd_idx || i == qd_idx) + continue; +@@ -1159,8 +1621,8 @@ + + check_xor(); + } +- if (count != 1) +- xor_block(count, STRIPE_SIZE, ptr); ++ if (count) ++ xor_blocks(count, STRIPE_SIZE, dest, ptr); + if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); + else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); + } +@@ -1183,7 +1645,7 @@ + BUG_ON(faila == failb); + if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } + +- PRINTK("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", ++ pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", + (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb); + + if ( failb == disks-1 ) { +@@ -1229,7 +1691,79 @@ + } + } + ++static int ++handle_write_operations5(struct stripe_head *sh, int rcw, int expand) ++{ ++ int i, pd_idx = sh->pd_idx, disks = sh->disks; ++ int locked = 0; ++ ++ if (rcw) { ++ /* if we are not expanding this is a proper write request, and ++ * there will be bios with new data to be drained into the ++ * stripe cache ++ */ ++ if (!expand) { ++ set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); ++ sh->ops.count++; ++ } ++ ++ set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); ++ sh->ops.count++; ++ ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ ++ if (dev->towrite) { ++ set_bit(R5_LOCKED, &dev->flags); ++ if (!expand) ++ clear_bit(R5_UPTODATE, &dev->flags); ++ locked++; ++ } ++ } ++ } else { ++ BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || ++ test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); ++ ++ set_bit(STRIPE_OP_PREXOR, &sh->ops.pending); ++ set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); ++ set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); ++ ++ sh->ops.count += 3; ++ ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ if (i == pd_idx) ++ continue; ++ ++ /* For a read-modify write there may be blocks that are ++ * locked for reading while others are ready to be ++ * written so we distinguish these blocks by the ++ * R5_Wantprexor bit ++ */ ++ if (dev->towrite && ++ (test_bit(R5_UPTODATE, &dev->flags) || ++ test_bit(R5_Wantcompute, &dev->flags))) { ++ set_bit(R5_Wantprexor, &dev->flags); ++ set_bit(R5_LOCKED, &dev->flags); ++ clear_bit(R5_UPTODATE, &dev->flags); ++ locked++; ++ } ++ } ++ } ++ ++ /* keep the parity disk locked while asynchronous operations ++ * are in flight ++ */ ++ set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); ++ clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); ++ locked++; ++ ++ pr_debug("%s: stripe %llu locked: %d pending: %lx\n", ++ __FUNCTION__, (unsigned long long)sh->sector, ++ locked, sh->ops.pending); + ++ return locked; ++} + + /* + * Each stripe/dev can have one or more bion attached. +@@ -1242,7 +1776,7 @@ + raid5_conf_t *conf = sh->raid_conf; + int firstwrite=0; + +- PRINTK("adding bh b#%llu to stripe s#%llu\n", ++ pr_debug("adding bh b#%llu to stripe s#%llu\n", + (unsigned long long)bi->bi_sector, + (unsigned long long)sh->sector); + +@@ -1271,7 +1805,7 @@ + spin_unlock_irq(&conf->device_lock); + spin_unlock(&sh->lock); + +- PRINTK("added bi b#%llu to stripe s#%llu, disk %d.\n", ++ pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", + (unsigned long long)bi->bi_sector, + (unsigned long long)sh->sector, dd_idx); + +@@ -1326,116 +1860,14 @@ + return pd_idx; + } + +- +-/* +- * handle_stripe - do things to a stripe. +- * +- * We lock the stripe and then examine the state of various bits +- * to see what needs to be done. +- * Possible results: +- * return some read request which now have data +- * return some write requests which are safely on disc +- * schedule a read on some buffers +- * schedule a write of some buffers +- * return confirmation of parity correctness +- * +- * Parity calculations are done inside the stripe lock +- * buffers are taken off read_list or write_list, and bh_cache buffers +- * get BH_Lock set before the stripe lock is released. +- * +- */ +- +-static void handle_stripe5(struct stripe_head *sh) ++static void ++handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh, ++ struct stripe_head_state *s, int disks, ++ struct bio **return_bi) + { +- raid5_conf_t *conf = sh->raid_conf; +- int disks = sh->disks; +- struct bio *return_bi= NULL; +- struct bio *bi; + int i; +- int syncing, expanding, expanded; +- int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0; +- int non_overwrite = 0; +- int failed_num=0; +- struct r5dev *dev; +- +- PRINTK("handling stripe %llu, cnt=%d, pd_idx=%d\n", +- (unsigned long long)sh->sector, atomic_read(&sh->count), +- sh->pd_idx); +- +- spin_lock(&sh->lock); +- clear_bit(STRIPE_HANDLE, &sh->state); +- clear_bit(STRIPE_DELAYED, &sh->state); +- +- syncing = test_bit(STRIPE_SYNCING, &sh->state); +- expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); +- expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); +- /* Now to look around and see what can be done */ +- +- rcu_read_lock(); +- for (i=disks; i--; ) { +- mdk_rdev_t *rdev; +- dev = &sh->dev[i]; +- clear_bit(R5_Insync, &dev->flags); +- +- PRINTK("check %d: state 0x%lx read %p write %p written %p\n", +- i, dev->flags, dev->toread, dev->towrite, dev->written); +- /* maybe we can reply to a read */ +- if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { +- struct bio *rbi, *rbi2; +- PRINTK("Return read for disc %d\n", i); +- spin_lock_irq(&conf->device_lock); +- rbi = dev->toread; +- dev->toread = NULL; +- if (test_and_clear_bit(R5_Overlap, &dev->flags)) +- wake_up(&conf->wait_for_overlap); +- spin_unlock_irq(&conf->device_lock); +- while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { +- copy_data(0, rbi, dev->page, dev->sector); +- rbi2 = r5_next_bio(rbi, dev->sector); +- spin_lock_irq(&conf->device_lock); +- if (--rbi->bi_phys_segments == 0) { +- rbi->bi_next = return_bi; +- return_bi = rbi; +- } +- spin_unlock_irq(&conf->device_lock); +- rbi = rbi2; +- } +- } +- +- /* now count some things */ +- if (test_bit(R5_LOCKED, &dev->flags)) locked++; +- if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++; +- +- +- if (dev->toread) to_read++; +- if (dev->towrite) { +- to_write++; +- if (!test_bit(R5_OVERWRITE, &dev->flags)) +- non_overwrite++; +- } +- if (dev->written) written++; +- rdev = rcu_dereference(conf->disks[i].rdev); +- if (!rdev || !test_bit(In_sync, &rdev->flags)) { +- /* The ReadError flag will just be confusing now */ +- clear_bit(R5_ReadError, &dev->flags); +- clear_bit(R5_ReWrite, &dev->flags); +- } +- if (!rdev || !test_bit(In_sync, &rdev->flags) +- || test_bit(R5_ReadError, &dev->flags)) { +- failed++; +- failed_num = i; +- } else +- set_bit(R5_Insync, &dev->flags); +- } +- rcu_read_unlock(); +- PRINTK("locked=%d uptodate=%d to_read=%d" +- " to_write=%d failed=%d failed_num=%d\n", +- locked, uptodate, to_read, to_write, failed, failed_num); +- /* check if the array has lost two devices and, if so, some requests might +- * need to be failed +- */ +- if (failed > 1 && to_read+to_write+written) { +- for (i=disks; i--; ) { ++ for (i = disks; i--; ) { ++ struct bio *bi; + int bitmap_end = 0; + + if (test_bit(R5_ReadError, &sh->dev[i].flags)) { +@@ -1447,23 +1879,26 @@ + md_error(conf->mddev, rdev); + rcu_read_unlock(); + } +- + spin_lock_irq(&conf->device_lock); + /* fail all writes first */ + bi = sh->dev[i].towrite; + sh->dev[i].towrite = NULL; +- if (bi) { to_write--; bitmap_end = 1; } ++ if (bi) { ++ s->to_write--; ++ bitmap_end = 1; ++ } + + if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) + wake_up(&conf->wait_for_overlap); + +- while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ ++ while (bi && bi->bi_sector < ++ sh->dev[i].sector + STRIPE_SECTORS) { + struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); + clear_bit(BIO_UPTODATE, &bi->bi_flags); + if (--bi->bi_phys_segments == 0) { + md_write_end(conf->mddev); +- bi->bi_next = return_bi; +- return_bi = bi; ++ bi->bi_next = *return_bi; ++ *return_bi = bi; + } + bi = nextbi; + } +@@ -1471,78 +1906,235 @@ + bi = sh->dev[i].written; + sh->dev[i].written = NULL; + if (bi) bitmap_end = 1; +- while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { ++ while (bi && bi->bi_sector < ++ sh->dev[i].sector + STRIPE_SECTORS) { + struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); + clear_bit(BIO_UPTODATE, &bi->bi_flags); + if (--bi->bi_phys_segments == 0) { + md_write_end(conf->mddev); +- bi->bi_next = return_bi; +- return_bi = bi; ++ bi->bi_next = *return_bi; ++ *return_bi = bi; ++ } ++ bi = bi2; ++ } ++ ++ /* fail any reads if this device is non-operational and ++ * the data has not reached the cache yet. ++ */ ++ if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && ++ (!test_bit(R5_Insync, &sh->dev[i].flags) || ++ test_bit(R5_ReadError, &sh->dev[i].flags))) { ++ bi = sh->dev[i].toread; ++ sh->dev[i].toread = NULL; ++ if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) ++ wake_up(&conf->wait_for_overlap); ++ if (bi) s->to_read--; ++ while (bi && bi->bi_sector < ++ sh->dev[i].sector + STRIPE_SECTORS) { ++ struct bio *nextbi = ++ r5_next_bio(bi, sh->dev[i].sector); ++ clear_bit(BIO_UPTODATE, &bi->bi_flags); ++ if (--bi->bi_phys_segments == 0) { ++ bi->bi_next = *return_bi; ++ *return_bi = bi; ++ } ++ bi = nextbi; ++ } ++ } ++ spin_unlock_irq(&conf->device_lock); ++ if (bitmap_end) ++ bitmap_endwrite(conf->mddev->bitmap, sh->sector, ++ STRIPE_SECTORS, 0, 0); ++ } ++ ++} ++ ++/* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks ++ * to process ++ */ ++static int __handle_issuing_new_read_requests5(struct stripe_head *sh, ++ struct stripe_head_state *s, int disk_idx, int disks) ++{ ++ struct r5dev *dev = &sh->dev[disk_idx]; ++ struct r5dev *failed_dev = &sh->dev[s->failed_num]; ++ ++ /* don't schedule compute operations or reads on the parity block while ++ * a check is in flight ++ */ ++ if ((disk_idx == sh->pd_idx) && ++ test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) ++ return ~0; ++ ++ /* is the data in this block needed, and can we get it? */ ++ if (!test_bit(R5_LOCKED, &dev->flags) && ++ !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread || ++ (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || ++ s->syncing || s->expanding || (s->failed && ++ (failed_dev->toread || (failed_dev->towrite && ++ !test_bit(R5_OVERWRITE, &failed_dev->flags) ++ ))))) { ++ /* 1/ We would like to get this block, possibly by computing it, ++ * but we might not be able to. ++ * ++ * 2/ Since parity check operations potentially make the parity ++ * block !uptodate it will need to be refreshed before any ++ * compute operations on data disks are scheduled. ++ * ++ * 3/ We hold off parity block re-reads until check operations ++ * have quiesced. ++ */ ++ if ((s->uptodate == disks - 1) && ++ !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { ++ set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); ++ set_bit(R5_Wantcompute, &dev->flags); ++ sh->ops.target = disk_idx; ++ s->req_compute = 1; ++ sh->ops.count++; ++ /* Careful: from this point on 'uptodate' is in the eye ++ * of raid5_run_ops which services 'compute' operations ++ * before writes. R5_Wantcompute flags a block that will ++ * be R5_UPTODATE by the time it is needed for a ++ * subsequent operation. ++ */ ++ s->uptodate++; ++ return 0; /* uptodate + compute == disks */ ++ } else if ((s->uptodate < disks - 1) && ++ test_bit(R5_Insync, &dev->flags)) { ++ /* Note: we hold off compute operations while checks are ++ * in flight, but we still prefer 'compute' over 'read' ++ * hence we only read if (uptodate < * disks-1) ++ */ ++ set_bit(R5_LOCKED, &dev->flags); ++ set_bit(R5_Wantread, &dev->flags); ++ if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) ++ sh->ops.count++; ++ s->locked++; ++ pr_debug("Reading block %d (sync=%d)\n", disk_idx, ++ s->syncing); + } +- bi = bi2; + } + +- /* fail any reads if this device is non-operational */ +- if (!test_bit(R5_Insync, &sh->dev[i].flags) || +- test_bit(R5_ReadError, &sh->dev[i].flags)) { +- bi = sh->dev[i].toread; +- sh->dev[i].toread = NULL; +- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) +- wake_up(&conf->wait_for_overlap); +- if (bi) to_read--; +- while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ +- struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); +- clear_bit(BIO_UPTODATE, &bi->bi_flags); +- if (--bi->bi_phys_segments == 0) { +- bi->bi_next = return_bi; +- return_bi = bi; +- } +- bi = nextbi; ++ return ~0; ++} ++ ++static void handle_issuing_new_read_requests5(struct stripe_head *sh, ++ struct stripe_head_state *s, int disks) ++{ ++ int i; ++ ++ /* Clear completed compute operations. Parity recovery ++ * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled ++ * later on in this routine ++ */ ++ if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && ++ !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { ++ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); ++ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); ++ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); ++ } ++ ++ /* look for blocks to read/compute, skip this if a compute ++ * is already in flight, or if the stripe contents are in the ++ * midst of changing due to a write ++ */ ++ if (!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) && ++ !test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) && ++ !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { ++ for (i = disks; i--; ) ++ if (__handle_issuing_new_read_requests5( ++ sh, s, i, disks) == 0) ++ break; + } ++ set_bit(STRIPE_HANDLE, &sh->state); ++} ++ ++static void handle_issuing_new_read_requests6(struct stripe_head *sh, ++ struct stripe_head_state *s, struct r6_state *r6s, ++ int disks) ++{ ++ int i; ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ if (!test_bit(R5_LOCKED, &dev->flags) && ++ !test_bit(R5_UPTODATE, &dev->flags) && ++ (dev->toread || (dev->towrite && ++ !test_bit(R5_OVERWRITE, &dev->flags)) || ++ s->syncing || s->expanding || ++ (s->failed >= 1 && ++ (sh->dev[r6s->failed_num[0]].toread || ++ s->to_write)) || ++ (s->failed >= 2 && ++ (sh->dev[r6s->failed_num[1]].toread || ++ s->to_write)))) { ++ /* we would like to get this block, possibly ++ * by computing it, but we might not be able to ++ */ ++ if (s->uptodate == disks-1) { ++ pr_debug("Computing stripe %llu block %d\n", ++ (unsigned long long)sh->sector, i); ++ compute_block_1(sh, i, 0); ++ s->uptodate++; ++ } else if ( s->uptodate == disks-2 && s->failed >= 2 ) { ++ /* Computing 2-failure is *very* expensive; only ++ * do it if failed >= 2 ++ */ ++ int other; ++ for (other = disks; other--; ) { ++ if (other == i) ++ continue; ++ if (!test_bit(R5_UPTODATE, ++ &sh->dev[other].flags)) ++ break; + } +- spin_unlock_irq(&conf->device_lock); +- if (bitmap_end) +- bitmap_endwrite(conf->mddev->bitmap, sh->sector, +- STRIPE_SECTORS, 0, 0); ++ BUG_ON(other < 0); ++ pr_debug("Computing stripe %llu blocks %d,%d\n", ++ (unsigned long long)sh->sector, ++ i, other); ++ compute_block_2(sh, i, other); ++ s->uptodate += 2; ++ } else if (test_bit(R5_Insync, &dev->flags)) { ++ set_bit(R5_LOCKED, &dev->flags); ++ set_bit(R5_Wantread, &dev->flags); ++ s->locked++; ++ pr_debug("Reading block %d (sync=%d)\n", ++ i, s->syncing); + } + } +- if (failed > 1 && syncing) { +- md_done_sync(conf->mddev, STRIPE_SECTORS,0); +- clear_bit(STRIPE_SYNCING, &sh->state); +- syncing = 0; + } ++ set_bit(STRIPE_HANDLE, &sh->state); ++} + +- /* might be able to return some write requests if the parity block +- * is safe, or on a failed drive +- */ +- dev = &sh->dev[sh->pd_idx]; +- if ( written && +- ( (test_bit(R5_Insync, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) && +- test_bit(R5_UPTODATE, &dev->flags)) +- || (failed == 1 && failed_num == sh->pd_idx)) +- ) { +- /* any written block on an uptodate or failed drive can be returned. ++ ++/* handle_completed_write_requests ++ * any written block on an uptodate or failed drive can be returned. + * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but + * never LOCKED, so we don't need to test 'failed' directly. + */ +- for (i=disks; i--; ) ++static void handle_completed_write_requests(raid5_conf_t *conf, ++ struct stripe_head *sh, int disks, struct bio **return_bi) ++{ ++ int i; ++ struct r5dev *dev; ++ ++ for (i = disks; i--; ) + if (sh->dev[i].written) { + dev = &sh->dev[i]; + if (!test_bit(R5_LOCKED, &dev->flags) && +- test_bit(R5_UPTODATE, &dev->flags) ) { ++ test_bit(R5_UPTODATE, &dev->flags)) { + /* We can return any write requests */ + struct bio *wbi, *wbi2; + int bitmap_end = 0; +- PRINTK("Return write for disc %d\n", i); ++ pr_debug("Return write for disc %d\n", i); + spin_lock_irq(&conf->device_lock); + wbi = dev->written; + dev->written = NULL; +- while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { ++ while (wbi && wbi->bi_sector < ++ dev->sector + STRIPE_SECTORS) { + wbi2 = r5_next_bio(wbi, dev->sector); + if (--wbi->bi_phys_segments == 0) { + md_write_end(conf->mddev); +- wbi->bi_next = return_bi; +- return_bi = wbi; ++ wbi->bi_next = *return_bi; ++ *return_bi = wbi; + } + wbi = wbi2; + } +@@ -1550,89 +2142,63 @@ + bitmap_end = 1; + spin_unlock_irq(&conf->device_lock); + if (bitmap_end) +- bitmap_endwrite(conf->mddev->bitmap, sh->sector, ++ bitmap_endwrite(conf->mddev->bitmap, ++ sh->sector, + STRIPE_SECTORS, +- !test_bit(STRIPE_DEGRADED, &sh->state), 0); +- } +- } +- } +- +- /* Now we might consider reading some blocks, either to check/generate +- * parity, or to satisfy requests +- * or to load a block that is being partially written. +- */ +- if (to_read || non_overwrite || (syncing && (uptodate < disks)) || expanding) { +- for (i=disks; i--;) { +- dev = &sh->dev[i]; +- if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && +- (dev->toread || +- (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || +- syncing || +- expanding || +- (failed && (sh->dev[failed_num].toread || +- (sh->dev[failed_num].towrite && !test_bit(R5_OVERWRITE, &sh->dev[failed_num].flags)))) +- ) +- ) { +- /* we would like to get this block, possibly +- * by computing it, but we might not be able to +- */ +- if (uptodate == disks-1) { +- PRINTK("Computing block %d\n", i); +- compute_block(sh, i); +- uptodate++; +- } else if (test_bit(R5_Insync, &dev->flags)) { +- set_bit(R5_LOCKED, &dev->flags); +- set_bit(R5_Wantread, &dev->flags); +- locked++; +- PRINTK("Reading block %d (sync=%d)\n", +- i, syncing); +- } ++ !test_bit(STRIPE_DEGRADED, &sh->state), ++ 0); + } + } +- set_bit(STRIPE_HANDLE, &sh->state); +- } ++} + +- /* now to consider writing and what else, if anything should be read */ +- if (to_write) { +- int rmw=0, rcw=0; +- for (i=disks ; i--;) { ++static void handle_issuing_new_write_requests5(raid5_conf_t *conf, ++ struct stripe_head *sh, struct stripe_head_state *s, int disks) ++{ ++ int rmw = 0, rcw = 0, i; ++ for (i = disks; i--; ) { + /* would I have to read this buffer for read_modify_write */ +- dev = &sh->dev[i]; ++ struct r5dev *dev = &sh->dev[i]; + if ((dev->towrite || i == sh->pd_idx) && +- (!test_bit(R5_LOCKED, &dev->flags) +- ) && +- !test_bit(R5_UPTODATE, &dev->flags)) { +- if (test_bit(R5_Insync, &dev->flags) +-/* && !(!mddev->insync && i == sh->pd_idx) */ +- ) ++ !test_bit(R5_LOCKED, &dev->flags) && ++ !(test_bit(R5_UPTODATE, &dev->flags) || ++ test_bit(R5_Wantcompute, &dev->flags))) { ++ if (test_bit(R5_Insync, &dev->flags)) + rmw++; +- else rmw += 2*disks; /* cannot read it */ ++ else ++ rmw += 2*disks; /* cannot read it */ + } + /* Would I have to read this buffer for reconstruct_write */ + if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx && +- (!test_bit(R5_LOCKED, &dev->flags) +- ) && +- !test_bit(R5_UPTODATE, &dev->flags)) { ++ !test_bit(R5_LOCKED, &dev->flags) && ++ !(test_bit(R5_UPTODATE, &dev->flags) || ++ test_bit(R5_Wantcompute, &dev->flags))) { + if (test_bit(R5_Insync, &dev->flags)) rcw++; +- else rcw += 2*disks; ++ else ++ rcw += 2*disks; + } + } +- PRINTK("for sector %llu, rmw=%d rcw=%d\n", ++ pr_debug("for sector %llu, rmw=%d rcw=%d\n", + (unsigned long long)sh->sector, rmw, rcw); + set_bit(STRIPE_HANDLE, &sh->state); + if (rmw < rcw && rmw > 0) + /* prefer read-modify-write, but need to get some data */ +- for (i=disks; i--;) { +- dev = &sh->dev[i]; ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; + if ((dev->towrite || i == sh->pd_idx) && +- !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && ++ !test_bit(R5_LOCKED, &dev->flags) && ++ !(test_bit(R5_UPTODATE, &dev->flags) || ++ test_bit(R5_Wantcompute, &dev->flags)) && + test_bit(R5_Insync, &dev->flags)) { +- if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) +- { +- PRINTK("Read_old block %d for r-m-w\n", i); ++ if ( ++ test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { ++ pr_debug("Read_old block " ++ "%d for r-m-w\n", i); + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantread, &dev->flags); +- locked++; ++ if (!test_and_set_bit( ++ STRIPE_OP_IO, &sh->ops.pending)) ++ sh->ops.count++; ++ s->locked++; + } else { + set_bit(STRIPE_DELAYED, &sh->state); + set_bit(STRIPE_HANDLE, &sh->state); +@@ -1641,165 +2207,367 @@ + } + if (rcw <= rmw && rcw > 0) + /* want reconstruct write, but need to get some data */ +- for (i=disks; i--;) { +- dev = &sh->dev[i]; +- if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx && +- !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ if (!test_bit(R5_OVERWRITE, &dev->flags) && ++ i != sh->pd_idx && ++ !test_bit(R5_LOCKED, &dev->flags) && ++ !(test_bit(R5_UPTODATE, &dev->flags) || ++ test_bit(R5_Wantcompute, &dev->flags)) && + test_bit(R5_Insync, &dev->flags)) { +- if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) +- { +- PRINTK("Read_old block %d for Reconstruct\n", i); ++ if ( ++ test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { ++ pr_debug("Read_old block " ++ "%d for Reconstruct\n", i); + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantread, &dev->flags); +- locked++; ++ if (!test_and_set_bit( ++ STRIPE_OP_IO, &sh->ops.pending)) ++ sh->ops.count++; ++ s->locked++; ++ } else { ++ set_bit(STRIPE_DELAYED, &sh->state); ++ set_bit(STRIPE_HANDLE, &sh->state); ++ } ++ } ++ } ++ /* now if nothing is locked, and if we have enough data, ++ * we can start a write request ++ */ ++ /* since handle_stripe can be called at any time we need to handle the ++ * case where a compute block operation has been submitted and then a ++ * subsequent call wants to start a write request. raid5_run_ops only ++ * handles the case where compute block and postxor are requested ++ * simultaneously. If this is not the case then new writes need to be ++ * held off until the compute completes. ++ */ ++ if ((s->req_compute || ++ !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) && ++ (s->locked == 0 && (rcw == 0 || rmw == 0) && ++ !test_bit(STRIPE_BIT_DELAY, &sh->state))) ++ s->locked += handle_write_operations5(sh, rcw == 0, 0); ++} ++ ++static void handle_issuing_new_write_requests6(raid5_conf_t *conf, ++ struct stripe_head *sh, struct stripe_head_state *s, ++ struct r6_state *r6s, int disks) ++{ ++ int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; ++ int qd_idx = r6s->qd_idx; ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ /* Would I have to read this buffer for reconstruct_write */ ++ if (!test_bit(R5_OVERWRITE, &dev->flags) ++ && i != pd_idx && i != qd_idx ++ && (!test_bit(R5_LOCKED, &dev->flags) ++ ) && ++ !test_bit(R5_UPTODATE, &dev->flags)) { ++ if (test_bit(R5_Insync, &dev->flags)) rcw++; ++ else { ++ pr_debug("raid6: must_compute: " ++ "disk %d flags=%#lx\n", i, dev->flags); ++ must_compute++; ++ } ++ } ++ } ++ pr_debug("for sector %llu, rcw=%d, must_compute=%d\n", ++ (unsigned long long)sh->sector, rcw, must_compute); ++ set_bit(STRIPE_HANDLE, &sh->state); ++ ++ if (rcw > 0) ++ /* want reconstruct write, but need to get some data */ ++ for (i = disks; i--; ) { ++ struct r5dev *dev = &sh->dev[i]; ++ if (!test_bit(R5_OVERWRITE, &dev->flags) ++ && !(s->failed == 0 && (i == pd_idx || i == qd_idx)) ++ && !test_bit(R5_LOCKED, &dev->flags) && ++ !test_bit(R5_UPTODATE, &dev->flags) && ++ test_bit(R5_Insync, &dev->flags)) { ++ if ( ++ test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { ++ pr_debug("Read_old stripe %llu " ++ "block %d for Reconstruct\n", ++ (unsigned long long)sh->sector, i); ++ set_bit(R5_LOCKED, &dev->flags); ++ set_bit(R5_Wantread, &dev->flags); ++ s->locked++; + } else { ++ pr_debug("Request delayed stripe %llu " ++ "block %d for Reconstruct\n", ++ (unsigned long long)sh->sector, i); + set_bit(STRIPE_DELAYED, &sh->state); + set_bit(STRIPE_HANDLE, &sh->state); + } + } + } +- /* now if nothing is locked, and if we have enough data, we can start a write request */ +- if (locked == 0 && (rcw == 0 ||rmw == 0) && ++ /* now if nothing is locked, and if we have enough data, we can start a ++ * write request ++ */ ++ if (s->locked == 0 && rcw == 0 && + !test_bit(STRIPE_BIT_DELAY, &sh->state)) { +- PRINTK("Computing parity...\n"); +- compute_parity5(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE); ++ if (must_compute > 0) { ++ /* We have failed blocks and need to compute them */ ++ switch (s->failed) { ++ case 0: ++ BUG(); ++ case 1: ++ compute_block_1(sh, r6s->failed_num[0], 0); ++ break; ++ case 2: ++ compute_block_2(sh, r6s->failed_num[0], ++ r6s->failed_num[1]); ++ break; ++ default: /* This request should have been failed? */ ++ BUG(); ++ } ++ } ++ ++ pr_debug("Computing parity for stripe %llu\n", ++ (unsigned long long)sh->sector); ++ compute_parity6(sh, RECONSTRUCT_WRITE); + /* now every locked buffer is ready to be written */ +- for (i=disks; i--;) ++ for (i = disks; i--; ) + if (test_bit(R5_LOCKED, &sh->dev[i].flags)) { +- PRINTK("Writing block %d\n", i); +- locked++; ++ pr_debug("Writing stripe %llu block %d\n", ++ (unsigned long long)sh->sector, i); ++ s->locked++; + set_bit(R5_Wantwrite, &sh->dev[i].flags); +- if (!test_bit(R5_Insync, &sh->dev[i].flags) +- || (i==sh->pd_idx && failed == 0)) +- set_bit(STRIPE_INSYNC, &sh->state); + } ++ /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ ++ set_bit(STRIPE_INSYNC, &sh->state); ++ + if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { + atomic_dec(&conf->preread_active_stripes); +- if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) ++ if (atomic_read(&conf->preread_active_stripes) < ++ IO_THRESHOLD) + md_wakeup_thread(conf->mddev->thread); + } + } +- } ++} + +- /* maybe we need to check and possibly fix the parity for this stripe +- * Any reads will already have been scheduled, so we just see if enough data +- * is available +- */ +- if (syncing && locked == 0 && +- !test_bit(STRIPE_INSYNC, &sh->state)) { ++static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, ++ struct stripe_head_state *s, int disks) ++{ + set_bit(STRIPE_HANDLE, &sh->state); +- if (failed == 0) { +- BUG_ON(uptodate != disks); +- compute_parity5(sh, CHECK_PARITY); +- uptodate--; +- if (page_is_zero(sh->dev[sh->pd_idx].page)) { +- /* parity is correct (on disc, not in buffer any more) */ ++ /* Take one of the following actions: ++ * 1/ start a check parity operation if (uptodate == disks) ++ * 2/ finish a check parity operation and act on the result ++ * 3/ skip to the writeback section if we previously ++ * initiated a recovery operation ++ */ ++ if (s->failed == 0 && ++ !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { ++ if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { ++ BUG_ON(s->uptodate != disks); ++ clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); ++ sh->ops.count++; ++ s->uptodate--; ++ } else if ( ++ test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { ++ clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); ++ clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); ++ ++ if (sh->ops.zero_sum_result == 0) ++ /* parity is correct (on disc, ++ * not in buffer any more) ++ */ + set_bit(STRIPE_INSYNC, &sh->state); +- } else { +- conf->mddev->resync_mismatches += STRIPE_SECTORS; +- if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) ++ else { ++ conf->mddev->resync_mismatches += ++ STRIPE_SECTORS; ++ if (test_bit( ++ MD_RECOVERY_CHECK, &conf->mddev->recovery)) + /* don't try to repair!! */ + set_bit(STRIPE_INSYNC, &sh->state); + else { +- compute_block(sh, sh->pd_idx); +- uptodate++; ++ set_bit(STRIPE_OP_COMPUTE_BLK, ++ &sh->ops.pending); ++ set_bit(STRIPE_OP_MOD_REPAIR_PD, ++ &sh->ops.pending); ++ set_bit(R5_Wantcompute, ++ &sh->dev[sh->pd_idx].flags); ++ sh->ops.target = sh->pd_idx; ++ sh->ops.count++; ++ s->uptodate++; ++ } ++ } + } + } ++ ++ /* check if we can clear a parity disk reconstruct */ ++ if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && ++ test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { ++ ++ clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); ++ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); ++ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); ++ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); + } +- if (!test_bit(STRIPE_INSYNC, &sh->state)) { ++ ++ /* Wait for check parity and compute block operations to complete ++ * before write-back ++ */ ++ if (!test_bit(STRIPE_INSYNC, &sh->state) && ++ !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) && ++ !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) { ++ struct r5dev *dev; + /* either failed parity check, or recovery is happening */ +- if (failed==0) +- failed_num = sh->pd_idx; +- dev = &sh->dev[failed_num]; ++ if (s->failed == 0) ++ s->failed_num = sh->pd_idx; ++ dev = &sh->dev[s->failed_num]; + BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); +- BUG_ON(uptodate != disks); ++ BUG_ON(s->uptodate != disks); + + set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantwrite, &dev->flags); ++ if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) ++ sh->ops.count++; ++ + clear_bit(STRIPE_DEGRADED, &sh->state); +- locked++; ++ s->locked++; + set_bit(STRIPE_INSYNC, &sh->state); + } ++} ++ ++ ++static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, ++ struct stripe_head_state *s, ++ struct r6_state *r6s, struct page *tmp_page, ++ int disks) ++{ ++ int update_p = 0, update_q = 0; ++ struct r5dev *dev; ++ int pd_idx = sh->pd_idx; ++ int qd_idx = r6s->qd_idx; ++ ++ set_bit(STRIPE_HANDLE, &sh->state); ++ ++ BUG_ON(s->failed > 2); ++ BUG_ON(s->uptodate < disks); ++ /* Want to check and possibly repair P and Q. ++ * However there could be one 'failed' device, in which ++ * case we can only check one of them, possibly using the ++ * other to generate missing data ++ */ ++ ++ /* If !tmp_page, we cannot do the calculations, ++ * but as we have set STRIPE_HANDLE, we will soon be called ++ * by stripe_handle with a tmp_page - just wait until then. ++ */ ++ if (tmp_page) { ++ if (s->failed == r6s->q_failed) { ++ /* The only possible failed device holds 'Q', so it ++ * makes sense to check P (If anything else were failed, ++ * we would have used P to recreate it). ++ */ ++ compute_block_1(sh, pd_idx, 1); ++ if (!page_is_zero(sh->dev[pd_idx].page)) { ++ compute_block_1(sh, pd_idx, 0); ++ update_p = 1; ++ } + } +- if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { +- md_done_sync(conf->mddev, STRIPE_SECTORS,1); +- clear_bit(STRIPE_SYNCING, &sh->state); ++ if (!r6s->q_failed && s->failed < 2) { ++ /* q is not failed, and we didn't use it to generate ++ * anything, so it makes sense to check it ++ */ ++ memcpy(page_address(tmp_page), ++ page_address(sh->dev[qd_idx].page), ++ STRIPE_SIZE); ++ compute_parity6(sh, UPDATE_PARITY); ++ if (memcmp(page_address(tmp_page), ++ page_address(sh->dev[qd_idx].page), ++ STRIPE_SIZE) != 0) { ++ clear_bit(STRIPE_INSYNC, &sh->state); ++ update_q = 1; ++ } ++ } ++ if (update_p || update_q) { ++ conf->mddev->resync_mismatches += STRIPE_SECTORS; ++ if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) ++ /* don't try to repair!! */ ++ update_p = update_q = 0; + } + +- /* If the failed drive is just a ReadError, then we might need to progress +- * the repair/check process ++ /* now write out any block on a failed drive, ++ * or P or Q if they need it + */ +- if (failed == 1 && ! conf->mddev->ro && +- test_bit(R5_ReadError, &sh->dev[failed_num].flags) +- && !test_bit(R5_LOCKED, &sh->dev[failed_num].flags) +- && test_bit(R5_UPTODATE, &sh->dev[failed_num].flags) +- ) { +- dev = &sh->dev[failed_num]; +- if (!test_bit(R5_ReWrite, &dev->flags)) { ++ ++ if (s->failed == 2) { ++ dev = &sh->dev[r6s->failed_num[1]]; ++ s->locked++; ++ set_bit(R5_LOCKED, &dev->flags); + set_bit(R5_Wantwrite, &dev->flags); +- set_bit(R5_ReWrite, &dev->flags); ++ } ++ if (s->failed >= 1) { ++ dev = &sh->dev[r6s->failed_num[0]]; ++ s->locked++; + set_bit(R5_LOCKED, &dev->flags); +- locked++; +- } else { +- /* let's read it back */ +- set_bit(R5_Wantread, &dev->flags); ++ set_bit(R5_Wantwrite, &dev->flags); ++ } ++ ++ if (update_p) { ++ dev = &sh->dev[pd_idx]; ++ s->locked++; + set_bit(R5_LOCKED, &dev->flags); +- locked++; ++ set_bit(R5_Wantwrite, &dev->flags); + } ++ if (update_q) { ++ dev = &sh->dev[qd_idx]; ++ s->locked++; ++ set_bit(R5_LOCKED, &dev->flags); ++ set_bit(R5_Wantwrite, &dev->flags); + } ++ clear_bit(STRIPE_DEGRADED, &sh->state); + +- if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { +- /* Need to write out all blocks after computing parity */ +- sh->disks = conf->raid_disks; +- sh->pd_idx = stripe_to_pdidx(sh->sector, conf, conf->raid_disks); +- compute_parity5(sh, RECONSTRUCT_WRITE); +- for (i= conf->raid_disks; i--;) { +- set_bit(R5_LOCKED, &sh->dev[i].flags); +- locked++; +- set_bit(R5_Wantwrite, &sh->dev[i].flags); +- } +- clear_bit(STRIPE_EXPANDING, &sh->state); +- } else if (expanded) { +- clear_bit(STRIPE_EXPAND_READY, &sh->state); +- atomic_dec(&conf->reshape_stripes); +- wake_up(&conf->wait_for_overlap); +- md_done_sync(conf->mddev, STRIPE_SECTORS, 1); ++ set_bit(STRIPE_INSYNC, &sh->state); + } ++} ++ ++static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, ++ struct r6_state *r6s) ++{ ++ int i; + +- if (expanding && locked == 0) { + /* We have read all the blocks in this stripe and now we need to + * copy some of them into a target stripe for expand. + */ ++ struct dma_async_tx_descriptor *tx = NULL; + clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); +- for (i=0; i< sh->disks; i++) +- if (i != sh->pd_idx) { ++ for (i = 0; i < sh->disks; i++) ++ if (i != sh->pd_idx && (r6s && i != r6s->qd_idx)) { + int dd_idx, pd_idx, j; + struct stripe_head *sh2; + + sector_t bn = compute_blocknr(sh, i); + sector_t s = raid5_compute_sector(bn, conf->raid_disks, +- conf->raid_disks-1, +- &dd_idx, &pd_idx, conf); +- sh2 = get_active_stripe(conf, s, conf->raid_disks, pd_idx, 1); ++ conf->raid_disks - ++ conf->max_degraded, &dd_idx, ++ &pd_idx, conf); ++ sh2 = get_active_stripe(conf, s, conf->raid_disks, ++ pd_idx, 1); + if (sh2 == NULL) + /* so far only the early blocks of this stripe + * have been requested. When later blocks + * get requested, we will try again + */ + continue; +- if(!test_bit(STRIPE_EXPANDING, &sh2->state) || ++ if (!test_bit(STRIPE_EXPANDING, &sh2->state) || + test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) { + /* must have already done this block */ + release_stripe(sh2); + continue; + } +- memcpy(page_address(sh2->dev[dd_idx].page), +- page_address(sh->dev[i].page), +- STRIPE_SIZE); ++ ++ /* place all the copies on one channel */ ++ tx = async_memcpy(sh2->dev[dd_idx].page, ++ sh->dev[i].page, 0, 0, STRIPE_SIZE, ++ ASYNC_TX_DEP_ACK, tx, NULL, NULL); ++ + set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); + set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); +- for (j=0; jraid_disks; j++) ++ for (j = 0; j < conf->raid_disks; j++) + if (j != sh2->pd_idx && ++ (r6s && j != r6s->qd_idx) && + !test_bit(R5_Expanded, &sh2->dev[j].flags)) + break; + if (j == conf->raid_disks) { +@@ -1807,153 +2575,91 @@ + set_bit(STRIPE_HANDLE, &sh2->state); + } + release_stripe(sh2); +- } +- } +- +- spin_unlock(&sh->lock); +- +- while ((bi=return_bi)) { +- int bytes = bi->bi_size; +- +- return_bi = bi->bi_next; +- bi->bi_next = NULL; +- bi->bi_size = 0; +- bi->bi_end_io(bi, bytes, +- test_bit(BIO_UPTODATE, &bi->bi_flags) +- ? 0 : -EIO); +- } +- for (i=disks; i-- ;) { +- int rw; +- struct bio *bi; +- mdk_rdev_t *rdev; +- if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) +- rw = WRITE; +- else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) +- rw = READ; +- else +- continue; +- +- bi = &sh->dev[i].req; +- +- bi->bi_rw = rw; +- if (rw == WRITE) +- bi->bi_end_io = raid5_end_write_request; +- else +- bi->bi_end_io = raid5_end_read_request; +- +- rcu_read_lock(); +- rdev = rcu_dereference(conf->disks[i].rdev); +- if (rdev && test_bit(Faulty, &rdev->flags)) +- rdev = NULL; +- if (rdev) +- atomic_inc(&rdev->nr_pending); +- rcu_read_unlock(); +- +- if (rdev) { +- if (syncing || expanding || expanded) +- md_sync_acct(rdev->bdev, STRIPE_SECTORS); + +- bi->bi_bdev = rdev->bdev; +- PRINTK("for %llu schedule op %ld on disc %d\n", +- (unsigned long long)sh->sector, bi->bi_rw, i); +- atomic_inc(&sh->count); +- bi->bi_sector = sh->sector + rdev->data_offset; +- bi->bi_flags = 1 << BIO_UPTODATE; +- bi->bi_vcnt = 1; +- bi->bi_max_vecs = 1; +- bi->bi_idx = 0; +- bi->bi_io_vec = &sh->dev[i].vec; +- bi->bi_io_vec[0].bv_len = STRIPE_SIZE; +- bi->bi_io_vec[0].bv_offset = 0; +- bi->bi_size = STRIPE_SIZE; +- bi->bi_next = NULL; +- if (rw == WRITE && +- test_bit(R5_ReWrite, &sh->dev[i].flags)) +- atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); +- generic_make_request(bi); +- } else { +- if (rw == WRITE) +- set_bit(STRIPE_DEGRADED, &sh->state); +- PRINTK("skip op %ld on disc %d for sector %llu\n", +- bi->bi_rw, i, (unsigned long long)sh->sector); +- clear_bit(R5_LOCKED, &sh->dev[i].flags); +- set_bit(STRIPE_HANDLE, &sh->state); ++ /* done submitting copies, wait for them to complete */ ++ if (i + 1 >= sh->disks) { ++ async_tx_ack(tx); ++ dma_wait_for_async_tx(tx); + } + } + } + +-static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) ++/* ++ * handle_stripe - do things to a stripe. ++ * ++ * We lock the stripe and then examine the state of various bits ++ * to see what needs to be done. ++ * Possible results: ++ * return some read request which now have data ++ * return some write requests which are safely on disc ++ * schedule a read on some buffers ++ * schedule a write of some buffers ++ * return confirmation of parity correctness ++ * ++ * buffers are taken off read_list or write_list, and bh_cache buffers ++ * get BH_Lock set before the stripe lock is released. ++ * ++ */ ++ ++static void handle_stripe5(struct stripe_head *sh) + { +- raid6_conf_t *conf = sh->raid_conf; +- int disks = sh->disks; +- struct bio *return_bi= NULL; +- struct bio *bi; +- int i; +- int syncing, expanding, expanded; +- int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0; +- int non_overwrite = 0; +- int failed_num[2] = {0, 0}; +- struct r5dev *dev, *pdev, *qdev; +- int pd_idx = sh->pd_idx; +- int qd_idx = raid6_next_disk(pd_idx, disks); +- int p_failed, q_failed; ++ raid5_conf_t *conf = sh->raid_conf; ++ int disks = sh->disks, i; ++ struct bio *return_bi = NULL, *bi; ++ struct stripe_head_state s; ++ struct r5dev *dev; ++ unsigned long pending = 0; + +- PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d, qd_idx=%d\n", +- (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count), +- pd_idx, qd_idx); ++ memset(&s, 0, sizeof(s)); ++ pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " ++ "ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state, ++ atomic_read(&sh->count), sh->pd_idx, ++ sh->ops.pending, sh->ops.ack, sh->ops.complete); + + spin_lock(&sh->lock); + clear_bit(STRIPE_HANDLE, &sh->state); + clear_bit(STRIPE_DELAYED, &sh->state); + +- syncing = test_bit(STRIPE_SYNCING, &sh->state); +- expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); +- expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); ++ s.syncing = test_bit(STRIPE_SYNCING, &sh->state); ++ s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); ++ s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); + /* Now to look around and see what can be done */ + + rcu_read_lock(); + for (i=disks; i--; ) { + mdk_rdev_t *rdev; +- dev = &sh->dev[i]; ++ struct r5dev *dev = &sh->dev[i]; + clear_bit(R5_Insync, &dev->flags); + +- PRINTK("check %d: state 0x%lx read %p write %p written %p\n", +- i, dev->flags, dev->toread, dev->towrite, dev->written); +- /* maybe we can reply to a read */ +- if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { +- struct bio *rbi, *rbi2; +- PRINTK("Return read for disc %d\n", i); +- spin_lock_irq(&conf->device_lock); +- rbi = dev->toread; +- dev->toread = NULL; +- if (test_and_clear_bit(R5_Overlap, &dev->flags)) +- wake_up(&conf->wait_for_overlap); +- spin_unlock_irq(&conf->device_lock); +- while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { +- copy_data(0, rbi, dev->page, dev->sector); +- rbi2 = r5_next_bio(rbi, dev->sector); +- spin_lock_irq(&conf->device_lock); +- if (--rbi->bi_phys_segments == 0) { +- rbi->bi_next = return_bi; +- return_bi = rbi; +- } +- spin_unlock_irq(&conf->device_lock); +- rbi = rbi2; +- } +- } +- +- /* now count some things */ +- if (test_bit(R5_LOCKED, &dev->flags)) locked++; +- if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++; ++ pr_debug("check %d: state 0x%lx toread %p read %p write %p " ++ "written %p\n", i, dev->flags, dev->toread, dev->read, ++ dev->towrite, dev->written); + ++ /* maybe we can request a biofill operation ++ * ++ * new wantfill requests are only permitted while ++ * STRIPE_OP_BIOFILL is clear ++ */ ++ if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && ++ !test_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) ++ set_bit(R5_Wantfill, &dev->flags); + +- if (dev->toread) to_read++; ++ /* now count some things */ ++ if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; ++ if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; ++ if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++; ++ ++ if (test_bit(R5_Wantfill, &dev->flags)) ++ s.to_fill++; ++ else if (dev->toread) ++ s.to_read++; + if (dev->towrite) { +- to_write++; ++ s.to_write++; + if (!test_bit(R5_OVERWRITE, &dev->flags)) +- non_overwrite++; ++ s.non_overwrite++; + } +- if (dev->written) written++; ++ if (dev->written) ++ s.written++; + rdev = rcu_dereference(conf->disks[i].rdev); + if (!rdev || !test_bit(In_sync, &rdev->flags)) { + /* The ReadError flag will just be confusing now */ +@@ -1962,376 +2668,361 @@ + } + if (!rdev || !test_bit(In_sync, &rdev->flags) + || test_bit(R5_ReadError, &dev->flags)) { +- if ( failed < 2 ) +- failed_num[failed] = i; +- failed++; ++ s.failed++; ++ s.failed_num = i; + } else + set_bit(R5_Insync, &dev->flags); + } + rcu_read_unlock(); +- PRINTK("locked=%d uptodate=%d to_read=%d" +- " to_write=%d failed=%d failed_num=%d,%d\n", +- locked, uptodate, to_read, to_write, failed, +- failed_num[0], failed_num[1]); +- /* check if the array has lost >2 devices and, if so, some requests might +- * need to be failed +- */ +- if (failed > 2 && to_read+to_write+written) { +- for (i=disks; i--; ) { +- int bitmap_end = 0; +- +- if (test_bit(R5_ReadError, &sh->dev[i].flags)) { +- mdk_rdev_t *rdev; +- rcu_read_lock(); +- rdev = rcu_dereference(conf->disks[i].rdev); +- if (rdev && test_bit(In_sync, &rdev->flags)) +- /* multiple read failures in one stripe */ +- md_error(conf->mddev, rdev); +- rcu_read_unlock(); +- } +- +- spin_lock_irq(&conf->device_lock); +- /* fail all writes first */ +- bi = sh->dev[i].towrite; +- sh->dev[i].towrite = NULL; +- if (bi) { to_write--; bitmap_end = 1; } +- +- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) +- wake_up(&conf->wait_for_overlap); + +- while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ +- struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); +- clear_bit(BIO_UPTODATE, &bi->bi_flags); +- if (--bi->bi_phys_segments == 0) { +- md_write_end(conf->mddev); +- bi->bi_next = return_bi; +- return_bi = bi; +- } +- bi = nextbi; +- } +- /* and fail all 'written' */ +- bi = sh->dev[i].written; +- sh->dev[i].written = NULL; +- if (bi) bitmap_end = 1; +- while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { +- struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); +- clear_bit(BIO_UPTODATE, &bi->bi_flags); +- if (--bi->bi_phys_segments == 0) { +- md_write_end(conf->mddev); +- bi->bi_next = return_bi; +- return_bi = bi; +- } +- bi = bi2; +- } ++ if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) ++ sh->ops.count++; + +- /* fail any reads if this device is non-operational */ +- if (!test_bit(R5_Insync, &sh->dev[i].flags) || +- test_bit(R5_ReadError, &sh->dev[i].flags)) { +- bi = sh->dev[i].toread; +- sh->dev[i].toread = NULL; +- if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) +- wake_up(&conf->wait_for_overlap); +- if (bi) to_read--; +- while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ +- struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); +- clear_bit(BIO_UPTODATE, &bi->bi_flags); +- if (--bi->bi_phys_segments == 0) { +- bi->bi_next = return_bi; +- return_bi = bi; +- } +- bi = nextbi; +- } +- } +- spin_unlock_irq(&conf->device_lock); +- if (bitmap_end) +- bitmap_endwrite(conf->mddev->bitmap, sh->sector, +- STRIPE_SECTORS, 0, 0); +- } +- } +- if (failed > 2 && syncing) { ++ pr_debug("locked=%d uptodate=%d to_read=%d" ++ " to_write=%d failed=%d failed_num=%d\n", ++ s.locked, s.uptodate, s.to_read, s.to_write, ++ s.failed, s.failed_num); ++ /* check if the array has lost two devices and, if so, some requests might ++ * need to be failed ++ */ ++ if (s.failed > 1 && s.to_read+s.to_write+s.written) ++ handle_requests_to_failed_array(conf, sh, &s, disks, ++ &return_bi); ++ if (s.failed > 1 && s.syncing) { + md_done_sync(conf->mddev, STRIPE_SECTORS,0); + clear_bit(STRIPE_SYNCING, &sh->state); +- syncing = 0; ++ s.syncing = 0; + } + +- /* +- * might be able to return some write requests if the parity blocks +- * are safe, or on a failed drive +- */ +- pdev = &sh->dev[pd_idx]; +- p_failed = (failed >= 1 && failed_num[0] == pd_idx) +- || (failed >= 2 && failed_num[1] == pd_idx); +- qdev = &sh->dev[qd_idx]; +- q_failed = (failed >= 1 && failed_num[0] == qd_idx) +- || (failed >= 2 && failed_num[1] == qd_idx); +- +- if ( written && +- ( p_failed || ((test_bit(R5_Insync, &pdev->flags) +- && !test_bit(R5_LOCKED, &pdev->flags) +- && test_bit(R5_UPTODATE, &pdev->flags))) ) && +- ( q_failed || ((test_bit(R5_Insync, &qdev->flags) +- && !test_bit(R5_LOCKED, &qdev->flags) +- && test_bit(R5_UPTODATE, &qdev->flags))) ) ) { +- /* any written block on an uptodate or failed drive can be +- * returned. Note that if we 'wrote' to a failed drive, +- * it will be UPTODATE, but never LOCKED, so we don't need +- * to test 'failed' directly. ++ /* might be able to return some write requests if the parity block ++ * is safe, or on a failed drive + */ +- for (i=disks; i--; ) +- if (sh->dev[i].written) { +- dev = &sh->dev[i]; +- if (!test_bit(R5_LOCKED, &dev->flags) && +- test_bit(R5_UPTODATE, &dev->flags) ) { +- /* We can return any write requests */ +- int bitmap_end = 0; +- struct bio *wbi, *wbi2; +- PRINTK("Return write for stripe %llu disc %d\n", +- (unsigned long long)sh->sector, i); +- spin_lock_irq(&conf->device_lock); +- wbi = dev->written; +- dev->written = NULL; +- while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { +- wbi2 = r5_next_bio(wbi, dev->sector); +- if (--wbi->bi_phys_segments == 0) { +- md_write_end(conf->mddev); +- wbi->bi_next = return_bi; +- return_bi = wbi; +- } +- wbi = wbi2; +- } +- if (dev->towrite == NULL) +- bitmap_end = 1; +- spin_unlock_irq(&conf->device_lock); +- if (bitmap_end) +- bitmap_endwrite(conf->mddev->bitmap, sh->sector, +- STRIPE_SECTORS, +- !test_bit(STRIPE_DEGRADED, &sh->state), 0); +- } +- } +- } ++ dev = &sh->dev[sh->pd_idx]; ++ if ( s.written && ++ ((test_bit(R5_Insync, &dev->flags) && ++ !test_bit(R5_LOCKED, &dev->flags) && ++ test_bit(R5_UPTODATE, &dev->flags)) || ++ (s.failed == 1 && s.failed_num == sh->pd_idx))) ++ handle_completed_write_requests(conf, sh, disks, &return_bi); + + /* Now we might consider reading some blocks, either to check/generate + * parity, or to satisfy requests + * or to load a block that is being partially written. + */ +- if (to_read || non_overwrite || (to_write && failed) || +- (syncing && (uptodate < disks)) || expanding) { +- for (i=disks; i--;) { +- dev = &sh->dev[i]; +- if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && +- (dev->toread || +- (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || +- syncing || +- expanding || +- (failed >= 1 && (sh->dev[failed_num[0]].toread || to_write)) || +- (failed >= 2 && (sh->dev[failed_num[1]].toread || to_write)) +- ) +- ) { +- /* we would like to get this block, possibly +- * by computing it, but we might not be able to ++ if (s.to_read || s.non_overwrite || ++ (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding || ++ test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) ++ handle_issuing_new_read_requests5(sh, &s, disks); ++ ++ /* Now we check to see if any write operations have recently ++ * completed + */ +- if (uptodate == disks-1) { +- PRINTK("Computing stripe %llu block %d\n", +- (unsigned long long)sh->sector, i); +- compute_block_1(sh, i, 0); +- uptodate++; +- } else if ( uptodate == disks-2 && failed >= 2 ) { +- /* Computing 2-failure is *very* expensive; only do it if failed >= 2 */ +- int other; +- for (other=disks; other--;) { +- if ( other == i ) +- continue; +- if ( !test_bit(R5_UPTODATE, &sh->dev[other].flags) ) +- break; +- } +- BUG_ON(other < 0); +- PRINTK("Computing stripe %llu blocks %d,%d\n", +- (unsigned long long)sh->sector, i, other); +- compute_block_2(sh, i, other); +- uptodate += 2; +- } else if (test_bit(R5_Insync, &dev->flags)) { +- set_bit(R5_LOCKED, &dev->flags); +- set_bit(R5_Wantread, &dev->flags); +- locked++; +- PRINTK("Reading block %d (sync=%d)\n", +- i, syncing); +- } +- } +- } +- set_bit(STRIPE_HANDLE, &sh->state); ++ ++ /* leave prexor set until postxor is done, allows us to distinguish ++ * a rmw from a rcw during biodrain ++ */ ++ if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && ++ test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { ++ ++ clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); ++ clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); ++ clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); ++ ++ for (i = disks; i--; ) ++ clear_bit(R5_Wantprexor, &sh->dev[i].flags); + } + +- /* now to consider writing and what else, if anything should be read */ +- if (to_write) { +- int rcw=0, must_compute=0; +- for (i=disks ; i--;) { ++ /* if only POSTXOR is set then this is an 'expand' postxor */ ++ if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) && ++ test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { ++ ++ clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete); ++ clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack); ++ clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); ++ ++ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); ++ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack); ++ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); ++ ++ /* All the 'written' buffers and the parity block are ready to ++ * be written back to disk ++ */ ++ BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); ++ for (i = disks; i--; ) { + dev = &sh->dev[i]; +- /* Would I have to read this buffer for reconstruct_write */ +- if (!test_bit(R5_OVERWRITE, &dev->flags) +- && i != pd_idx && i != qd_idx +- && (!test_bit(R5_LOCKED, &dev->flags) +- ) && +- !test_bit(R5_UPTODATE, &dev->flags)) { +- if (test_bit(R5_Insync, &dev->flags)) rcw++; +- else { +- PRINTK("raid6: must_compute: disk %d flags=%#lx\n", i, dev->flags); +- must_compute++; ++ if (test_bit(R5_LOCKED, &dev->flags) && ++ (i == sh->pd_idx || dev->written)) { ++ pr_debug("Writing block %d\n", i); ++ set_bit(R5_Wantwrite, &dev->flags); ++ if (!test_and_set_bit( ++ STRIPE_OP_IO, &sh->ops.pending)) ++ sh->ops.count++; ++ if (!test_bit(R5_Insync, &dev->flags) || ++ (i == sh->pd_idx && s.failed == 0)) ++ set_bit(STRIPE_INSYNC, &sh->state); ++ } + } ++ if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { ++ atomic_dec(&conf->preread_active_stripes); ++ if (atomic_read(&conf->preread_active_stripes) < ++ IO_THRESHOLD) ++ md_wakeup_thread(conf->mddev->thread); + } + } +- PRINTK("for sector %llu, rcw=%d, must_compute=%d\n", +- (unsigned long long)sh->sector, rcw, must_compute); +- set_bit(STRIPE_HANDLE, &sh->state); + +- if (rcw > 0) +- /* want reconstruct write, but need to get some data */ +- for (i=disks; i--;) { +- dev = &sh->dev[i]; +- if (!test_bit(R5_OVERWRITE, &dev->flags) +- && !(failed == 0 && (i == pd_idx || i == qd_idx)) +- && !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && +- test_bit(R5_Insync, &dev->flags)) { +- if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) +- { +- PRINTK("Read_old stripe %llu block %d for Reconstruct\n", +- (unsigned long long)sh->sector, i); ++ /* Now to consider new write requests and what else, if anything ++ * should be read. We do not handle new writes when: ++ * 1/ A 'write' operation (copy+xor) is already in flight. ++ * 2/ A 'check' operation is in flight, as it may clobber the parity ++ * block. ++ */ ++ if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) && ++ !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) ++ handle_issuing_new_write_requests5(conf, sh, &s, disks); ++ ++ /* maybe we need to check and possibly fix the parity for this stripe ++ * Any reads will already have been scheduled, so we just see if enough ++ * data is available. The parity check is held off while parity ++ * dependent operations are in flight. ++ */ ++ if ((s.syncing && s.locked == 0 && ++ !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) && ++ !test_bit(STRIPE_INSYNC, &sh->state)) || ++ test_bit(STRIPE_OP_CHECK, &sh->ops.pending) || ++ test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) ++ handle_parity_checks5(conf, sh, &s, disks); ++ ++ if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { ++ md_done_sync(conf->mddev, STRIPE_SECTORS,1); ++ clear_bit(STRIPE_SYNCING, &sh->state); ++ } ++ ++ /* If the failed drive is just a ReadError, then we might need to progress ++ * the repair/check process ++ */ ++ if (s.failed == 1 && !conf->mddev->ro && ++ test_bit(R5_ReadError, &sh->dev[s.failed_num].flags) ++ && !test_bit(R5_LOCKED, &sh->dev[s.failed_num].flags) ++ && test_bit(R5_UPTODATE, &sh->dev[s.failed_num].flags) ++ ) { ++ dev = &sh->dev[s.failed_num]; ++ if (!test_bit(R5_ReWrite, &dev->flags)) { ++ set_bit(R5_Wantwrite, &dev->flags); ++ if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) ++ sh->ops.count++; ++ set_bit(R5_ReWrite, &dev->flags); + set_bit(R5_LOCKED, &dev->flags); +- set_bit(R5_Wantread, &dev->flags); +- locked++; ++ s.locked++; + } else { +- PRINTK("Request delayed stripe %llu block %d for Reconstruct\n", +- (unsigned long long)sh->sector, i); +- set_bit(STRIPE_DELAYED, &sh->state); +- set_bit(STRIPE_HANDLE, &sh->state); ++ /* let's read it back */ ++ set_bit(R5_Wantread, &dev->flags); ++ if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) ++ sh->ops.count++; ++ set_bit(R5_LOCKED, &dev->flags); ++ s.locked++; + } + } ++ ++ /* Finish postxor operations initiated by the expansion ++ * process ++ */ ++ if (test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete) && ++ !test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending)) { ++ ++ clear_bit(STRIPE_EXPANDING, &sh->state); ++ ++ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); ++ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack); ++ clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); ++ ++ for (i = conf->raid_disks; i--; ) { ++ set_bit(R5_Wantwrite, &sh->dev[i].flags); ++ if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) ++ sh->ops.count++; + } +- /* now if nothing is locked, and if we have enough data, we can start a write request */ +- if (locked == 0 && rcw == 0 && +- !test_bit(STRIPE_BIT_DELAY, &sh->state)) { +- if ( must_compute > 0 ) { +- /* We have failed blocks and need to compute them */ +- switch ( failed ) { +- case 0: BUG(); +- case 1: compute_block_1(sh, failed_num[0], 0); break; +- case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break; +- default: BUG(); /* This request should have been failed? */ + } ++ ++ if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && ++ !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { ++ /* Need to write out all blocks after computing parity */ ++ sh->disks = conf->raid_disks; ++ sh->pd_idx = stripe_to_pdidx(sh->sector, conf, ++ conf->raid_disks); ++ s.locked += handle_write_operations5(sh, 0, 1); ++ } else if (s.expanded && ++ !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { ++ clear_bit(STRIPE_EXPAND_READY, &sh->state); ++ atomic_dec(&conf->reshape_stripes); ++ wake_up(&conf->wait_for_overlap); ++ md_done_sync(conf->mddev, STRIPE_SECTORS, 1); + } + +- PRINTK("Computing parity for stripe %llu\n", (unsigned long long)sh->sector); +- compute_parity6(sh, RECONSTRUCT_WRITE); +- /* now every locked buffer is ready to be written */ +- for (i=disks; i--;) +- if (test_bit(R5_LOCKED, &sh->dev[i].flags)) { +- PRINTK("Writing stripe %llu block %d\n", +- (unsigned long long)sh->sector, i); +- locked++; +- set_bit(R5_Wantwrite, &sh->dev[i].flags); ++ if (s.expanding && s.locked == 0) ++ handle_stripe_expansion(conf, sh, NULL); ++ ++ if (sh->ops.count) ++ pending = get_stripe_work(sh); ++ ++ spin_unlock(&sh->lock); ++ ++ if (pending) ++ raid5_run_ops(sh, pending); ++ ++ while ((bi=return_bi)) { ++ int bytes = bi->bi_size; ++ ++ return_bi = bi->bi_next; ++ bi->bi_next = NULL; ++ bi->bi_size = 0; ++ bi->bi_end_io(bi, bytes, ++ test_bit(BIO_UPTODATE, &bi->bi_flags) ++ ? 0 : -EIO); + } +- /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ +- set_bit(STRIPE_INSYNC, &sh->state); ++} + +- if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { +- atomic_dec(&conf->preread_active_stripes); +- if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) +- md_wakeup_thread(conf->mddev->thread); ++static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) ++{ ++ raid6_conf_t *conf = sh->raid_conf; ++ int disks = sh->disks; ++ struct bio *return_bi = NULL; ++ struct bio *bi; ++ int i, pd_idx = sh->pd_idx; ++ struct stripe_head_state s; ++ struct r6_state r6s; ++ struct r5dev *dev, *pdev, *qdev; ++ ++ r6s.qd_idx = raid6_next_disk(pd_idx, disks); ++ pr_debug("handling stripe %llu, state=%#lx cnt=%d, " ++ "pd_idx=%d, qd_idx=%d\n", ++ (unsigned long long)sh->sector, sh->state, ++ atomic_read(&sh->count), pd_idx, r6s.qd_idx); ++ memset(&s, 0, sizeof(s)); ++ ++ spin_lock(&sh->lock); ++ clear_bit(STRIPE_HANDLE, &sh->state); ++ clear_bit(STRIPE_DELAYED, &sh->state); ++ ++ s.syncing = test_bit(STRIPE_SYNCING, &sh->state); ++ s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); ++ s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); ++ /* Now to look around and see what can be done */ ++ ++ rcu_read_lock(); ++ for (i=disks; i--; ) { ++ mdk_rdev_t *rdev; ++ dev = &sh->dev[i]; ++ clear_bit(R5_Insync, &dev->flags); ++ ++ pr_debug("check %d: state 0x%lx read %p write %p written %p\n", ++ i, dev->flags, dev->toread, dev->towrite, dev->written); ++ /* maybe we can reply to a read */ ++ if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { ++ struct bio *rbi, *rbi2; ++ pr_debug("Return read for disc %d\n", i); ++ spin_lock_irq(&conf->device_lock); ++ rbi = dev->toread; ++ dev->toread = NULL; ++ if (test_and_clear_bit(R5_Overlap, &dev->flags)) ++ wake_up(&conf->wait_for_overlap); ++ spin_unlock_irq(&conf->device_lock); ++ while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { ++ copy_data(0, rbi, dev->page, dev->sector); ++ rbi2 = r5_next_bio(rbi, dev->sector); ++ spin_lock_irq(&conf->device_lock); ++ if (--rbi->bi_phys_segments == 0) { ++ rbi->bi_next = return_bi; ++ return_bi = rbi; + } ++ spin_unlock_irq(&conf->device_lock); ++ rbi = rbi2; + } + } + +- /* maybe we need to check and possibly fix the parity for this stripe +- * Any reads will already have been scheduled, so we just see if enough data +- * is available +- */ +- if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) { +- int update_p = 0, update_q = 0; +- struct r5dev *dev; +- +- set_bit(STRIPE_HANDLE, &sh->state); ++ /* now count some things */ ++ if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; ++ if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; + +- BUG_ON(failed>2); +- BUG_ON(uptodate < disks); +- /* Want to check and possibly repair P and Q. +- * However there could be one 'failed' device, in which +- * case we can only check one of them, possibly using the +- * other to generate missing data +- */ + +- /* If !tmp_page, we cannot do the calculations, +- * but as we have set STRIPE_HANDLE, we will soon be called +- * by stripe_handle with a tmp_page - just wait until then. +- */ +- if (tmp_page) { +- if (failed == q_failed) { +- /* The only possible failed device holds 'Q', so it makes +- * sense to check P (If anything else were failed, we would +- * have used P to recreate it). +- */ +- compute_block_1(sh, pd_idx, 1); +- if (!page_is_zero(sh->dev[pd_idx].page)) { +- compute_block_1(sh,pd_idx,0); +- update_p = 1; +- } ++ if (dev->toread) ++ s.to_read++; ++ if (dev->towrite) { ++ s.to_write++; ++ if (!test_bit(R5_OVERWRITE, &dev->flags)) ++ s.non_overwrite++; + } +- if (!q_failed && failed < 2) { +- /* q is not failed, and we didn't use it to generate +- * anything, so it makes sense to check it +- */ +- memcpy(page_address(tmp_page), +- page_address(sh->dev[qd_idx].page), +- STRIPE_SIZE); +- compute_parity6(sh, UPDATE_PARITY); +- if (memcmp(page_address(tmp_page), +- page_address(sh->dev[qd_idx].page), +- STRIPE_SIZE)!= 0) { +- clear_bit(STRIPE_INSYNC, &sh->state); +- update_q = 1; ++ if (dev->written) ++ s.written++; ++ rdev = rcu_dereference(conf->disks[i].rdev); ++ if (!rdev || !test_bit(In_sync, &rdev->flags)) { ++ /* The ReadError flag will just be confusing now */ ++ clear_bit(R5_ReadError, &dev->flags); ++ clear_bit(R5_ReWrite, &dev->flags); + } ++ if (!rdev || !test_bit(In_sync, &rdev->flags) ++ || test_bit(R5_ReadError, &dev->flags)) { ++ if (s.failed < 2) ++ r6s.failed_num[s.failed] = i; ++ s.failed++; ++ } else ++ set_bit(R5_Insync, &dev->flags); + } +- if (update_p || update_q) { +- conf->mddev->resync_mismatches += STRIPE_SECTORS; +- if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) +- /* don't try to repair!! */ +- update_p = update_q = 0; ++ rcu_read_unlock(); ++ pr_debug("locked=%d uptodate=%d to_read=%d" ++ " to_write=%d failed=%d failed_num=%d,%d\n", ++ s.locked, s.uptodate, s.to_read, s.to_write, s.failed, ++ r6s.failed_num[0], r6s.failed_num[1]); ++ /* check if the array has lost >2 devices and, if so, some requests ++ * might need to be failed ++ */ ++ if (s.failed > 2 && s.to_read+s.to_write+s.written) ++ handle_requests_to_failed_array(conf, sh, &s, disks, ++ &return_bi); ++ if (s.failed > 2 && s.syncing) { ++ md_done_sync(conf->mddev, STRIPE_SECTORS,0); ++ clear_bit(STRIPE_SYNCING, &sh->state); ++ s.syncing = 0; + } + +- /* now write out any block on a failed drive, +- * or P or Q if they need it ++ /* ++ * might be able to return some write requests if the parity blocks ++ * are safe, or on a failed drive + */ ++ pdev = &sh->dev[pd_idx]; ++ r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx) ++ || (s.failed >= 2 && r6s.failed_num[1] == pd_idx); ++ qdev = &sh->dev[r6s.qd_idx]; ++ r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == r6s.qd_idx) ++ || (s.failed >= 2 && r6s.failed_num[1] == r6s.qd_idx); + +- if (failed == 2) { +- dev = &sh->dev[failed_num[1]]; +- locked++; +- set_bit(R5_LOCKED, &dev->flags); +- set_bit(R5_Wantwrite, &dev->flags); +- } +- if (failed >= 1) { +- dev = &sh->dev[failed_num[0]]; +- locked++; +- set_bit(R5_LOCKED, &dev->flags); +- set_bit(R5_Wantwrite, &dev->flags); +- } ++ if ( s.written && ++ ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags) ++ && !test_bit(R5_LOCKED, &pdev->flags) ++ && test_bit(R5_UPTODATE, &pdev->flags)))) && ++ ( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags) ++ && !test_bit(R5_LOCKED, &qdev->flags) ++ && test_bit(R5_UPTODATE, &qdev->flags))))) ++ handle_completed_write_requests(conf, sh, disks, &return_bi); + +- if (update_p) { +- dev = &sh->dev[pd_idx]; +- locked ++; +- set_bit(R5_LOCKED, &dev->flags); +- set_bit(R5_Wantwrite, &dev->flags); +- } +- if (update_q) { +- dev = &sh->dev[qd_idx]; +- locked++; +- set_bit(R5_LOCKED, &dev->flags); +- set_bit(R5_Wantwrite, &dev->flags); +- } +- clear_bit(STRIPE_DEGRADED, &sh->state); ++ /* Now we might consider reading some blocks, either to check/generate ++ * parity, or to satisfy requests ++ * or to load a block that is being partially written. ++ */ ++ if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || ++ (s.syncing && (s.uptodate < disks)) || s.expanding) ++ handle_issuing_new_read_requests6(sh, &s, &r6s, disks); + +- set_bit(STRIPE_INSYNC, &sh->state); +- } +- } ++ /* now to consider writing and what else, if anything should be read */ ++ if (s.to_write) ++ handle_issuing_new_write_requests6(conf, sh, &s, &r6s, disks); ++ ++ /* maybe we need to check and possibly fix the parity for this stripe ++ * Any reads will already have been scheduled, so we just see if enough ++ * data is available ++ */ ++ if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) ++ handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks); + +- if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { ++ if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { + md_done_sync(conf->mddev, STRIPE_SECTORS,1); + clear_bit(STRIPE_SYNCING, &sh->state); + } +@@ -2339,9 +3030,9 @@ + /* If the failed drives are just a ReadError, then we might need + * to progress the repair/check process + */ +- if (failed <= 2 && ! conf->mddev->ro) +- for (i=0; idev[failed_num[i]]; ++ if (s.failed <= 2 && !conf->mddev->ro) ++ for (i = 0; i < s.failed; i++) { ++ dev = &sh->dev[r6s.failed_num[i]]; + if (test_bit(R5_ReadError, &dev->flags) + && !test_bit(R5_LOCKED, &dev->flags) + && test_bit(R5_UPTODATE, &dev->flags) +@@ -2358,7 +3049,7 @@ + } + } + +- if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { ++ if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { + /* Need to write out all blocks after computing P&Q */ + sh->disks = conf->raid_disks; + sh->pd_idx = stripe_to_pdidx(sh->sector, conf, +@@ -2366,69 +3057,19 @@ + compute_parity6(sh, RECONSTRUCT_WRITE); + for (i = conf->raid_disks ; i-- ; ) { + set_bit(R5_LOCKED, &sh->dev[i].flags); +- locked++; ++ s.locked++; + set_bit(R5_Wantwrite, &sh->dev[i].flags); + } + clear_bit(STRIPE_EXPANDING, &sh->state); +- } else if (expanded) { ++ } else if (s.expanded) { + clear_bit(STRIPE_EXPAND_READY, &sh->state); + atomic_dec(&conf->reshape_stripes); + wake_up(&conf->wait_for_overlap); + md_done_sync(conf->mddev, STRIPE_SECTORS, 1); + } + +- if (expanding && locked == 0) { +- /* We have read all the blocks in this stripe and now we need to +- * copy some of them into a target stripe for expand. +- */ +- clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); +- for (i = 0; i < sh->disks ; i++) +- if (i != pd_idx && i != qd_idx) { +- int dd_idx2, pd_idx2, j; +- struct stripe_head *sh2; +- +- sector_t bn = compute_blocknr(sh, i); +- sector_t s = raid5_compute_sector( +- bn, conf->raid_disks, +- conf->raid_disks - conf->max_degraded, +- &dd_idx2, &pd_idx2, conf); +- sh2 = get_active_stripe(conf, s, +- conf->raid_disks, +- pd_idx2, 1); +- if (sh2 == NULL) +- /* so for only the early blocks of +- * this stripe have been requests. +- * When later blocks get requests, we +- * will try again +- */ +- continue; +- if (!test_bit(STRIPE_EXPANDING, &sh2->state) || +- test_bit(R5_Expanded, +- &sh2->dev[dd_idx2].flags)) { +- /* must have already done this block */ +- release_stripe(sh2); +- continue; +- } +- memcpy(page_address(sh2->dev[dd_idx2].page), +- page_address(sh->dev[i].page), +- STRIPE_SIZE); +- set_bit(R5_Expanded, &sh2->dev[dd_idx2].flags); +- set_bit(R5_UPTODATE, &sh2->dev[dd_idx2].flags); +- for (j = 0 ; j < conf->raid_disks ; j++) +- if (j != sh2->pd_idx && +- j != raid6_next_disk(sh2->pd_idx, +- sh2->disks) && +- !test_bit(R5_Expanded, +- &sh2->dev[j].flags)) +- break; +- if (j == conf->raid_disks) { +- set_bit(STRIPE_EXPAND_READY, +- &sh2->state); +- set_bit(STRIPE_HANDLE, &sh2->state); +- } +- release_stripe(sh2); +- } +- } ++ if (s.expanding && s.locked == 0) ++ handle_stripe_expansion(conf, sh, &r6s); + + spin_unlock(&sh->lock); + +@@ -2470,11 +3111,11 @@ + rcu_read_unlock(); + + if (rdev) { +- if (syncing || expanding || expanded) ++ if (s.syncing || s.expanding || s.expanded) + md_sync_acct(rdev->bdev, STRIPE_SECTORS); + + bi->bi_bdev = rdev->bdev; +- PRINTK("for %llu schedule op %ld on disc %d\n", ++ pr_debug("for %llu schedule op %ld on disc %d\n", + (unsigned long long)sh->sector, bi->bi_rw, i); + atomic_inc(&sh->count); + bi->bi_sector = sh->sector + rdev->data_offset; +@@ -2494,7 +3135,7 @@ + } else { + if (rw == WRITE) + set_bit(STRIPE_DEGRADED, &sh->state); +- PRINTK("skip op %ld on disc %d for sector %llu\n", ++ pr_debug("skip op %ld on disc %d for sector %llu\n", + bi->bi_rw, i, (unsigned long long)sh->sector); + clear_bit(R5_LOCKED, &sh->dev[i].flags); + set_bit(STRIPE_HANDLE, &sh->state); +@@ -2738,7 +3379,7 @@ + } + + +- PRINTK("raid5_align_endio : io error...handing IO for a retry\n"); ++ pr_debug("raid5_align_endio : io error...handing IO for a retry\n"); + + add_bio_to_retry(raid_bi, conf); + return 0; +@@ -2776,7 +3417,7 @@ + mdk_rdev_t *rdev; + + if (!in_chunk_boundary(mddev, raid_bio)) { +- PRINTK("chunk_aligned_read : non aligned\n"); ++ pr_debug("chunk_aligned_read : non aligned\n"); + return 0; + } + /* +@@ -2900,7 +3541,7 @@ + + new_sector = raid5_compute_sector(logical_sector, disks, data_disks, + &dd_idx, &pd_idx, conf); +- PRINTK("raid5: make_request, sector %llu logical %llu\n", ++ pr_debug("raid5: make_request, sector %llu logical %llu\n", + (unsigned long long)new_sector, + (unsigned long long)logical_sector); + +@@ -3273,7 +3914,7 @@ + raid5_conf_t *conf = mddev_to_conf(mddev); + int handled; + +- PRINTK("+++ raid5d active\n"); ++ pr_debug("+++ raid5d active\n"); + + md_check_recovery(mddev); + +@@ -3308,8 +3949,10 @@ + handled++; + } + +- if (list_empty(&conf->handle_list)) ++ if (list_empty(&conf->handle_list)) { ++ async_tx_issue_pending_all(); + break; ++ } + + first = conf->handle_list.next; + sh = list_entry(first, struct stripe_head, lru); +@@ -3325,13 +3968,13 @@ + + spin_lock_irq(&conf->device_lock); + } +- PRINTK("%d stripes handled\n", handled); ++ pr_debug("%d stripes handled\n", handled); + + spin_unlock_irq(&conf->device_lock); + + unplug_slaves(mddev); + +- PRINTK("--- raid5d inactive\n"); ++ pr_debug("--- raid5d inactive\n"); + } + + static ssize_t +@@ -3507,7 +4150,7 @@ + atomic_set(&conf->preread_active_stripes, 0); + atomic_set(&conf->active_aligned_reads, 0); + +- PRINTK("raid5: run(%s) called.\n", mdname(mddev)); ++ pr_debug("raid5: run(%s) called.\n", mdname(mddev)); + + ITERATE_RDEV(mddev,rdev,tmp) { + raid_disk = rdev->raid_disk; +@@ -3690,7 +4333,7 @@ + return 0; + } + +-#if RAID5_DEBUG ++#ifdef DEBUG + static void print_sh (struct seq_file *seq, struct stripe_head *sh) + { + int i; +@@ -3737,7 +4380,7 @@ + conf->disks[i].rdev && + test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_"); + seq_printf (seq, "]"); +-#if RAID5_DEBUG ++#ifdef DEBUG + seq_printf (seq, "\n"); + printall(seq, conf); + #endif +diff -Nurb linux-2.6.22-570/drivers/md/xor.c linux-2.6.22-591/drivers/md/xor.c +--- linux-2.6.22-570/drivers/md/xor.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/md/xor.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,154 +0,0 @@ +-/* +- * xor.c : Multiple Devices driver for Linux +- * +- * Copyright (C) 1996, 1997, 1998, 1999, 2000, +- * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson. +- * +- * Dispatch optimized RAID-5 checksumming functions. +- * +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License as published by +- * the Free Software Foundation; either version 2, or (at your option) +- * any later version. +- * +- * You should have received a copy of the GNU General Public License +- * (for example /usr/src/linux/COPYING); if not, write to the Free +- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +- */ +- +-#define BH_TRACE 0 +-#include +-#include +-#include +-#include +- +-/* The xor routines to use. */ +-static struct xor_block_template *active_template; +- +-void +-xor_block(unsigned int count, unsigned int bytes, void **ptr) +-{ +- unsigned long *p0, *p1, *p2, *p3, *p4; +- +- p0 = (unsigned long *) ptr[0]; +- p1 = (unsigned long *) ptr[1]; +- if (count == 2) { +- active_template->do_2(bytes, p0, p1); +- return; +- } +- +- p2 = (unsigned long *) ptr[2]; +- if (count == 3) { +- active_template->do_3(bytes, p0, p1, p2); +- return; +- } +- +- p3 = (unsigned long *) ptr[3]; +- if (count == 4) { +- active_template->do_4(bytes, p0, p1, p2, p3); +- return; +- } +- +- p4 = (unsigned long *) ptr[4]; +- active_template->do_5(bytes, p0, p1, p2, p3, p4); +-} +- +-/* Set of all registered templates. */ +-static struct xor_block_template *template_list; +- +-#define BENCH_SIZE (PAGE_SIZE) +- +-static void +-do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2) +-{ +- int speed; +- unsigned long now; +- int i, count, max; +- +- tmpl->next = template_list; +- template_list = tmpl; +- +- /* +- * Count the number of XORs done during a whole jiffy, and use +- * this to calculate the speed of checksumming. We use a 2-page +- * allocation to have guaranteed color L1-cache layout. +- */ +- max = 0; +- for (i = 0; i < 5; i++) { +- now = jiffies; +- count = 0; +- while (jiffies == now) { +- mb(); +- tmpl->do_2(BENCH_SIZE, b1, b2); +- mb(); +- count++; +- mb(); +- } +- if (count > max) +- max = count; +- } +- +- speed = max * (HZ * BENCH_SIZE / 1024); +- tmpl->speed = speed; +- +- printk(" %-10s: %5d.%03d MB/sec\n", tmpl->name, +- speed / 1000, speed % 1000); +-} +- +-static int +-calibrate_xor_block(void) +-{ +- void *b1, *b2; +- struct xor_block_template *f, *fastest; +- +- b1 = (void *) __get_free_pages(GFP_KERNEL, 2); +- if (! b1) { +- printk("raid5: Yikes! No memory available.\n"); +- return -ENOMEM; +- } +- b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; +- +- /* +- * If this arch/cpu has a short-circuited selection, don't loop through all +- * the possible functions, just test the best one +- */ +- +- fastest = NULL; +- +-#ifdef XOR_SELECT_TEMPLATE +- fastest = XOR_SELECT_TEMPLATE(fastest); +-#endif +- +-#define xor_speed(templ) do_xor_speed((templ), b1, b2) +- +- if (fastest) { +- printk(KERN_INFO "raid5: automatically using best checksumming function: %s\n", +- fastest->name); +- xor_speed(fastest); +- } else { +- printk(KERN_INFO "raid5: measuring checksumming speed\n"); +- XOR_TRY_TEMPLATES; +- fastest = template_list; +- for (f = fastest; f; f = f->next) +- if (f->speed > fastest->speed) +- fastest = f; +- } +- +- printk("raid5: using function: %s (%d.%03d MB/sec)\n", +- fastest->name, fastest->speed / 1000, fastest->speed % 1000); +- +-#undef xor_speed +- +- free_pages((unsigned long)b1, 2); +- +- active_template = fastest; +- return 0; +-} +- +-static __exit void xor_exit(void) { } +- +-EXPORT_SYMBOL(xor_block); +-MODULE_LICENSE("GPL"); +- +-module_init(calibrate_xor_block); +-module_exit(xor_exit); +diff -Nurb linux-2.6.22-570/drivers/media/dvb/dvb-core/dvb_frontend.c linux-2.6.22-591/drivers/media/dvb/dvb-core/dvb_frontend.c +--- linux-2.6.22-570/drivers/media/dvb/dvb-core/dvb_frontend.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/media/dvb/dvb-core/dvb_frontend.c 2007-12-21 15:36:12.000000000 -0500 +@@ -523,6 +523,7 @@ + + dvb_frontend_init(fe); + ++ set_freezable(); + while (1) { + up(&fepriv->sem); /* is locked when we enter the thread... */ + restart: +diff -Nurb linux-2.6.22-570/drivers/media/video/cx88/cx88-tvaudio.c linux-2.6.22-591/drivers/media/video/cx88/cx88-tvaudio.c +--- linux-2.6.22-570/drivers/media/video/cx88/cx88-tvaudio.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/media/video/cx88/cx88-tvaudio.c 2007-12-21 15:36:12.000000000 -0500 +@@ -906,6 +906,7 @@ + u32 mode = 0; + + dprintk("cx88: tvaudio thread started\n"); ++ set_freezable(); + for (;;) { + msleep_interruptible(1000); + if (kthread_should_stop()) +diff -Nurb linux-2.6.22-570/drivers/media/video/msp3400-kthreads.c linux-2.6.22-591/drivers/media/video/msp3400-kthreads.c +--- linux-2.6.22-570/drivers/media/video/msp3400-kthreads.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/media/video/msp3400-kthreads.c 2007-12-21 15:36:12.000000000 -0500 +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -468,6 +469,7 @@ + + + v4l_dbg(1, msp_debug, client, "msp3400 daemon started\n"); ++ set_freezable(); + for (;;) { + v4l_dbg(2, msp_debug, client, "msp3400 thread: sleep\n"); + msp_sleep(state, -1); +@@ -646,7 +648,7 @@ + int val, i, std, count; + + v4l_dbg(1, msp_debug, client, "msp3410 daemon started\n"); +- ++ set_freezable(); + for (;;) { + v4l_dbg(2, msp_debug, client, "msp3410 thread: sleep\n"); + msp_sleep(state,-1); +@@ -940,7 +942,7 @@ + int val, i; + + v4l_dbg(1, msp_debug, client, "msp34xxg daemon started\n"); +- ++ set_freezable(); + for (;;) { + v4l_dbg(2, msp_debug, client, "msp34xxg thread: sleep\n"); + msp_sleep(state, -1); +diff -Nurb linux-2.6.22-570/drivers/media/video/tvaudio.c linux-2.6.22-591/drivers/media/video/tvaudio.c +--- linux-2.6.22-570/drivers/media/video/tvaudio.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/media/video/tvaudio.c 2007-12-21 15:36:12.000000000 -0500 +@@ -271,7 +271,7 @@ + struct CHIPDESC *desc = chiplist + chip->type; + + v4l_dbg(1, debug, &chip->c, "%s: thread started\n", chip->c.name); +- ++ set_freezable(); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (!kthread_should_stop()) +diff -Nurb linux-2.6.22-570/drivers/media/video/video-buf-dvb.c linux-2.6.22-591/drivers/media/video/video-buf-dvb.c +--- linux-2.6.22-570/drivers/media/video/video-buf-dvb.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/media/video/video-buf-dvb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -47,6 +47,7 @@ + int err; + + dprintk("dvb thread started\n"); ++ set_freezable(); + videobuf_read_start(&dvb->dvbq); + + for (;;) { +diff -Nurb linux-2.6.22-570/drivers/media/video/vivi.c linux-2.6.22-591/drivers/media/video/vivi.c +--- linux-2.6.22-570/drivers/media/video/vivi.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/media/video/vivi.c 2007-12-21 15:36:12.000000000 -0500 +@@ -573,6 +573,7 @@ + dprintk(1,"thread started\n"); + + mod_timer(&dma_q->timeout, jiffies+BUFFER_TIMEOUT); ++ set_freezable(); + + for (;;) { + vivi_sleep(dma_q); +diff -Nurb linux-2.6.22-570/drivers/message/fusion/linux_compat.h linux-2.6.22-591/drivers/message/fusion/linux_compat.h +--- linux-2.6.22-570/drivers/message/fusion/linux_compat.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/linux_compat.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,9 +0,0 @@ +-/* drivers/message/fusion/linux_compat.h */ +- +-#ifndef FUSION_LINUX_COMPAT_H +-#define FUSION_LINUX_COMPAT_H +- +-#include +-#include +- +-#endif /* _LINUX_COMPAT_H */ +diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi.h +--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi.h 2007-12-21 15:36:12.000000000 -0500 +@@ -1,12 +1,12 @@ + /* +- * Copyright (c) 2000-2006 LSI Logic Corporation. ++ * Copyright (c) 2000-2007 LSI Logic Corporation. + * + * + * Name: mpi.h + * Title: MPI Message independent structures and definitions + * Creation Date: July 27, 2000 + * +- * mpi.h Version: 01.05.12 ++ * mpi.h Version: 01.05.13 + * + * Version History + * --------------- +@@ -78,6 +78,7 @@ + * 08-30-05 01.05.10 Added 2 new IOCStatus codes for Target. + * 03-27-06 01.05.11 Bumped MPI_HEADER_VERSION_UNIT. + * 10-11-06 01.05.12 Bumped MPI_HEADER_VERSION_UNIT. ++ * 05-24-07 01.05.13 Bumped MPI_HEADER_VERSION_UNIT. + * -------------------------------------------------------------------------- + */ + +@@ -108,7 +109,7 @@ + /* Note: The major versions of 0xe0 through 0xff are reserved */ + + /* versioning for this MPI header set */ +-#define MPI_HEADER_VERSION_UNIT (0x0E) ++#define MPI_HEADER_VERSION_UNIT (0x10) + #define MPI_HEADER_VERSION_DEV (0x00) + #define MPI_HEADER_VERSION_UNIT_MASK (0xFF00) + #define MPI_HEADER_VERSION_UNIT_SHIFT (8) +diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_cnfg.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_cnfg.h +--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_cnfg.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_cnfg.h 2007-12-21 15:36:12.000000000 -0500 +@@ -1,12 +1,12 @@ + /* +- * Copyright (c) 2000-2006 LSI Logic Corporation. ++ * Copyright (c) 2000-2007 LSI Logic Corporation. + * + * + * Name: mpi_cnfg.h + * Title: MPI Config message, structures, and Pages + * Creation Date: July 27, 2000 + * +- * mpi_cnfg.h Version: 01.05.13 ++ * mpi_cnfg.h Version: 01.05.15 + * + * Version History + * --------------- +@@ -293,6 +293,21 @@ + * Added more AccessStatus values for SAS Device Page 0. + * Added bit for SATA Asynchronous Notification Support in + * Flags field of SAS Device Page 0. ++ * 02-28-07 01.05.14 Added ExtFlags field to Manufacturing Page 4. ++ * Added Disable SMART Polling for CapabilitiesFlags of ++ * IOC Page 6. ++ * Added Disable SMART Polling to DeviceSettings of BIOS ++ * Page 1. ++ * Added Multi-Port Domain bit for DiscoveryStatus field ++ * of SAS IO Unit Page. ++ * Added Multi-Port Domain Illegal flag for SAS IO Unit ++ * Page 1 AdditionalControlFlags field. ++ * 05-24-07 01.05.15 Added Hide Physical Disks with Non-Integrated RAID ++ * Metadata bit to Manufacturing Page 4 ExtFlags field. ++ * Added Internal Connector to End Device Present bit to ++ * Expander Page 0 Flags field. ++ * Fixed define for ++ * MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED. + * -------------------------------------------------------------------------- + */ + +@@ -639,7 +654,7 @@ + U8 InfoSize1; /* 0Bh */ + U8 InquirySize; /* 0Ch */ + U8 Flags; /* 0Dh */ +- U16 Reserved2; /* 0Eh */ ++ U16 ExtFlags; /* 0Eh */ + U8 InquiryData[56]; /* 10h */ + U32 ISVolumeSettings; /* 48h */ + U32 IMEVolumeSettings; /* 4Ch */ +@@ -658,7 +673,7 @@ + } CONFIG_PAGE_MANUFACTURING_4, MPI_POINTER PTR_CONFIG_PAGE_MANUFACTURING_4, + ManufacturingPage4_t, MPI_POINTER pManufacturingPage4_t; + +-#define MPI_MANUFACTURING4_PAGEVERSION (0x04) ++#define MPI_MANUFACTURING4_PAGEVERSION (0x05) + + /* defines for the Flags field */ + #define MPI_MANPAGE4_FORCE_BAD_BLOCK_TABLE (0x80) +@@ -670,6 +685,12 @@ + #define MPI_MANPAGE4_IM_RESYNC_CACHE_ENABLE (0x02) + #define MPI_MANPAGE4_IR_NO_MIX_SAS_SATA (0x01) + ++/* defines for the ExtFlags field */ ++#define MPI_MANPAGE4_EXTFLAGS_HIDE_NON_IR_METADATA (0x0008) ++#define MPI_MANPAGE4_EXTFLAGS_SAS_CACHE_DISABLE (0x0004) ++#define MPI_MANPAGE4_EXTFLAGS_SATA_CACHE_DISABLE (0x0002) ++#define MPI_MANPAGE4_EXTFLAGS_LEGACY_MODE (0x0001) ++ + + #ifndef MPI_MANPAGE5_NUM_FORCEWWID + #define MPI_MANPAGE5_NUM_FORCEWWID (1) +@@ -781,7 +802,7 @@ + } CONFIG_PAGE_MANUFACTURING_9, MPI_POINTER PTR_CONFIG_PAGE_MANUFACTURING_9, + ManufacturingPage9_t, MPI_POINTER pManufacturingPage9_t; + +-#define MPI_MANUFACTURING6_PAGEVERSION (0x00) ++#define MPI_MANUFACTURING9_PAGEVERSION (0x00) + + + typedef struct _CONFIG_PAGE_MANUFACTURING_10 +@@ -1138,6 +1159,8 @@ + + /* IOC Page 6 Capabilities Flags */ + ++#define MPI_IOCPAGE6_CAP_FLAGS_DISABLE_SMART_POLLING (0x00000008) ++ + #define MPI_IOCPAGE6_CAP_FLAGS_MASK_METADATA_SIZE (0x00000006) + #define MPI_IOCPAGE6_CAP_FLAGS_64MB_METADATA_SIZE (0x00000000) + #define MPI_IOCPAGE6_CAP_FLAGS_512MB_METADATA_SIZE (0x00000002) +@@ -1208,6 +1231,7 @@ + #define MPI_BIOSPAGE1_IOCSET_ALTERNATE_CHS (0x00000008) + + /* values for the DeviceSettings field */ ++#define MPI_BIOSPAGE1_DEVSET_DISABLE_SMART_POLLING (0x00000010) + #define MPI_BIOSPAGE1_DEVSET_DISABLE_SEQ_LUN (0x00000008) + #define MPI_BIOSPAGE1_DEVSET_DISABLE_RM_LUN (0x00000004) + #define MPI_BIOSPAGE1_DEVSET_DISABLE_NON_RM_LUN (0x00000002) +@@ -2281,11 +2305,11 @@ + typedef struct _CONFIG_PAGE_RAID_VOL_1 + { + CONFIG_PAGE_HEADER Header; /* 00h */ +- U8 VolumeID; /* 01h */ +- U8 VolumeBus; /* 02h */ +- U8 VolumeIOC; /* 03h */ +- U8 Reserved0; /* 04h */ +- U8 GUID[24]; /* 05h */ ++ U8 VolumeID; /* 04h */ ++ U8 VolumeBus; /* 05h */ ++ U8 VolumeIOC; /* 06h */ ++ U8 Reserved0; /* 07h */ ++ U8 GUID[24]; /* 08h */ + U8 Name[32]; /* 20h */ + U64 WWID; /* 40h */ + U32 Reserved1; /* 48h */ +@@ -2340,7 +2364,7 @@ + } RAID_PHYS_DISK0_STATUS, MPI_POINTER PTR_RAID_PHYS_DISK0_STATUS, + RaidPhysDiskStatus_t, MPI_POINTER pRaidPhysDiskStatus_t; + +-/* RAID Volume 2 IM Physical Disk DiskStatus flags */ ++/* RAID Physical Disk PhysDiskStatus flags */ + + #define MPI_PHYSDISK0_STATUS_FLAG_OUT_OF_SYNC (0x01) + #define MPI_PHYSDISK0_STATUS_FLAG_QUIESCED (0x02) +@@ -2544,6 +2568,7 @@ + #define MPI_SAS_IOUNIT0_DS_TABLE_LINK (0x00000400) + #define MPI_SAS_IOUNIT0_DS_UNSUPPORTED_DEVICE (0x00000800) + #define MPI_SAS_IOUNIT0_DS_MAX_SATA_TARGETS (0x00001000) ++#define MPI_SAS_IOUNIT0_DS_MULTI_PORT_DOMAIN (0x00002000) + + + typedef struct _MPI_SAS_IO_UNIT1_PHY_DATA +@@ -2607,6 +2632,7 @@ + #define MPI_SAS_IOUNIT1_CONTROL_CLEAR_AFFILIATION (0x0001) + + /* values for SAS IO Unit Page 1 AdditionalControlFlags */ ++#define MPI_SAS_IOUNIT1_ACONTROL_MULTI_PORT_DOMAIN_ILLEGAL (0x0080) + #define MPI_SAS_IOUNIT1_ACONTROL_SATA_ASYNCHROUNOUS_NOTIFICATION (0x0040) + #define MPI_SAS_IOUNIT1_ACONTROL_HIDE_NONZERO_ATTACHED_PHY_IDENT (0x0020) + #define MPI_SAS_IOUNIT1_ACONTROL_PORT_ENABLE_ONLY_SATA_LINK_RESET (0x0010) +@@ -2734,6 +2760,7 @@ + #define MPI_SAS_EXPANDER0_DS_UNSUPPORTED_DEVICE (0x00000800) + + /* values for SAS Expander Page 0 Flags field */ ++#define MPI_SAS_EXPANDER0_FLAGS_CONNECTOR_END_DEVICE (0x04) + #define MPI_SAS_EXPANDER0_FLAGS_ROUTE_TABLE_CONFIG (0x02) + #define MPI_SAS_EXPANDER0_FLAGS_CONFIG_IN_PROGRESS (0x01) + +@@ -2774,7 +2801,7 @@ + /* see mpi_sas.h for values for SAS Expander Page 1 AttachedDeviceInfo values */ + + /* values for SAS Expander Page 1 DiscoveryInfo field */ +-#define MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY DISABLED (0x04) ++#define MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED (0x04) + #define MPI_SAS_EXPANDER1_DISCINFO_LINK_STATUS_CHANGE (0x02) + #define MPI_SAS_EXPANDER1_DISCINFO_NO_ROUTING_ENTRIES (0x01) + +@@ -2895,11 +2922,11 @@ + U8 AttachedPhyIdentifier; /* 16h */ + U8 Reserved2; /* 17h */ + U32 AttachedDeviceInfo; /* 18h */ +- U8 ProgrammedLinkRate; /* 20h */ +- U8 HwLinkRate; /* 21h */ +- U8 ChangeCount; /* 22h */ +- U8 Flags; /* 23h */ +- U32 PhyInfo; /* 24h */ ++ U8 ProgrammedLinkRate; /* 1Ch */ ++ U8 HwLinkRate; /* 1Dh */ ++ U8 ChangeCount; /* 1Eh */ ++ U8 Flags; /* 1Fh */ ++ U32 PhyInfo; /* 20h */ + } CONFIG_PAGE_SAS_PHY_0, MPI_POINTER PTR_CONFIG_PAGE_SAS_PHY_0, + SasPhyPage0_t, MPI_POINTER pSasPhyPage0_t; + +diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_history.txt linux-2.6.22-591/drivers/message/fusion/lsi/mpi_history.txt +--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_history.txt 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_history.txt 2007-12-21 15:36:12.000000000 -0500 +@@ -3,28 +3,28 @@ + MPI Header File Change History + ============================== + +- Copyright (c) 2000-2006 LSI Logic Corporation. ++ Copyright (c) 2000-2007 LSI Logic Corporation. + + --------------------------------------- +- Header Set Release Version: 01.05.14 +- Header Set Release Date: 10-11-06 ++ Header Set Release Version: 01.05.16 ++ Header Set Release Date: 05-24-07 + --------------------------------------- + + Filename Current version Prior version + ---------- --------------- ------------- +- mpi.h 01.05.12 01.05.11 +- mpi_ioc.h 01.05.12 01.05.11 +- mpi_cnfg.h 01.05.13 01.05.12 +- mpi_init.h 01.05.08 01.05.07 ++ mpi.h 01.05.13 01.05.12 ++ mpi_ioc.h 01.05.14 01.05.13 ++ mpi_cnfg.h 01.05.15 01.05.14 ++ mpi_init.h 01.05.09 01.05.09 + mpi_targ.h 01.05.06 01.05.06 + mpi_fc.h 01.05.01 01.05.01 + mpi_lan.h 01.05.01 01.05.01 +- mpi_raid.h 01.05.02 01.05.02 ++ mpi_raid.h 01.05.03 01.05.03 + mpi_tool.h 01.05.03 01.05.03 + mpi_inb.h 01.05.01 01.05.01 +- mpi_sas.h 01.05.04 01.05.03 ++ mpi_sas.h 01.05.04 01.05.04 + mpi_type.h 01.05.02 01.05.02 +- mpi_history.txt 01.05.14 01.05.13 ++ mpi_history.txt 01.05.14 01.05.14 + + + * Date Version Description +@@ -95,6 +95,7 @@ + * 08-30-05 01.05.10 Added 2 new IOCStatus codes for Target. + * 03-27-06 01.05.11 Bumped MPI_HEADER_VERSION_UNIT. + * 10-11-06 01.05.12 Bumped MPI_HEADER_VERSION_UNIT. ++ * 05-24-07 01.05.13 Bumped MPI_HEADER_VERSION_UNIT. + * -------------------------------------------------------------------------- + + mpi_ioc.h +@@ -191,6 +192,13 @@ + * data structure. + * Added new ImageType values for FWDownload and FWUpload + * requests. ++ * 02-28-07 01.05.13 Added MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT for SAS ++ * Broadcast Event Data (replacing _RESERVED2). ++ * For Discovery Error Event Data DiscoveryStatus field, ++ * replaced _MULTPL_PATHS with _UNSUPPORTED_DEVICE and ++ * added _MULTI_PORT_DOMAIN. ++ * 05-24-07 01.05.14 Added Common Boot Block type to FWDownload Request. ++ * Added Common Boot Block type to FWUpload Request. + * -------------------------------------------------------------------------- + + mpi_cnfg.h +@@ -473,6 +481,21 @@ + * Added more AccessStatus values for SAS Device Page 0. + * Added bit for SATA Asynchronous Notification Support in + * Flags field of SAS Device Page 0. ++ * 02-28-07 01.05.14 Added ExtFlags field to Manufacturing Page 4. ++ * Added Disable SMART Polling for CapabilitiesFlags of ++ * IOC Page 6. ++ * Added Disable SMART Polling to DeviceSettings of BIOS ++ * Page 1. ++ * Added Multi-Port Domain bit for DiscoveryStatus field ++ * of SAS IO Unit Page. ++ * Added Multi-Port Domain Illegal flag for SAS IO Unit ++ * Page 1 AdditionalControlFlags field. ++ * 05-24-07 01.05.15 Added Hide Physical Disks with Non-Integrated RAID ++ * Metadata bit to Manufacturing Page 4 ExtFlags field. ++ * Added Internal Connector to End Device Present bit to ++ * Expander Page 0 Flags field. ++ * Fixed define for ++ * MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED. + * -------------------------------------------------------------------------- + + mpi_init.h +@@ -517,6 +540,8 @@ + * unique in the first 32 characters. + * 03-27-06 01.05.07 Added Task Management type of Clear ACA. + * 10-11-06 01.05.08 Shortened define for Task Management type of Clear ACA. ++ * 02-28-07 01.05.09 Defined two new MsgFlags bits for SCSI Task Management ++ * Request: Do Not Send Task IU and Soft Reset Option. + * -------------------------------------------------------------------------- + + mpi_targ.h +@@ -571,7 +596,7 @@ + * 11-02-00 01.01.01 Original release for post 1.0 work + * 12-04-00 01.01.02 Added messages for Common Transport Send and + * Primitive Send. +- * 01-09-01 01.01.03 Modified some of the new flags to have an MPI prefix ++ * 01-09-01 01.01.03 Modifed some of the new flags to have an MPI prefix + * and modified the FcPrimitiveSend flags. + * 01-25-01 01.01.04 Move InitiatorIndex in LinkServiceRsp reply to a larger + * field. +@@ -634,6 +659,8 @@ + * 08-19-04 01.05.01 Original release for MPI v1.5. + * 01-15-05 01.05.02 Added defines for the two new RAID Actions for + * _SET_RESYNC_RATE and _SET_DATA_SCRUB_RATE. ++ * 02-28-07 01.05.03 Added new RAID Action, Device FW Update Mode, and ++ * associated defines. + * -------------------------------------------------------------------------- + + mpi_tool.h +@@ -682,7 +709,22 @@ + + mpi_history.txt Parts list history + +-Filename 01.05.13 01.05.13 01.05.12 01.05.11 01.05.10 01.05.09 ++Filename 01.05.15 01.05.15 ++---------- -------- -------- ++mpi.h 01.05.12 01.05.13 ++mpi_ioc.h 01.05.13 01.05.14 ++mpi_cnfg.h 01.05.14 01.05.15 ++mpi_init.h 01.05.09 01.05.09 ++mpi_targ.h 01.05.06 01.05.06 ++mpi_fc.h 01.05.01 01.05.01 ++mpi_lan.h 01.05.01 01.05.01 ++mpi_raid.h 01.05.03 01.05.03 ++mpi_tool.h 01.05.03 01.05.03 ++mpi_inb.h 01.05.01 01.05.01 ++mpi_sas.h 01.05.04 01.05.04 ++mpi_type.h 01.05.02 01.05.02 ++ ++Filename 01.05.14 01.05.13 01.05.12 01.05.11 01.05.10 01.05.09 + ---------- -------- -------- -------- -------- -------- -------- + mpi.h 01.05.12 01.05.11 01.05.10 01.05.09 01.05.08 01.05.07 + mpi_ioc.h 01.05.12 01.05.11 01.05.10 01.05.09 01.05.09 01.05.08 +diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_inb.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_inb.h +--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_inb.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_inb.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,221 +0,0 @@ +-/* +- * Copyright (c) 2003-2004 LSI Logic Corporation. +- * +- * +- * Name: mpi_inb.h +- * Title: MPI Inband structures and definitions +- * Creation Date: September 30, 2003 +- * +- * mpi_inb.h Version: 01.05.01 +- * +- * Version History +- * --------------- +- * +- * Date Version Description +- * -------- -------- ------------------------------------------------------ +- * 05-11-04 01.03.01 Original release. +- * 08-19-04 01.05.01 Original release for MPI v1.5. +- * -------------------------------------------------------------------------- +- */ +- +-#ifndef MPI_INB_H +-#define MPI_INB_H +- +-/****************************************************************************** +-* +-* I n b a n d M e s s a g e s +-* +-*******************************************************************************/ +- +- +-/****************************************************************************/ +-/* Inband Buffer Post Request */ +-/****************************************************************************/ +- +-typedef struct _MSG_INBAND_BUFFER_POST_REQUEST +-{ +- U8 Reserved1; /* 00h */ +- U8 BufferCount; /* 01h */ +- U8 ChainOffset; /* 02h */ +- U8 Function; /* 03h */ +- U16 Reserved2; /* 04h */ +- U8 Reserved3; /* 06h */ +- U8 MsgFlags; /* 07h */ +- U32 MsgContext; /* 08h */ +- U32 Reserved4; /* 0Ch */ +- SGE_TRANS_SIMPLE_UNION SGL; /* 10h */ +-} MSG_INBAND_BUFFER_POST_REQUEST, MPI_POINTER PTR_MSG_INBAND_BUFFER_POST_REQUEST, +- MpiInbandBufferPostRequest_t , MPI_POINTER pMpiInbandBufferPostRequest_t; +- +- +-typedef struct _WWN_FC_FORMAT +-{ +- U64 NodeName; /* 00h */ +- U64 PortName; /* 08h */ +-} WWN_FC_FORMAT, MPI_POINTER PTR_WWN_FC_FORMAT, +- WwnFcFormat_t, MPI_POINTER pWwnFcFormat_t; +- +-typedef struct _WWN_SAS_FORMAT +-{ +- U64 WorldWideID; /* 00h */ +- U32 Reserved1; /* 08h */ +- U32 Reserved2; /* 0Ch */ +-} WWN_SAS_FORMAT, MPI_POINTER PTR_WWN_SAS_FORMAT, +- WwnSasFormat_t, MPI_POINTER pWwnSasFormat_t; +- +-typedef union _WWN_INBAND_FORMAT +-{ +- WWN_FC_FORMAT Fc; +- WWN_SAS_FORMAT Sas; +-} WWN_INBAND_FORMAT, MPI_POINTER PTR_WWN_INBAND_FORMAT, +- WwnInbandFormat, MPI_POINTER pWwnInbandFormat; +- +- +-/* Inband Buffer Post reply message */ +- +-typedef struct _MSG_INBAND_BUFFER_POST_REPLY +-{ +- U16 Reserved1; /* 00h */ +- U8 MsgLength; /* 02h */ +- U8 Function; /* 03h */ +- U16 Reserved2; /* 04h */ +- U8 Reserved3; /* 06h */ +- U8 MsgFlags; /* 07h */ +- U32 MsgContext; /* 08h */ +- U16 Reserved4; /* 0Ch */ +- U16 IOCStatus; /* 0Eh */ +- U32 IOCLogInfo; /* 10h */ +- U32 TransferLength; /* 14h */ +- U32 TransactionContext; /* 18h */ +- WWN_INBAND_FORMAT Wwn; /* 1Ch */ +- U32 IOCIdentifier[4]; /* 2Ch */ +-} MSG_INBAND_BUFFER_POST_REPLY, MPI_POINTER PTR_MSG_INBAND_BUFFER_POST_REPLY, +- MpiInbandBufferPostReply_t, MPI_POINTER pMpiInbandBufferPostReply_t; +- +- +-/****************************************************************************/ +-/* Inband Send Request */ +-/****************************************************************************/ +- +-typedef struct _MSG_INBAND_SEND_REQUEST +-{ +- U16 Reserved1; /* 00h */ +- U8 ChainOffset; /* 02h */ +- U8 Function; /* 03h */ +- U16 Reserved2; /* 04h */ +- U8 Reserved3; /* 06h */ +- U8 MsgFlags; /* 07h */ +- U32 MsgContext; /* 08h */ +- U32 Reserved4; /* 0Ch */ +- WWN_INBAND_FORMAT Wwn; /* 10h */ +- U32 Reserved5; /* 20h */ +- SGE_IO_UNION SGL; /* 24h */ +-} MSG_INBAND_SEND_REQUEST, MPI_POINTER PTR_MSG_INBAND_SEND_REQUEST, +- MpiInbandSendRequest_t , MPI_POINTER pMpiInbandSendRequest_t; +- +- +-/* Inband Send reply message */ +- +-typedef struct _MSG_INBAND_SEND_REPLY +-{ +- U16 Reserved1; /* 00h */ +- U8 MsgLength; /* 02h */ +- U8 Function; /* 03h */ +- U16 Reserved2; /* 04h */ +- U8 Reserved3; /* 06h */ +- U8 MsgFlags; /* 07h */ +- U32 MsgContext; /* 08h */ +- U16 Reserved4; /* 0Ch */ +- U16 IOCStatus; /* 0Eh */ +- U32 IOCLogInfo; /* 10h */ +- U32 ResponseLength; /* 14h */ +-} MSG_INBAND_SEND_REPLY, MPI_POINTER PTR_MSG_INBAND_SEND_REPLY, +- MpiInbandSendReply_t, MPI_POINTER pMpiInbandSendReply_t; +- +- +-/****************************************************************************/ +-/* Inband Response Request */ +-/****************************************************************************/ +- +-typedef struct _MSG_INBAND_RSP_REQUEST +-{ +- U16 Reserved1; /* 00h */ +- U8 ChainOffset; /* 02h */ +- U8 Function; /* 03h */ +- U16 Reserved2; /* 04h */ +- U8 Reserved3; /* 06h */ +- U8 MsgFlags; /* 07h */ +- U32 MsgContext; /* 08h */ +- U32 Reserved4; /* 0Ch */ +- WWN_INBAND_FORMAT Wwn; /* 10h */ +- U32 IOCIdentifier[4]; /* 20h */ +- U32 ResponseLength; /* 30h */ +- SGE_IO_UNION SGL; /* 34h */ +-} MSG_INBAND_RSP_REQUEST, MPI_POINTER PTR_MSG_INBAND_RSP_REQUEST, +- MpiInbandRspRequest_t , MPI_POINTER pMpiInbandRspRequest_t; +- +- +-/* Inband Response reply message */ +- +-typedef struct _MSG_INBAND_RSP_REPLY +-{ +- U16 Reserved1; /* 00h */ +- U8 MsgLength; /* 02h */ +- U8 Function; /* 03h */ +- U16 Reserved2; /* 04h */ +- U8 Reserved3; /* 06h */ +- U8 MsgFlags; /* 07h */ +- U32 MsgContext; /* 08h */ +- U16 Reserved4; /* 0Ch */ +- U16 IOCStatus; /* 0Eh */ +- U32 IOCLogInfo; /* 10h */ +-} MSG_INBAND_RSP_REPLY, MPI_POINTER PTR_MSG_INBAND_RSP_REPLY, +- MpiInbandRspReply_t, MPI_POINTER pMpiInbandRspReply_t; +- +- +-/****************************************************************************/ +-/* Inband Abort Request */ +-/****************************************************************************/ +- +-typedef struct _MSG_INBAND_ABORT_REQUEST +-{ +- U8 Reserved1; /* 00h */ +- U8 AbortType; /* 01h */ +- U8 ChainOffset; /* 02h */ +- U8 Function; /* 03h */ +- U16 Reserved2; /* 04h */ +- U8 Reserved3; /* 06h */ +- U8 MsgFlags; /* 07h */ +- U32 MsgContext; /* 08h */ +- U32 Reserved4; /* 0Ch */ +- U32 ContextToAbort; /* 10h */ +-} MSG_INBAND_ABORT_REQUEST, MPI_POINTER PTR_MSG_INBAND_ABORT_REQUEST, +- MpiInbandAbortRequest_t , MPI_POINTER pMpiInbandAbortRequest_t; +- +-#define MPI_INBAND_ABORT_TYPE_ALL_BUFFERS (0x00) +-#define MPI_INBAND_ABORT_TYPE_EXACT_BUFFER (0x01) +-#define MPI_INBAND_ABORT_TYPE_SEND_REQUEST (0x02) +-#define MPI_INBAND_ABORT_TYPE_RESPONSE_REQUEST (0x03) +- +- +-/* Inband Abort reply message */ +- +-typedef struct _MSG_INBAND_ABORT_REPLY +-{ +- U8 Reserved1; /* 00h */ +- U8 AbortType; /* 01h */ +- U8 MsgLength; /* 02h */ +- U8 Function; /* 03h */ +- U16 Reserved2; /* 04h */ +- U8 Reserved3; /* 06h */ +- U8 MsgFlags; /* 07h */ +- U32 MsgContext; /* 08h */ +- U16 Reserved4; /* 0Ch */ +- U16 IOCStatus; /* 0Eh */ +- U32 IOCLogInfo; /* 10h */ +-} MSG_INBAND_ABORT_REPLY, MPI_POINTER PTR_MSG_INBAND_ABORT_REPLY, +- MpiInbandAbortReply_t, MPI_POINTER pMpiInbandAbortReply_t; +- +- +-#endif +- +diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_init.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_init.h +--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_init.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_init.h 2007-12-21 15:36:12.000000000 -0500 +@@ -1,12 +1,12 @@ + /* +- * Copyright (c) 2000-2006 LSI Logic Corporation. ++ * Copyright (c) 2000-2007 LSI Logic Corporation. + * + * + * Name: mpi_init.h + * Title: MPI initiator mode messages and structures + * Creation Date: June 8, 2000 + * +- * mpi_init.h Version: 01.05.08 ++ * mpi_init.h Version: 01.05.09 + * + * Version History + * --------------- +@@ -54,6 +54,8 @@ + * unique in the first 32 characters. + * 03-27-06 01.05.07 Added Task Management type of Clear ACA. + * 10-11-06 01.05.08 Shortened define for Task Management type of Clear ACA. ++ * 02-28-07 01.05.09 Defined two new MsgFlags bits for SCSI Task Management ++ * Request: Do Not Send Task IU and Soft Reset Option. + * -------------------------------------------------------------------------- + */ + +@@ -432,10 +434,14 @@ + #define MPI_SCSITASKMGMT_TASKTYPE_CLR_ACA (0x08) + + /* MsgFlags bits */ ++#define MPI_SCSITASKMGMT_MSGFLAGS_DO_NOT_SEND_TASK_IU (0x01) ++ + #define MPI_SCSITASKMGMT_MSGFLAGS_TARGET_RESET_OPTION (0x00) + #define MPI_SCSITASKMGMT_MSGFLAGS_LIP_RESET_OPTION (0x02) + #define MPI_SCSITASKMGMT_MSGFLAGS_LIPRESET_RESET_OPTION (0x04) + ++#define MPI_SCSITASKMGMT_MSGFLAGS_SOFT_RESET_OPTION (0x08) ++ + /* SCSI Task Management Reply */ + typedef struct _MSG_SCSI_TASK_MGMT_REPLY + { +diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_ioc.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_ioc.h +--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_ioc.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_ioc.h 2007-12-21 15:36:12.000000000 -0500 +@@ -1,12 +1,12 @@ + /* +- * Copyright (c) 2000-2006 LSI Logic Corporation. ++ * Copyright (c) 2000-2007 LSI Logic Corporation. + * + * + * Name: mpi_ioc.h + * Title: MPI IOC, Port, Event, FW Download, and FW Upload messages + * Creation Date: August 11, 2000 + * +- * mpi_ioc.h Version: 01.05.12 ++ * mpi_ioc.h Version: 01.05.14 + * + * Version History + * --------------- +@@ -106,6 +106,13 @@ + * data structure. + * Added new ImageType values for FWDownload and FWUpload + * requests. ++ * 02-28-07 01.05.13 Added MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT for SAS ++ * Broadcast Event Data (replacing _RESERVED2). ++ * For Discovery Error Event Data DiscoveryStatus field, ++ * replaced _MULTPL_PATHS with _UNSUPPORTED_DEVICE and ++ * added _MULTI_PORT_DOMAIN. ++ * 05-24-07 01.05.14 Added Common Boot Block type to FWDownload Request. ++ * Added Common Boot Block type to FWUpload Request. + * -------------------------------------------------------------------------- + */ + +@@ -792,7 +799,7 @@ + + #define MPI_EVENT_PRIMITIVE_CHANGE (0x01) + #define MPI_EVENT_PRIMITIVE_EXPANDER (0x03) +-#define MPI_EVENT_PRIMITIVE_RESERVED2 (0x04) ++#define MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT (0x04) + #define MPI_EVENT_PRIMITIVE_RESERVED3 (0x05) + #define MPI_EVENT_PRIMITIVE_RESERVED4 (0x06) + #define MPI_EVENT_PRIMITIVE_CHANGE0_RESERVED (0x07) +@@ -857,8 +864,9 @@ + #define MPI_EVENT_DSCVRY_ERR_DS_SMP_CRC_ERROR (0x00000100) + #define MPI_EVENT_DSCVRY_ERR_DS_MULTPL_SUBTRACTIVE (0x00000200) + #define MPI_EVENT_DSCVRY_ERR_DS_TABLE_TO_TABLE (0x00000400) +-#define MPI_EVENT_DSCVRY_ERR_DS_MULTPL_PATHS (0x00000800) ++#define MPI_EVENT_DSCVRY_ERR_DS_UNSUPPORTED_DEVICE (0x00000800) + #define MPI_EVENT_DSCVRY_ERR_DS_MAX_SATA_TARGETS (0x00001000) ++#define MPI_EVENT_DSCVRY_ERR_DS_MULTI_PORT_DOMAIN (0x00002000) + + /* SAS SMP Error Event data */ + +@@ -990,6 +998,7 @@ + #define MPI_FW_DOWNLOAD_ITYPE_CONFIG_1 (0x07) + #define MPI_FW_DOWNLOAD_ITYPE_CONFIG_2 (0x08) + #define MPI_FW_DOWNLOAD_ITYPE_MEGARAID (0x09) ++#define MPI_FW_DOWNLOAD_ITYPE_COMMON_BOOT_BLOCK (0x0B) + + + typedef struct _FWDownloadTCSGE +@@ -1049,6 +1058,7 @@ + #define MPI_FW_UPLOAD_ITYPE_CONFIG_2 (0x08) + #define MPI_FW_UPLOAD_ITYPE_MEGARAID (0x09) + #define MPI_FW_UPLOAD_ITYPE_COMPLETE (0x0A) ++#define MPI_FW_UPLOAD_ITYPE_COMMON_BOOT_BLOCK (0x0B) + + typedef struct _FWUploadTCSGE + { +diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_raid.h linux-2.6.22-591/drivers/message/fusion/lsi/mpi_raid.h +--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_raid.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/lsi/mpi_raid.h 2007-12-21 15:36:12.000000000 -0500 +@@ -1,12 +1,12 @@ + /* +- * Copyright (c) 2001-2005 LSI Logic Corporation. ++ * Copyright (c) 2001-2007 LSI Logic Corporation. + * + * + * Name: mpi_raid.h + * Title: MPI RAID message and structures + * Creation Date: February 27, 2001 + * +- * mpi_raid.h Version: 01.05.02 ++ * mpi_raid.h Version: 01.05.03 + * + * Version History + * --------------- +@@ -32,6 +32,8 @@ + * 08-19-04 01.05.01 Original release for MPI v1.5. + * 01-15-05 01.05.02 Added defines for the two new RAID Actions for + * _SET_RESYNC_RATE and _SET_DATA_SCRUB_RATE. ++ * 02-28-07 01.05.03 Added new RAID Action, Device FW Update Mode, and ++ * associated defines. + * -------------------------------------------------------------------------- + */ + +@@ -90,6 +92,7 @@ + #define MPI_RAID_ACTION_INACTIVATE_VOLUME (0x12) + #define MPI_RAID_ACTION_SET_RESYNC_RATE (0x13) + #define MPI_RAID_ACTION_SET_DATA_SCRUB_RATE (0x14) ++#define MPI_RAID_ACTION_DEVICE_FW_UPDATE_MODE (0x15) + + /* ActionDataWord defines for use with MPI_RAID_ACTION_CREATE_VOLUME action */ + #define MPI_RAID_ACTION_ADATA_DO_NOT_SYNC (0x00000001) +@@ -111,6 +114,10 @@ + /* ActionDataWord defines for use with MPI_RAID_ACTION_SET_DATA_SCRUB_RATE action */ + #define MPI_RAID_ACTION_ADATA_DATA_SCRUB_RATE_MASK (0x000000FF) + ++/* ActionDataWord defines for use with MPI_RAID_ACTION_DEVICE_FW_UPDATE_MODE action */ ++#define MPI_RAID_ACTION_ADATA_ENABLE_FW_UPDATE (0x00000001) ++#define MPI_RAID_ACTION_ADATA_MASK_FW_UPDATE_TIMEOUT (0x0000FF00) ++#define MPI_RAID_ACTION_ADATA_SHIFT_FW_UPDATE_TIMEOUT (8) + + + /* RAID Action reply message */ +diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptbase.c linux-2.6.22-591/drivers/message/fusion/mptbase.c +--- linux-2.6.22-570/drivers/message/fusion/mptbase.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/mptbase.c 2007-12-21 15:36:12.000000000 -0500 +@@ -6,7 +6,7 @@ + * running LSI Logic Fusion MPT (Message Passing Technology) firmware. + * + * Copyright (c) 1999-2007 LSI Logic Corporation +- * (mailto:mpt_linux_developer@lsi.com) ++ * (mailto:DL-MPTFusionLinux@lsi.com) + * + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +@@ -64,6 +64,7 @@ + #endif + + #include "mptbase.h" ++#include "lsi/mpi_log_fc.h" + + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ + #define my_NAME "Fusion MPT base driver" +@@ -6349,14 +6350,37 @@ + static void + mpt_fc_log_info(MPT_ADAPTER *ioc, u32 log_info) + { +- static char *subcl_str[8] = { +- "FCP Initiator", "FCP Target", "LAN", "MPI Message Layer", +- "FC Link", "Context Manager", "Invalid Field Offset", "State Change Info" +- }; +- u8 subcl = (log_info >> 24) & 0x7; ++ char *desc = "unknown"; ++ ++ switch (log_info & 0xFF000000) { ++ case MPI_IOCLOGINFO_FC_INIT_BASE: ++ desc = "FCP Initiator"; ++ break; ++ case MPI_IOCLOGINFO_FC_TARGET_BASE: ++ desc = "FCP Target"; ++ break; ++ case MPI_IOCLOGINFO_FC_LAN_BASE: ++ desc = "LAN"; ++ break; ++ case MPI_IOCLOGINFO_FC_MSG_BASE: ++ desc = "MPI Message Layer"; ++ break; ++ case MPI_IOCLOGINFO_FC_LINK_BASE: ++ desc = "FC Link"; ++ break; ++ case MPI_IOCLOGINFO_FC_CTX_BASE: ++ desc = "Context Manager"; ++ break; ++ case MPI_IOCLOGINFO_FC_INVALID_FIELD_BYTE_OFFSET: ++ desc = "Invalid Field Offset"; ++ break; ++ case MPI_IOCLOGINFO_FC_STATE_CHANGE: ++ desc = "State Change Info"; ++ break; ++ } + +- printk(MYIOC_s_INFO_FMT "LogInfo(0x%08x): SubCl={%s}\n", +- ioc->name, log_info, subcl_str[subcl]); ++ printk(MYIOC_s_INFO_FMT "LogInfo(0x%08x): SubClass={%s}, Value=(0x%06x)\n", ++ ioc->name, log_info, desc, (log_info & 0xFFFFFF)); + } + + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptbase.h linux-2.6.22-591/drivers/message/fusion/mptbase.h +--- linux-2.6.22-570/drivers/message/fusion/mptbase.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/mptbase.h 2007-12-21 15:36:12.000000000 -0500 +@@ -6,7 +6,7 @@ + * running LSI Logic Fusion MPT (Message Passing Technology) firmware. + * + * Copyright (c) 1999-2007 LSI Logic Corporation +- * (mailto:mpt_linux_developer@lsi.com) ++ * (mailto:DL-MPTFusionLinux@lsi.com) + * + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +@@ -75,8 +75,8 @@ + #define COPYRIGHT "Copyright (c) 1999-2007 " MODULEAUTHOR + #endif + +-#define MPT_LINUX_VERSION_COMMON "3.04.04" +-#define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-3.04.04" ++#define MPT_LINUX_VERSION_COMMON "3.04.05" ++#define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-3.04.05" + #define WHAT_MAGIC_STRING "@" "(" "#" ")" + + #define show_mptmod_ver(s,ver) \ +diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptctl.c linux-2.6.22-591/drivers/message/fusion/mptctl.c +--- linux-2.6.22-570/drivers/message/fusion/mptctl.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/mptctl.c 2007-12-21 15:36:12.000000000 -0500 +@@ -5,7 +5,7 @@ + * running LSI Logic Fusion MPT (Message Passing Technology) firmware. + * + * Copyright (c) 1999-2007 LSI Logic Corporation +- * (mailto:mpt_linux_developer@lsi.com) ++ * (mailto:DL-MPTFusionLinux@lsi.com) + * + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptctl.h linux-2.6.22-591/drivers/message/fusion/mptctl.h +--- linux-2.6.22-570/drivers/message/fusion/mptctl.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/mptctl.h 2007-12-21 15:36:12.000000000 -0500 +@@ -6,7 +6,7 @@ + * running LSI Logic Fusion MPT (Message Passing Technology) firmware. + * + * Copyright (c) 1999-2007 LSI Logic Corporation +- * (mailto:mpt_linux_developer@lsi.com) ++ * (mailto:DL-MPTFusionLinux@lsi.com) + * + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptfc.c linux-2.6.22-591/drivers/message/fusion/mptfc.c +--- linux-2.6.22-570/drivers/message/fusion/mptfc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/mptfc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -4,7 +4,7 @@ + * running LSI Logic Fusion MPT (Message Passing Technology) firmware. + * + * Copyright (c) 1999-2007 LSI Logic Corporation +- * (mailto:mpt_linux_developer@lsi.com) ++ * (mailto:DL-MPTFusionLinux@lsi.com) + * + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +@@ -43,7 +43,6 @@ + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +-#include "linux_compat.h" /* linux-2.6 tweaks */ + #include + #include + #include +diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptlan.c linux-2.6.22-591/drivers/message/fusion/mptlan.c +--- linux-2.6.22-570/drivers/message/fusion/mptlan.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/mptlan.c 2007-12-21 15:36:12.000000000 -0500 +@@ -5,7 +5,7 @@ + * running LSI Logic Fusion MPT (Message Passing Technology) firmware. + * + * Copyright (c) 2000-2007 LSI Logic Corporation +- * (mailto:mpt_linux_developer@lsi.com) ++ * (mailto:DL-MPTFusionLinux@lsi.com) + * + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptlan.h linux-2.6.22-591/drivers/message/fusion/mptlan.h +--- linux-2.6.22-570/drivers/message/fusion/mptlan.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/mptlan.h 2007-12-21 15:36:12.000000000 -0500 +@@ -5,7 +5,7 @@ + * running LSI Logic Fusion MPT (Message Passing Technology) firmware. + * + * Copyright (c) 2000-2007 LSI Logic Corporation +- * (mailto:mpt_linux_developer@lsi.com) ++ * (mailto:DL-MPTFusionLinux@lsi.com) + * + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptsas.c linux-2.6.22-591/drivers/message/fusion/mptsas.c +--- linux-2.6.22-570/drivers/message/fusion/mptsas.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/mptsas.c 2007-12-21 15:36:12.000000000 -0500 +@@ -4,7 +4,7 @@ + * running LSI Logic Fusion MPT (Message Passing Technology) firmware. + * + * Copyright (c) 1999-2007 LSI Logic Corporation +- * (mailto:mpt_linux_developer@lsi.com) ++ * (mailto:DL-MPTFusionLinux@lsi.com) + * Copyright (c) 2005-2007 Dell + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptscsih.c linux-2.6.22-591/drivers/message/fusion/mptscsih.c +--- linux-2.6.22-570/drivers/message/fusion/mptscsih.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/mptscsih.c 2007-12-21 15:36:12.000000000 -0500 +@@ -4,7 +4,7 @@ + * running LSI Logic Fusion MPT (Message Passing Technology) firmware. + * + * Copyright (c) 1999-2007 LSI Logic Corporation +- * (mailto:mpt_linux_developer@lsi.com) ++ * (mailto:DL-MPTFusionLinux@lsi.com) + * + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +@@ -44,7 +44,6 @@ + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ + +-#include "linux_compat.h" /* linux-2.6 tweaks */ + #include + #include + #include +@@ -260,30 +259,13 @@ + /* Map the data portion, if any. + * sges_left = 0 if no data transfer. + */ +- if ( (sges_left = SCpnt->use_sg) ) { +- sges_left = pci_map_sg(ioc->pcidev, +- (struct scatterlist *) SCpnt->request_buffer, +- SCpnt->use_sg, +- SCpnt->sc_data_direction); +- if (sges_left == 0) ++ sges_left = scsi_dma_map(SCpnt); ++ if (sges_left < 0) + return FAILED; +- } else if (SCpnt->request_bufflen) { +- SCpnt->SCp.dma_handle = pci_map_single(ioc->pcidev, +- SCpnt->request_buffer, +- SCpnt->request_bufflen, +- SCpnt->sc_data_direction); +- dsgprintk((MYIOC_s_INFO_FMT "SG: non-SG for %p, len=%d\n", +- ioc->name, SCpnt, SCpnt->request_bufflen)); +- mptscsih_add_sge((char *) &pReq->SGL, +- 0xD1000000|MPT_SGE_FLAGS_ADDRESSING|sgdir|SCpnt->request_bufflen, +- SCpnt->SCp.dma_handle); +- +- return SUCCESS; +- } + + /* Handle the SG case. + */ +- sg = (struct scatterlist *) SCpnt->request_buffer; ++ sg = scsi_sglist(SCpnt); + sg_done = 0; + sgeOffset = sizeof(SCSIIORequest_t) - sizeof(SGE_IO_UNION); + chainSge = NULL; +@@ -465,7 +447,12 @@ + MPT_FRAME_HDR *mf; + SEPRequest_t *SEPMsg; + +- if (ioc->bus_type == FC) ++ if (ioc->bus_type != SAS) ++ return; ++ ++ /* Not supported for hidden raid components ++ */ ++ if (vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) + return; + + if ((mf = mpt_get_msg_frame(ioc->InternalCtx, ioc)) == NULL) { +@@ -662,7 +649,7 @@ + scsi_state = pScsiReply->SCSIState; + scsi_status = pScsiReply->SCSIStatus; + xfer_cnt = le32_to_cpu(pScsiReply->TransferCount); +- sc->resid = sc->request_bufflen - xfer_cnt; ++ scsi_set_resid(sc, scsi_bufflen(sc) - xfer_cnt); + log_info = le32_to_cpu(pScsiReply->IOCLogInfo); + + /* +@@ -767,7 +754,7 @@ + break; + + case MPI_IOCSTATUS_SCSI_RESIDUAL_MISMATCH: /* 0x0049 */ +- sc->resid = sc->request_bufflen - xfer_cnt; ++ scsi_set_resid(sc, scsi_bufflen(sc) - xfer_cnt); + if((xfer_cnt==0)||(sc->underflow > xfer_cnt)) + sc->result=DID_SOFT_ERROR << 16; + else /* Sufficient data transfer occurred */ +@@ -816,7 +803,7 @@ + break; + + case MPI_IOCSTATUS_SCSI_DATA_OVERRUN: /* 0x0044 */ +- sc->resid=0; ++ scsi_set_resid(sc, 0); + case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR: /* 0x0040 */ + case MPI_IOCSTATUS_SUCCESS: /* 0x0000 */ + sc->result = (DID_OK << 16) | scsi_status; +@@ -900,22 +887,17 @@ + + dreplyprintk(("%s: [%d:%d:%d:%d] resid=%d " + "bufflen=%d xfer_cnt=%d\n", __FUNCTION__, +- sc->device->host->host_no, sc->device->channel, sc->device->id, +- sc->device->lun, sc->resid, sc->request_bufflen, +- xfer_cnt)); ++ sc->device->host->host_no, ++ sc->device->channel, sc->device->id, ++ sc->device->lun, scsi_get_resid(sc), ++ scsi_bufflen(sc), xfer_cnt)); + } + #endif + + } /* end of address reply case */ + + /* Unmap the DMA buffers, if any. */ +- if (sc->use_sg) { +- pci_unmap_sg(ioc->pcidev, (struct scatterlist *) sc->request_buffer, +- sc->use_sg, sc->sc_data_direction); +- } else if (sc->request_bufflen) { +- pci_unmap_single(ioc->pcidev, sc->SCp.dma_handle, +- sc->request_bufflen, sc->sc_data_direction); +- } ++ scsi_dma_unmap(sc); + + sc->scsi_done(sc); /* Issue the command callback */ + +@@ -970,17 +952,8 @@ + /* Set status, free OS resources (SG DMA buffers) + * Do OS callback + */ +- if (SCpnt->use_sg) { +- pci_unmap_sg(ioc->pcidev, +- (struct scatterlist *) SCpnt->request_buffer, +- SCpnt->use_sg, +- SCpnt->sc_data_direction); +- } else if (SCpnt->request_bufflen) { +- pci_unmap_single(ioc->pcidev, +- SCpnt->SCp.dma_handle, +- SCpnt->request_bufflen, +- SCpnt->sc_data_direction); +- } ++ scsi_dma_unmap(SCpnt); ++ + SCpnt->result = DID_RESET << 16; + SCpnt->host_scribble = NULL; + +@@ -1023,14 +996,19 @@ + mf = (SCSIIORequest_t *)MPT_INDEX_2_MFPTR(hd->ioc, ii); + if (mf == NULL) + continue; ++ /* If the device is a hidden raid component, then its ++ * expected that the mf->function will be RAID_SCSI_IO ++ */ ++ if (vdevice->vtarget->tflags & ++ MPT_TARGET_FLAGS_RAID_COMPONENT && mf->Function != ++ MPI_FUNCTION_RAID_SCSI_IO_PASSTHROUGH) ++ continue; ++ + int_to_scsilun(vdevice->lun, &lun); + if ((mf->Bus != vdevice->vtarget->channel) || + (mf->TargetID != vdevice->vtarget->id) || + memcmp(lun.scsi_lun, mf->LUN, 8)) + continue; +- dsprintk(( "search_running: found (sc=%p, mf = %p) " +- "channel %d id %d, lun %d \n", hd->ScsiLookup[ii], +- mf, mf->Bus, mf->TargetID, vdevice->lun)); + + /* Cleanup + */ +@@ -1039,19 +1017,12 @@ + mpt_free_msg_frame(hd->ioc, (MPT_FRAME_HDR *)mf); + if ((unsigned char *)mf != sc->host_scribble) + continue; +- if (sc->use_sg) { +- pci_unmap_sg(hd->ioc->pcidev, +- (struct scatterlist *) sc->request_buffer, +- sc->use_sg, +- sc->sc_data_direction); +- } else if (sc->request_bufflen) { +- pci_unmap_single(hd->ioc->pcidev, +- sc->SCp.dma_handle, +- sc->request_bufflen, +- sc->sc_data_direction); +- } ++ scsi_dma_unmap(sc); + sc->host_scribble = NULL; + sc->result = DID_NO_CONNECT << 16; ++ dsprintk(( "search_running: found (sc=%p, mf = %p) " ++ "channel %d id %d, lun %d \n", sc, mf, ++ vdevice->vtarget->channel, vdevice->vtarget->id, vdevice->lun)); + sc->scsi_done(sc); + } + } +@@ -1380,10 +1351,10 @@ + * will be no data transfer! GRRRRR... + */ + if (SCpnt->sc_data_direction == DMA_FROM_DEVICE) { +- datalen = SCpnt->request_bufflen; ++ datalen = scsi_bufflen(SCpnt); + scsidir = MPI_SCSIIO_CONTROL_READ; /* DATA IN (host<--ioc<--dev) */ + } else if (SCpnt->sc_data_direction == DMA_TO_DEVICE) { +- datalen = SCpnt->request_bufflen; ++ datalen = scsi_bufflen(SCpnt); + scsidir = MPI_SCSIIO_CONTROL_WRITE; /* DATA OUT (host-->ioc-->dev) */ + } else { + datalen = 0; +@@ -1768,20 +1739,45 @@ + u32 ctx2abort; + int scpnt_idx; + int retval; +- VirtDevice *vdev; ++ VirtDevice *vdevice; + ulong sn = SCpnt->serial_number; ++ MPT_ADAPTER *ioc; + + /* If we can't locate our host adapter structure, return FAILED status. + */ + if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL) { + SCpnt->result = DID_RESET << 16; + SCpnt->scsi_done(SCpnt); +- dfailprintk((KERN_INFO MYNAM ": mptscsih_abort: " +- "Can't locate host! (sc=%p)\n", +- SCpnt)); ++ dfailprintk((KERN_INFO MYNAM ": mptscsih_abort: Can't locate " ++ "host! (sc=%p)\n", SCpnt)); + return FAILED; + } + ++ ioc = hd->ioc; ++ printk(MYIOC_s_INFO_FMT "attempting task abort! (sc=%p)\n", ++ ioc->name, SCpnt); ++ scsi_print_command(SCpnt); ++ ++ vdevice = SCpnt->device->hostdata; ++ if (!vdevice || !vdevice->vtarget) { ++ dtmprintk((MYIOC_s_DEBUG_FMT "task abort: device has been " ++ "deleted (sc=%p)\n", ioc->name, SCpnt)); ++ SCpnt->result = DID_NO_CONNECT << 16; ++ SCpnt->scsi_done(SCpnt); ++ retval = 0; ++ goto out; ++ } ++ ++ /* Task aborts are not supported for hidden raid components. ++ */ ++ if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) { ++ dtmprintk((MYIOC_s_DEBUG_FMT "task abort: hidden raid " ++ "component (sc=%p)\n", ioc->name, SCpnt)); ++ SCpnt->result = DID_RESET << 16; ++ retval = FAILED; ++ goto out; ++ } ++ + /* Find this command + */ + if ((scpnt_idx = SCPNT_TO_LOOKUP_IDX(SCpnt)) < 0) { +@@ -1790,21 +1786,20 @@ + */ + SCpnt->result = DID_RESET << 16; + dtmprintk((KERN_INFO MYNAM ": %s: mptscsih_abort: " +- "Command not in the active list! (sc=%p)\n", +- hd->ioc->name, SCpnt)); +- return SUCCESS; ++ "Command not in the active list! (sc=%p)\n", ioc->name, ++ SCpnt)); ++ retval = 0; ++ goto out; + } + +- if (hd->resetPending) +- return FAILED; ++ if (hd->resetPending) { ++ retval = FAILED; ++ goto out; ++ } + + if (hd->timeouts < -1) + hd->timeouts++; + +- printk(KERN_WARNING MYNAM ": %s: attempting task abort! (sc=%p)\n", +- hd->ioc->name, SCpnt); +- scsi_print_command(SCpnt); +- + /* Most important! Set TaskMsgContext to SCpnt's MsgContext! + * (the IO to be ABORT'd) + * +@@ -1817,18 +1812,17 @@ + + hd->abortSCpnt = SCpnt; + +- vdev = SCpnt->device->hostdata; + retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK, +- vdev->vtarget->channel, vdev->vtarget->id, vdev->lun, +- ctx2abort, mptscsih_get_tm_timeout(hd->ioc)); ++ vdevice->vtarget->channel, vdevice->vtarget->id, vdevice->lun, ++ ctx2abort, mptscsih_get_tm_timeout(ioc)); + + if (SCPNT_TO_LOOKUP_IDX(SCpnt) == scpnt_idx && + SCpnt->serial_number == sn) + retval = FAILED; + +- printk (KERN_WARNING MYNAM ": %s: task abort: %s (sc=%p)\n", +- hd->ioc->name, +- ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); ++ out: ++ printk(MYIOC_s_INFO_FMT "task abort: %s (sc=%p)\n", ++ ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); + + if (retval == 0) + return SUCCESS; +@@ -1850,32 +1844,47 @@ + { + MPT_SCSI_HOST *hd; + int retval; +- VirtDevice *vdev; ++ VirtDevice *vdevice; ++ MPT_ADAPTER *ioc; + + /* If we can't locate our host adapter structure, return FAILED status. + */ + if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){ +- dtmprintk((KERN_INFO MYNAM ": mptscsih_dev_reset: " +- "Can't locate host! (sc=%p)\n", +- SCpnt)); ++ dtmprintk((KERN_INFO MYNAM ": mptscsih_dev_reset: Can't " ++ "locate host! (sc=%p)\n", SCpnt)); + return FAILED; + } + +- if (hd->resetPending) +- return FAILED; +- +- printk(KERN_WARNING MYNAM ": %s: attempting target reset! (sc=%p)\n", +- hd->ioc->name, SCpnt); ++ ioc = hd->ioc; ++ printk(MYIOC_s_INFO_FMT "attempting target reset! (sc=%p)\n", ++ ioc->name, SCpnt); + scsi_print_command(SCpnt); + +- vdev = SCpnt->device->hostdata; ++ if (hd->resetPending) { ++ retval = FAILED; ++ goto out; ++ } ++ ++ vdevice = SCpnt->device->hostdata; ++ if (!vdevice || !vdevice->vtarget) { ++ retval = 0; ++ goto out; ++ } ++ ++ /* Target reset to hidden raid component is not supported ++ */ ++ if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) { ++ retval = FAILED; ++ goto out; ++ } ++ + retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET, +- vdev->vtarget->channel, vdev->vtarget->id, +- 0, 0, mptscsih_get_tm_timeout(hd->ioc)); ++ vdevice->vtarget->channel, vdevice->vtarget->id, 0, 0, ++ mptscsih_get_tm_timeout(ioc)); + +- printk (KERN_WARNING MYNAM ": %s: target reset: %s (sc=%p)\n", +- hd->ioc->name, +- ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); ++ out: ++ printk (MYIOC_s_INFO_FMT "target reset: %s (sc=%p)\n", ++ ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); + + if (retval == 0) + return SUCCESS; +@@ -1899,18 +1908,19 @@ + MPT_SCSI_HOST *hd; + int retval; + VirtDevice *vdev; ++ MPT_ADAPTER *ioc; + + /* If we can't locate our host adapter structure, return FAILED status. + */ + if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){ +- dtmprintk((KERN_INFO MYNAM ": mptscsih_bus_reset: " +- "Can't locate host! (sc=%p)\n", +- SCpnt ) ); ++ dtmprintk((KERN_INFO MYNAM ": mptscsih_bus_reset: Can't " ++ "locate host! (sc=%p)\n", SCpnt )); + return FAILED; + } + +- printk(KERN_WARNING MYNAM ": %s: attempting bus reset! (sc=%p)\n", +- hd->ioc->name, SCpnt); ++ ioc = hd->ioc; ++ printk(MYIOC_s_INFO_FMT "attempting bus reset! (sc=%p)\n", ++ ioc->name, SCpnt); + scsi_print_command(SCpnt); + + if (hd->timeouts < -1) +@@ -1918,11 +1928,10 @@ + + vdev = SCpnt->device->hostdata; + retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS, +- vdev->vtarget->channel, 0, 0, 0, mptscsih_get_tm_timeout(hd->ioc)); ++ vdev->vtarget->channel, 0, 0, 0, mptscsih_get_tm_timeout(ioc)); + +- printk (KERN_WARNING MYNAM ": %s: bus reset: %s (sc=%p)\n", +- hd->ioc->name, +- ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); ++ printk(MYIOC_s_INFO_FMT "bus reset: %s (sc=%p)\n", ++ ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); + + if (retval == 0) + return SUCCESS; +@@ -1943,37 +1952,38 @@ + mptscsih_host_reset(struct scsi_cmnd *SCpnt) + { + MPT_SCSI_HOST * hd; +- int status = SUCCESS; ++ int retval; ++ MPT_ADAPTER *ioc; + + /* If we can't locate the host to reset, then we failed. */ + if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){ +- dtmprintk( ( KERN_INFO MYNAM ": mptscsih_host_reset: " +- "Can't locate host! (sc=%p)\n", +- SCpnt ) ); ++ dtmprintk( ( KERN_INFO MYNAM ": mptscsih_host_reset: Can't " ++ "locate host! (sc=%p)\n", SCpnt)); + return FAILED; + } + +- printk(KERN_WARNING MYNAM ": %s: Attempting host reset! (sc=%p)\n", +- hd->ioc->name, SCpnt); ++ ioc = hd->ioc; ++ printk(MYIOC_s_INFO_FMT "attempting host reset! (sc=%p)\n", ++ ioc->name, SCpnt); + + /* If our attempts to reset the host failed, then return a failed + * status. The host will be taken off line by the SCSI mid-layer. + */ +- if (mpt_HardResetHandler(hd->ioc, CAN_SLEEP) < 0){ +- status = FAILED; ++ if (mpt_HardResetHandler(hd->ioc, CAN_SLEEP) < 0) { ++ retval = FAILED; + } else { + /* Make sure TM pending is cleared and TM state is set to + * NONE. + */ ++ retval = 0; + hd->tmPending = 0; + hd->tmState = TM_STATE_NONE; + } + +- dtmprintk( ( KERN_INFO MYNAM ": mptscsih_host_reset: " +- "Status = %s\n", +- (status == SUCCESS) ? "SUCCESS" : "FAILED" ) ); ++ printk(MYIOC_s_INFO_FMT "host reset: %s (sc=%p)\n", ++ ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); + +- return status; ++ return retval; + } + + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +@@ -3150,6 +3160,16 @@ + { + INTERNAL_CMD iocmd; + ++ /* Ignore hidden raid components, this is handled when the command ++ * is sent to the volume ++ */ ++ if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) ++ return; ++ ++ if (vdevice->vtarget->type != TYPE_DISK || vdevice->vtarget->deleted || ++ !vdevice->configured_lun) ++ return; ++ + /* Following parameters will not change + * in this routine. + */ +@@ -3164,8 +3184,6 @@ + iocmd.id = vdevice->vtarget->id; + iocmd.lun = vdevice->lun; + +- if ((vdevice->vtarget->type == TYPE_DISK) && +- (vdevice->configured_lun)) + mptscsih_do_cmd(hd, &iocmd); + } + +diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptscsih.h linux-2.6.22-591/drivers/message/fusion/mptscsih.h +--- linux-2.6.22-570/drivers/message/fusion/mptscsih.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/mptscsih.h 2007-12-21 15:36:12.000000000 -0500 +@@ -6,7 +6,7 @@ + * running LSI Logic Fusion MPT (Message Passing Technology) firmware. + * + * Copyright (c) 1999-2007 LSI Logic Corporation +- * (mailto:mpt_linux_developer@lsi.com) ++ * (mailto:DL-MPTFusionLinux@lsi.com) + * + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptspi.c linux-2.6.22-591/drivers/message/fusion/mptspi.c +--- linux-2.6.22-570/drivers/message/fusion/mptspi.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/fusion/mptspi.c 2007-12-21 15:36:12.000000000 -0500 +@@ -4,7 +4,7 @@ + * running LSI Logic Fusion MPT (Message Passing Technology) firmware. + * + * Copyright (c) 1999-2007 LSI Logic Corporation +- * (mailto:mpt_linux_developer@lsi.com) ++ * (mailto:DL-MPTFusionLinux@lsi.com) + * + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +@@ -44,7 +44,6 @@ + */ + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ + +-#include "linux_compat.h" /* linux-2.6 tweaks */ + #include + #include + #include +diff -Nurb linux-2.6.22-570/drivers/message/i2o/i2o_scsi.c linux-2.6.22-591/drivers/message/i2o/i2o_scsi.c +--- linux-2.6.22-570/drivers/message/i2o/i2o_scsi.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/message/i2o/i2o_scsi.c 2007-12-21 15:36:12.000000000 -0500 +@@ -377,12 +377,8 @@ + osm_err("SCSI error %08x\n", error); + + dev = &c->pdev->dev; +- if (cmd->use_sg) +- dma_unmap_sg(dev, cmd->request_buffer, cmd->use_sg, +- cmd->sc_data_direction); +- else if (cmd->SCp.dma_handle) +- dma_unmap_single(dev, cmd->SCp.dma_handle, cmd->request_bufflen, +- cmd->sc_data_direction); ++ ++ scsi_dma_unmap(cmd); + + cmd->scsi_done(cmd); + +@@ -664,21 +660,15 @@ + + if (sgl_offset != SGL_OFFSET_0) { + /* write size of data addressed by SGL */ +- *mptr++ = cpu_to_le32(SCpnt->request_bufflen); ++ *mptr++ = cpu_to_le32(scsi_bufflen(SCpnt)); + + /* Now fill in the SGList and command */ +- if (SCpnt->use_sg) { +- if (!i2o_dma_map_sg(c, SCpnt->request_buffer, +- SCpnt->use_sg, ++ ++ if (scsi_sg_count(SCpnt)) { ++ if (!i2o_dma_map_sg(c, scsi_sglist(SCpnt), ++ scsi_sg_count(SCpnt), + SCpnt->sc_data_direction, &mptr)) + goto nomem; +- } else { +- SCpnt->SCp.dma_handle = +- i2o_dma_map_single(c, SCpnt->request_buffer, +- SCpnt->request_bufflen, +- SCpnt->sc_data_direction, &mptr); +- if (dma_mapping_error(SCpnt->SCp.dma_handle)) +- goto nomem; + } + } + +diff -Nurb linux-2.6.22-570/drivers/mfd/ucb1x00-ts.c linux-2.6.22-591/drivers/mfd/ucb1x00-ts.c +--- linux-2.6.22-570/drivers/mfd/ucb1x00-ts.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mfd/ucb1x00-ts.c 2007-12-21 15:36:12.000000000 -0500 +@@ -209,6 +209,7 @@ + DECLARE_WAITQUEUE(wait, tsk); + int valid = 0; + ++ set_freezable(); + add_wait_queue(&ts->irq_wait, &wait); + while (!kthread_should_stop()) { + unsigned int x, y, p; +diff -Nurb linux-2.6.22-570/drivers/misc/asus-laptop.c linux-2.6.22-591/drivers/misc/asus-laptop.c +--- linux-2.6.22-570/drivers/misc/asus-laptop.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/misc/asus-laptop.c 2007-12-21 15:36:12.000000000 -0500 +@@ -737,8 +737,7 @@ + struct device_attribute dev_attr_##_name = { \ + .attr = { \ + .name = __stringify(_name), \ +- .mode = 0, \ +- .owner = THIS_MODULE }, \ ++ .mode = 0 }, \ + .show = NULL, \ + .store = NULL, \ + } +diff -Nurb linux-2.6.22-570/drivers/mmc/card/Kconfig linux-2.6.22-591/drivers/mmc/card/Kconfig +--- linux-2.6.22-570/drivers/mmc/card/Kconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/card/Kconfig 2007-12-21 15:36:12.000000000 -0500 +@@ -14,3 +14,21 @@ + mount the filesystem. Almost everyone wishing MMC support + should say Y or M here. + ++config MMC_BLOCK_BOUNCE ++ bool "Use bounce buffer for simple hosts" ++ depends on MMC_BLOCK ++ default y ++ help ++ SD/MMC is a high latency protocol where it is crucial to ++ send large requests in order to get high performance. Many ++ controllers, however, are restricted to continuous memory ++ (i.e. they can't do scatter-gather), something the kernel ++ rarely can provide. ++ ++ Say Y here to help these restricted hosts by bouncing ++ requests back and forth from a large buffer. You will get ++ a big performance gain at the cost of up to 64 KiB of ++ physical memory. ++ ++ If unsure, say Y here. ++ +diff -Nurb linux-2.6.22-570/drivers/mmc/card/block.c linux-2.6.22-591/drivers/mmc/card/block.c +--- linux-2.6.22-570/drivers/mmc/card/block.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/card/block.c 2007-12-21 15:36:12.000000000 -0500 +@@ -262,7 +262,9 @@ + } + + brq.data.sg = mq->sg; +- brq.data.sg_len = blk_rq_map_sg(req->q, req, brq.data.sg); ++ brq.data.sg_len = mmc_queue_map_sg(mq); ++ ++ mmc_queue_bounce_pre(mq); + + if (brq.data.blocks != + (req->nr_sectors >> (md->block_bits - 9))) { +@@ -279,6 +281,9 @@ + } + + mmc_wait_for_req(card->host, &brq.mrq); ++ ++ mmc_queue_bounce_post(mq); ++ + if (brq.cmd.error) { + printk(KERN_ERR "%s: error %d sending read/write command\n", + req->rq_disk->disk_name, brq.cmd.error); +diff -Nurb linux-2.6.22-570/drivers/mmc/card/queue.c linux-2.6.22-591/drivers/mmc/card/queue.c +--- linux-2.6.22-570/drivers/mmc/card/queue.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/card/queue.c 2007-12-21 15:36:12.000000000 -0500 +@@ -11,12 +11,15 @@ + */ + #include + #include ++#include + #include + + #include + #include + #include "queue.h" + ++#define MMC_QUEUE_BOUNCESZ 65536 ++ + #define MMC_QUEUE_SUSPENDED (1 << 0) + + /* +@@ -42,11 +45,7 @@ + struct mmc_queue *mq = d; + struct request_queue *q = mq->queue; + +- /* +- * Set iothread to ensure that we aren't put to sleep by +- * the process freezing. We handle suspension ourselves. +- */ +- current->flags |= PF_MEMALLOC|PF_NOFREEZE; ++ current->flags |= PF_MEMALLOC; + + down(&mq->thread_sem); + do { +@@ -118,6 +117,7 @@ + struct mmc_host *host = card->host; + u64 limit = BLK_BOUNCE_HIGH; + int ret; ++ unsigned int bouncesz; + + if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) + limit = *mmc_dev(host)->dma_mask; +@@ -127,36 +127,83 @@ + if (!mq->queue) + return -ENOMEM; + ++ mq->queue->queuedata = mq; ++ mq->req = NULL; ++ + blk_queue_prep_rq(mq->queue, mmc_prep_request); ++ ++#ifdef CONFIG_MMC_BLOCK_BOUNCE ++ if (host->max_hw_segs == 1) { ++ bouncesz = MMC_QUEUE_BOUNCESZ; ++ ++ if (bouncesz > host->max_req_size) ++ bouncesz = host->max_req_size; ++ if (bouncesz > host->max_seg_size) ++ bouncesz = host->max_seg_size; ++ ++ mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL); ++ if (!mq->bounce_buf) { ++ printk(KERN_WARNING "%s: unable to allocate " ++ "bounce buffer\n", mmc_card_name(card)); ++ } else { ++ blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); ++ blk_queue_max_sectors(mq->queue, bouncesz / 512); ++ blk_queue_max_phys_segments(mq->queue, bouncesz / 512); ++ blk_queue_max_hw_segments(mq->queue, bouncesz / 512); ++ blk_queue_max_segment_size(mq->queue, bouncesz); ++ ++ mq->sg = kmalloc(sizeof(struct scatterlist), ++ GFP_KERNEL); ++ if (!mq->sg) { ++ ret = -ENOMEM; ++ goto free_bounce_buf; ++ } ++ ++ mq->bounce_sg = kmalloc(sizeof(struct scatterlist) * ++ bouncesz / 512, GFP_KERNEL); ++ if (!mq->bounce_sg) { ++ ret = -ENOMEM; ++ goto free_sg; ++ } ++ } ++ } ++#endif ++ ++ if (!mq->bounce_buf) { + blk_queue_bounce_limit(mq->queue, limit); + blk_queue_max_sectors(mq->queue, host->max_req_size / 512); + blk_queue_max_phys_segments(mq->queue, host->max_phys_segs); + blk_queue_max_hw_segments(mq->queue, host->max_hw_segs); + blk_queue_max_segment_size(mq->queue, host->max_seg_size); + +- mq->queue->queuedata = mq; +- mq->req = NULL; +- +- mq->sg = kmalloc(sizeof(struct scatterlist) * host->max_phys_segs, +- GFP_KERNEL); ++ mq->sg = kmalloc(sizeof(struct scatterlist) * ++ host->max_phys_segs, GFP_KERNEL); + if (!mq->sg) { + ret = -ENOMEM; + goto cleanup_queue; + } ++ } + + init_MUTEX(&mq->thread_sem); + + mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd"); + if (IS_ERR(mq->thread)) { + ret = PTR_ERR(mq->thread); +- goto free_sg; ++ goto free_bounce_sg; + } + + return 0; +- ++ free_bounce_sg: ++ if (mq->bounce_sg) ++ kfree(mq->bounce_sg); ++ mq->bounce_sg = NULL; + free_sg: + kfree(mq->sg); + mq->sg = NULL; ++ free_bounce_buf: ++ if (mq->bounce_buf) ++ kfree(mq->bounce_buf); ++ mq->bounce_buf = NULL; + cleanup_queue: + blk_cleanup_queue(mq->queue); + return ret; +@@ -178,9 +225,17 @@ + /* Then terminate our worker thread */ + kthread_stop(mq->thread); + ++ if (mq->bounce_sg) ++ kfree(mq->bounce_sg); ++ mq->bounce_sg = NULL; ++ + kfree(mq->sg); + mq->sg = NULL; + ++ if (mq->bounce_buf) ++ kfree(mq->bounce_buf); ++ mq->bounce_buf = NULL; ++ + blk_cleanup_queue(mq->queue); + + mq->card = NULL; +@@ -231,3 +286,108 @@ + } + } + ++static void copy_sg(struct scatterlist *dst, unsigned int dst_len, ++ struct scatterlist *src, unsigned int src_len) ++{ ++ unsigned int chunk; ++ char *dst_buf, *src_buf; ++ unsigned int dst_size, src_size; ++ ++ dst_buf = NULL; ++ src_buf = NULL; ++ dst_size = 0; ++ src_size = 0; ++ ++ while (src_len) { ++ BUG_ON(dst_len == 0); ++ ++ if (dst_size == 0) { ++ dst_buf = page_address(dst->page) + dst->offset; ++ dst_size = dst->length; ++ } ++ ++ if (src_size == 0) { ++ src_buf = page_address(src->page) + src->offset; ++ src_size = src->length; ++ } ++ ++ chunk = min(dst_size, src_size); ++ ++ memcpy(dst_buf, src_buf, chunk); ++ ++ dst_buf += chunk; ++ src_buf += chunk; ++ dst_size -= chunk; ++ src_size -= chunk; ++ ++ if (dst_size == 0) { ++ dst++; ++ dst_len--; ++ } ++ ++ if (src_size == 0) { ++ src++; ++ src_len--; ++ } ++ } ++} ++ ++unsigned int mmc_queue_map_sg(struct mmc_queue *mq) ++{ ++ unsigned int sg_len; ++ ++ if (!mq->bounce_buf) ++ return blk_rq_map_sg(mq->queue, mq->req, mq->sg); ++ ++ BUG_ON(!mq->bounce_sg); ++ ++ sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg); ++ ++ mq->bounce_sg_len = sg_len; ++ ++ /* ++ * Shortcut in the event we only get a single entry. ++ */ ++ if (sg_len == 1) { ++ memcpy(mq->sg, mq->bounce_sg, sizeof(struct scatterlist)); ++ return 1; ++ } ++ ++ mq->sg[0].page = virt_to_page(mq->bounce_buf); ++ mq->sg[0].offset = offset_in_page(mq->bounce_buf); ++ mq->sg[0].length = 0; ++ ++ while (sg_len) { ++ mq->sg[0].length += mq->bounce_sg[sg_len - 1].length; ++ sg_len--; ++ } ++ ++ return 1; ++} ++ ++void mmc_queue_bounce_pre(struct mmc_queue *mq) ++{ ++ if (!mq->bounce_buf) ++ return; ++ ++ if (mq->bounce_sg_len == 1) ++ return; ++ if (rq_data_dir(mq->req) != WRITE) ++ return; ++ ++ copy_sg(mq->sg, 1, mq->bounce_sg, mq->bounce_sg_len); ++} ++ ++void mmc_queue_bounce_post(struct mmc_queue *mq) ++{ ++ if (!mq->bounce_buf) ++ return; ++ ++ if (mq->bounce_sg_len == 1) ++ return; ++ if (rq_data_dir(mq->req) != READ) ++ return; ++ ++ copy_sg(mq->bounce_sg, mq->bounce_sg_len, mq->sg, 1); ++} ++ +diff -Nurb linux-2.6.22-570/drivers/mmc/card/queue.h linux-2.6.22-591/drivers/mmc/card/queue.h +--- linux-2.6.22-570/drivers/mmc/card/queue.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/card/queue.h 2007-12-21 15:36:12.000000000 -0500 +@@ -14,6 +14,9 @@ + void *data; + struct request_queue *queue; + struct scatterlist *sg; ++ char *bounce_buf; ++ struct scatterlist *bounce_sg; ++ unsigned int bounce_sg_len; + }; + + extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *); +@@ -21,4 +24,8 @@ + extern void mmc_queue_suspend(struct mmc_queue *); + extern void mmc_queue_resume(struct mmc_queue *); + ++extern unsigned int mmc_queue_map_sg(struct mmc_queue *); ++extern void mmc_queue_bounce_pre(struct mmc_queue *); ++extern void mmc_queue_bounce_post(struct mmc_queue *); ++ + #endif +diff -Nurb linux-2.6.22-570/drivers/mmc/core/Kconfig linux-2.6.22-591/drivers/mmc/core/Kconfig +--- linux-2.6.22-570/drivers/mmc/core/Kconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/core/Kconfig 2007-12-21 15:36:12.000000000 -0500 +@@ -14,3 +14,16 @@ + This option is usually just for embedded systems which use + a MMC/SD card for rootfs. Most people should say N here. + ++config MMC_PASSWORDS ++ boolean "MMC card lock/unlock passwords (EXPERIMENTAL)" ++ depends on EXPERIMENTAL ++ select KEYS ++ help ++ Say Y here to enable the use of passwords to lock and unlock ++ MMC cards. This uses the access key retention support, using ++ request_key to look up the key associated with each card. ++ ++ For example, if you have an MMC card that was locked using ++ Symbian OS on your cell phone, you won't be able to read it ++ on Linux without this support. ++ +diff -Nurb linux-2.6.22-570/drivers/mmc/core/Makefile linux-2.6.22-591/drivers/mmc/core/Makefile +--- linux-2.6.22-570/drivers/mmc/core/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/core/Makefile 2007-12-21 15:36:12.000000000 -0500 +@@ -7,5 +7,6 @@ + endif + + obj-$(CONFIG_MMC) += mmc_core.o +-mmc_core-y := core.o sysfs.o mmc.o mmc_ops.o sd.o sd_ops.o ++mmc_core-y := core.o sysfs.o bus.o host.o mmc.o mmc_ops.o sd.o sd_ops.o ++mmc_core-$(CONFIG_MMC_PASSWORDS) += lock.o + +diff -Nurb linux-2.6.22-570/drivers/mmc/core/bus.c linux-2.6.22-591/drivers/mmc/core/bus.c +--- linux-2.6.22-570/drivers/mmc/core/bus.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/mmc/core/bus.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,245 @@ ++/* ++ * linux/drivers/mmc/core/bus.c ++ * ++ * Copyright (C) 2003 Russell King, All Rights Reserved. ++ * Copyright (C) 2007 Pierre Ossman ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * MMC card bus driver model ++ */ ++ ++#include ++#include ++ ++#include ++#include ++ ++#include "sysfs.h" ++#include "bus.h" ++ ++#define dev_to_mmc_card(d) container_of(d, struct mmc_card, dev) ++#define to_mmc_driver(d) container_of(d, struct mmc_driver, drv) ++ ++static ssize_t mmc_type_show(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct mmc_card *card = dev_to_mmc_card(dev); ++ ++ switch (card->type) { ++ case MMC_TYPE_MMC: ++ return sprintf(buf, "MMC\n"); ++ case MMC_TYPE_SD: ++ return sprintf(buf, "SD\n"); ++ default: ++ return -EFAULT; ++ } ++} ++ ++static struct device_attribute mmc_dev_attrs[] = { ++ MMC_ATTR_RO(type), ++ __ATTR_NULL, ++}; ++ ++/* ++ * This currently matches any MMC driver to any MMC card - drivers ++ * themselves make the decision whether to drive this card in their ++ * probe method. ++ * ++ * We also fail for all locked cards; drivers expect to be able to do block ++ * I/O still on probe(), which is not possible while the card is locked. ++ * Device probing must be triggered sometime later to make the card available ++ * to the block driver. ++ */ ++static int mmc_bus_match(struct device *dev, struct device_driver *drv) ++{ ++ struct mmc_card *card = dev_to_mmc_card(dev); ++ ++ if (mmc_card_locked(card)) { ++ dev_dbg(&card->dev, "card is locked; binding is deferred\n"); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static int ++mmc_bus_uevent(struct device *dev, char **envp, int num_envp, char *buf, ++ int buf_size) ++{ ++ struct mmc_card *card = dev_to_mmc_card(dev); ++ int retval = 0, i = 0, length = 0; ++ ++#define add_env(fmt,val) do { \ ++ retval = add_uevent_var(envp, num_envp, &i, \ ++ buf, buf_size, &length, \ ++ fmt, val); \ ++ if (retval) \ ++ return retval; \ ++} while (0); ++ ++ switch (card->type) { ++ case MMC_TYPE_MMC: ++ add_env("MMC_TYPE=%s", "MMC"); ++ break; ++ case MMC_TYPE_SD: ++ add_env("MMC_TYPE=%s", "SD"); ++ break; ++ } ++ ++ add_env("MMC_NAME=%s", mmc_card_name(card)); ++ ++#undef add_env ++ ++ envp[i] = NULL; ++ ++ return 0; ++} ++ ++static int mmc_bus_probe(struct device *dev) ++{ ++ struct mmc_driver *drv = to_mmc_driver(dev->driver); ++ struct mmc_card *card = dev_to_mmc_card(dev); ++ ++ return drv->probe(card); ++} ++ ++static int mmc_bus_remove(struct device *dev) ++{ ++ struct mmc_driver *drv = to_mmc_driver(dev->driver); ++ struct mmc_card *card = dev_to_mmc_card(dev); ++ ++ drv->remove(card); ++ ++ return 0; ++} ++ ++static int mmc_bus_suspend(struct device *dev, pm_message_t state) ++{ ++ struct mmc_driver *drv = to_mmc_driver(dev->driver); ++ struct mmc_card *card = dev_to_mmc_card(dev); ++ int ret = 0; ++ ++ if (dev->driver && drv->suspend) ++ ret = drv->suspend(card, state); ++ return ret; ++} ++ ++static int mmc_bus_resume(struct device *dev) ++{ ++ struct mmc_driver *drv = to_mmc_driver(dev->driver); ++ struct mmc_card *card = dev_to_mmc_card(dev); ++ int ret = 0; ++ ++ if (dev->driver && drv->resume) ++ ret = drv->resume(card); ++ return ret; ++} ++ ++static struct bus_type mmc_bus_type = { ++ .name = "mmc", ++ .dev_attrs = mmc_dev_attrs, ++ .match = mmc_bus_match, ++ .uevent = mmc_bus_uevent, ++ .probe = mmc_bus_probe, ++ .remove = mmc_bus_remove, ++ .suspend = mmc_bus_suspend, ++ .resume = mmc_bus_resume, ++}; ++ ++int mmc_register_bus(void) ++{ ++ return bus_register(&mmc_bus_type); ++} ++ ++void mmc_unregister_bus(void) ++{ ++ bus_unregister(&mmc_bus_type); ++} ++ ++/** ++ * mmc_register_driver - register a media driver ++ * @drv: MMC media driver ++ */ ++int mmc_register_driver(struct mmc_driver *drv) ++{ ++ drv->drv.bus = &mmc_bus_type; ++ return driver_register(&drv->drv); ++} ++ ++EXPORT_SYMBOL(mmc_register_driver); ++ ++/** ++ * mmc_unregister_driver - unregister a media driver ++ * @drv: MMC media driver ++ */ ++void mmc_unregister_driver(struct mmc_driver *drv) ++{ ++ drv->drv.bus = &mmc_bus_type; ++ driver_unregister(&drv->drv); ++} ++ ++EXPORT_SYMBOL(mmc_unregister_driver); ++ ++static void mmc_release_card(struct device *dev) ++{ ++ struct mmc_card *card = dev_to_mmc_card(dev); ++ ++ kfree(card); ++} ++ ++/* ++ * Allocate and initialise a new MMC card structure. ++ */ ++struct mmc_card *mmc_alloc_card(struct mmc_host *host) ++{ ++ struct mmc_card *card; ++ ++ card = kmalloc(sizeof(struct mmc_card), GFP_KERNEL); ++ if (!card) ++ return ERR_PTR(-ENOMEM); ++ ++ memset(card, 0, sizeof(struct mmc_card)); ++ ++ card->host = host; ++ ++ device_initialize(&card->dev); ++ ++ card->dev.parent = mmc_classdev(host); ++ card->dev.bus = &mmc_bus_type; ++ card->dev.release = mmc_release_card; ++ ++ return card; ++} ++ ++/* ++ * Register a new MMC card with the driver model. ++ */ ++int mmc_add_card(struct mmc_card *card) ++{ ++ int ret; ++ ++ snprintf(card->dev.bus_id, sizeof(card->dev.bus_id), ++ "%s:%04x", mmc_hostname(card->host), card->rca); ++ ++ ret = device_add(&card->dev); ++ if (ret == 0) ++ mmc_card_set_present(card); ++ ++ return ret; ++} ++ ++/* ++ * Unregister a new MMC card with the driver model, and ++ * (eventually) free it. ++ */ ++void mmc_remove_card(struct mmc_card *card) ++{ ++ if (mmc_card_present(card)) ++ device_del(&card->dev); ++ ++ put_device(&card->dev); ++} ++ +diff -Nurb linux-2.6.22-570/drivers/mmc/core/bus.h linux-2.6.22-591/drivers/mmc/core/bus.h +--- linux-2.6.22-570/drivers/mmc/core/bus.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/mmc/core/bus.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,22 @@ ++/* ++ * linux/drivers/mmc/core/bus.h ++ * ++ * Copyright (C) 2003 Russell King, All Rights Reserved. ++ * Copyright 2007 Pierre Ossman ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++#ifndef _MMC_CORE_BUS_H ++#define _MMC_CORE_BUS_H ++ ++struct mmc_card *mmc_alloc_card(struct mmc_host *host); ++int mmc_add_card(struct mmc_card *card); ++void mmc_remove_card(struct mmc_card *card); ++ ++int mmc_register_bus(void); ++void mmc_unregister_bus(void); ++ ++#endif ++ +diff -Nurb linux-2.6.22-570/drivers/mmc/core/core.c linux-2.6.22-591/drivers/mmc/core/core.c +--- linux-2.6.22-570/drivers/mmc/core/core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/core/core.c 2007-12-21 15:36:12.000000000 -0500 +@@ -27,7 +27,9 @@ + #include + + #include "core.h" +-#include "sysfs.h" ++#include "bus.h" ++#include "host.h" ++#include "lock.h" + + #include "mmc_ops.h" + #include "sd_ops.h" +@@ -35,6 +37,25 @@ + extern int mmc_attach_mmc(struct mmc_host *host, u32 ocr); + extern int mmc_attach_sd(struct mmc_host *host, u32 ocr); + ++static struct workqueue_struct *workqueue; ++ ++/* ++ * Internal function. Schedule delayed work in the MMC work queue. ++ */ ++static int mmc_schedule_delayed_work(struct delayed_work *work, ++ unsigned long delay) ++{ ++ return queue_delayed_work(workqueue, work, delay); ++} ++ ++/* ++ * Internal function. Flush all scheduled work from the MMC work queue. ++ */ ++static void mmc_flush_scheduled_work(void) ++{ ++ flush_workqueue(workqueue); ++} ++ + /** + * mmc_request_done - finish processing an MMC request + * @host: MMC host which completed request +@@ -369,22 +390,6 @@ + } + + /* +- * Allocate a new MMC card +- */ +-struct mmc_card *mmc_alloc_card(struct mmc_host *host) +-{ +- struct mmc_card *card; +- +- card = kmalloc(sizeof(struct mmc_card), GFP_KERNEL); +- if (!card) +- return ERR_PTR(-ENOMEM); +- +- mmc_init_card(card, host); +- +- return card; +-} +- +-/* + * Apply power to the MMC stack. This is a two-stage process. + * First, we enable power to the card without the clock running. + * We then wait a bit for the power to stabilise. Finally, +@@ -512,7 +517,7 @@ + EXPORT_SYMBOL(mmc_detect_change); + + +-static void mmc_rescan(struct work_struct *work) ++void mmc_rescan(struct work_struct *work) + { + struct mmc_host *host = + container_of(work, struct mmc_host, detect.work); +@@ -561,69 +566,13 @@ + } + } + +- +-/** +- * mmc_alloc_host - initialise the per-host structure. +- * @extra: sizeof private data structure +- * @dev: pointer to host device model structure +- * +- * Initialise the per-host structure. +- */ +-struct mmc_host *mmc_alloc_host(int extra, struct device *dev) +-{ +- struct mmc_host *host; +- +- host = mmc_alloc_host_sysfs(extra, dev); +- if (host) { +- spin_lock_init(&host->lock); +- init_waitqueue_head(&host->wq); +- INIT_DELAYED_WORK(&host->detect, mmc_rescan); +- +- /* +- * By default, hosts do not support SGIO or large requests. +- * They have to set these according to their abilities. +- */ +- host->max_hw_segs = 1; +- host->max_phys_segs = 1; +- host->max_seg_size = PAGE_CACHE_SIZE; +- +- host->max_req_size = PAGE_CACHE_SIZE; +- host->max_blk_size = 512; +- host->max_blk_count = PAGE_CACHE_SIZE / 512; +- } +- +- return host; +-} +- +-EXPORT_SYMBOL(mmc_alloc_host); +- +-/** +- * mmc_add_host - initialise host hardware +- * @host: mmc host +- */ +-int mmc_add_host(struct mmc_host *host) ++void mmc_start_host(struct mmc_host *host) + { +- int ret; +- +- ret = mmc_add_host_sysfs(host); +- if (ret == 0) { + mmc_power_off(host); + mmc_detect_change(host, 0); +- } +- +- return ret; + } + +-EXPORT_SYMBOL(mmc_add_host); +- +-/** +- * mmc_remove_host - remove host hardware +- * @host: mmc host +- * +- * Unregister and remove all cards associated with this host, +- * and power down the MMC bus. +- */ +-void mmc_remove_host(struct mmc_host *host) ++void mmc_stop_host(struct mmc_host *host) + { + #ifdef CONFIG_MMC_DEBUG + unsigned long flags; +@@ -648,24 +597,8 @@ + BUG_ON(host->card); + + mmc_power_off(host); +- mmc_remove_host_sysfs(host); + } + +-EXPORT_SYMBOL(mmc_remove_host); +- +-/** +- * mmc_free_host - free the host structure +- * @host: mmc host +- * +- * Free the host once all references to it have been dropped. +- */ +-void mmc_free_host(struct mmc_host *host) +-{ +- mmc_free_host_sysfs(host); +-} +- +-EXPORT_SYMBOL(mmc_free_host); +- + #ifdef CONFIG_PM + + /** +@@ -726,4 +659,47 @@ + + #endif + ++static int __init mmc_init(void) ++{ ++ int ret; ++ ++ workqueue = create_singlethread_workqueue("kmmcd"); ++ if (!workqueue) ++ return -ENOMEM; ++ ++ ret = mmc_register_bus(); ++ if (ret) ++ goto destroy_workqueue; ++ ++ ret = mmc_register_host_class(); ++ if (ret) ++ goto unregister_bus; ++ ++ ret = mmc_register_key_type(); ++ if (ret) ++ goto unregister_host_class; ++ ++ return 0; ++ ++unregister_host_class: ++ mmc_unregister_host_class(); ++unregister_bus: ++ mmc_unregister_bus(); ++destroy_workqueue: ++ destroy_workqueue(workqueue); ++ ++ return ret; ++} ++ ++static void __exit mmc_exit(void) ++{ ++ mmc_unregister_key_type(); ++ mmc_unregister_host_class(); ++ mmc_unregister_bus(); ++ destroy_workqueue(workqueue); ++} ++ ++module_init(mmc_init); ++module_exit(mmc_exit); ++ + MODULE_LICENSE("GPL"); +diff -Nurb linux-2.6.22-570/drivers/mmc/core/core.h linux-2.6.22-591/drivers/mmc/core/core.h +--- linux-2.6.22-570/drivers/mmc/core/core.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/core/core.h 2007-12-21 15:36:12.000000000 -0500 +@@ -54,8 +54,6 @@ + u32 mmc_select_voltage(struct mmc_host *host, u32 ocr); + void mmc_set_timing(struct mmc_host *host, unsigned int timing); + +-struct mmc_card *mmc_alloc_card(struct mmc_host *host); +- + static inline void mmc_delay(unsigned int ms) + { + if (ms < 1000 / HZ) { +@@ -66,5 +64,9 @@ + } + } + ++void mmc_rescan(struct work_struct *work); ++void mmc_start_host(struct mmc_host *host); ++void mmc_stop_host(struct mmc_host *host); ++ + #endif + +diff -Nurb linux-2.6.22-570/drivers/mmc/core/host.c linux-2.6.22-591/drivers/mmc/core/host.c +--- linux-2.6.22-570/drivers/mmc/core/host.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/mmc/core/host.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,156 @@ ++/* ++ * linux/drivers/mmc/core/host.c ++ * ++ * Copyright (C) 2003 Russell King, All Rights Reserved. ++ * Copyright (C) 2007 Pierre Ossman ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * MMC host class device management ++ */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "core.h" ++#include "host.h" ++ ++#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev) ++ ++static void mmc_host_classdev_release(struct device *dev) ++{ ++ struct mmc_host *host = cls_dev_to_mmc_host(dev); ++ kfree(host); ++} ++ ++static struct class mmc_host_class = { ++ .name = "mmc_host", ++ .dev_release = mmc_host_classdev_release, ++}; ++ ++int mmc_register_host_class(void) ++{ ++ return class_register(&mmc_host_class); ++} ++ ++void mmc_unregister_host_class(void) ++{ ++ class_unregister(&mmc_host_class); ++} ++ ++static DEFINE_IDR(mmc_host_idr); ++static DEFINE_SPINLOCK(mmc_host_lock); ++ ++/** ++ * mmc_alloc_host - initialise the per-host structure. ++ * @extra: sizeof private data structure ++ * @dev: pointer to host device model structure ++ * ++ * Initialise the per-host structure. ++ */ ++struct mmc_host *mmc_alloc_host(int extra, struct device *dev) ++{ ++ struct mmc_host *host; ++ ++ host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL); ++ if (!host) ++ return NULL; ++ ++ memset(host, 0, sizeof(struct mmc_host) + extra); ++ ++ host->parent = dev; ++ host->class_dev.parent = dev; ++ host->class_dev.class = &mmc_host_class; ++ device_initialize(&host->class_dev); ++ ++ spin_lock_init(&host->lock); ++ init_waitqueue_head(&host->wq); ++ INIT_DELAYED_WORK(&host->detect, mmc_rescan); ++ ++ /* ++ * By default, hosts do not support SGIO or large requests. ++ * They have to set these according to their abilities. ++ */ ++ host->max_hw_segs = 1; ++ host->max_phys_segs = 1; ++ host->max_seg_size = PAGE_CACHE_SIZE; ++ ++ host->max_req_size = PAGE_CACHE_SIZE; ++ host->max_blk_size = 512; ++ host->max_blk_count = PAGE_CACHE_SIZE / 512; ++ ++ return host; ++} ++ ++EXPORT_SYMBOL(mmc_alloc_host); ++ ++/** ++ * mmc_add_host - initialise host hardware ++ * @host: mmc host ++ */ ++int mmc_add_host(struct mmc_host *host) ++{ ++ int err; ++ ++ if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL)) ++ return -ENOMEM; ++ ++ spin_lock(&mmc_host_lock); ++ err = idr_get_new(&mmc_host_idr, host, &host->index); ++ spin_unlock(&mmc_host_lock); ++ if (err) ++ return err; ++ ++ snprintf(host->class_dev.bus_id, BUS_ID_SIZE, ++ "mmc%d", host->index); ++ ++ err = device_add(&host->class_dev); ++ if (err) ++ return err; ++ ++ mmc_start_host(host); ++ ++ return 0; ++} ++ ++EXPORT_SYMBOL(mmc_add_host); ++ ++/** ++ * mmc_remove_host - remove host hardware ++ * @host: mmc host ++ * ++ * Unregister and remove all cards associated with this host, ++ * and power down the MMC bus. ++ */ ++void mmc_remove_host(struct mmc_host *host) ++{ ++ mmc_stop_host(host); ++ ++ device_del(&host->class_dev); ++ ++ spin_lock(&mmc_host_lock); ++ idr_remove(&mmc_host_idr, host->index); ++ spin_unlock(&mmc_host_lock); ++} ++ ++EXPORT_SYMBOL(mmc_remove_host); ++ ++/** ++ * mmc_free_host - free the host structure ++ * @host: mmc host ++ * ++ * Free the host once all references to it have been dropped. ++ */ ++void mmc_free_host(struct mmc_host *host) ++{ ++ put_device(&host->class_dev); ++} ++ ++EXPORT_SYMBOL(mmc_free_host); ++ +diff -Nurb linux-2.6.22-570/drivers/mmc/core/host.h linux-2.6.22-591/drivers/mmc/core/host.h +--- linux-2.6.22-570/drivers/mmc/core/host.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/mmc/core/host.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,18 @@ ++/* ++ * linux/drivers/mmc/core/host.h ++ * ++ * Copyright (C) 2003 Russell King, All Rights Reserved. ++ * Copyright 2007 Pierre Ossman ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++#ifndef _MMC_CORE_HOST_H ++#define _MMC_CORE_HOST_H ++ ++int mmc_register_host_class(void); ++void mmc_unregister_host_class(void); ++ ++#endif ++ +diff -Nurb linux-2.6.22-570/drivers/mmc/core/lock.c linux-2.6.22-591/drivers/mmc/core/lock.c +--- linux-2.6.22-570/drivers/mmc/core/lock.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/mmc/core/lock.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,199 @@ ++/* ++ * linux/drivers/mmc/core/lock.h ++ * ++ * Copyright 2006 Instituto Nokia de Tecnologia (INdT), All Rights Reserved. ++ * Copyright 2007 Pierre Ossman ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * MMC password key handling. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include "sysfs.h" ++#include "mmc_ops.h" ++#include "lock.h" ++ ++#define MMC_KEYLEN_MAXBYTES 32 ++ ++#define dev_to_mmc_card(d) container_of(d, struct mmc_card, dev) ++ ++static int mmc_key_instantiate(struct key *key, const void *data, size_t datalen) ++{ ++ struct mmc_key_payload *mpayload; ++ int ret; ++ ++ ret = -EINVAL; ++ if (datalen <= 0 || datalen > MMC_KEYLEN_MAXBYTES || !data) { ++ pr_debug("Invalid data\n"); ++ goto error; ++ } ++ ++ ret = key_payload_reserve(key, datalen); ++ if (ret < 0) { ++ pr_debug("ret = %d\n", ret); ++ goto error; ++ } ++ ++ ret = -ENOMEM; ++ mpayload = kmalloc(sizeof(*mpayload) + datalen, GFP_KERNEL); ++ if (!mpayload) { ++ pr_debug("Unable to allocate mpayload structure\n"); ++ goto error; ++ } ++ mpayload->datalen = datalen; ++ memcpy(mpayload->data, data, datalen); ++ ++ rcu_assign_pointer(key->payload.data, mpayload); ++ ++ /* ret = 0 if there is no error */ ++ ret = 0; ++ ++error: ++ return ret; ++} ++ ++static int mmc_key_match(const struct key *key, const void *description) ++{ ++ return strcmp(key->description, description) == 0; ++} ++ ++/* ++ * dispose of the data dangling from the corpse of a mmc key ++ */ ++static void mmc_key_destroy(struct key *key) ++{ ++ struct mmc_key_payload *mpayload = key->payload.data; ++ ++ kfree(mpayload); ++} ++ ++static struct key_type mmc_key_type = { ++ .name = "mmc", ++ .def_datalen = MMC_KEYLEN_MAXBYTES, ++ .instantiate = mmc_key_instantiate, ++ .match = mmc_key_match, ++ .destroy = mmc_key_destroy, ++}; ++ ++int mmc_register_key_type(void) ++{ ++ return register_key_type(&mmc_key_type); ++} ++ ++void mmc_unregister_key_type(void) ++{ ++ unregister_key_type(&mmc_key_type); ++} ++ ++static ssize_t ++mmc_lockable_show(struct device *dev, struct device_attribute *att, char *buf) ++{ ++ struct mmc_card *card = dev_to_mmc_card(dev); ++ ++ return sprintf(buf, "%slocked\n", mmc_card_locked(card) ? "" : "un"); ++} ++ ++/* ++ * implement MMC password functions: force erase, remove password, change ++ * password, unlock card and assign password. ++ */ ++static ssize_t ++mmc_lockable_store(struct device *dev, struct device_attribute *att, ++ const char *data, size_t len) ++{ ++ struct mmc_card *card = dev_to_mmc_card(dev); ++ int ret; ++ struct key *mmc_key; ++ ++ if(!mmc_card_lockable(card)) ++ return -EINVAL; ++ ++ mmc_claim_host(card->host); ++ ++ ret = -EINVAL; ++ if (mmc_card_locked(card) && !strncmp(data, "erase", 5)) { ++ /* forced erase only works while card is locked */ ++ mmc_lock_unlock(card, NULL, MMC_LOCK_MODE_ERASE); ++ ret = len; ++ } else if (!mmc_card_locked(card) && !strncmp(data, "remove", 6)) { ++ /* remove password only works while card is unlocked */ ++ mmc_key = request_key(&mmc_key_type, "mmc:key", "remove"); ++ ++ if (!IS_ERR(mmc_key)) { ++ ret = mmc_lock_unlock(card, mmc_key, MMC_LOCK_MODE_CLR_PWD); ++ if (!ret) ++ ret = len; ++ } else ++ dev_dbg(&card->dev, "request_key returned error %ld\n", PTR_ERR(mmc_key)); ++ } else if (!mmc_card_locked(card) && ((!strncmp(data, "assign", 6)) || ++ (!strncmp(data, "change", 6)))) { ++ /* assign or change */ ++ if(!(strncmp(data, "assign", 6))) ++ mmc_key = request_key(&mmc_key_type, "mmc:key", "assign"); ++ else ++ mmc_key = request_key(&mmc_key_type, "mmc:key", "change"); ++ ++ if (!IS_ERR(mmc_key)) { ++ ret = mmc_lock_unlock(card, mmc_key, MMC_LOCK_MODE_SET_PWD); ++ if (!ret) ++ ret = len; ++ } else ++ dev_dbg(&card->dev, "request_key returned error %ld\n", PTR_ERR(mmc_key)); ++ } else if (mmc_card_locked(card) && !strncmp(data, "unlock", 6)) { ++ /* unlock */ ++ mmc_key = request_key(&mmc_key_type, "mmc:key", "unlock"); ++ if (!IS_ERR(mmc_key)) { ++ ret = mmc_lock_unlock(card, mmc_key, MMC_LOCK_MODE_UNLOCK); ++ if (ret) { ++ dev_dbg(&card->dev, "Wrong password\n"); ++ ret = -EINVAL; ++ } ++ else { ++ mmc_release_host(card->host); ++ device_release_driver(dev); ++ ret = device_attach(dev); ++ if(!ret) ++ return -EINVAL; ++ else ++ return len; ++ } ++ } else ++ dev_dbg(&card->dev, "request_key returned error %ld\n", PTR_ERR(mmc_key)); ++ } ++ ++ mmc_release_host(card->host); ++ return ret; ++} ++ ++static struct device_attribute mmc_dev_attr_lockable[] = { ++ __ATTR(lockable, S_IWUSR | S_IRUGO, ++ mmc_lockable_show, mmc_lockable_store), ++ __ATTR_NULL, ++}; ++ ++int mmc_lock_add_sysfs(struct mmc_card *card) ++{ ++ if (!mmc_card_lockable(card)) ++ return 0; ++ ++ return mmc_add_attrs(card, mmc_dev_attr_lockable); ++} ++ ++void mmc_lock_remove_sysfs(struct mmc_card *card) ++{ ++ if (!mmc_card_lockable(card)) ++ return; ++ ++ mmc_remove_attrs(card, mmc_dev_attr_lockable); ++} ++ +diff -Nurb linux-2.6.22-570/drivers/mmc/core/lock.h linux-2.6.22-591/drivers/mmc/core/lock.h +--- linux-2.6.22-570/drivers/mmc/core/lock.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/mmc/core/lock.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,51 @@ ++/* ++ * linux/drivers/mmc/core/lock.h ++ * ++ * Copyright 2006 Instituto Nokia de Tecnologia (INdT), All Rights Reserved. ++ * Copyright 2007 Pierre Ossman ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++#ifndef _MMC_CORE_LOCK_H ++#define _MMC_CORE_LOCK_H ++ ++#ifdef CONFIG_MMC_PASSWORDS ++ ++/* core-internal data */ ++struct mmc_key_payload { ++ struct rcu_head rcu; /* RCU destructor */ ++ unsigned short datalen; /* length of this data */ ++ char data[0]; /* actual data */ ++}; ++ ++int mmc_register_key_type(void); ++void mmc_unregister_key_type(void); ++ ++int mmc_lock_add_sysfs(struct mmc_card *card); ++void mmc_lock_remove_sysfs(struct mmc_card *card); ++ ++#else ++ ++static inline int mmc_register_key_type(void) ++{ ++ return 0; ++} ++ ++static inline void mmc_unregister_key_type(void) ++{ ++} ++ ++static inline int mmc_lock_add_sysfs(struct mmc_card *card) ++{ ++ return 0; ++} ++ ++static inline void mmc_lock_remove_sysfs(struct mmc_card *card) ++{ ++} ++ ++#endif ++ ++#endif +diff -Nurb linux-2.6.22-570/drivers/mmc/core/mmc.c linux-2.6.22-591/drivers/mmc/core/mmc.c +--- linux-2.6.22-570/drivers/mmc/core/mmc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/core/mmc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -18,6 +18,8 @@ + + #include "core.h" + #include "sysfs.h" ++#include "bus.h" ++#include "lock.h" + #include "mmc_ops.h" + + static const unsigned int tran_exp[] = { +@@ -230,19 +232,74 @@ + return err; + } + ++MMC_ATTR_FN(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1], ++ card->raw_cid[2], card->raw_cid[3]); ++MMC_ATTR_FN(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1], ++ card->raw_csd[2], card->raw_csd[3]); ++MMC_ATTR_FN(date, "%02d/%04d\n", card->cid.month, card->cid.year); ++MMC_ATTR_FN(fwrev, "0x%x\n", card->cid.fwrev); ++MMC_ATTR_FN(hwrev, "0x%x\n", card->cid.hwrev); ++MMC_ATTR_FN(manfid, "0x%06x\n", card->cid.manfid); ++MMC_ATTR_FN(name, "%s\n", card->cid.prod_name); ++MMC_ATTR_FN(oemid, "0x%04x\n", card->cid.oemid); ++MMC_ATTR_FN(serial, "0x%08x\n", card->cid.serial); ++ ++static struct device_attribute mmc_dev_attrs[] = { ++ MMC_ATTR_RO(cid), ++ MMC_ATTR_RO(csd), ++ MMC_ATTR_RO(date), ++ MMC_ATTR_RO(fwrev), ++ MMC_ATTR_RO(hwrev), ++ MMC_ATTR_RO(manfid), ++ MMC_ATTR_RO(name), ++ MMC_ATTR_RO(oemid), ++ MMC_ATTR_RO(serial), ++ __ATTR_NULL, ++}; ++ ++/* ++ * Adds sysfs entries as relevant. ++ */ ++static int mmc_sysfs_add(struct mmc_card *card) ++{ ++ int ret; ++ ++ ret = mmc_add_attrs(card, mmc_dev_attrs); ++ if (ret < 0) ++ return ret; ++ ++ ret = mmc_lock_add_sysfs(card); ++ if (ret < 0) { ++ mmc_remove_attrs(card, mmc_dev_attrs); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Removes the sysfs entries added by mmc_sysfs_add(). ++ */ ++static void mmc_sysfs_remove(struct mmc_card *card) ++{ ++ mmc_lock_remove_sysfs(card); ++ mmc_remove_attrs(card, mmc_dev_attrs); ++} ++ + /* + * Handle the detection and initialisation of a card. + * + * In the case of a resume, "curcard" will contain the card + * we're trying to reinitialise. + */ +-static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, ++static int mmc_init_card(struct mmc_host *host, u32 ocr, + struct mmc_card *oldcard) + { + struct mmc_card *card; + int err; + u32 cid[4]; + unsigned int max_dtr; ++ u32 status; + + BUG_ON(!host); + BUG_ON(!host->claimed); +@@ -294,6 +351,15 @@ + + mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL); + ++ /* ++ * Check if card is locked. ++ */ ++ err = mmc_send_status(card, &status); ++ if (err != MMC_ERR_NONE) ++ goto free_card; ++ if (status & R1_CARD_IS_LOCKED) ++ mmc_card_set_locked(card); ++ + if (!oldcard) { + /* + * Fetch CSD from card. +@@ -389,6 +455,8 @@ + BUG_ON(!host); + BUG_ON(!host->card); + ++ mmc_sysfs_remove(host->card); ++ + mmc_remove_card(host->card); + host->card = NULL; + } +@@ -413,8 +481,7 @@ + mmc_release_host(host); + + if (err != MMC_ERR_NONE) { +- mmc_remove_card(host->card); +- host->card = NULL; ++ mmc_remove(host); + + mmc_claim_host(host); + mmc_detach_bus(host); +@@ -434,7 +501,7 @@ + + mmc_claim_host(host); + mmc_deselect_cards(host); +- host->card->state &= ~MMC_STATE_HIGHSPEED; ++ host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_LOCKED); + mmc_release_host(host); + } + +@@ -453,11 +520,9 @@ + + mmc_claim_host(host); + +- err = mmc_sd_init_card(host, host->ocr, host->card); ++ err = mmc_init_card(host, host->ocr, host->card); + if (err != MMC_ERR_NONE) { +- mmc_remove_card(host->card); +- host->card = NULL; +- ++ mmc_remove(host); + mmc_detach_bus(host); + } + +@@ -512,13 +577,17 @@ + /* + * Detect and init the card. + */ +- err = mmc_sd_init_card(host, host->ocr, NULL); ++ err = mmc_init_card(host, host->ocr, NULL); + if (err != MMC_ERR_NONE) + goto err; + + mmc_release_host(host); + +- err = mmc_register_card(host->card); ++ err = mmc_add_card(host->card); ++ if (err) ++ goto reclaim_host; ++ ++ err = mmc_sysfs_add(host->card); + if (err) + goto reclaim_host; + +diff -Nurb linux-2.6.22-570/drivers/mmc/core/mmc_ops.c linux-2.6.22-591/drivers/mmc/core/mmc_ops.c +--- linux-2.6.22-570/drivers/mmc/core/mmc_ops.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/core/mmc_ops.c 2007-12-21 15:36:12.000000000 -0500 +@@ -2,6 +2,8 @@ + * linux/drivers/mmc/mmc_ops.h + * + * Copyright 2006-2007 Pierre Ossman ++ * MMC password protection (C) 2006 Instituto Nokia de Tecnologia (INdT), ++ * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -12,12 +14,14 @@ + #include + #include + #include ++#include + + #include + #include + #include + + #include "core.h" ++#include "lock.h" + #include "mmc_ops.h" + + static int _mmc_select_card(struct mmc_host *host, struct mmc_card *card) +@@ -274,3 +278,114 @@ + return MMC_ERR_NONE; + } + ++#ifdef CONFIG_MMC_PASSWORDS ++ ++int mmc_lock_unlock(struct mmc_card *card, struct key *key, int mode) ++{ ++ struct mmc_request mrq; ++ struct mmc_command cmd; ++ struct mmc_data data; ++ struct scatterlist sg; ++ struct mmc_key_payload *mpayload; ++ unsigned long erase_timeout; ++ int err, data_size; ++ u8 *data_buf; ++ ++ mpayload = NULL; ++ data_size = 1; ++ if (!(mode & MMC_LOCK_MODE_ERASE)) { ++ mpayload = rcu_dereference(key->payload.data); ++ data_size = 2 + mpayload->datalen; ++ } ++ ++ data_buf = kmalloc(data_size, GFP_KERNEL); ++ if (!data_buf) ++ return -ENOMEM; ++ memset(data_buf, 0, data_size); ++ ++ data_buf[0] |= mode; ++ if (mode & MMC_LOCK_MODE_UNLOCK) ++ data_buf[0] &= ~MMC_LOCK_MODE_UNLOCK; ++ ++ if (!(mode & MMC_LOCK_MODE_ERASE)) { ++ data_buf[1] = mpayload->datalen; ++ memcpy(data_buf + 2, mpayload->data, mpayload->datalen); ++ } ++ ++ memset(&cmd, 0, sizeof(struct mmc_command)); ++ ++ cmd.opcode = MMC_SET_BLOCKLEN; ++ cmd.arg = data_size; ++ cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; ++ err = mmc_wait_for_cmd(card->host, &cmd, MMC_CMD_RETRIES); ++ if (err != MMC_ERR_NONE) ++ goto out; ++ ++ memset(&cmd, 0, sizeof(struct mmc_command)); ++ ++ cmd.opcode = MMC_LOCK_UNLOCK; ++ cmd.arg = 0; ++ cmd.flags = MMC_RSP_R1B | MMC_CMD_ADTC; ++ ++ memset(&data, 0, sizeof(struct mmc_data)); ++ ++ mmc_set_data_timeout(&data, card, 1); ++ ++ data.blksz = data_size; ++ data.blocks = 1; ++ data.flags = MMC_DATA_WRITE; ++ data.sg = &sg; ++ data.sg_len = 1; ++ ++ memset(&mrq, 0, sizeof(struct mmc_request)); ++ ++ mrq.cmd = &cmd; ++ mrq.data = &data; ++ ++ sg_init_one(&sg, data_buf, data_size); ++ err = mmc_wait_for_req(card->host, &mrq); ++ if (err != MMC_ERR_NONE) ++ goto out; ++ ++ memset(&cmd, 0, sizeof(struct mmc_command)); ++ ++ cmd.opcode = MMC_SEND_STATUS; ++ cmd.arg = card->rca << 16; ++ cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; ++ ++ /* set timeout for forced erase operation to 3 min. (see MMC spec) */ ++ erase_timeout = jiffies + 180 * HZ; ++ do { ++ /* we cannot use "retries" here because the ++ * R1_LOCK_UNLOCK_FAILED bit is cleared by subsequent reads to ++ * the status register, hiding the error condition */ ++ err = mmc_wait_for_cmd(card->host, &cmd, 0); ++ if (err != MMC_ERR_NONE) ++ break; ++ /* the other modes don't need timeout checking */ ++ if (!(mode & MMC_LOCK_MODE_ERASE)) ++ continue; ++ if (time_after(jiffies, erase_timeout)) { ++ dev_dbg(&card->dev, "forced erase timed out\n"); ++ err = MMC_ERR_TIMEOUT; ++ break; ++ } ++ } while (!(cmd.resp[0] & R1_READY_FOR_DATA)); ++ if (cmd.resp[0] & R1_LOCK_UNLOCK_FAILED) { ++ dev_dbg(&card->dev, "LOCK_UNLOCK operation failed\n"); ++ err = MMC_ERR_FAILED; ++ } ++ ++ if (cmd.resp[0] & R1_CARD_IS_LOCKED) ++ mmc_card_set_locked(card); ++ else ++ card->state &= ~MMC_STATE_LOCKED; ++ ++out: ++ kfree(data_buf); ++ ++ return err; ++} ++ ++#endif /* CONFIG_MMC_PASSWORDS */ ++ +diff -Nurb linux-2.6.22-570/drivers/mmc/core/mmc_ops.h linux-2.6.22-591/drivers/mmc/core/mmc_ops.h +--- linux-2.6.22-570/drivers/mmc/core/mmc_ops.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/core/mmc_ops.h 2007-12-21 15:36:12.000000000 -0500 +@@ -12,6 +12,8 @@ + #ifndef _MMC_MMC_OPS_H + #define _MMC_MMC_OPS_H + ++struct key; ++ + int mmc_select_card(struct mmc_card *card); + int mmc_deselect_cards(struct mmc_host *host); + int mmc_go_idle(struct mmc_host *host); +@@ -22,6 +24,7 @@ + int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd); + int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value); + int mmc_send_status(struct mmc_card *card, u32 *status); ++int mmc_lock_unlock(struct mmc_card *card, struct key *key, int mode); + + #endif + +diff -Nurb linux-2.6.22-570/drivers/mmc/core/sd.c linux-2.6.22-591/drivers/mmc/core/sd.c +--- linux-2.6.22-570/drivers/mmc/core/sd.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/core/sd.c 2007-12-21 15:36:12.000000000 -0500 +@@ -19,11 +19,11 @@ + + #include "core.h" + #include "sysfs.h" ++#include "bus.h" ++#include "lock.h" + #include "mmc_ops.h" + #include "sd_ops.h" + +-#include "core.h" +- + static const unsigned int tran_exp[] = { + 10000, 100000, 1000000, 10000000, + 0, 0, 0, 0 +@@ -280,6 +280,62 @@ + return err; + } + ++MMC_ATTR_FN(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1], ++ card->raw_cid[2], card->raw_cid[3]); ++MMC_ATTR_FN(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1], ++ card->raw_csd[2], card->raw_csd[3]); ++MMC_ATTR_FN(scr, "%08x%08x\n", card->raw_scr[0], card->raw_scr[1]); ++MMC_ATTR_FN(date, "%02d/%04d\n", card->cid.month, card->cid.year); ++MMC_ATTR_FN(fwrev, "0x%x\n", card->cid.fwrev); ++MMC_ATTR_FN(hwrev, "0x%x\n", card->cid.hwrev); ++MMC_ATTR_FN(manfid, "0x%06x\n", card->cid.manfid); ++MMC_ATTR_FN(name, "%s\n", card->cid.prod_name); ++MMC_ATTR_FN(oemid, "0x%04x\n", card->cid.oemid); ++MMC_ATTR_FN(serial, "0x%08x\n", card->cid.serial); ++ ++static struct device_attribute mmc_sd_dev_attrs[] = { ++ MMC_ATTR_RO(cid), ++ MMC_ATTR_RO(csd), ++ MMC_ATTR_RO(scr), ++ MMC_ATTR_RO(date), ++ MMC_ATTR_RO(fwrev), ++ MMC_ATTR_RO(hwrev), ++ MMC_ATTR_RO(manfid), ++ MMC_ATTR_RO(name), ++ MMC_ATTR_RO(oemid), ++ MMC_ATTR_RO(serial), ++ __ATTR_NULL, ++}; ++ ++/* ++ * Adds sysfs entries as relevant. ++ */ ++static int mmc_sd_sysfs_add(struct mmc_card *card) ++{ ++ int ret; ++ ++ ret = mmc_add_attrs(card, mmc_sd_dev_attrs); ++ if (ret < 0) ++ return ret; ++ ++ ret = mmc_lock_add_sysfs(card); ++ if (ret < 0) { ++ mmc_remove_attrs(card, mmc_sd_dev_attrs); ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/* ++ * Removes the sysfs entries added by mmc_sysfs_add(). ++ */ ++static void mmc_sd_sysfs_remove(struct mmc_card *card) ++{ ++ mmc_lock_remove_sysfs(card); ++ mmc_remove_attrs(card, mmc_sd_dev_attrs); ++} ++ + /* + * Handle the detection and initialisation of a card. + * +@@ -293,6 +349,7 @@ + int err; + u32 cid[4]; + unsigned int max_dtr; ++ u32 status; + + BUG_ON(!host); + BUG_ON(!host->claimed); +@@ -352,6 +409,15 @@ + + mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL); + ++ /* ++ * Check if card is locked. ++ */ ++ err = mmc_send_status(card, &status); ++ if (err != MMC_ERR_NONE) ++ goto free_card; ++ if (status & R1_CARD_IS_LOCKED) ++ mmc_card_set_locked(card); ++ + if (!oldcard) { + /* + * Fetch CSD from card. +@@ -463,6 +529,8 @@ + BUG_ON(!host); + BUG_ON(!host->card); + ++ mmc_sd_sysfs_remove(host->card); ++ + mmc_remove_card(host->card); + host->card = NULL; + } +@@ -487,8 +555,7 @@ + mmc_release_host(host); + + if (err != MMC_ERR_NONE) { +- mmc_remove_card(host->card); +- host->card = NULL; ++ mmc_sd_remove(host); + + mmc_claim_host(host); + mmc_detach_bus(host); +@@ -508,7 +575,7 @@ + + mmc_claim_host(host); + mmc_deselect_cards(host); +- host->card->state &= ~MMC_STATE_HIGHSPEED; ++ host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_LOCKED); + mmc_release_host(host); + } + +@@ -529,9 +596,7 @@ + + err = mmc_sd_init_card(host, host->ocr, host->card); + if (err != MMC_ERR_NONE) { +- mmc_remove_card(host->card); +- host->card = NULL; +- ++ mmc_sd_remove(host); + mmc_detach_bus(host); + } + +@@ -599,7 +664,11 @@ + + mmc_release_host(host); + +- err = mmc_register_card(host->card); ++ err = mmc_add_card(host->card); ++ if (err) ++ goto reclaim_host; ++ ++ err = mmc_sd_sysfs_add(host->card); + if (err) + goto reclaim_host; + +diff -Nurb linux-2.6.22-570/drivers/mmc/core/sysfs.c linux-2.6.22-591/drivers/mmc/core/sysfs.c +--- linux-2.6.22-570/drivers/mmc/core/sysfs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/core/sysfs.c 2007-12-21 15:36:12.000000000 -0500 +@@ -2,6 +2,7 @@ + * linux/drivers/mmc/core/sysfs.c + * + * Copyright (C) 2003 Russell King, All Rights Reserved. ++ * Copyright 2007 Pierre Ossman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as +@@ -9,352 +10,34 @@ + * + * MMC sysfs/driver model support. + */ +-#include +-#include + #include +-#include +-#include + + #include +-#include + + #include "sysfs.h" + +-#define dev_to_mmc_card(d) container_of(d, struct mmc_card, dev) +-#define to_mmc_driver(d) container_of(d, struct mmc_driver, drv) +-#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev) +- +-#define MMC_ATTR(name, fmt, args...) \ +-static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf) \ +-{ \ +- struct mmc_card *card = dev_to_mmc_card(dev); \ +- return sprintf(buf, fmt, args); \ +-} +- +-MMC_ATTR(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1], +- card->raw_cid[2], card->raw_cid[3]); +-MMC_ATTR(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1], +- card->raw_csd[2], card->raw_csd[3]); +-MMC_ATTR(scr, "%08x%08x\n", card->raw_scr[0], card->raw_scr[1]); +-MMC_ATTR(date, "%02d/%04d\n", card->cid.month, card->cid.year); +-MMC_ATTR(fwrev, "0x%x\n", card->cid.fwrev); +-MMC_ATTR(hwrev, "0x%x\n", card->cid.hwrev); +-MMC_ATTR(manfid, "0x%06x\n", card->cid.manfid); +-MMC_ATTR(name, "%s\n", card->cid.prod_name); +-MMC_ATTR(oemid, "0x%04x\n", card->cid.oemid); +-MMC_ATTR(serial, "0x%08x\n", card->cid.serial); +- +-#define MMC_ATTR_RO(name) __ATTR(name, S_IRUGO, mmc_##name##_show, NULL) +- +-static struct device_attribute mmc_dev_attrs[] = { +- MMC_ATTR_RO(cid), +- MMC_ATTR_RO(csd), +- MMC_ATTR_RO(date), +- MMC_ATTR_RO(fwrev), +- MMC_ATTR_RO(hwrev), +- MMC_ATTR_RO(manfid), +- MMC_ATTR_RO(name), +- MMC_ATTR_RO(oemid), +- MMC_ATTR_RO(serial), +- __ATTR_NULL +-}; +- +-static struct device_attribute mmc_dev_attr_scr = MMC_ATTR_RO(scr); +- +- +-static void mmc_release_card(struct device *dev) +-{ +- struct mmc_card *card = dev_to_mmc_card(dev); +- +- kfree(card); +-} +- +-/* +- * This currently matches any MMC driver to any MMC card - drivers +- * themselves make the decision whether to drive this card in their +- * probe method. +- */ +-static int mmc_bus_match(struct device *dev, struct device_driver *drv) +-{ +- return 1; +-} +- +-static int +-mmc_bus_uevent(struct device *dev, char **envp, int num_envp, char *buf, +- int buf_size) +-{ +- struct mmc_card *card = dev_to_mmc_card(dev); +- char ccc[13]; +- int retval = 0, i = 0, length = 0; +- +-#define add_env(fmt,val) do { \ +- retval = add_uevent_var(envp, num_envp, &i, \ +- buf, buf_size, &length, \ +- fmt, val); \ +- if (retval) \ +- return retval; \ +-} while (0); +- +- for (i = 0; i < 12; i++) +- ccc[i] = card->csd.cmdclass & (1 << i) ? '1' : '0'; +- ccc[12] = '\0'; +- +- add_env("MMC_CCC=%s", ccc); +- add_env("MMC_MANFID=%06x", card->cid.manfid); +- add_env("MMC_NAME=%s", mmc_card_name(card)); +- add_env("MMC_OEMID=%04x", card->cid.oemid); +-#undef add_env +- envp[i] = NULL; +- +- return 0; +-} +- +-static int mmc_bus_suspend(struct device *dev, pm_message_t state) ++int mmc_add_attrs(struct mmc_card *card, struct device_attribute *attrs) + { +- struct mmc_driver *drv = to_mmc_driver(dev->driver); +- struct mmc_card *card = dev_to_mmc_card(dev); +- int ret = 0; +- +- if (dev->driver && drv->suspend) +- ret = drv->suspend(card, state); +- return ret; +-} ++ int error = 0; ++ int i; + +-static int mmc_bus_resume(struct device *dev) +-{ +- struct mmc_driver *drv = to_mmc_driver(dev->driver); +- struct mmc_card *card = dev_to_mmc_card(dev); +- int ret = 0; +- +- if (dev->driver && drv->resume) +- ret = drv->resume(card); +- return ret; +-} +- +-static int mmc_bus_probe(struct device *dev) +-{ +- struct mmc_driver *drv = to_mmc_driver(dev->driver); +- struct mmc_card *card = dev_to_mmc_card(dev); +- +- return drv->probe(card); +-} +- +-static int mmc_bus_remove(struct device *dev) +-{ +- struct mmc_driver *drv = to_mmc_driver(dev->driver); +- struct mmc_card *card = dev_to_mmc_card(dev); +- +- drv->remove(card); +- +- return 0; +-} +- +-static struct bus_type mmc_bus_type = { +- .name = "mmc", +- .dev_attrs = mmc_dev_attrs, +- .match = mmc_bus_match, +- .uevent = mmc_bus_uevent, +- .probe = mmc_bus_probe, +- .remove = mmc_bus_remove, +- .suspend = mmc_bus_suspend, +- .resume = mmc_bus_resume, +-}; +- +-/** +- * mmc_register_driver - register a media driver +- * @drv: MMC media driver +- */ +-int mmc_register_driver(struct mmc_driver *drv) +-{ +- drv->drv.bus = &mmc_bus_type; +- return driver_register(&drv->drv); +-} +- +-EXPORT_SYMBOL(mmc_register_driver); +- +-/** +- * mmc_unregister_driver - unregister a media driver +- * @drv: MMC media driver +- */ +-void mmc_unregister_driver(struct mmc_driver *drv) +-{ +- drv->drv.bus = &mmc_bus_type; +- driver_unregister(&drv->drv); +-} +- +-EXPORT_SYMBOL(mmc_unregister_driver); +- +- +-/* +- * Internal function. Initialise a MMC card structure. +- */ +-void mmc_init_card(struct mmc_card *card, struct mmc_host *host) +-{ +- memset(card, 0, sizeof(struct mmc_card)); +- card->host = host; +- device_initialize(&card->dev); +- card->dev.parent = mmc_classdev(host); +- card->dev.bus = &mmc_bus_type; +- card->dev.release = mmc_release_card; +-} +- +-/* +- * Internal function. Register a new MMC card with the driver model. +- */ +-int mmc_register_card(struct mmc_card *card) +-{ +- int ret; +- +- snprintf(card->dev.bus_id, sizeof(card->dev.bus_id), +- "%s:%04x", mmc_hostname(card->host), card->rca); +- +- ret = device_add(&card->dev); +- if (ret == 0) { +- if (mmc_card_sd(card)) { +- ret = device_create_file(&card->dev, &mmc_dev_attr_scr); +- if (ret) +- device_del(&card->dev); +- } ++ for (i = 0; attr_name(attrs[i]); i++) { ++ error = device_create_file(&card->dev, &attrs[i]); ++ if (error) { ++ while (--i >= 0) ++ device_remove_file(&card->dev, &attrs[i]); ++ break; + } +- if (ret == 0) +- mmc_card_set_present(card); +- return ret; +-} +- +-/* +- * Internal function. Unregister a new MMC card with the +- * driver model, and (eventually) free it. +- */ +-void mmc_remove_card(struct mmc_card *card) +-{ +- if (mmc_card_present(card)) { +- if (mmc_card_sd(card)) +- device_remove_file(&card->dev, &mmc_dev_attr_scr); +- +- device_del(&card->dev); + } + +- put_device(&card->dev); +-} +- +- +-static void mmc_host_classdev_release(struct device *dev) +-{ +- struct mmc_host *host = cls_dev_to_mmc_host(dev); +- kfree(host); +-} +- +-static struct class mmc_host_class = { +- .name = "mmc_host", +- .dev_release = mmc_host_classdev_release, +-}; +- +-static DEFINE_IDR(mmc_host_idr); +-static DEFINE_SPINLOCK(mmc_host_lock); +- +-/* +- * Internal function. Allocate a new MMC host. +- */ +-struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev) +-{ +- struct mmc_host *host; +- +- host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL); +- if (host) { +- memset(host, 0, sizeof(struct mmc_host) + extra); +- +- host->parent = dev; +- host->class_dev.parent = dev; +- host->class_dev.class = &mmc_host_class; +- device_initialize(&host->class_dev); +- } +- +- return host; +-} +- +-/* +- * Internal function. Register a new MMC host with the MMC class. +- */ +-int mmc_add_host_sysfs(struct mmc_host *host) +-{ +- int err; +- +- if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL)) +- return -ENOMEM; +- +- spin_lock(&mmc_host_lock); +- err = idr_get_new(&mmc_host_idr, host, &host->index); +- spin_unlock(&mmc_host_lock); +- if (err) +- return err; +- +- snprintf(host->class_dev.bus_id, BUS_ID_SIZE, +- "mmc%d", host->index); +- +- return device_add(&host->class_dev); +-} +- +-/* +- * Internal function. Unregister a MMC host with the MMC class. +- */ +-void mmc_remove_host_sysfs(struct mmc_host *host) +-{ +- device_del(&host->class_dev); +- +- spin_lock(&mmc_host_lock); +- idr_remove(&mmc_host_idr, host->index); +- spin_unlock(&mmc_host_lock); ++ return error; + } + +-/* +- * Internal function. Free a MMC host. +- */ +-void mmc_free_host_sysfs(struct mmc_host *host) ++void mmc_remove_attrs(struct mmc_card *card, struct device_attribute *attrs) + { +- put_device(&host->class_dev); +-} ++ int i; + +-static struct workqueue_struct *workqueue; +- +-/* +- * Internal function. Schedule delayed work in the MMC work queue. +- */ +-int mmc_schedule_delayed_work(struct delayed_work *work, unsigned long delay) +-{ +- return queue_delayed_work(workqueue, work, delay); +-} +- +-/* +- * Internal function. Flush all scheduled work from the MMC work queue. +- */ +-void mmc_flush_scheduled_work(void) +-{ +- flush_workqueue(workqueue); +-} +- +-static int __init mmc_init(void) +-{ +- int ret; +- +- workqueue = create_singlethread_workqueue("kmmcd"); +- if (!workqueue) +- return -ENOMEM; +- +- ret = bus_register(&mmc_bus_type); +- if (ret == 0) { +- ret = class_register(&mmc_host_class); +- if (ret) +- bus_unregister(&mmc_bus_type); +- } +- return ret; +-} +- +-static void __exit mmc_exit(void) +-{ +- class_unregister(&mmc_host_class); +- bus_unregister(&mmc_bus_type); +- destroy_workqueue(workqueue); ++ for (i = 0; attr_name(attrs[i]); i++) ++ device_remove_file(&card->dev, &attrs[i]); + } + +-module_init(mmc_init); +-module_exit(mmc_exit); +diff -Nurb linux-2.6.22-570/drivers/mmc/core/sysfs.h linux-2.6.22-591/drivers/mmc/core/sysfs.h +--- linux-2.6.22-570/drivers/mmc/core/sysfs.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/core/sysfs.h 2007-12-21 15:36:12.000000000 -0500 +@@ -11,17 +11,16 @@ + #ifndef _MMC_CORE_SYSFS_H + #define _MMC_CORE_SYSFS_H + +-void mmc_init_card(struct mmc_card *card, struct mmc_host *host); +-int mmc_register_card(struct mmc_card *card); +-void mmc_remove_card(struct mmc_card *card); ++#define MMC_ATTR_FN(name, fmt, args...) \ ++static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf) \ ++{ \ ++ struct mmc_card *card = container_of(dev, struct mmc_card, dev);\ ++ return sprintf(buf, fmt, args); \ ++} + +-struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev); +-int mmc_add_host_sysfs(struct mmc_host *host); +-void mmc_remove_host_sysfs(struct mmc_host *host); +-void mmc_free_host_sysfs(struct mmc_host *host); ++#define MMC_ATTR_RO(name) __ATTR(name, S_IRUGO, mmc_##name##_show, NULL) + +-int mmc_schedule_work(struct work_struct *work); +-int mmc_schedule_delayed_work(struct delayed_work *work, unsigned long delay); +-void mmc_flush_scheduled_work(void); ++int mmc_add_attrs(struct mmc_card *card, struct device_attribute *attrs); ++void mmc_remove_attrs(struct mmc_card *card, struct device_attribute *attrs); + + #endif +diff -Nurb linux-2.6.22-570/drivers/mmc/host/sdhci.c linux-2.6.22-591/drivers/mmc/host/sdhci.c +--- linux-2.6.22-570/drivers/mmc/host/sdhci.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mmc/host/sdhci.c 2007-12-21 15:36:12.000000000 -0500 +@@ -70,6 +70,14 @@ + .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE, + }, + ++ { ++ .vendor = PCI_VENDOR_ID_ENE, ++ .device = PCI_DEVICE_ID_ENE_CB712_SD_2, ++ .subvendor = PCI_ANY_ID, ++ .subdevice = PCI_ANY_ID, ++ .driver_data = SDHCI_QUIRK_SINGLE_POWER_WRITE, ++ }, ++ + { /* Generic SD host controller */ + PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00) + }, +diff -Nurb linux-2.6.22-570/drivers/mtd/mtd_blkdevs.c linux-2.6.22-591/drivers/mtd/mtd_blkdevs.c +--- linux-2.6.22-570/drivers/mtd/mtd_blkdevs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mtd/mtd_blkdevs.c 2007-12-21 15:36:12.000000000 -0500 +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -80,7 +81,7 @@ + struct request_queue *rq = tr->blkcore_priv->rq; + + /* we might get involved when memory gets low, so use PF_MEMALLOC */ +- current->flags |= PF_MEMALLOC | PF_NOFREEZE; ++ current->flags |= PF_MEMALLOC; + + spin_lock_irq(rq->queue_lock); + while (!kthread_should_stop()) { +diff -Nurb linux-2.6.22-570/drivers/mtd/ubi/wl.c linux-2.6.22-591/drivers/mtd/ubi/wl.c +--- linux-2.6.22-570/drivers/mtd/ubi/wl.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/mtd/ubi/wl.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1346,6 +1346,7 @@ + ubi_msg("background thread \"%s\" started, PID %d", + ubi->bgt_name, current->pid); + ++ set_freezable(); + for (;;) { + int err; + +diff -Nurb linux-2.6.22-570/drivers/net/3c523.c linux-2.6.22-591/drivers/net/3c523.c +--- linux-2.6.22-570/drivers/net/3c523.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/3c523.c 2007-12-21 15:36:12.000000000 -0500 +@@ -990,7 +990,7 @@ + if (skb != NULL) { + skb_reserve(skb, 2); /* 16 byte alignment */ + skb_put(skb,totlen); +- eth_copy_and_sum(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen,0); ++ skb_copy_to_linear_data(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen); + skb->protocol = eth_type_trans(skb, dev); + netif_rx(skb); + dev->last_rx = jiffies; +diff -Nurb linux-2.6.22-570/drivers/net/7990.c linux-2.6.22-591/drivers/net/7990.c +--- linux-2.6.22-570/drivers/net/7990.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/7990.c 2007-12-21 15:36:12.000000000 -0500 +@@ -333,9 +333,9 @@ + + skb_reserve (skb, 2); /* 16 byte align */ + skb_put (skb, len); /* make room */ +- eth_copy_and_sum(skb, ++ skb_copy_to_linear_data(skb, + (unsigned char *)&(ib->rx_buf [lp->rx_new][0]), +- len, 0); ++ len); + skb->protocol = eth_type_trans (skb, dev); + netif_rx (skb); + dev->last_rx = jiffies; +diff -Nurb linux-2.6.22-570/drivers/net/8139too.c linux-2.6.22-591/drivers/net/8139too.c +--- linux-2.6.22-570/drivers/net/8139too.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/8139too.c 2007-12-21 15:36:12.000000000 -0500 +@@ -2017,7 +2017,7 @@ + #if RX_BUF_IDX == 3 + wrap_copy(skb, rx_ring, ring_offset+4, pkt_size); + #else +- eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0); ++ skb_copy_to_linear_data (skb, &rx_ring[ring_offset + 4], pkt_size); + #endif + skb_put (skb, pkt_size); + +diff -Nurb linux-2.6.22-570/drivers/net/Kconfig linux-2.6.22-591/drivers/net/Kconfig +--- linux-2.6.22-570/drivers/net/Kconfig 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/net/Kconfig 2007-12-21 15:36:14.000000000 -0500 +@@ -119,6 +119,20 @@ + + If you don't know what to use this for, you don't need it. + ++config ETUN ++ tristate "Ethernet tunnel device driver support" ++ depends on SYSFS ++ ---help--- ++ ETUN provices a pair of network devices that can be used for ++ configuring interesting topolgies. What one devices transmits ++ the other receives and vice versa. The link level framing ++ is ethernet for wide compatibility with network stacks. ++ ++ To compile this driver as a module, choose M here: the module ++ will be called etun. ++ ++ If you don't know what to use this for, you don't need it. ++ + config NET_SB1000 + tristate "General Instruments Surfboard 1000" + depends on PNP +@@ -2555,6 +2569,18 @@ + + source "drivers/s390/net/Kconfig" + ++config XEN_NETDEV_FRONTEND ++ tristate "Xen network device frontend driver" ++ depends on XEN ++ default y ++ help ++ The network device frontend driver allows the kernel to ++ access network devices exported exported by a virtual ++ machine containing a physical network device driver. The ++ frontend driver is intended for unprivileged guest domains; ++ if you are compiling a kernel for a Xen guest, you almost ++ certainly want to enable this. ++ + config ISERIES_VETH + tristate "iSeries Virtual Ethernet driver support" + depends on PPC_ISERIES +diff -Nurb linux-2.6.22-570/drivers/net/Makefile linux-2.6.22-591/drivers/net/Makefile +--- linux-2.6.22-570/drivers/net/Makefile 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/net/Makefile 2007-12-21 15:36:14.000000000 -0500 +@@ -186,6 +186,7 @@ + obj-$(CONFIG_MACMACE) += macmace.o + obj-$(CONFIG_MAC89x0) += mac89x0.o + obj-$(CONFIG_TUN) += tun.o ++obj-$(CONFIG_ETUN) += etun.o + obj-$(CONFIG_NET_NETX) += netx-eth.o + obj-$(CONFIG_DL2K) += dl2k.o + obj-$(CONFIG_R8169) += r8169.o +@@ -224,7 +225,10 @@ + obj-$(CONFIG_ENP2611_MSF_NET) += ixp2000/ + + obj-$(CONFIG_NETCONSOLE) += netconsole.o ++obj-$(CONFIG_KGDBOE) += kgdboe.o + + obj-$(CONFIG_FS_ENET) += fs_enet/ + + obj-$(CONFIG_NETXEN_NIC) += netxen/ ++obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o ++ +diff -Nurb linux-2.6.22-570/drivers/net/a2065.c linux-2.6.22-591/drivers/net/a2065.c +--- linux-2.6.22-570/drivers/net/a2065.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/a2065.c 2007-12-21 15:36:12.000000000 -0500 +@@ -322,9 +322,9 @@ + + skb_reserve (skb, 2); /* 16 byte align */ + skb_put (skb, len); /* make room */ +- eth_copy_and_sum(skb, ++ skb_copy_to_linear_data(skb, + (unsigned char *)&(ib->rx_buf [lp->rx_new][0]), +- len, 0); ++ len); + skb->protocol = eth_type_trans (skb, dev); + netif_rx (skb); + dev->last_rx = jiffies; +diff -Nurb linux-2.6.22-570/drivers/net/ariadne.c linux-2.6.22-591/drivers/net/ariadne.c +--- linux-2.6.22-570/drivers/net/ariadne.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/ariadne.c 2007-12-21 15:36:12.000000000 -0500 +@@ -746,7 +746,7 @@ + + skb_reserve(skb,2); /* 16 byte align */ + skb_put(skb,pkt_len); /* Make room */ +- eth_copy_and_sum(skb, (char *)priv->rx_buff[entry], pkt_len,0); ++ skb_copy_to_linear_data(skb, (char *)priv->rx_buff[entry], pkt_len); + skb->protocol=eth_type_trans(skb,dev); + #if 0 + printk(KERN_DEBUG "RX pkt type 0x%04x from ", +diff -Nurb linux-2.6.22-570/drivers/net/arm/ep93xx_eth.c linux-2.6.22-591/drivers/net/arm/ep93xx_eth.c +--- linux-2.6.22-570/drivers/net/arm/ep93xx_eth.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/arm/ep93xx_eth.c 2007-12-21 15:36:12.000000000 -0500 +@@ -258,7 +258,7 @@ + skb_reserve(skb, 2); + dma_sync_single(NULL, ep->descs->rdesc[entry].buf_addr, + length, DMA_FROM_DEVICE); +- eth_copy_and_sum(skb, ep->rx_buf[entry], length, 0); ++ skb_copy_to_linear_data(skb, ep->rx_buf[entry], length); + skb_put(skb, length); + skb->protocol = eth_type_trans(skb, dev); + +diff -Nurb linux-2.6.22-570/drivers/net/au1000_eth.c linux-2.6.22-591/drivers/net/au1000_eth.c +--- linux-2.6.22-570/drivers/net/au1000_eth.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/au1000_eth.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1205,8 +1205,8 @@ + continue; + } + skb_reserve(skb, 2); /* 16 byte IP header align */ +- eth_copy_and_sum(skb, +- (unsigned char *)pDB->vaddr, frmlen, 0); ++ skb_copy_to_linear_data(skb, ++ (unsigned char *)pDB->vaddr, frmlen); + skb_put(skb, frmlen); + skb->protocol = eth_type_trans(skb, dev); + netif_rx(skb); /* pass the packet to upper layers */ +diff -Nurb linux-2.6.22-570/drivers/net/bnx2.c linux-2.6.22-591/drivers/net/bnx2.c +--- linux-2.6.22-570/drivers/net/bnx2.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/bnx2.c 2007-12-21 15:36:12.000000000 -0500 +@@ -6490,10 +6490,10 @@ + memcpy(dev->perm_addr, bp->mac_addr, 6); + bp->name = board_info[ent->driver_data].name; + +- if (CHIP_NUM(bp) == CHIP_NUM_5709) +- dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG; +- else + dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; ++ if (CHIP_NUM(bp) == CHIP_NUM_5709) ++ dev->features |= NETIF_F_IPV6_CSUM; ++ + #ifdef BCM_VLAN + dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; + #endif +diff -Nurb linux-2.6.22-570/drivers/net/bonding/bond_3ad.c linux-2.6.22-591/drivers/net/bonding/bond_3ad.c +--- linux-2.6.22-570/drivers/net/bonding/bond_3ad.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/bonding/bond_3ad.c 2007-12-21 15:36:14.000000000 -0500 +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + #include "bonding.h" + #include "bond_3ad.h" + +@@ -2448,6 +2449,9 @@ + struct slave *slave = NULL; + int ret = NET_RX_DROP; + ++ if (dev->nd_net != &init_net) ++ goto out; ++ + if (!(dev->flags & IFF_MASTER)) + goto out; + +diff -Nurb linux-2.6.22-570/drivers/net/bonding/bond_alb.c linux-2.6.22-591/drivers/net/bonding/bond_alb.c +--- linux-2.6.22-570/drivers/net/bonding/bond_alb.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/bonding/bond_alb.c 2007-12-21 15:36:14.000000000 -0500 +@@ -345,6 +345,9 @@ + struct arp_pkt *arp = (struct arp_pkt *)skb->data; + int res = NET_RX_DROP; + ++ if (bond_dev->nd_net != &init_net) ++ goto out; ++ + if (!(bond_dev->flags & IFF_MASTER)) + goto out; + +diff -Nurb linux-2.6.22-570/drivers/net/bonding/bond_main.c linux-2.6.22-591/drivers/net/bonding/bond_main.c +--- linux-2.6.22-570/drivers/net/bonding/bond_main.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/net/bonding/bond_main.c 2007-12-21 15:36:14.000000000 -0500 +@@ -75,6 +75,7 @@ + #include + #include + #include ++#include + #include "bonding.h" + #include "bond_3ad.h" + #include "bond_alb.h" +@@ -2376,6 +2377,7 @@ + * can tag the ARP with the proper VLAN tag. + */ + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.fl4_dst = targets[i]; + fl.fl4_tos = RTO_ONLINK; + +@@ -2485,6 +2487,9 @@ + unsigned char *arp_ptr; + u32 sip, tip; + ++ if (dev->nd_net != &init_net) ++ goto out; ++ + if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER)) + goto out; + +@@ -3172,7 +3177,7 @@ + { + int len = strlen(DRV_NAME); + +- for (bond_proc_dir = proc_net->subdir; bond_proc_dir; ++ for (bond_proc_dir = init_net.proc_net->subdir; bond_proc_dir; + bond_proc_dir = bond_proc_dir->next) { + if ((bond_proc_dir->namelen == len) && + !memcmp(bond_proc_dir->name, DRV_NAME, len)) { +@@ -3181,7 +3186,7 @@ + } + + if (!bond_proc_dir) { +- bond_proc_dir = proc_mkdir(DRV_NAME, proc_net); ++ bond_proc_dir = proc_mkdir(DRV_NAME, init_net.proc_net); + if (bond_proc_dir) { + bond_proc_dir->owner = THIS_MODULE; + } else { +@@ -3216,7 +3221,7 @@ + bond_proc_dir->owner = NULL; + } + } else { +- remove_proc_entry(DRV_NAME, proc_net); ++ remove_proc_entry(DRV_NAME, init_net.proc_net); + bond_proc_dir = NULL; + } + } +@@ -3323,6 +3328,9 @@ + { + struct net_device *event_dev = (struct net_device *)ptr; + ++ if (event_dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + dprintk("event_dev: %s, event: %lx\n", + (event_dev ? event_dev->name : "None"), + event); +@@ -3740,7 +3748,7 @@ + } + + down_write(&(bonding_rwsem)); +- slave_dev = dev_get_by_name(ifr->ifr_slave); ++ slave_dev = dev_get_by_name(&init_net, ifr->ifr_slave); + + dprintk("slave_dev=%p: \n", slave_dev); + +diff -Nurb linux-2.6.22-570/drivers/net/bonding/bond_sysfs.c linux-2.6.22-591/drivers/net/bonding/bond_sysfs.c +--- linux-2.6.22-570/drivers/net/bonding/bond_sysfs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/bonding/bond_sysfs.c 2007-12-21 15:36:14.000000000 -0500 +@@ -35,6 +35,7 @@ + #include + #include + #include ++#include + + /* #define BONDING_DEBUG 1 */ + #include "bonding.h" +@@ -299,7 +300,7 @@ + read_unlock_bh(&bond->lock); + printk(KERN_INFO DRV_NAME ": %s: Adding slave %s.\n", + bond->dev->name, ifname); +- dev = dev_get_by_name(ifname); ++ dev = dev_get_by_name(&init_net, ifname); + if (!dev) { + printk(KERN_INFO DRV_NAME + ": %s: Interface %s does not exist!\n", +diff -Nurb linux-2.6.22-570/drivers/net/dl2k.c linux-2.6.22-591/drivers/net/dl2k.c +--- linux-2.6.22-570/drivers/net/dl2k.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/dl2k.c 2007-12-21 15:36:12.000000000 -0500 +@@ -866,9 +866,9 @@ + PCI_DMA_FROMDEVICE); + /* 16 byte align the IP header */ + skb_reserve (skb, 2); +- eth_copy_and_sum (skb, ++ skb_copy_to_linear_data (skb, + np->rx_skbuff[entry]->data, +- pkt_len, 0); ++ pkt_len); + skb_put (skb, pkt_len); + pci_dma_sync_single_for_device(np->pdev, + desc->fraginfo & +diff -Nurb linux-2.6.22-570/drivers/net/dummy.c linux-2.6.22-591/drivers/net/dummy.c +--- linux-2.6.22-570/drivers/net/dummy.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/dummy.c 2007-12-21 15:36:12.000000000 -0500 +@@ -34,11 +34,17 @@ + #include + #include + #include ++#include ++#include ++ ++struct dummy_priv { ++ struct net_device *dev; ++ struct list_head list; ++}; + + static int numdummies = 1; + + static int dummy_xmit(struct sk_buff *skb, struct net_device *dev); +-static struct net_device_stats *dummy_get_stats(struct net_device *dev); + + static int dummy_set_address(struct net_device *dev, void *p) + { +@@ -56,13 +62,13 @@ + { + } + +-static void __init dummy_setup(struct net_device *dev) ++static void dummy_setup(struct net_device *dev) + { + /* Initialize the device structure. */ +- dev->get_stats = dummy_get_stats; + dev->hard_start_xmit = dummy_xmit; + dev->set_multicast_list = set_multicast_list; + dev->set_mac_address = dummy_set_address; ++ dev->destructor = free_netdev; + + /* Fill in device structure with ethernet-generic values. */ + ether_setup(dev); +@@ -76,77 +82,114 @@ + + static int dummy_xmit(struct sk_buff *skb, struct net_device *dev) + { +- struct net_device_stats *stats = netdev_priv(dev); +- +- stats->tx_packets++; +- stats->tx_bytes+=skb->len; ++ dev->stats.tx_packets++; ++ dev->stats.tx_bytes += skb->len; + + dev_kfree_skb(skb); + return 0; + } + +-static struct net_device_stats *dummy_get_stats(struct net_device *dev) ++static LIST_HEAD(dummies); ++ ++static int dummy_newlink(struct net_device *dev, ++ struct nlattr *tb[], struct nlattr *data[]) + { +- return netdev_priv(dev); ++ struct dummy_priv *priv = netdev_priv(dev); ++ int err; ++ ++ err = register_netdevice(dev); ++ if (err < 0) ++ return err; ++ ++ priv->dev = dev; ++ list_add_tail(&priv->list, &dummies); ++ return 0; ++} ++ ++static void dummy_dellink(struct net_device *dev) ++{ ++ struct dummy_priv *priv = netdev_priv(dev); ++ ++ list_del(&priv->list); ++ unregister_netdevice(dev); + } + +-static struct net_device **dummies; ++static struct rtnl_link_ops dummy_link_ops __read_mostly = { ++ .kind = "dummy", ++ .priv_size = sizeof(struct dummy_priv), ++ .setup = dummy_setup, ++ .newlink = dummy_newlink, ++ .dellink = dummy_dellink, ++}; + + /* Number of dummy devices to be set up by this module. */ + module_param(numdummies, int, 0); + MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices"); + +-static int __init dummy_init_one(int index) ++static int __init dummy_init_one(void) + { + struct net_device *dev_dummy; ++ struct dummy_priv *priv; + int err; + +- dev_dummy = alloc_netdev(sizeof(struct net_device_stats), +- "dummy%d", dummy_setup); ++ dev_dummy = alloc_netdev(sizeof(struct dummy_priv), "dummy%d", ++ dummy_setup); + + if (!dev_dummy) + return -ENOMEM; + +- if ((err = register_netdev(dev_dummy))) { +- free_netdev(dev_dummy); +- dev_dummy = NULL; +- } else { +- dummies[index] = dev_dummy; +- } ++ err = dev_alloc_name(dev_dummy, dev_dummy->name); ++ if (err < 0) ++ goto err; ++ ++ dev_dummy->rtnl_link_ops = &dummy_link_ops; ++ err = register_netdevice(dev_dummy); ++ if (err < 0) ++ goto err; ++ ++ priv = netdev_priv(dev_dummy); ++ priv->dev = dev_dummy; ++ list_add_tail(&priv->list, &dummies); ++ return 0; + ++err: ++ free_netdev(dev_dummy); + return err; + } + +-static void dummy_free_one(int index) +-{ +- unregister_netdev(dummies[index]); +- free_netdev(dummies[index]); +-} +- + static int __init dummy_init_module(void) + { ++ struct dummy_priv *priv, *next; + int i, err = 0; +- dummies = kmalloc(numdummies * sizeof(void *), GFP_KERNEL); +- if (!dummies) +- return -ENOMEM; ++ ++ rtnl_lock(); ++ err = __rtnl_link_register(&dummy_link_ops); ++ + for (i = 0; i < numdummies && !err; i++) +- err = dummy_init_one(i); +- if (err) { +- i--; +- while (--i >= 0) +- dummy_free_one(i); ++ err = dummy_init_one(); ++ if (err < 0) { ++ list_for_each_entry_safe(priv, next, &dummies, list) ++ dummy_dellink(priv->dev); ++ __rtnl_link_unregister(&dummy_link_ops); + } ++ rtnl_unlock(); ++ + return err; + } + + static void __exit dummy_cleanup_module(void) + { +- int i; +- for (i = 0; i < numdummies; i++) +- dummy_free_one(i); +- kfree(dummies); ++ struct dummy_priv *priv, *next; ++ ++ rtnl_lock(); ++ list_for_each_entry_safe(priv, next, &dummies, list) ++ dummy_dellink(priv->dev); ++ ++ __rtnl_link_unregister(&dummy_link_ops); ++ rtnl_unlock(); + } + + module_init(dummy_init_module); + module_exit(dummy_cleanup_module); + MODULE_LICENSE("GPL"); ++MODULE_ALIAS_RTNL_LINK("dummy"); +diff -Nurb linux-2.6.22-570/drivers/net/eepro100.c linux-2.6.22-591/drivers/net/eepro100.c +--- linux-2.6.22-570/drivers/net/eepro100.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/eepro100.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1801,7 +1801,7 @@ + + #if 1 || USE_IP_CSUM + /* Packet is in one chunk -- we can copy + cksum. */ +- eth_copy_and_sum(skb, sp->rx_skbuff[entry]->data, pkt_len, 0); ++ skb_copy_to_linear_data(skb, sp->rx_skbuff[entry]->data, pkt_len); + skb_put(skb, pkt_len); + #else + skb_copy_from_linear_data(sp->rx_skbuff[entry], +diff -Nurb linux-2.6.22-570/drivers/net/epic100.c linux-2.6.22-591/drivers/net/epic100.c +--- linux-2.6.22-570/drivers/net/epic100.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/epic100.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1201,7 +1201,7 @@ + ep->rx_ring[entry].bufaddr, + ep->rx_buf_sz, + PCI_DMA_FROMDEVICE); +- eth_copy_and_sum(skb, ep->rx_skbuff[entry]->data, pkt_len, 0); ++ skb_copy_to_linear_data(skb, ep->rx_skbuff[entry]->data, pkt_len); + skb_put(skb, pkt_len); + pci_dma_sync_single_for_device(ep->pci_dev, + ep->rx_ring[entry].bufaddr, +diff -Nurb linux-2.6.22-570/drivers/net/eql.c linux-2.6.22-591/drivers/net/eql.c +--- linux-2.6.22-570/drivers/net/eql.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/eql.c 2007-12-21 15:36:14.000000000 -0500 +@@ -116,6 +116,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -412,7 +413,7 @@ + if (copy_from_user(&srq, srqp, sizeof (slaving_request_t))) + return -EFAULT; + +- slave_dev = dev_get_by_name(srq.slave_name); ++ slave_dev = dev_get_by_name(&init_net, srq.slave_name); + if (slave_dev) { + if ((master_dev->flags & IFF_UP) == IFF_UP) { + /* slave is not a master & not already a slave: */ +@@ -460,7 +461,7 @@ + if (copy_from_user(&srq, srqp, sizeof (slaving_request_t))) + return -EFAULT; + +- slave_dev = dev_get_by_name(srq.slave_name); ++ slave_dev = dev_get_by_name(&init_net, srq.slave_name); + ret = -EINVAL; + if (slave_dev) { + spin_lock_bh(&eql->queue.lock); +@@ -493,7 +494,7 @@ + if (copy_from_user(&sc, scp, sizeof (slave_config_t))) + return -EFAULT; + +- slave_dev = dev_get_by_name(sc.slave_name); ++ slave_dev = dev_get_by_name(&init_net, sc.slave_name); + if (!slave_dev) + return -ENODEV; + +@@ -528,7 +529,7 @@ + if (copy_from_user(&sc, scp, sizeof (slave_config_t))) + return -EFAULT; + +- slave_dev = dev_get_by_name(sc.slave_name); ++ slave_dev = dev_get_by_name(&init_net, sc.slave_name); + if (!slave_dev) + return -ENODEV; + +diff -Nurb linux-2.6.22-570/drivers/net/etun.c linux-2.6.22-591/drivers/net/etun.c +--- linux-2.6.22-570/drivers/net/etun.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/net/etun.c 2007-12-21 15:36:14.000000000 -0500 +@@ -0,0 +1,489 @@ ++/* ++ * ETUN - Universal ETUN device driver. ++ * Copyright (C) 2006 Linux Networx ++ * ++ */ ++ ++#define DRV_NAME "etun" ++#define DRV_VERSION "1.0" ++#define DRV_DESCRIPTION "Ethernet pseudo tunnel device driver" ++#define DRV_COPYRIGHT "(C) 2007 Linux Networx" ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++/* Device cheksum strategy. ++ * ++ * etun is designed to a be a pair of virutal devices ++ * connecting two network stack instances. ++ * ++ * Typically it will either be used with ethernet bridging or ++ * it will be used to route packets between the two stacks. ++ * ++ * The only checksum offloading I can do is to completely ++ * skip the checksumming step all together. ++ * ++ * When used for ethernet bridging I don't believe any ++ * checksum off loading is safe. ++ * - If my source is an external interface the checksum may be ++ * invalid so I don't want to report I have already checked it. ++ * - If my destination is an external interface I don't want to put ++ * a packet on the wire with someone computing the checksum. ++ * ++ * When used for routing between two stacks checksums should ++ * be as unnecessary as they are on the loopback device. ++ * ++ * So by default I am safe and disable checksumming and ++ * other advanced features like SG and TSO. ++ * ++ * However because I think these features could be useful ++ * I provide the ethtool functions to and enable/disable ++ * them at runtime. ++ * ++ * If you think you can correctly enable these go ahead. ++ * For checksums both the transmitter and the receiver must ++ * agree before the are actually disabled. ++ */ ++ ++#define ETUN_NUM_STATS 1 ++static struct { ++ const char string[ETH_GSTRING_LEN]; ++} ethtool_stats_keys[ETUN_NUM_STATS] = { ++ { "partner_ifindex" }, ++}; ++ ++struct etun_info { ++ struct net_device *rx_dev; ++ unsigned ip_summed; ++ struct net_device_stats stats; ++ struct list_head list; ++ struct net_device *dev; ++}; ++ ++/* ++ * I have to hold the rtnl_lock during device delete. ++ * So I use the rtnl_lock to protect my list manipulations ++ * as well. Crude but simple. ++ */ ++static LIST_HEAD(etun_list); ++ ++/* ++ * The higher levels take care of making this non-reentrant (it's ++ * called with bh's disabled). ++ */ ++static int etun_xmit(struct sk_buff *skb, struct net_device *tx_dev) ++{ ++ struct etun_info *tx_info = tx_dev->priv; ++ struct net_device *rx_dev = tx_info->rx_dev; ++ struct etun_info *rx_info = rx_dev->priv; ++ ++ tx_info->stats.tx_packets++; ++ tx_info->stats.tx_bytes += skb->len; ++ ++ /* Drop the skb state that was needed to get here */ ++ skb_orphan(skb); ++ if (skb->dst) ++ skb->dst = dst_pop(skb->dst); /* Allow for smart routing */ ++ ++ /* Switch to the receiving device */ ++ skb->pkt_type = PACKET_HOST; ++ skb->protocol = eth_type_trans(skb, rx_dev); ++ skb->dev = rx_dev; ++ skb->ip_summed = CHECKSUM_NONE; ++ ++ /* If both halves agree no checksum is needed */ ++ if (tx_dev->features & NETIF_F_NO_CSUM) ++ skb->ip_summed = rx_info->ip_summed; ++ ++ rx_dev->last_rx = jiffies; ++ rx_info->stats.rx_packets++; ++ rx_info->stats.rx_bytes += skb->len; ++ netif_rx(skb); ++ ++ return 0; ++} ++ ++static struct net_device_stats *etun_get_stats(struct net_device *dev) ++{ ++ struct etun_info *info = dev->priv; ++ return &info->stats; ++} ++ ++/* ethtool interface */ ++static int etun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) ++{ ++ cmd->supported = 0; ++ cmd->advertising = 0; ++ cmd->speed = SPEED_10000; /* Memory is fast! */ ++ cmd->duplex = DUPLEX_FULL; ++ cmd->port = PORT_TP; ++ cmd->phy_address = 0; ++ cmd->transceiver = XCVR_INTERNAL; ++ cmd->autoneg = AUTONEG_DISABLE; ++ cmd->maxtxpkt = 0; ++ cmd->maxrxpkt = 0; ++ return 0; ++} ++ ++static void etun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) ++{ ++ strcpy(info->driver, DRV_NAME); ++ strcpy(info->version, DRV_VERSION); ++ strcpy(info->fw_version, "N/A"); ++} ++ ++static void etun_get_strings(struct net_device *dev, u32 stringset, u8 *buf) ++{ ++ switch(stringset) { ++ case ETH_SS_STATS: ++ memcpy(buf, ðtool_stats_keys, sizeof(ethtool_stats_keys)); ++ break; ++ case ETH_SS_TEST: ++ default: ++ break; ++ } ++} ++ ++static int etun_get_stats_count(struct net_device *dev) ++{ ++ return ETUN_NUM_STATS; ++} ++ ++static void etun_get_ethtool_stats(struct net_device *dev, ++ struct ethtool_stats *stats, u64 *data) ++{ ++ struct etun_info *info = dev->priv; ++ ++ data[0] = info->rx_dev->ifindex; ++} ++ ++static u32 etun_get_rx_csum(struct net_device *dev) ++{ ++ struct etun_info *info = dev->priv; ++ return info->ip_summed == CHECKSUM_UNNECESSARY; ++} ++ ++static int etun_set_rx_csum(struct net_device *dev, u32 data) ++{ ++ struct etun_info *info = dev->priv; ++ ++ info->ip_summed = data ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE; ++ ++ return 0; ++} ++ ++static u32 etun_get_tx_csum(struct net_device *dev) ++{ ++ return (dev->features & NETIF_F_NO_CSUM) != 0; ++} ++ ++static int etun_set_tx_csum(struct net_device *dev, u32 data) ++{ ++ dev->features &= ~NETIF_F_NO_CSUM; ++ if (data) ++ dev->features |= NETIF_F_NO_CSUM; ++ ++ return 0; ++} ++ ++static struct ethtool_ops etun_ethtool_ops = { ++ .get_settings = etun_get_settings, ++ .get_drvinfo = etun_get_drvinfo, ++ .get_link = ethtool_op_get_link, ++ .get_rx_csum = etun_get_rx_csum, ++ .set_rx_csum = etun_set_rx_csum, ++ .get_tx_csum = etun_get_tx_csum, ++ .set_tx_csum = etun_set_tx_csum, ++ .get_sg = ethtool_op_get_sg, ++ .set_sg = ethtool_op_set_sg, ++#if 0 /* Does just setting the bit successfuly emulate tso? */ ++ .get_tso = ethtool_op_get_tso, ++ .set_tso = ethtool_op_set_tso, ++#endif ++ .get_strings = etun_get_strings, ++ .get_stats_count = etun_get_stats_count, ++ .get_ethtool_stats = etun_get_ethtool_stats, ++ .get_perm_addr = ethtool_op_get_perm_addr, ++}; ++ ++static int etun_open(struct net_device *tx_dev) ++{ ++ struct etun_info *tx_info = tx_dev->priv; ++ struct net_device *rx_dev = tx_info->rx_dev; ++ /* If we attempt to bring up etun in the small window before ++ * it is connected to it's partner error. ++ */ ++ if (!rx_dev) ++ return -ENOTCONN; ++ if (rx_dev->flags & IFF_UP) { ++ netif_carrier_on(tx_dev); ++ netif_carrier_on(rx_dev); ++ } ++ netif_start_queue(tx_dev); ++ return 0; ++} ++ ++static int etun_stop(struct net_device *tx_dev) ++{ ++ struct etun_info *tx_info = tx_dev->priv; ++ struct net_device *rx_dev = tx_info->rx_dev; ++ netif_stop_queue(tx_dev); ++ if (netif_carrier_ok(tx_dev)) { ++ netif_carrier_off(tx_dev); ++ netif_carrier_off(rx_dev); ++ } ++ return 0; ++} ++ ++static int etun_change_mtu(struct net_device *dev, int new_mtu) ++{ ++ /* Don't allow ridiculously small mtus */ ++ if (new_mtu < (ETH_ZLEN - ETH_HLEN)) ++ return -EINVAL; ++ dev->mtu = new_mtu; ++ return 0; ++} ++ ++static void etun_set_multicast_list(struct net_device *dev) ++{ ++ /* Nothing sane I can do here */ ++ return; ++} ++ ++static int etun_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) ++{ ++ return -EOPNOTSUPP; ++} ++ ++/* Only allow letters and numbers in an etun device name */ ++static int is_valid_name(const char *name) ++{ ++ const char *ptr; ++ for (ptr = name; *ptr; ptr++) { ++ if (!isalnum(*ptr)) ++ return 0; ++ } ++ return 1; ++} ++ ++static struct net_device *etun_alloc(struct net *net, const char *name) ++{ ++ struct net_device *dev; ++ struct etun_info *info; ++ int err; ++ ++ if (!name || !is_valid_name(name)) ++ return ERR_PTR(-EINVAL); ++ ++ dev = alloc_netdev(sizeof(struct etun_info), name, ether_setup); ++ if (!dev) ++ return ERR_PTR(-ENOMEM); ++ ++ info = dev->priv; ++ info->dev = dev; ++ dev->nd_net = net; ++ ++ random_ether_addr(dev->dev_addr); ++ dev->tx_queue_len = 0; /* A queue is silly for a loopback device */ ++ dev->hard_start_xmit = etun_xmit; ++ dev->get_stats = etun_get_stats; ++ dev->open = etun_open; ++ dev->stop = etun_stop; ++ dev->set_multicast_list = etun_set_multicast_list; ++ dev->do_ioctl = etun_ioctl; ++ dev->features = NETIF_F_FRAGLIST ++ | NETIF_F_HIGHDMA ++ | NETIF_F_LLTX; ++ dev->flags = IFF_BROADCAST | IFF_MULTICAST |IFF_PROMISC; ++ dev->ethtool_ops = &etun_ethtool_ops; ++ dev->destructor = free_netdev; ++ dev->change_mtu = etun_change_mtu; ++ err = register_netdev(dev); ++ if (err) { ++ free_netdev(dev); ++ dev = ERR_PTR(err); ++ goto out; ++ } ++ netif_carrier_off(dev); ++out: ++ return dev; ++} ++ ++static int etun_alloc_pair(struct net *net, const char *name0, const char *name1) ++{ ++ struct net_device *dev0, *dev1; ++ struct etun_info *info0, *info1; ++ ++ dev0 = etun_alloc(net, name0); ++ if (IS_ERR(dev0)) { ++ return PTR_ERR(dev0); ++ } ++ info0 = dev0->priv; ++ ++ dev1 = etun_alloc(net, name1); ++ if (IS_ERR(dev1)) { ++ unregister_netdev(dev0); ++ return PTR_ERR(dev1); ++ } ++ info1 = dev1->priv; ++ ++ dev_hold(dev0); ++ dev_hold(dev1); ++ info0->rx_dev = dev1; ++ info1->rx_dev = dev0; ++ ++ /* Only place one member of the pair on the list ++ * so I don't confuse list_for_each_entry_safe, ++ * by deleting two list entries at once. ++ */ ++ rtnl_lock(); ++ list_add(&info0->list, &etun_list); ++ INIT_LIST_HEAD(&info1->list); ++ rtnl_unlock(); ++ ++ return 0; ++} ++ ++static int etun_unregister_pair(struct net_device *dev0) ++{ ++ struct etun_info *info0, *info1; ++ struct net_device *dev1; ++ ++ ASSERT_RTNL(); ++ ++ if (!dev0) ++ return -ENODEV; ++ ++ /* Ensure my network devices are not passing packets */ ++ dev_close(dev0); ++ info0 = dev0->priv; ++ dev1 = info0->rx_dev; ++ info1 = dev1->priv; ++ dev_close(dev1); ++ ++ /* Drop the cross device references */ ++ dev_put(dev0); ++ dev_put(dev1); ++ ++ /* Remove from the etun list */ ++ if (!list_empty(&info0->list)) ++ list_del_init(&info0->list); ++ if (!list_empty(&info1->list)) ++ list_del_init(&info1->list); ++ ++ unregister_netdevice(dev0); ++ unregister_netdevice(dev1); ++ return 0; ++} ++ ++static int etun_noget(char *buffer, struct kernel_param *kp) ++{ ++ return 0; ++} ++ ++static int etun_newif(const char *val, struct kernel_param *kp) ++{ ++ char name0[IFNAMSIZ], name1[IFNAMSIZ]; ++ const char *mid; ++ int len, len0, len1; ++ if (!capable(CAP_NET_ADMIN)) ++ return -EPERM; ++ ++ /* Avoid frustration by removing trailing whitespace */ ++ len = strlen(val); ++ while (isspace(val[len - 1])) ++ len--; ++ ++ /* Split the string into 2 names */ ++ mid = memchr(val, ',', len); ++ if (!mid) ++ return -EINVAL; ++ ++ /* Get the first device name */ ++ len0 = mid - val; ++ if (len0 > sizeof(name0) - 1) ++ len = sizeof(name0) - 1; ++ strncpy(name0, val, len0); ++ name0[len0] = '\0'; ++ ++ /* And the second device name */ ++ len1 = len - (len0 + 1); ++ if (len1 > sizeof(name1) - 1) ++ len1 = sizeof(name1) - 1; ++ strncpy(name1, mid + 1, len1); ++ name1[len1] = '\0'; ++ ++ return etun_alloc_pair(current->nsproxy->net_ns, name0, name1); ++} ++ ++static int etun_delif(const char *val, struct kernel_param *kp) ++{ ++ char name[IFNAMSIZ]; ++ int len; ++ struct net_device *dev; ++ int err; ++ if (!capable(CAP_NET_ADMIN)) ++ return -EPERM; ++ ++ /* Avoid frustration by removing trailing whitespace */ ++ len = strlen(val); ++ while (isspace(val[len - 1])) ++ len--; ++ ++ /* Get the device name */ ++ if (len > sizeof(name) - 1) ++ return -EINVAL; ++ strncpy(name, val, len); ++ name[len] = '\0'; ++ ++ /* Double check I don't have strange characters in my device name */ ++ if (!is_valid_name(name)) ++ return -EINVAL; ++ ++ rtnl_lock(); ++ err = -ENODEV; ++ dev = __dev_get_by_name(current->nsproxy->net_ns, name); ++ err = etun_unregister_pair(dev); ++ rtnl_unlock(); ++ return err; ++} ++ ++static int __init etun_init(void) ++{ ++ printk(KERN_INFO "etun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION); ++ printk(KERN_INFO "etun: %s\n", DRV_COPYRIGHT); ++ ++ return 0; ++} ++ ++static void etun_cleanup(void) ++{ ++ struct etun_info *info, *tmp; ++ rtnl_lock(); ++ list_for_each_entry_safe(info, tmp, &etun_list, list) { ++ etun_unregister_pair(info->dev); ++ } ++ rtnl_unlock(); ++} ++ ++module_param_call(newif, etun_newif, etun_noget, NULL, S_IWUSR); ++module_param_call(delif, etun_delif, etun_noget, NULL, S_IWUSR); ++module_init(etun_init); ++module_exit(etun_cleanup); ++MODULE_DESCRIPTION(DRV_DESCRIPTION); ++MODULE_AUTHOR("Eric Biederman "); ++MODULE_LICENSE("GPL"); +diff -Nurb linux-2.6.22-570/drivers/net/fealnx.c linux-2.6.22-591/drivers/net/fealnx.c +--- linux-2.6.22-570/drivers/net/fealnx.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/fealnx.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1727,8 +1727,8 @@ + /* Call copy + cksum if available. */ + + #if ! defined(__alpha__) +- eth_copy_and_sum(skb, +- np->cur_rx->skbuff->data, pkt_len, 0); ++ skb_copy_to_linear_data(skb, ++ np->cur_rx->skbuff->data, pkt_len); + skb_put(skb, pkt_len); + #else + memcpy(skb_put(skb, pkt_len), +diff -Nurb linux-2.6.22-570/drivers/net/fec.c linux-2.6.22-591/drivers/net/fec.c +--- linux-2.6.22-570/drivers/net/fec.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/fec.c 2007-12-21 15:36:12.000000000 -0500 +@@ -648,7 +648,7 @@ + fep->stats.rx_dropped++; + } else { + skb_put(skb,pkt_len-4); /* Make room */ +- eth_copy_and_sum(skb, data, pkt_len-4, 0); ++ skb_copy_to_linear_data(skb, data, pkt_len-4); + skb->protocol=eth_type_trans(skb,dev); + netif_rx(skb); + } +diff -Nurb linux-2.6.22-570/drivers/net/hamachi.c linux-2.6.22-591/drivers/net/hamachi.c +--- linux-2.6.22-570/drivers/net/hamachi.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/hamachi.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1575,8 +1575,8 @@ + PCI_DMA_FROMDEVICE); + /* Call copy + cksum if available. */ + #if 1 || USE_IP_COPYSUM +- eth_copy_and_sum(skb, +- hmp->rx_skbuff[entry]->data, pkt_len, 0); ++ skb_copy_to_linear_data(skb, ++ hmp->rx_skbuff[entry]->data, pkt_len); + skb_put(skb, pkt_len); + #else + memcpy(skb_put(skb, pkt_len), hmp->rx_ring_dma +diff -Nurb linux-2.6.22-570/drivers/net/hamradio/baycom_epp.c linux-2.6.22-591/drivers/net/hamradio/baycom_epp.c +--- linux-2.6.22-570/drivers/net/hamradio/baycom_epp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/hamradio/baycom_epp.c 2007-12-21 15:36:12.000000000 -0500 +@@ -320,7 +320,7 @@ + sprintf(portarg, "%ld", bc->pdev->port->base); + printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg); + +- return call_usermodehelper(eppconfig_path, argv, envp, 1); ++ return call_usermodehelper(eppconfig_path, argv, envp, UMH_WAIT_PROC); + } + + /* ---------------------------------------------------------------------- */ +diff -Nurb linux-2.6.22-570/drivers/net/hamradio/bpqether.c linux-2.6.22-591/drivers/net/hamradio/bpqether.c +--- linux-2.6.22-570/drivers/net/hamradio/bpqether.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/hamradio/bpqether.c 2007-12-21 15:36:14.000000000 -0500 +@@ -83,6 +83,7 @@ + + #include + #include ++#include + + #include + +@@ -172,6 +173,9 @@ + struct ethhdr *eth; + struct bpqdev *bpq; + ++ if (dev->nd_net != &init_net) ++ goto drop; ++ + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + return NET_RX_DROP; + +@@ -559,6 +563,9 @@ + { + struct net_device *dev = (struct net_device *)ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (!dev_is_ethdev(dev)) + return NOTIFY_DONE; + +@@ -594,7 +601,7 @@ + static int __init bpq_init_driver(void) + { + #ifdef CONFIG_PROC_FS +- if (!proc_net_fops_create("bpqether", S_IRUGO, &bpq_info_fops)) { ++ if (!proc_net_fops_create(&init_net, "bpqether", S_IRUGO, &bpq_info_fops)) { + printk(KERN_ERR + "bpq: cannot create /proc/net/bpqether entry.\n"); + return -ENOENT; +@@ -618,7 +625,7 @@ + + unregister_netdevice_notifier(&bpq_dev_notifier); + +- proc_net_remove("bpqether"); ++ proc_net_remove(&init_net, "bpqether"); + + rtnl_lock(); + while (!list_empty(&bpq_devices)) { +diff -Nurb linux-2.6.22-570/drivers/net/hamradio/scc.c linux-2.6.22-591/drivers/net/hamradio/scc.c +--- linux-2.6.22-570/drivers/net/hamradio/scc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/hamradio/scc.c 2007-12-21 15:36:14.000000000 -0500 +@@ -174,6 +174,7 @@ + #include + #include + ++#include + #include + + #include +@@ -2114,7 +2115,7 @@ + } + rtnl_unlock(); + +- proc_net_fops_create("z8530drv", 0, &scc_net_seq_fops); ++ proc_net_fops_create(&init_net, "z8530drv", 0, &scc_net_seq_fops); + + return 0; + } +@@ -2169,7 +2170,7 @@ + if (Vector_Latch) + release_region(Vector_Latch, 1); + +- proc_net_remove("z8530drv"); ++ proc_net_remove(&init_net, "z8530drv"); + } + + MODULE_AUTHOR("Joerg Reuter "); +diff -Nurb linux-2.6.22-570/drivers/net/hamradio/yam.c linux-2.6.22-591/drivers/net/hamradio/yam.c +--- linux-2.6.22-570/drivers/net/hamradio/yam.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/hamradio/yam.c 2007-12-21 15:36:14.000000000 -0500 +@@ -61,6 +61,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -1142,7 +1143,7 @@ + yam_timer.expires = jiffies + HZ / 100; + add_timer(&yam_timer); + +- proc_net_fops_create("yam", S_IRUGO, &yam_info_fops); ++ proc_net_fops_create(&init_net, "yam", S_IRUGO, &yam_info_fops); + return 0; + error: + while (--i >= 0) { +@@ -1174,7 +1175,7 @@ + kfree(p); + } + +- proc_net_remove("yam"); ++ proc_net_remove(&init_net, "yam"); + } + + /* --------------------------------------------------------------------- */ +diff -Nurb linux-2.6.22-570/drivers/net/ibmveth.c linux-2.6.22-591/drivers/net/ibmveth.c +--- linux-2.6.22-570/drivers/net/ibmveth.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/ibmveth.c 2007-12-21 15:36:14.000000000 -0500 +@@ -47,6 +47,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -97,7 +98,7 @@ + static struct kobj_type ktype_veth_pool; + + #ifdef CONFIG_PROC_FS +-#define IBMVETH_PROC_DIR "net/ibmveth" ++#define IBMVETH_PROC_DIR "ibmveth" + static struct proc_dir_entry *ibmveth_proc_dir; + #endif + +@@ -1093,7 +1094,7 @@ + #ifdef CONFIG_PROC_FS + static void ibmveth_proc_register_driver(void) + { +- ibmveth_proc_dir = proc_mkdir(IBMVETH_PROC_DIR, NULL); ++ ibmveth_proc_dir = proc_mkdir(IBMVETH_PROC_DIR, init_net.proc_net); + if (ibmveth_proc_dir) { + SET_MODULE_OWNER(ibmveth_proc_dir); + } +@@ -1101,7 +1102,7 @@ + + static void ibmveth_proc_unregister_driver(void) + { +- remove_proc_entry(IBMVETH_PROC_DIR, NULL); ++ remove_proc_entry(IBMVETH_PROC_DIR, init_net.proc_net); + } + + static void *ibmveth_seq_start(struct seq_file *seq, loff_t *pos) +@@ -1337,7 +1338,7 @@ + + #define ATTR(_name, _mode) \ + struct attribute veth_##_name##_attr = { \ +- .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE \ ++ .name = __stringify(_name), .mode = _mode, \ + }; + + static ATTR(active, 0644); +diff -Nurb linux-2.6.22-570/drivers/net/ifb.c linux-2.6.22-591/drivers/net/ifb.c +--- linux-2.6.22-570/drivers/net/ifb.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/ifb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -33,12 +33,15 @@ + #include + #include + #include ++#include + #include + + #define TX_TIMEOUT (2*HZ) + + #define TX_Q_LIMIT 32 + struct ifb_private { ++ struct list_head list; ++ struct net_device *dev; + struct net_device_stats stats; + struct tasklet_struct ifb_tasklet; + int tasklet_pending; +@@ -136,13 +139,14 @@ + + } + +-static void __init ifb_setup(struct net_device *dev) ++static void ifb_setup(struct net_device *dev) + { + /* Initialize the device structure. */ + dev->get_stats = ifb_get_stats; + dev->hard_start_xmit = ifb_xmit; + dev->open = &ifb_open; + dev->stop = &ifb_close; ++ dev->destructor = free_netdev; + + /* Fill in device structure with ethernet-generic values. */ + ether_setup(dev); +@@ -197,7 +201,7 @@ + return stats; + } + +-static struct net_device **ifbs; ++static LIST_HEAD(ifbs); + + /* Number of ifb devices to be set up by this module. */ + module_param(numifbs, int, 0); +@@ -226,9 +230,41 @@ + return 0; + } + ++static int ifb_newlink(struct net_device *dev, ++ struct nlattr *tb[], struct nlattr *data[]) ++{ ++ struct ifb_private *priv = netdev_priv(dev); ++ int err; ++ ++ err = register_netdevice(dev); ++ if (err < 0) ++ return err; ++ ++ priv->dev = dev; ++ list_add_tail(&priv->list, &ifbs); ++ return 0; ++} ++ ++static void ifb_dellink(struct net_device *dev) ++{ ++ struct ifb_private *priv = netdev_priv(dev); ++ ++ list_del(&priv->list); ++ unregister_netdevice(dev); ++} ++ ++static struct rtnl_link_ops ifb_link_ops __read_mostly = { ++ .kind = "ifb", ++ .priv_size = sizeof(struct ifb_private), ++ .setup = ifb_setup, ++ .newlink = ifb_newlink, ++ .dellink = ifb_dellink, ++}; ++ + static int __init ifb_init_one(int index) + { + struct net_device *dev_ifb; ++ struct ifb_private *priv; + int err; + + dev_ifb = alloc_netdev(sizeof(struct ifb_private), +@@ -237,49 +273,59 @@ + if (!dev_ifb) + return -ENOMEM; + +- if ((err = register_netdev(dev_ifb))) { +- free_netdev(dev_ifb); +- dev_ifb = NULL; +- } else { +- ifbs[index] = dev_ifb; +- } ++ err = dev_alloc_name(dev_ifb, dev_ifb->name); ++ if (err < 0) ++ goto err; ++ ++ dev_ifb->rtnl_link_ops = &ifb_link_ops; ++ err = register_netdevice(dev_ifb); ++ if (err < 0) ++ goto err; ++ ++ priv = netdev_priv(dev_ifb); ++ priv->dev = dev_ifb; ++ list_add_tail(&priv->list, &ifbs); ++ return 0; + ++err: ++ free_netdev(dev_ifb); + return err; + } + +-static void ifb_free_one(int index) +-{ +- unregister_netdev(ifbs[index]); +- free_netdev(ifbs[index]); +-} +- + static int __init ifb_init_module(void) + { +- int i, err = 0; +- ifbs = kmalloc(numifbs * sizeof(void *), GFP_KERNEL); +- if (!ifbs) +- return -ENOMEM; ++ struct ifb_private *priv, *next; ++ int i, err; ++ ++ rtnl_lock(); ++ err = __rtnl_link_register(&ifb_link_ops); ++ + for (i = 0; i < numifbs && !err; i++) + err = ifb_init_one(i); + if (err) { +- i--; +- while (--i >= 0) +- ifb_free_one(i); ++ list_for_each_entry_safe(priv, next, &ifbs, list) ++ ifb_dellink(priv->dev); ++ __rtnl_link_unregister(&ifb_link_ops); + } ++ rtnl_unlock(); + + return err; + } + + static void __exit ifb_cleanup_module(void) + { +- int i; ++ struct ifb_private *priv, *next; ++ ++ rtnl_lock(); ++ list_for_each_entry_safe(priv, next, &ifbs, list) ++ ifb_dellink(priv->dev); + +- for (i = 0; i < numifbs; i++) +- ifb_free_one(i); +- kfree(ifbs); ++ __rtnl_link_unregister(&ifb_link_ops); ++ rtnl_unlock(); + } + + module_init(ifb_init_module); + module_exit(ifb_cleanup_module); + MODULE_LICENSE("GPL"); + MODULE_AUTHOR("Jamal Hadi Salim"); ++MODULE_ALIAS_RTNL_LINK("ifb"); +diff -Nurb linux-2.6.22-570/drivers/net/ixp2000/ixpdev.c linux-2.6.22-591/drivers/net/ixp2000/ixpdev.c +--- linux-2.6.22-570/drivers/net/ixp2000/ixpdev.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/ixp2000/ixpdev.c 2007-12-21 15:36:12.000000000 -0500 +@@ -111,7 +111,7 @@ + skb = dev_alloc_skb(desc->pkt_length + 2); + if (likely(skb != NULL)) { + skb_reserve(skb, 2); +- eth_copy_and_sum(skb, buf, desc->pkt_length, 0); ++ skb_copy_to_linear_data(skb, buf, desc->pkt_length); + skb_put(skb, desc->pkt_length); + skb->protocol = eth_type_trans(skb, nds[desc->channel]); + +diff -Nurb linux-2.6.22-570/drivers/net/kgdboe.c linux-2.6.22-591/drivers/net/kgdboe.c +--- linux-2.6.22-570/drivers/net/kgdboe.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/net/kgdboe.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,294 @@ ++/* ++ * drivers/net/kgdboe.c ++ * ++ * A network interface for GDB. ++ * Based upon 'gdbserial' by David Grothe ++ * and Scott Foehner ++ * ++ * Maintainers: Amit S. Kale and ++ * Tom Rini ++ * ++ * 2004 (c) Amit S. Kale ++ * 2004-2005 (c) MontaVista Software, Inc. ++ * 2005 (c) Wind River Systems, Inc. ++ * ++ * Contributors at various stages not listed above: ++ * San Mehat , Robert Walsh , ++ * wangdi , Matt Mackall , ++ * Pavel Machek , Jason Wessel ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program is licensed "as is" without any warranty of any ++ * kind, whether express or implied. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#define IN_BUF_SIZE 512 /* power of 2, please */ ++#define NOT_CONFIGURED_STRING "not_configured" ++#define OUT_BUF_SIZE 30 /* We don't want to send too big of a packet. */ ++#define MAX_KGDBOE_CONFIG_STR 256 ++ ++static char in_buf[IN_BUF_SIZE], out_buf[OUT_BUF_SIZE]; ++static int in_head, in_tail, out_count; ++static atomic_t in_count; ++/* 0 = unconfigured, 1 = netpoll options parsed, 2 = fully configured. */ ++static int configured; ++static struct kgdb_io local_kgdb_io_ops; ++static int use_dynamic_mac; ++ ++MODULE_DESCRIPTION("KGDB driver for network interfaces"); ++MODULE_LICENSE("GPL"); ++static char config[MAX_KGDBOE_CONFIG_STR] = NOT_CONFIGURED_STRING; ++static struct kparam_string kps = { ++ .string = config, ++ .maxlen = MAX_KGDBOE_CONFIG_STR, ++}; ++ ++static void rx_hook(struct netpoll *np, int port, char *msg, int len, ++ struct sk_buff *skb) ++{ ++ int i; ++ ++ np->remote_port = port; ++ ++ /* Copy the MAC address if we need to. */ ++ if (use_dynamic_mac) { ++ memcpy(np->remote_mac, eth_hdr(skb)->h_source, ++ sizeof(np->remote_mac)); ++ use_dynamic_mac = 0; ++ } ++ ++ /* ++ * This could be GDB trying to attach. But it could also be GDB ++ * finishing up a session, with kgdb_connected=0 but GDB sending ++ * an ACK for the final packet. To make sure we don't try and ++ * make a breakpoint when GDB is leaving, make sure that if ++ * !kgdb_connected the only len == 1 packet we allow is ^C. ++ */ ++ if (!kgdb_connected && (len != 1 || msg[0] == 3) && ++ !atomic_read(&kgdb_setting_breakpoint)) { ++ tasklet_schedule(&kgdb_tasklet_breakpoint); ++ } ++ ++ for (i = 0; i < len; i++) { ++ if (msg[i] == 3) ++ tasklet_schedule(&kgdb_tasklet_breakpoint); ++ ++ if (atomic_read(&in_count) >= IN_BUF_SIZE) { ++ /* buffer overflow, clear it */ ++ in_head = in_tail = 0; ++ atomic_set(&in_count, 0); ++ break; ++ } ++ in_buf[in_head++] = msg[i]; ++ in_head &= (IN_BUF_SIZE - 1); ++ atomic_inc(&in_count); ++ } ++} ++ ++static struct netpoll np = { ++ .dev_name = "eth0", ++ .name = "kgdboe", ++ .rx_hook = rx_hook, ++ .local_port = 6443, ++ .remote_port = 6442, ++ .remote_mac = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, ++}; ++ ++static void eth_pre_exception_handler(void) ++{ ++ /* Increment the module count when the debugger is active */ ++ if (!kgdb_connected) ++ try_module_get(THIS_MODULE); ++ netpoll_set_trap(1); ++} ++ ++static void eth_post_exception_handler(void) ++{ ++ /* decrement the module count when the debugger detaches */ ++ if (!kgdb_connected) ++ module_put(THIS_MODULE); ++ netpoll_set_trap(0); ++} ++ ++static int eth_get_char(void) ++{ ++ int chr; ++ ++ while (atomic_read(&in_count) == 0) ++ netpoll_poll(&np); ++ ++ chr = in_buf[in_tail++]; ++ in_tail &= (IN_BUF_SIZE - 1); ++ atomic_dec(&in_count); ++ return chr; ++} ++ ++static void eth_flush_buf(void) ++{ ++ if (out_count && np.dev) { ++ netpoll_send_udp(&np, out_buf, out_count); ++ memset(out_buf, 0, sizeof(out_buf)); ++ out_count = 0; ++ } ++} ++ ++static void eth_put_char(u8 chr) ++{ ++ out_buf[out_count++] = chr; ++ if (out_count == OUT_BUF_SIZE) ++ eth_flush_buf(); ++} ++ ++static int option_setup(char *opt) ++{ ++ char opt_scratch[MAX_KGDBOE_CONFIG_STR]; ++ ++ /* If we're being given a new configuration, copy it in. */ ++ if (opt != config) ++ strcpy(config, opt); ++ /* But work on a copy as netpoll_parse_options will eat it. */ ++ strcpy(opt_scratch, opt); ++ configured = !netpoll_parse_options(&np, opt_scratch); ++ ++ use_dynamic_mac = 1; ++ ++ return 0; ++} ++__setup("kgdboe=", option_setup); ++ ++/* With our config string set by some means, configure kgdboe. */ ++static int configure_kgdboe(void) ++{ ++ /* Try out the string. */ ++ option_setup(config); ++ ++ if (!configured) { ++ printk(KERN_ERR "kgdboe: configuration incorrect - kgdboe not " ++ "loaded.\n"); ++ printk(KERN_ERR " Usage: kgdboe=[src-port]@[src-ip]/[dev]," ++ "[tgt-port]@/\n"); ++ return -EINVAL; ++ } ++ ++ /* Bring it up. */ ++ if (netpoll_setup(&np)) { ++ printk(KERN_ERR "kgdboe: netpoll_setup failed kgdboe failed\n"); ++ return -EINVAL; ++ } ++ ++ if (kgdb_register_io_module(&local_kgdb_io_ops)) { ++ netpoll_cleanup(&np); ++ return -EINVAL; ++ } ++ ++ configured = 2; ++ ++ return 0; ++} ++ ++static int init_kgdboe(void) ++{ ++ int ret; ++ ++ /* Already done? */ ++ if (configured == 2) ++ return 0; ++ ++ /* OK, go ahead and do it. */ ++ ret = configure_kgdboe(); ++ ++ if (configured == 2) ++ printk(KERN_INFO "kgdboe: debugging over ethernet enabled\n"); ++ ++ return ret; ++} ++ ++static void cleanup_kgdboe(void) ++{ ++ netpoll_cleanup(&np); ++ configured = 0; ++ kgdb_unregister_io_module(&local_kgdb_io_ops); ++} ++ ++static int param_set_kgdboe_var(const char *kmessage, struct kernel_param *kp) ++{ ++ char kmessage_save[MAX_KGDBOE_CONFIG_STR]; ++ int msg_len = strlen(kmessage); ++ ++ if (msg_len + 1 > MAX_KGDBOE_CONFIG_STR) { ++ printk(KERN_ERR "%s: string doesn't fit in %u chars.\n", ++ kp->name, MAX_KGDBOE_CONFIG_STR - 1); ++ return -ENOSPC; ++ } ++ ++ if (kgdb_connected) { ++ printk(KERN_ERR "kgdboe: Cannot reconfigure while KGDB is " ++ "connected.\n"); ++ return 0; ++ } ++ ++ /* Start the reconfiguration process by saving the old string */ ++ strncpy(kmessage_save, config, sizeof(kmessage_save)); ++ ++ ++ /* Copy in the new param and strip out invalid characters so we ++ * can optionally specify the MAC. ++ */ ++ strncpy(config, kmessage, sizeof(config)); ++ msg_len--; ++ while (msg_len > 0 && ++ (config[msg_len] < ',' || config[msg_len] > 'f')) { ++ config[msg_len] = '\0'; ++ msg_len--; ++ } ++ ++ /* Check to see if we are unconfiguring the io module and that it ++ * was in a fully configured state, as this is the only time that ++ * netpoll_cleanup should get called ++ */ ++ if (configured == 2 && strcmp(config, NOT_CONFIGURED_STRING) == 0) { ++ printk(KERN_INFO "kgdboe: reverting to unconfigured state\n"); ++ cleanup_kgdboe(); ++ return 0; ++ } else ++ /* Go and configure with the new params. */ ++ configure_kgdboe(); ++ ++ if (configured == 2) ++ return 0; ++ ++ /* If the new string was invalid, revert to the previous state, which ++ * is at a minimum not_configured. */ ++ strncpy(config, kmessage_save, sizeof(config)); ++ if (strcmp(kmessage_save, NOT_CONFIGURED_STRING) != 0) { ++ printk(KERN_INFO "kgdboe: reverting to prior configuration\n"); ++ /* revert back to the original config */ ++ strncpy(config, kmessage_save, sizeof(config)); ++ configure_kgdboe(); ++ } ++ return 0; ++} ++ ++static struct kgdb_io local_kgdb_io_ops = { ++ .read_char = eth_get_char, ++ .write_char = eth_put_char, ++ .init = init_kgdboe, ++ .flush = eth_flush_buf, ++ .pre_exception = eth_pre_exception_handler, ++ .post_exception = eth_post_exception_handler ++}; ++ ++module_init(init_kgdboe); ++module_exit(cleanup_kgdboe); ++module_param_call(kgdboe, param_set_kgdboe_var, param_get_string, &kps, 0644); ++MODULE_PARM_DESC(kgdboe, " kgdboe=[src-port]@[src-ip]/[dev]," ++ "[tgt-port]@/\n"); +diff -Nurb linux-2.6.22-570/drivers/net/lance.c linux-2.6.22-591/drivers/net/lance.c +--- linux-2.6.22-570/drivers/net/lance.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/lance.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1186,9 +1186,9 @@ + } + skb_reserve(skb,2); /* 16 byte align */ + skb_put(skb,pkt_len); /* Make room */ +- eth_copy_and_sum(skb, ++ skb_copy_to_linear_data(skb, + (unsigned char *)isa_bus_to_virt((lp->rx_ring[entry].base & 0x00ffffff)), +- pkt_len,0); ++ pkt_len); + skb->protocol=eth_type_trans(skb,dev); + netif_rx(skb); + dev->last_rx = jiffies; +diff -Nurb linux-2.6.22-570/drivers/net/loopback.c linux-2.6.22-591/drivers/net/loopback.c +--- linux-2.6.22-570/drivers/net/loopback.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/loopback.c 2007-12-21 15:36:14.000000000 -0500 +@@ -57,6 +57,7 @@ + #include + #include + #include ++#include + + struct pcpu_lstats { + unsigned long packets; +@@ -199,39 +200,52 @@ + .get_rx_csum = always_on, + }; + ++static int loopback_net_init(struct net *net) ++{ ++ struct net_device *lo = &net->loopback_dev; + /* + * The loopback device is special. There is only one instance and + * it is statically allocated. Don't do this for other devices. + */ +-struct net_device loopback_dev = { +- .name = "lo", +- .get_stats = &get_stats, +- .mtu = (16 * 1024) + 20 + 20 + 12, +- .hard_start_xmit = loopback_xmit, +- .hard_header = eth_header, +- .hard_header_cache = eth_header_cache, +- .header_cache_update = eth_header_cache_update, +- .hard_header_len = ETH_HLEN, /* 14 */ +- .addr_len = ETH_ALEN, /* 6 */ +- .tx_queue_len = 0, +- .type = ARPHRD_LOOPBACK, /* 0x0001*/ +- .rebuild_header = eth_rebuild_header, +- .flags = IFF_LOOPBACK, +- .features = NETIF_F_SG | NETIF_F_FRAGLIST ++ strcpy(lo->name, "lo"); ++ lo->get_stats = &get_stats, ++ lo->mtu = (16 * 1024) + 20 + 20 + 12, ++ lo->hard_start_xmit = loopback_xmit, ++ lo->hard_header = eth_header, ++ lo->hard_header_cache = eth_header_cache, ++ lo->header_cache_update = eth_header_cache_update, ++ lo->hard_header_len = ETH_HLEN, /* 14 */ ++ lo->addr_len = ETH_ALEN, /* 6 */ ++ lo->tx_queue_len = 0, ++ lo->type = ARPHRD_LOOPBACK, /* 0x0001*/ ++ lo->rebuild_header = eth_rebuild_header, ++ lo->flags = IFF_LOOPBACK, ++ lo->features = NETIF_F_SG | NETIF_F_FRAGLIST + #ifdef LOOPBACK_TSO + | NETIF_F_TSO + #endif + | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA +- | NETIF_F_LLTX, +- .ethtool_ops = &loopback_ethtool_ops, ++ | NETIF_F_LLTX ++ | NETIF_F_NETNS_LOCAL, ++ lo->ethtool_ops = &loopback_ethtool_ops, ++ lo->nd_net = net; ++ return register_netdev(lo); ++} ++ ++static void loopback_net_exit(struct net *net) ++{ ++ unregister_netdev(&net->loopback_dev); ++} ++ ++static struct pernet_operations loopback_net_ops = { ++ .init = loopback_net_init, ++ .exit = loopback_net_exit, + }; + + /* Setup and register the loopback device. */ + static int __init loopback_init(void) + { +- return register_netdev(&loopback_dev); ++ return register_pernet_device(&loopback_net_ops); + }; + + module_init(loopback_init); +- +-EXPORT_SYMBOL(loopback_dev); +diff -Nurb linux-2.6.22-570/drivers/net/natsemi.c linux-2.6.22-591/drivers/net/natsemi.c +--- linux-2.6.22-570/drivers/net/natsemi.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/net/natsemi.c 2007-12-21 15:36:12.000000000 -0500 +@@ -2357,8 +2357,8 @@ + np->rx_dma[entry], + buflen, + PCI_DMA_FROMDEVICE); +- eth_copy_and_sum(skb, +- np->rx_skbuff[entry]->data, pkt_len, 0); ++ skb_copy_to_linear_data(skb, ++ np->rx_skbuff[entry]->data, pkt_len); + skb_put(skb, pkt_len); + pci_dma_sync_single_for_device(np->pci_dev, + np->rx_dma[entry], +diff -Nurb linux-2.6.22-570/drivers/net/ni52.c linux-2.6.22-591/drivers/net/ni52.c +--- linux-2.6.22-570/drivers/net/ni52.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/ni52.c 2007-12-21 15:36:12.000000000 -0500 +@@ -936,7 +936,7 @@ + { + skb_reserve(skb,2); + skb_put(skb,totlen); +- eth_copy_and_sum(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen,0); ++ skb_copy_to_linear_data(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen); + skb->protocol=eth_type_trans(skb,dev); + netif_rx(skb); + dev->last_rx = jiffies; +diff -Nurb linux-2.6.22-570/drivers/net/ni65.c linux-2.6.22-591/drivers/net/ni65.c +--- linux-2.6.22-570/drivers/net/ni65.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/ni65.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1096,7 +1096,7 @@ + #ifdef RCV_VIA_SKB + if( (unsigned long) (skb->data + R_BUF_SIZE) > 0x1000000) { + skb_put(skb,len); +- eth_copy_and_sum(skb, (unsigned char *)(p->recv_skb[p->rmdnum]->data),len,0); ++ skb_copy_to_linear_data(skb, (unsigned char *)(p->recv_skb[p->rmdnum]->data),len); + } + else { + struct sk_buff *skb1 = p->recv_skb[p->rmdnum]; +@@ -1108,7 +1108,7 @@ + } + #else + skb_put(skb,len); +- eth_copy_and_sum(skb, (unsigned char *) p->recvbounce[p->rmdnum],len,0); ++ skb_copy_to_linear_data(skb, (unsigned char *) p->recvbounce[p->rmdnum],len); + #endif + p->stats.rx_packets++; + p->stats.rx_bytes += len; +diff -Nurb linux-2.6.22-570/drivers/net/pci-skeleton.c linux-2.6.22-591/drivers/net/pci-skeleton.c +--- linux-2.6.22-570/drivers/net/pci-skeleton.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/pci-skeleton.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1567,7 +1567,7 @@ + if (skb) { + skb_reserve (skb, 2); /* 16 byte align the IP fields. */ + +- eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0); ++ skb_copy_to_linear_data (skb, &rx_ring[ring_offset + 4], pkt_size); + skb_put (skb, pkt_size); + + skb->protocol = eth_type_trans (skb, dev); +diff -Nurb linux-2.6.22-570/drivers/net/pcnet32.c linux-2.6.22-591/drivers/net/pcnet32.c +--- linux-2.6.22-570/drivers/net/pcnet32.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/pcnet32.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1235,9 +1235,9 @@ + lp->rx_dma_addr[entry], + pkt_len, + PCI_DMA_FROMDEVICE); +- eth_copy_and_sum(skb, ++ skb_copy_to_linear_data(skb, + (unsigned char *)(lp->rx_skbuff[entry]->data), +- pkt_len, 0); ++ pkt_len); + pci_dma_sync_single_for_device(lp->pci_dev, + lp->rx_dma_addr[entry], + pkt_len, +diff -Nurb linux-2.6.22-570/drivers/net/pppoe.c linux-2.6.22-591/drivers/net/pppoe.c +--- linux-2.6.22-570/drivers/net/pppoe.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/pppoe.c 2007-12-21 15:36:14.000000000 -0500 +@@ -78,6 +78,7 @@ + #include + #include + ++#include + #include + + #include +@@ -210,7 +211,7 @@ + struct net_device *dev; + int ifindex; + +- dev = dev_get_by_name(sp->sa_addr.pppoe.dev); ++ dev = dev_get_by_name(&init_net, sp->sa_addr.pppoe.dev); + if(!dev) + return NULL; + ifindex = dev->ifindex; +@@ -295,6 +296,9 @@ + { + struct net_device *dev = (struct net_device *) ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + /* Only look at sockets that are using this specific device. */ + switch (event) { + case NETDEV_CHANGEMTU: +@@ -380,6 +384,9 @@ + struct pppoe_hdr *ph; + struct pppox_sock *po; + ++ if (dev->nd_net != &init_net) ++ goto drop; ++ + if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) + goto drop; + +@@ -412,6 +419,9 @@ + struct pppoe_hdr *ph; + struct pppox_sock *po; + ++ if (dev->nd_net != &init_net) ++ goto abort; ++ + if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) + goto abort; + +@@ -471,12 +481,12 @@ + * Initialize a new struct sock. + * + **********************************************************************/ +-static int pppoe_create(struct socket *sock) ++static int pppoe_create(struct net *net, struct socket *sock) + { + int error = -ENOMEM; + struct sock *sk; + +- sk = sk_alloc(PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1); ++ sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1); + if (!sk) + goto out; + +@@ -588,7 +598,7 @@ + + /* Don't re-bind if sid==0 */ + if (sp->sa_addr.pppoe.sid != 0) { +- dev = dev_get_by_name(sp->sa_addr.pppoe.dev); ++ dev = dev_get_by_name(&init_net, sp->sa_addr.pppoe.dev); + + error = -ENODEV; + if (!dev) +@@ -1064,7 +1074,7 @@ + { + struct proc_dir_entry *p; + +- p = create_proc_entry("net/pppoe", S_IRUGO, NULL); ++ p = create_proc_entry("pppoe", S_IRUGO, init_net.proc_net); + if (!p) + return -ENOMEM; + +@@ -1135,7 +1145,7 @@ + dev_remove_pack(&pppoes_ptype); + dev_remove_pack(&pppoed_ptype); + unregister_netdevice_notifier(&pppoe_notifier); +- remove_proc_entry("net/pppoe", NULL); ++ remove_proc_entry("pppoe", init_net.proc_net); + proto_unregister(&pppoe_sk_proto); + } + +diff -Nurb linux-2.6.22-570/drivers/net/pppox.c linux-2.6.22-591/drivers/net/pppox.c +--- linux-2.6.22-570/drivers/net/pppox.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/pppox.c 2007-12-21 15:36:14.000000000 -0500 +@@ -107,10 +107,13 @@ + + EXPORT_SYMBOL(pppox_ioctl); + +-static int pppox_create(struct socket *sock, int protocol) ++static int pppox_create(struct net *net, struct socket *sock, int protocol) + { + int rc = -EPROTOTYPE; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + if (protocol < 0 || protocol > PX_MAX_PROTO) + goto out; + +@@ -126,7 +129,7 @@ + !try_module_get(pppox_protos[protocol]->owner)) + goto out; + +- rc = pppox_protos[protocol]->create(sock); ++ rc = pppox_protos[protocol]->create(net, sock); + + module_put(pppox_protos[protocol]->owner); + out: +diff -Nurb linux-2.6.22-570/drivers/net/r8169.c linux-2.6.22-591/drivers/net/r8169.c +--- linux-2.6.22-570/drivers/net/r8169.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/net/r8169.c 2007-12-21 15:36:12.000000000 -0500 +@@ -2492,7 +2492,7 @@ + skb = dev_alloc_skb(pkt_size + align); + if (skb) { + skb_reserve(skb, (align - 1) & (unsigned long)skb->data); +- eth_copy_and_sum(skb, sk_buff[0]->data, pkt_size, 0); ++ skb_copy_to_linear_data(skb, sk_buff[0]->data, pkt_size); + *sk_buff = skb; + rtl8169_mark_to_asic(desc, rx_buf_sz); + ret = 0; +diff -Nurb linux-2.6.22-570/drivers/net/saa9730.c linux-2.6.22-591/drivers/net/saa9730.c +--- linux-2.6.22-570/drivers/net/saa9730.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/saa9730.c 2007-12-21 15:36:12.000000000 -0500 +@@ -690,9 +690,9 @@ + lp->stats.rx_packets++; + skb_reserve(skb, 2); /* 16 byte align */ + skb_put(skb, len); /* make room */ +- eth_copy_and_sum(skb, ++ skb_copy_to_linear_data(skb, + (unsigned char *) pData, +- len, 0); ++ len); + skb->protocol = eth_type_trans(skb, dev); + netif_rx(skb); + dev->last_rx = jiffies; +diff -Nurb linux-2.6.22-570/drivers/net/sgiseeq.c linux-2.6.22-591/drivers/net/sgiseeq.c +--- linux-2.6.22-570/drivers/net/sgiseeq.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/sgiseeq.c 2007-12-21 15:36:12.000000000 -0500 +@@ -320,7 +320,7 @@ + skb_put(skb, len); + + /* Copy out of kseg1 to avoid silly cache flush. */ +- eth_copy_and_sum(skb, pkt_pointer + 2, len, 0); ++ skb_copy_to_linear_data(skb, pkt_pointer + 2, len); + skb->protocol = eth_type_trans(skb, dev); + + /* We don't want to receive our own packets */ +diff -Nurb linux-2.6.22-570/drivers/net/shaper.c linux-2.6.22-591/drivers/net/shaper.c +--- linux-2.6.22-570/drivers/net/shaper.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/shaper.c 2007-12-21 15:36:14.000000000 -0500 +@@ -86,6 +86,7 @@ + + #include + #include ++#include + + struct shaper_cb { + unsigned long shapeclock; /* Time it should go out */ +@@ -488,7 +489,7 @@ + { + case SHAPER_SET_DEV: + { +- struct net_device *them=__dev_get_by_name(ss->ss_name); ++ struct net_device *them=__dev_get_by_name(&init_net, ss->ss_name); + if(them==NULL) + return -ENODEV; + if(sh->dev) +diff -Nurb linux-2.6.22-570/drivers/net/sis190.c linux-2.6.22-591/drivers/net/sis190.c +--- linux-2.6.22-570/drivers/net/sis190.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/sis190.c 2007-12-21 15:36:12.000000000 -0500 +@@ -548,7 +548,7 @@ + skb = dev_alloc_skb(pkt_size + NET_IP_ALIGN); + if (skb) { + skb_reserve(skb, NET_IP_ALIGN); +- eth_copy_and_sum(skb, sk_buff[0]->data, pkt_size, 0); ++ skb_copy_to_linear_data(skb, sk_buff[0]->data, pkt_size); + *sk_buff = skb; + sis190_give_to_asic(desc, rx_buf_sz); + ret = 0; +diff -Nurb linux-2.6.22-570/drivers/net/starfire.c linux-2.6.22-591/drivers/net/starfire.c +--- linux-2.6.22-570/drivers/net/starfire.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/starfire.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1456,7 +1456,7 @@ + pci_dma_sync_single_for_cpu(np->pci_dev, + np->rx_info[entry].mapping, + pkt_len, PCI_DMA_FROMDEVICE); +- eth_copy_and_sum(skb, np->rx_info[entry].skb->data, pkt_len, 0); ++ skb_copy_to_linear_data(skb, np->rx_info[entry].skb->data, pkt_len); + pci_dma_sync_single_for_device(np->pci_dev, + np->rx_info[entry].mapping, + pkt_len, PCI_DMA_FROMDEVICE); +diff -Nurb linux-2.6.22-570/drivers/net/sun3_82586.c linux-2.6.22-591/drivers/net/sun3_82586.c +--- linux-2.6.22-570/drivers/net/sun3_82586.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/sun3_82586.c 2007-12-21 15:36:12.000000000 -0500 +@@ -777,7 +777,7 @@ + { + skb_reserve(skb,2); + skb_put(skb,totlen); +- eth_copy_and_sum(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen,0); ++ skb_copy_to_linear_data(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen); + skb->protocol=eth_type_trans(skb,dev); + netif_rx(skb); + p->stats.rx_packets++; +diff -Nurb linux-2.6.22-570/drivers/net/sun3lance.c linux-2.6.22-591/drivers/net/sun3lance.c +--- linux-2.6.22-570/drivers/net/sun3lance.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/sun3lance.c 2007-12-21 15:36:12.000000000 -0500 +@@ -853,10 +853,9 @@ + + skb_reserve( skb, 2 ); /* 16 byte align */ + skb_put( skb, pkt_len ); /* Make room */ +-// skb_copy_to_linear_data(skb, PKTBUF_ADDR(head), pkt_len); +- eth_copy_and_sum(skb, ++ skb_copy_to_linear_data(skb, + PKTBUF_ADDR(head), +- pkt_len, 0); ++ pkt_len); + + skb->protocol = eth_type_trans( skb, dev ); + netif_rx( skb ); +diff -Nurb linux-2.6.22-570/drivers/net/sunbmac.c linux-2.6.22-591/drivers/net/sunbmac.c +--- linux-2.6.22-570/drivers/net/sunbmac.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/sunbmac.c 2007-12-21 15:36:12.000000000 -0500 +@@ -860,7 +860,7 @@ + sbus_dma_sync_single_for_cpu(bp->bigmac_sdev, + this->rx_addr, len, + SBUS_DMA_FROMDEVICE); +- eth_copy_and_sum(copy_skb, (unsigned char *)skb->data, len, 0); ++ skb_copy_to_linear_data(copy_skb, (unsigned char *)skb->data, len); + sbus_dma_sync_single_for_device(bp->bigmac_sdev, + this->rx_addr, len, + SBUS_DMA_FROMDEVICE); +diff -Nurb linux-2.6.22-570/drivers/net/sundance.c linux-2.6.22-591/drivers/net/sundance.c +--- linux-2.6.22-570/drivers/net/sundance.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/sundance.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1313,7 +1313,7 @@ + np->rx_buf_sz, + PCI_DMA_FROMDEVICE); + +- eth_copy_and_sum(skb, np->rx_skbuff[entry]->data, pkt_len, 0); ++ skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len); + pci_dma_sync_single_for_device(np->pci_dev, + desc->frag[0].addr, + np->rx_buf_sz, +diff -Nurb linux-2.6.22-570/drivers/net/sunlance.c linux-2.6.22-591/drivers/net/sunlance.c +--- linux-2.6.22-570/drivers/net/sunlance.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/sunlance.c 2007-12-21 15:36:12.000000000 -0500 +@@ -549,9 +549,9 @@ + + skb_reserve(skb, 2); /* 16 byte align */ + skb_put(skb, len); /* make room */ +- eth_copy_and_sum(skb, ++ skb_copy_to_linear_data(skb, + (unsigned char *)&(ib->rx_buf [entry][0]), +- len, 0); ++ len); + skb->protocol = eth_type_trans(skb, dev); + netif_rx(skb); + dev->last_rx = jiffies; +diff -Nurb linux-2.6.22-570/drivers/net/sunqe.c linux-2.6.22-591/drivers/net/sunqe.c +--- linux-2.6.22-570/drivers/net/sunqe.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/sunqe.c 2007-12-21 15:36:12.000000000 -0500 +@@ -439,8 +439,8 @@ + } else { + skb_reserve(skb, 2); + skb_put(skb, len); +- eth_copy_and_sum(skb, (unsigned char *) this_qbuf, +- len, 0); ++ skb_copy_to_linear_data(skb, (unsigned char *) this_qbuf, ++ len); + skb->protocol = eth_type_trans(skb, qep->dev); + netif_rx(skb); + qep->dev->last_rx = jiffies; +diff -Nurb linux-2.6.22-570/drivers/net/tg3.c linux-2.6.22-591/drivers/net/tg3.c +--- linux-2.6.22-570/drivers/net/tg3.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/tg3.c 2007-12-21 15:36:12.000000000 -0500 +@@ -11944,12 +11944,11 @@ + * checksumming. + */ + if ((tp->tg3_flags & TG3_FLAG_BROKEN_CHECKSUMS) == 0) { ++ dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG; + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787) +- dev->features |= NETIF_F_HW_CSUM; +- else +- dev->features |= NETIF_F_IP_CSUM; +- dev->features |= NETIF_F_SG; ++ dev->features |= NETIF_F_IPV6_CSUM; ++ + tp->tg3_flags |= TG3_FLAG_RX_CHECKSUMS; + } else + tp->tg3_flags &= ~TG3_FLAG_RX_CHECKSUMS; +diff -Nurb linux-2.6.22-570/drivers/net/tokenring/lanstreamer.c linux-2.6.22-591/drivers/net/tokenring/lanstreamer.c +--- linux-2.6.22-570/drivers/net/tokenring/lanstreamer.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/tokenring/lanstreamer.c 2007-12-21 15:36:14.000000000 -0500 +@@ -250,7 +250,7 @@ + #if STREAMER_NETWORK_MONITOR + #ifdef CONFIG_PROC_FS + if (!dev_streamer) +- create_proc_read_entry("net/streamer_tr", 0, 0, ++ create_proc_read_entry("streamer_tr", 0, init_net.proc_net, + streamer_proc_info, NULL); + streamer_priv->next = dev_streamer; + dev_streamer = streamer_priv; +@@ -423,7 +423,7 @@ + } + } + if (!dev_streamer) +- remove_proc_entry("net/streamer_tr", NULL); ++ remove_proc_entry("streamer_tr", init_net.proc_net); + } + #endif + #endif +diff -Nurb linux-2.6.22-570/drivers/net/tokenring/olympic.c linux-2.6.22-591/drivers/net/tokenring/olympic.c +--- linux-2.6.22-570/drivers/net/tokenring/olympic.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/tokenring/olympic.c 2007-12-21 15:36:14.000000000 -0500 +@@ -101,6 +101,7 @@ + #include + #include + ++#include + #include + + #include +@@ -268,9 +269,9 @@ + printk("Olympic: %s registered as: %s\n",olympic_priv->olympic_card_name,dev->name); + if (olympic_priv->olympic_network_monitor) { /* Must go after register_netdev as we need the device name */ + char proc_name[20] ; +- strcpy(proc_name,"net/olympic_") ; ++ strcpy(proc_name,"olympic_") ; + strcat(proc_name,dev->name) ; +- create_proc_read_entry(proc_name,0,NULL,olympic_proc_info,(void *)dev) ; ++ create_proc_read_entry(proc_name,0,init_net.proc_net,olympic_proc_info,(void *)dev) ; + printk("Olympic: Network Monitor information: /proc/%s\n",proc_name); + } + return 0 ; +@@ -1752,9 +1753,9 @@ + + if (olympic_priv->olympic_network_monitor) { + char proc_name[20] ; +- strcpy(proc_name,"net/olympic_") ; ++ strcpy(proc_name,"olympic_") ; + strcat(proc_name,dev->name) ; +- remove_proc_entry(proc_name,NULL); ++ remove_proc_entry(proc_name,init_net.proc_net); + } + unregister_netdev(dev) ; + iounmap(olympic_priv->olympic_mmio) ; +diff -Nurb linux-2.6.22-570/drivers/net/tulip/interrupt.c linux-2.6.22-591/drivers/net/tulip/interrupt.c +--- linux-2.6.22-570/drivers/net/tulip/interrupt.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/tulip/interrupt.c 2007-12-21 15:36:12.000000000 -0500 +@@ -197,8 +197,8 @@ + tp->rx_buffers[entry].mapping, + pkt_len, PCI_DMA_FROMDEVICE); + #if ! defined(__alpha__) +- eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->data, +- pkt_len, 0); ++ skb_copy_to_linear_data(skb, tp->rx_buffers[entry].skb->data, ++ pkt_len); + skb_put(skb, pkt_len); + #else + memcpy(skb_put(skb, pkt_len), +@@ -420,8 +420,8 @@ + tp->rx_buffers[entry].mapping, + pkt_len, PCI_DMA_FROMDEVICE); + #if ! defined(__alpha__) +- eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->data, +- pkt_len, 0); ++ skb_copy_to_linear_data(skb, tp->rx_buffers[entry].skb->data, ++ pkt_len); + skb_put(skb, pkt_len); + #else + memcpy(skb_put(skb, pkt_len), +diff -Nurb linux-2.6.22-570/drivers/net/tulip/winbond-840.c linux-2.6.22-591/drivers/net/tulip/winbond-840.c +--- linux-2.6.22-570/drivers/net/tulip/winbond-840.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/tulip/winbond-840.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1232,7 +1232,7 @@ + pci_dma_sync_single_for_cpu(np->pci_dev,np->rx_addr[entry], + np->rx_skbuff[entry]->len, + PCI_DMA_FROMDEVICE); +- eth_copy_and_sum(skb, np->rx_skbuff[entry]->data, pkt_len, 0); ++ skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len); + skb_put(skb, pkt_len); + pci_dma_sync_single_for_device(np->pci_dev,np->rx_addr[entry], + np->rx_skbuff[entry]->len, +diff -Nurb linux-2.6.22-570/drivers/net/tulip/xircom_cb.c linux-2.6.22-591/drivers/net/tulip/xircom_cb.c +--- linux-2.6.22-570/drivers/net/tulip/xircom_cb.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/tulip/xircom_cb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1208,7 +1208,7 @@ + goto out; + } + skb_reserve(skb, 2); +- eth_copy_and_sum(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len, 0); ++ skb_copy_to_linear_data(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len); + skb_put(skb, pkt_len); + skb->protocol = eth_type_trans(skb, dev); + netif_rx(skb); +diff -Nurb linux-2.6.22-570/drivers/net/tulip/xircom_tulip_cb.c linux-2.6.22-591/drivers/net/tulip/xircom_tulip_cb.c +--- linux-2.6.22-570/drivers/net/tulip/xircom_tulip_cb.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/tulip/xircom_tulip_cb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1242,8 +1242,8 @@ + && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { + skb_reserve(skb, 2); /* 16 byte align the IP header */ + #if ! defined(__alpha__) +- eth_copy_and_sum(skb, bus_to_virt(tp->rx_ring[entry].buffer1), +- pkt_len, 0); ++ skb_copy_to_linear_data(skb, bus_to_virt(tp->rx_ring[entry].buffer1), ++ pkt_len); + skb_put(skb, pkt_len); + #else + memcpy(skb_put(skb, pkt_len), +diff -Nurb linux-2.6.22-570/drivers/net/tun.c linux-2.6.22-591/drivers/net/tun.c +--- linux-2.6.22-570/drivers/net/tun.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/tun.c 2007-12-21 15:36:14.000000000 -0500 +@@ -62,6 +62,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -432,6 +433,7 @@ + init_waitqueue_head(&tun->read_wait); + + tun->owner = -1; ++ tun->group = -1; + + SET_MODULE_OWNER(dev); + dev->open = tun_net_open; +@@ -467,11 +469,14 @@ + return -EBUSY; + + /* Check permissions */ +- if (tun->owner != -1 && +- current->euid != tun->owner && !capable(CAP_NET_ADMIN)) ++ if (((tun->owner != -1 && ++ current->euid != tun->owner) || ++ (tun->group != -1 && ++ current->egid != tun->group)) && ++ !capable(CAP_NET_ADMIN)) + return -EPERM; + } +- else if (__dev_get_by_name(ifr->ifr_name)) ++ else if (__dev_get_by_name(&init_net, ifr->ifr_name)) + return -EINVAL; + else { + char *name; +@@ -610,6 +615,13 @@ + DBG(KERN_INFO "%s: owner set to %d\n", tun->dev->name, tun->owner); + break; + ++ case TUNSETGROUP: ++ /* Set group of the device */ ++ tun->group= (gid_t) arg; ++ ++ DBG(KERN_INFO "%s: group set to %d\n", tun->dev->name, tun->group); ++ break; ++ + case TUNSETLINK: + /* Only allow setting the type when the interface is down */ + if (tun->dev->flags & IFF_UP) { +diff -Nurb linux-2.6.22-570/drivers/net/typhoon.c linux-2.6.22-591/drivers/net/typhoon.c +--- linux-2.6.22-570/drivers/net/typhoon.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/typhoon.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1703,7 +1703,7 @@ + pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, + PKT_BUF_SZ, + PCI_DMA_FROMDEVICE); +- eth_copy_and_sum(new_skb, skb->data, pkt_len, 0); ++ skb_copy_to_linear_data(new_skb, skb->data, pkt_len); + pci_dma_sync_single_for_device(tp->pdev, dma_addr, + PKT_BUF_SZ, + PCI_DMA_FROMDEVICE); +diff -Nurb linux-2.6.22-570/drivers/net/usb/catc.c linux-2.6.22-591/drivers/net/usb/catc.c +--- linux-2.6.22-570/drivers/net/usb/catc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/usb/catc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -255,7 +255,7 @@ + if (!(skb = dev_alloc_skb(pkt_len))) + return; + +- eth_copy_and_sum(skb, pkt_start + pkt_offset, pkt_len, 0); ++ skb_copy_to_linear_data(skb, pkt_start + pkt_offset, pkt_len); + skb_put(skb, pkt_len); + + skb->protocol = eth_type_trans(skb, catc->netdev); +diff -Nurb linux-2.6.22-570/drivers/net/usb/kaweth.c linux-2.6.22-591/drivers/net/usb/kaweth.c +--- linux-2.6.22-570/drivers/net/usb/kaweth.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/usb/kaweth.c 2007-12-21 15:36:12.000000000 -0500 +@@ -635,7 +635,7 @@ + + skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ + +- eth_copy_and_sum(skb, kaweth->rx_buf + 2, pkt_len, 0); ++ skb_copy_to_linear_data(skb, kaweth->rx_buf + 2, pkt_len); + + skb_put(skb, pkt_len); + +diff -Nurb linux-2.6.22-570/drivers/net/via-rhine.c linux-2.6.22-591/drivers/net/via-rhine.c +--- linux-2.6.22-570/drivers/net/via-rhine.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/via-rhine.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1492,9 +1492,9 @@ + rp->rx_buf_sz, + PCI_DMA_FROMDEVICE); + +- eth_copy_and_sum(skb, ++ skb_copy_to_linear_data(skb, + rp->rx_skbuff[entry]->data, +- pkt_len, 0); ++ pkt_len); + skb_put(skb, pkt_len); + pci_dma_sync_single_for_device(rp->pdev, + rp->rx_skbuff_dma[entry], +diff -Nurb linux-2.6.22-570/drivers/net/wan/dlci.c linux-2.6.22-591/drivers/net/wan/dlci.c +--- linux-2.6.22-570/drivers/net/wan/dlci.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/wan/dlci.c 2007-12-21 15:36:14.000000000 -0500 +@@ -361,7 +361,7 @@ + + + /* validate slave device */ +- slave = dev_get_by_name(dlci->devname); ++ slave = dev_get_by_name(&init_net, dlci->devname); + if (!slave) + return -ENODEV; + +@@ -427,7 +427,7 @@ + int err; + + /* validate slave device */ +- master = __dev_get_by_name(dlci->devname); ++ master = __dev_get_by_name(&init_net, dlci->devname); + if (!master) + return(-ENODEV); + +@@ -513,6 +513,9 @@ + { + struct net_device *dev = (struct net_device *) ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (event == NETDEV_UNREGISTER) { + struct dlci_local *dlp; + +diff -Nurb linux-2.6.22-570/drivers/net/wan/hdlc.c linux-2.6.22-591/drivers/net/wan/hdlc.c +--- linux-2.6.22-570/drivers/net/wan/hdlc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/wan/hdlc.c 2007-12-21 15:36:14.000000000 -0500 +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + + + static const char* version = "HDLC support module revision 1.21"; +@@ -66,6 +67,12 @@ + struct packet_type *p, struct net_device *orig_dev) + { + struct hdlc_device_desc *desc = dev_to_desc(dev); ++ ++ if (dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } ++ + if (desc->netif_rx) + return desc->netif_rx(skb); + +@@ -102,6 +109,9 @@ + unsigned long flags; + int on; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (dev->get_stats != hdlc_get_stats) + return NOTIFY_DONE; /* not an HDLC device */ + +diff -Nurb linux-2.6.22-570/drivers/net/wan/lapbether.c linux-2.6.22-591/drivers/net/wan/lapbether.c +--- linux-2.6.22-570/drivers/net/wan/lapbether.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/wan/lapbether.c 2007-12-21 15:36:14.000000000 -0500 +@@ -91,6 +91,9 @@ + int len, err; + struct lapbethdev *lapbeth; + ++ if (dev->nd_net != &init_net) ++ goto drop; ++ + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + return NET_RX_DROP; + +@@ -391,6 +394,9 @@ + struct lapbethdev *lapbeth; + struct net_device *dev = ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (!dev_is_ethdev(dev)) + return NOTIFY_DONE; + +diff -Nurb linux-2.6.22-570/drivers/net/wan/sbni.c linux-2.6.22-591/drivers/net/wan/sbni.c +--- linux-2.6.22-570/drivers/net/wan/sbni.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/wan/sbni.c 2007-12-21 15:36:14.000000000 -0500 +@@ -54,6 +54,7 @@ + #include + #include + ++#include + #include + + #include +@@ -1362,7 +1363,7 @@ + + if (copy_from_user( slave_name, ifr->ifr_data, sizeof slave_name )) + return -EFAULT; +- slave_dev = dev_get_by_name( slave_name ); ++ slave_dev = dev_get_by_name(&init_net, slave_name ); + if( !slave_dev || !(slave_dev->flags & IFF_UP) ) { + printk( KERN_ERR "%s: trying to enslave non-active " + "device %s\n", dev->name, slave_name ); +diff -Nurb linux-2.6.22-570/drivers/net/wan/syncppp.c linux-2.6.22-591/drivers/net/wan/syncppp.c +--- linux-2.6.22-570/drivers/net/wan/syncppp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/wan/syncppp.c 2007-12-21 15:36:14.000000000 -0500 +@@ -51,6 +51,7 @@ + #include + #include + ++#include + #include + + #include +@@ -1445,6 +1446,11 @@ + + static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) + { ++ if (dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } ++ + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + return NET_RX_DROP; + sppp_input(dev,skb); +diff -Nurb linux-2.6.22-570/drivers/net/wireless/airo.c linux-2.6.22-591/drivers/net/wireless/airo.c +--- linux-2.6.22-570/drivers/net/wireless/airo.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/wireless/airo.c 2007-12-21 15:36:12.000000000 -0500 +@@ -3079,6 +3079,7 @@ + struct airo_info *ai = dev->priv; + int locked; + ++ set_freezable(); + while(1) { + /* make swsusp happy with our thread */ + try_to_freeze(); +diff -Nurb linux-2.6.22-570/drivers/net/wireless/hostap/hostap_main.c linux-2.6.22-591/drivers/net/wireless/hostap/hostap_main.c +--- linux-2.6.22-570/drivers/net/wireless/hostap/hostap_main.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/wireless/hostap/hostap_main.c 2007-12-21 15:36:14.000000000 -0500 +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1094,8 +1095,8 @@ + + static int __init hostap_init(void) + { +- if (proc_net != NULL) { +- hostap_proc = proc_mkdir("hostap", proc_net); ++ if (init_net.proc_net != NULL) { ++ hostap_proc = proc_mkdir("hostap", init_net.proc_net); + if (!hostap_proc) + printk(KERN_WARNING "Failed to mkdir " + "/proc/net/hostap\n"); +@@ -1110,7 +1111,7 @@ + { + if (hostap_proc != NULL) { + hostap_proc = NULL; +- remove_proc_entry("hostap", proc_net); ++ remove_proc_entry("hostap", init_net.proc_net); + } + } + +diff -Nurb linux-2.6.22-570/drivers/net/wireless/libertas/main.c linux-2.6.22-591/drivers/net/wireless/libertas/main.c +--- linux-2.6.22-570/drivers/net/wireless/libertas/main.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/wireless/libertas/main.c 2007-12-21 15:36:12.000000000 -0500 +@@ -613,6 +613,7 @@ + + init_waitqueue_entry(&wait, current); + ++ set_freezable(); + for (;;) { + lbs_deb_thread( "main-thread 111: intcounter=%d " + "currenttxskb=%p dnld_sent=%d\n", +diff -Nurb linux-2.6.22-570/drivers/net/wireless/strip.c linux-2.6.22-591/drivers/net/wireless/strip.c +--- linux-2.6.22-570/drivers/net/wireless/strip.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/wireless/strip.c 2007-12-21 15:36:14.000000000 -0500 +@@ -107,6 +107,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -1971,7 +1972,7 @@ + sizeof(zero_address))) { + struct net_device *dev; + read_lock_bh(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if (dev->type == strip_info->dev->type && + !memcmp(dev->dev_addr, + &strip_info->true_dev_addr, +@@ -2787,7 +2788,7 @@ + /* + * Register the status file with /proc + */ +- proc_net_fops_create("strip", S_IFREG | S_IRUGO, &strip_seq_fops); ++ proc_net_fops_create(&init_net, "strip", S_IFREG | S_IRUGO, &strip_seq_fops); + + return status; + } +@@ -2809,7 +2810,7 @@ + } + + /* Unregister with the /proc/net file here. */ +- proc_net_remove("strip"); ++ proc_net_remove(&init_net, "strip"); + + if ((i = tty_unregister_ldisc(N_STRIP))) + printk(KERN_ERR "STRIP: can't unregister line discipline (err = %d)\n", i); +diff -Nurb linux-2.6.22-570/drivers/net/wireless/wl3501_cs.c linux-2.6.22-591/drivers/net/wireless/wl3501_cs.c +--- linux-2.6.22-570/drivers/net/wireless/wl3501_cs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/wireless/wl3501_cs.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1011,7 +1011,7 @@ + } else { + skb->dev = dev; + skb_reserve(skb, 2); /* IP headers on 16 bytes boundaries */ +- eth_copy_and_sum(skb, (unsigned char *)&sig.daddr, 12, 0); ++ skb_copy_to_linear_data(skb, (unsigned char *)&sig.daddr, 12); + wl3501_receive(this, skb->data, pkt_len); + skb_put(skb, pkt_len); + skb->protocol = eth_type_trans(skb, dev); +diff -Nurb linux-2.6.22-570/drivers/net/xen-netfront.c linux-2.6.22-591/drivers/net/xen-netfront.c +--- linux-2.6.22-570/drivers/net/xen-netfront.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/net/xen-netfront.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,1995 @@ ++/* ++ * Virtual network driver for conversing with remote driver backends. ++ * ++ * Copyright (c) 2002-2005, K A Fraser ++ * Copyright (c) 2005, XenSource Ltd ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation; or, when distributed ++ * separately from the Linux kernel or incorporated into other ++ * software packages, subject to the following license: ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this source file (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, copy, modify, ++ * merge, publish, distribute, sublicense, and/or sell copies of the Software, ++ * and to permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++static struct ethtool_ops xennet_ethtool_ops; ++ ++struct netfront_cb { ++ struct page *page; ++ unsigned offset; ++}; ++ ++#define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb)) ++ ++/* ++ * Mutually-exclusive module options to select receive data path: ++ * copy : Packets are copied by network backend into local memory ++ * flip : Page containing packet data is transferred to our ownership ++ * For fully-virtualised guests there is no option - copying must be used. ++ * For paravirtualised guests, flipping is the default. ++ */ ++typedef enum rx_mode { ++ RX_COPY = 0, ++ RX_FLIP = 1, ++} rx_mode_t; ++ ++static enum rx_mode rx_mode = RX_FLIP; ++ ++#define param_check_rx_mode_t(name, p) __param_check(name, p, rx_mode_t) ++ ++static int param_set_rx_mode_t(const char *val, struct kernel_param *kp) ++{ ++ enum rx_mode *rxmp = kp->arg; ++ int ret = 0; ++ ++ if (strcmp(val, "copy") == 0) ++ *rxmp = RX_COPY; ++ else if (strcmp(val, "flip") == 0) ++ *rxmp = RX_FLIP; ++ else ++ ret = -EINVAL; ++ ++ return ret; ++} ++ ++static int param_get_rx_mode_t(char *buffer, struct kernel_param *kp) ++{ ++ enum rx_mode *rxmp = kp->arg; ++ ++ return sprintf(buffer, "%s", *rxmp == RX_COPY ? "copy" : "flip"); ++} ++ ++MODULE_PARM_DESC(rx_mode, "How to get packets from card: \"copy\" or \"flip\""); ++module_param(rx_mode, rx_mode_t, 0400); ++ ++#define RX_COPY_THRESHOLD 256 ++ ++#define GRANT_INVALID_REF 0 ++ ++#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE) ++#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE) ++#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) ++ ++struct netfront_info { ++ struct list_head list; ++ struct net_device *netdev; ++ ++ struct net_device_stats stats; ++ ++ struct xen_netif_tx_front_ring tx; ++ struct xen_netif_rx_front_ring rx; ++ ++ spinlock_t tx_lock; ++ spinlock_t rx_lock; ++ ++ unsigned int evtchn; ++ unsigned int copying_receiver; ++ ++ /* Receive-ring batched refills. */ ++#define RX_MIN_TARGET 8 ++#define RX_DFL_MIN_TARGET 64 ++#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) ++ unsigned rx_min_target, rx_max_target, rx_target; ++ struct sk_buff_head rx_batch; ++ ++ struct timer_list rx_refill_timer; ++ ++ /* ++ * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries ++ * are linked from tx_skb_freelist through skb_entry.link. ++ * ++ * NB. Freelist index entries are always going to be less than ++ * PAGE_OFFSET, whereas pointers to skbs will always be equal or ++ * greater than PAGE_OFFSET: we use this property to distinguish ++ * them. ++ */ ++ union skb_entry { ++ struct sk_buff *skb; ++ unsigned link; ++ } tx_skbs[NET_TX_RING_SIZE];; ++ grant_ref_t gref_tx_head; ++ grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; ++ unsigned tx_skb_freelist; ++ ++ struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; ++ grant_ref_t gref_rx_head; ++ grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; ++ ++ struct xenbus_device *xbdev; ++ int tx_ring_ref; ++ int rx_ring_ref; ++ ++ unsigned long rx_pfn_array[NET_RX_RING_SIZE]; ++ struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; ++ struct mmu_update rx_mmu[NET_RX_RING_SIZE]; ++}; ++ ++struct netfront_rx_info { ++ struct xen_netif_rx_response rx; ++ struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; ++}; ++ ++/* ++ * Access macros for acquiring freeing slots in tx_skbs[]. ++ */ ++ ++static void add_id_to_freelist(unsigned *head, union skb_entry *list, unsigned short id) ++{ ++ list[id].link = *head; ++ *head = id; ++} ++ ++static unsigned short get_id_from_freelist(unsigned *head, union skb_entry *list) ++{ ++ unsigned int id = *head; ++ *head = list[id].link; ++ return id; ++} ++ ++static int xennet_rxidx(RING_IDX idx) ++{ ++ return idx & (NET_RX_RING_SIZE - 1); ++} ++ ++static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np, ++ RING_IDX ri) ++{ ++ int i = xennet_rxidx(ri); ++ struct sk_buff *skb = np->rx_skbs[i]; ++ np->rx_skbs[i] = NULL; ++ return skb; ++} ++ ++static grant_ref_t xennet_get_rx_ref(struct netfront_info *np, ++ RING_IDX ri) ++{ ++ int i = xennet_rxidx(ri); ++ grant_ref_t ref = np->grant_rx_ref[i]; ++ np->grant_rx_ref[i] = GRANT_INVALID_REF; ++ return ref; ++} ++ ++#ifdef CONFIG_SYSFS ++static int xennet_sysfs_addif(struct net_device *netdev); ++static void xennet_sysfs_delif(struct net_device *netdev); ++#else /* !CONFIG_SYSFS */ ++#define xennet_sysfs_addif(dev) (0) ++#define xennet_sysfs_delif(dev) do { } while(0) ++#endif ++ ++static int xennet_can_sg(struct net_device *dev) ++{ ++ return dev->features & NETIF_F_SG; ++} ++ ++ ++static void rx_refill_timeout(unsigned long data) ++{ ++ struct net_device *dev = (struct net_device *)data; ++ netif_rx_schedule(dev); ++} ++ ++static int netfront_tx_slot_available(struct netfront_info *np) ++{ ++ return ((np->tx.req_prod_pvt - np->tx.rsp_cons) < ++ (TX_MAX_TARGET - MAX_SKB_FRAGS - 2)); ++} ++ ++static void xennet_maybe_wake_tx(struct net_device *dev) ++{ ++ struct netfront_info *np = netdev_priv(dev); ++ ++ if (unlikely(netif_queue_stopped(dev)) && ++ netfront_tx_slot_available(np) && ++ likely(netif_running(dev))) ++ netif_wake_queue(dev); ++} ++ ++static void xennet_alloc_rx_buffers(struct net_device *dev) ++{ ++ unsigned short id; ++ struct netfront_info *np = netdev_priv(dev); ++ struct sk_buff *skb; ++ struct page *page; ++ int i, batch_target, notify; ++ RING_IDX req_prod = np->rx.req_prod_pvt; ++ struct xen_memory_reservation reservation; ++ grant_ref_t ref; ++ unsigned long pfn; ++ void *vaddr; ++ int nr_flips; ++ struct xen_netif_rx_request *req; ++ ++ if (unlikely(!netif_carrier_ok(dev))) ++ return; ++ ++ /* ++ * Allocate skbuffs greedily, even though we batch updates to the ++ * receive ring. This creates a less bursty demand on the memory ++ * allocator, so should reduce the chance of failed allocation requests ++ * both for ourself and for other kernel subsystems. ++ */ ++ batch_target = np->rx_target - (req_prod - np->rx.rsp_cons); ++ for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) { ++ skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD, ++ GFP_ATOMIC | __GFP_NOWARN); ++ if (unlikely(!skb)) ++ goto no_skb; ++ ++ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN); ++ if (!page) { ++ kfree_skb(skb); ++no_skb: ++ /* Any skbuffs queued for refill? Force them out. */ ++ if (i != 0) ++ goto refill; ++ /* Could not allocate any skbuffs. Try again later. */ ++ mod_timer(&np->rx_refill_timer, ++ jiffies + (HZ/10)); ++ break; ++ } ++ ++ skb_shinfo(skb)->frags[0].page = page; ++ skb_shinfo(skb)->nr_frags = 1; ++ __skb_queue_tail(&np->rx_batch, skb); ++ } ++ ++ /* Is the batch large enough to be worthwhile? */ ++ if (i < (np->rx_target/2)) { ++ if (req_prod > np->rx.sring->req_prod) ++ goto push; ++ return; ++ } ++ ++ /* Adjust our fill target if we risked running out of buffers. */ ++ if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) && ++ ((np->rx_target *= 2) > np->rx_max_target)) ++ np->rx_target = np->rx_max_target; ++ ++ refill: ++ for (nr_flips = i = 0; ; i++) { ++ if ((skb = __skb_dequeue(&np->rx_batch)) == NULL) ++ break; ++ ++ skb->dev = dev; ++ ++ id = xennet_rxidx(req_prod + i); ++ ++ BUG_ON(np->rx_skbs[id]); ++ np->rx_skbs[id] = skb; ++ ++ ref = gnttab_claim_grant_reference(&np->gref_rx_head); ++ BUG_ON((signed short)ref < 0); ++ np->grant_rx_ref[id] = ref; ++ ++ pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page); ++ vaddr = page_address(skb_shinfo(skb)->frags[0].page); ++ ++ req = RING_GET_REQUEST(&np->rx, req_prod + i); ++ if (!np->copying_receiver) { ++ gnttab_grant_foreign_transfer_ref(ref, ++ np->xbdev->otherend_id, ++ pfn); ++ np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn); ++ if (!xen_feature(XENFEAT_auto_translated_physmap)) { ++ /* Remove this page before passing ++ * back to Xen. */ ++ set_phys_to_machine(pfn, INVALID_P2M_ENTRY); ++ MULTI_update_va_mapping(np->rx_mcl+i, ++ (unsigned long)vaddr, ++ __pte(0), 0); ++ } ++ nr_flips++; ++ } else { ++ gnttab_grant_foreign_access_ref(ref, ++ np->xbdev->otherend_id, ++ pfn_to_mfn(pfn), ++ 0); ++ } ++ ++ req->id = id; ++ req->gref = ref; ++ } ++ ++ if (nr_flips != 0) { ++ reservation.extent_start = np->rx_pfn_array; ++ reservation.nr_extents = nr_flips; ++ reservation.extent_order = 0; ++ reservation.address_bits = 0; ++ reservation.domid = DOMID_SELF; ++ ++ if (!xen_feature(XENFEAT_auto_translated_physmap)) { ++ /* After all PTEs have been zapped, flush the TLB. */ ++ np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = ++ UVMF_TLB_FLUSH|UVMF_ALL; ++ ++ /* Give away a batch of pages. */ ++ np->rx_mcl[i].op = __HYPERVISOR_memory_op; ++ np->rx_mcl[i].args[0] = XENMEM_decrease_reservation; ++ np->rx_mcl[i].args[1] = (unsigned long)&reservation; ++ ++ /* Zap PTEs and give away pages in one big ++ * multicall. */ ++ (void)HYPERVISOR_multicall(np->rx_mcl, i+1); ++ ++ /* Check return status of HYPERVISOR_memory_op(). */ ++ if (unlikely(np->rx_mcl[i].result != i)) ++ panic("Unable to reduce memory reservation\n"); ++ } else { ++ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, ++ &reservation) != i) ++ panic("Unable to reduce memory reservation\n"); ++ } ++ } else { ++ wmb(); ++ } ++ ++ /* Above is a suitable barrier to ensure backend will see requests. */ ++ np->rx.req_prod_pvt = req_prod + i; ++ push: ++ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify); ++ if (notify) ++ notify_remote_via_irq(np->netdev->irq); ++} ++ ++static int xennet_open(struct net_device *dev) ++{ ++ struct netfront_info *np = netdev_priv(dev); ++ ++ memset(&np->stats, 0, sizeof(np->stats)); ++ ++ spin_lock_bh(&np->rx_lock); ++ if (netif_carrier_ok(dev)) { ++ xennet_alloc_rx_buffers(dev); ++ np->rx.sring->rsp_event = np->rx.rsp_cons + 1; ++ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) ++ netif_rx_schedule(dev); ++ } ++ spin_unlock_bh(&np->rx_lock); ++ ++ xennet_maybe_wake_tx(dev); ++ ++ return 0; ++} ++ ++static void xennet_tx_buf_gc(struct net_device *dev) ++{ ++ RING_IDX cons, prod; ++ unsigned short id; ++ struct netfront_info *np = netdev_priv(dev); ++ struct sk_buff *skb; ++ ++ BUG_ON(!netif_carrier_ok(dev)); ++ ++ do { ++ prod = np->tx.sring->rsp_prod; ++ rmb(); /* Ensure we see responses up to 'rp'. */ ++ ++ for (cons = np->tx.rsp_cons; cons != prod; cons++) { ++ struct xen_netif_tx_response *txrsp; ++ ++ txrsp = RING_GET_RESPONSE(&np->tx, cons); ++ if (txrsp->status == NETIF_RSP_NULL) ++ continue; ++ ++ id = txrsp->id; ++ skb = np->tx_skbs[id].skb; ++ if (unlikely(gnttab_query_foreign_access( ++ np->grant_tx_ref[id]) != 0)) { ++ printk(KERN_ALERT "xennet_tx_buf_gc: warning " ++ "-- grant still in use by backend " ++ "domain.\n"); ++ BUG(); ++ } ++ gnttab_end_foreign_access_ref( ++ np->grant_tx_ref[id], GNTMAP_readonly); ++ gnttab_release_grant_reference( ++ &np->gref_tx_head, np->grant_tx_ref[id]); ++ np->grant_tx_ref[id] = GRANT_INVALID_REF; ++ add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id); ++ dev_kfree_skb_irq(skb); ++ } ++ ++ np->tx.rsp_cons = prod; ++ ++ /* ++ * Set a new event, then check for race with update of tx_cons. ++ * Note that it is essential to schedule a callback, no matter ++ * how few buffers are pending. Even if there is space in the ++ * transmit ring, higher layers may be blocked because too much ++ * data is outstanding: in such cases notification from Xen is ++ * likely to be the only kick that we'll get. ++ */ ++ np->tx.sring->rsp_event = ++ prod + ((np->tx.sring->req_prod - prod) >> 1) + 1; ++ mb(); ++ } while ((cons == prod) && (prod != np->tx.sring->rsp_prod)); ++ ++ xennet_maybe_wake_tx(dev); ++} ++ ++static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, ++ struct xen_netif_tx_request *tx) ++{ ++ struct netfront_info *np = netdev_priv(dev); ++ char *data = skb->data; ++ unsigned long mfn; ++ RING_IDX prod = np->tx.req_prod_pvt; ++ int frags = skb_shinfo(skb)->nr_frags; ++ unsigned int offset = offset_in_page(data); ++ unsigned int len = skb_headlen(skb); ++ unsigned int id; ++ grant_ref_t ref; ++ int i; ++ ++ /* While the header overlaps a page boundary (including being ++ larger than a page), split it it into page-sized chunks. */ ++ while (len > PAGE_SIZE - offset) { ++ tx->size = PAGE_SIZE - offset; ++ tx->flags |= NETTXF_more_data; ++ len -= tx->size; ++ data += tx->size; ++ offset = 0; ++ ++ id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); ++ np->tx_skbs[id].skb = skb_get(skb); ++ tx = RING_GET_REQUEST(&np->tx, prod++); ++ tx->id = id; ++ ref = gnttab_claim_grant_reference(&np->gref_tx_head); ++ BUG_ON((signed short)ref < 0); ++ ++ mfn = virt_to_mfn(data); ++ gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, ++ mfn, GNTMAP_readonly); ++ ++ tx->gref = np->grant_tx_ref[id] = ref; ++ tx->offset = offset; ++ tx->size = len; ++ tx->flags = 0; ++ } ++ ++ /* Grant backend access to each skb fragment page. */ ++ for (i = 0; i < frags; i++) { ++ skb_frag_t *frag = skb_shinfo(skb)->frags + i; ++ ++ tx->flags |= NETTXF_more_data; ++ ++ id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); ++ np->tx_skbs[id].skb = skb_get(skb); ++ tx = RING_GET_REQUEST(&np->tx, prod++); ++ tx->id = id; ++ ref = gnttab_claim_grant_reference(&np->gref_tx_head); ++ BUG_ON((signed short)ref < 0); ++ ++ mfn = pfn_to_mfn(page_to_pfn(frag->page)); ++ gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id, ++ mfn, GNTMAP_readonly); ++ ++ tx->gref = np->grant_tx_ref[id] = ref; ++ tx->offset = frag->page_offset; ++ tx->size = frag->size; ++ tx->flags = 0; ++ } ++ ++ np->tx.req_prod_pvt = prod; ++} ++ ++static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) ++{ ++ unsigned short id; ++ struct netfront_info *np = netdev_priv(dev); ++ struct xen_netif_tx_request *tx; ++ struct xen_netif_extra_info *extra; ++ char *data = skb->data; ++ RING_IDX i; ++ grant_ref_t ref; ++ unsigned long mfn; ++ int notify; ++ int frags = skb_shinfo(skb)->nr_frags; ++ unsigned int offset = offset_in_page(data); ++ unsigned int len = skb_headlen(skb); ++ ++ frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; ++ if (unlikely(frags > MAX_SKB_FRAGS + 1)) { ++ printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", ++ frags); ++ dump_stack(); ++ goto drop; ++ } ++ ++ spin_lock_irq(&np->tx_lock); ++ ++ if (unlikely(!netif_carrier_ok(dev) || ++ (frags > 1 && !xennet_can_sg(dev)) || ++ netif_needs_gso(dev, skb))) { ++ spin_unlock_irq(&np->tx_lock); ++ goto drop; ++ } ++ ++ i = np->tx.req_prod_pvt; ++ ++ id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs); ++ np->tx_skbs[id].skb = skb; ++ ++ tx = RING_GET_REQUEST(&np->tx, i); ++ ++ tx->id = id; ++ ref = gnttab_claim_grant_reference(&np->gref_tx_head); ++ BUG_ON((signed short)ref < 0); ++ mfn = virt_to_mfn(data); ++ gnttab_grant_foreign_access_ref( ++ ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly); ++ tx->gref = np->grant_tx_ref[id] = ref; ++ tx->offset = offset; ++ tx->size = len; ++ extra = NULL; ++ ++ tx->flags = 0; ++ if (skb->ip_summed == CHECKSUM_PARTIAL) ++ /* local packet? */ ++ tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; ++ else if (skb->ip_summed == CHECKSUM_UNNECESSARY) ++ /* remote but checksummed. */ ++ tx->flags |= NETTXF_data_validated; ++ ++ if (skb_shinfo(skb)->gso_size) { ++ struct xen_netif_extra_info *gso; ++ ++ gso = (struct xen_netif_extra_info *) ++ RING_GET_REQUEST(&np->tx, ++i); ++ ++ if (extra) ++ extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; ++ else ++ tx->flags |= NETTXF_extra_info; ++ ++ gso->u.gso.size = skb_shinfo(skb)->gso_size; ++ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; ++ gso->u.gso.pad = 0; ++ gso->u.gso.features = 0; ++ ++ gso->type = XEN_NETIF_EXTRA_TYPE_GSO; ++ gso->flags = 0; ++ extra = gso; ++ } ++ ++ np->tx.req_prod_pvt = i + 1; ++ ++ xennet_make_frags(skb, dev, tx); ++ tx->size = skb->len; ++ ++ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify); ++ if (notify) ++ notify_remote_via_irq(np->netdev->irq); ++ ++ xennet_tx_buf_gc(dev); ++ ++ if (!netfront_tx_slot_available(np)) ++ netif_stop_queue(dev); ++ ++ spin_unlock_irq(&np->tx_lock); ++ ++ np->stats.tx_bytes += skb->len; ++ np->stats.tx_packets++; ++ ++ return 0; ++ ++ drop: ++ np->stats.tx_dropped++; ++ dev_kfree_skb(skb); ++ return 0; ++} ++ ++static int xennet_close(struct net_device *dev) ++{ ++ struct netfront_info *np = netdev_priv(dev); ++ netif_stop_queue(np->netdev); ++ return 0; ++} ++ ++static struct net_device_stats *xennet_get_stats(struct net_device *dev) ++{ ++ struct netfront_info *np = netdev_priv(dev); ++ return &np->stats; ++} ++ ++static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb, ++ grant_ref_t ref) ++{ ++ int new = xennet_rxidx(np->rx.req_prod_pvt); ++ ++ BUG_ON(np->rx_skbs[new]); ++ np->rx_skbs[new] = skb; ++ np->grant_rx_ref[new] = ref; ++ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; ++ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; ++ np->rx.req_prod_pvt++; ++} ++ ++static int xennet_get_extras(struct netfront_info *np, ++ struct xen_netif_extra_info *extras, ++ RING_IDX rp) ++ ++{ ++ struct xen_netif_extra_info *extra; ++ struct device *dev = &np->netdev->dev; ++ RING_IDX cons = np->rx.rsp_cons; ++ int err = 0; ++ ++ do { ++ struct sk_buff *skb; ++ grant_ref_t ref; ++ ++ if (unlikely(cons + 1 == rp)) { ++ if (net_ratelimit()) ++ dev_warn(dev, "Missing extra info\n"); ++ err = -EBADR; ++ break; ++ } ++ ++ extra = (struct xen_netif_extra_info *) ++ RING_GET_RESPONSE(&np->rx, ++cons); ++ ++ if (unlikely(!extra->type || ++ extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { ++ if (net_ratelimit()) ++ dev_warn(dev, "Invalid extra type: %d\n", ++ extra->type); ++ err = -EINVAL; ++ } else { ++ memcpy(&extras[extra->type - 1], extra, ++ sizeof(*extra)); ++ } ++ ++ skb = xennet_get_rx_skb(np, cons); ++ ref = xennet_get_rx_ref(np, cons); ++ xennet_move_rx_slot(np, skb, ref); ++ } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); ++ ++ np->rx.rsp_cons = cons; ++ return err; ++} ++ ++static int xennet_get_responses(struct netfront_info *np, ++ struct netfront_rx_info *rinfo, RING_IDX rp, ++ struct sk_buff_head *list, ++ int *pages_flipped_p) ++{ ++ int pages_flipped = *pages_flipped_p; ++ struct mmu_update *mmu; ++ struct multicall_entry *mcl; ++ struct xen_netif_rx_response *rx = &rinfo->rx; ++ struct xen_netif_extra_info *extras = rinfo->extras; ++ struct device *dev = &np->netdev->dev; ++ RING_IDX cons = np->rx.rsp_cons; ++ struct sk_buff *skb = xennet_get_rx_skb(np, cons); ++ grant_ref_t ref = xennet_get_rx_ref(np, cons); ++ int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD); ++ int frags = 1; ++ int err = 0; ++ unsigned long ret; ++ ++ if (rx->flags & NETRXF_extra_info) { ++ err = xennet_get_extras(np, extras, rp); ++ cons = np->rx.rsp_cons; ++ } ++ ++ for (;;) { ++ unsigned long mfn; ++ ++ if (unlikely(rx->status < 0 || ++ rx->offset + rx->status > PAGE_SIZE)) { ++ if (net_ratelimit()) ++ dev_warn(dev, "rx->offset: %x, size: %u\n", ++ rx->offset, rx->status); ++ xennet_move_rx_slot(np, skb, ref); ++ err = -EINVAL; ++ goto next; ++ } ++ ++ /* ++ * This definitely indicates a bug, either in this driver or in ++ * the backend driver. In future this should flag the bad ++ * situation to the system controller to reboot the backed. ++ */ ++ if (ref == GRANT_INVALID_REF) { ++ if (net_ratelimit()) ++ dev_warn(dev, "Bad rx response id %d.\n", ++ rx->id); ++ err = -EINVAL; ++ goto next; ++ } ++ ++ if (!np->copying_receiver) { ++ /* Memory pressure, insufficient buffer ++ * headroom, ... */ ++ mfn = gnttab_end_foreign_transfer_ref(ref); ++ if (!mfn) { ++ if (net_ratelimit()) ++ dev_warn(dev, "Unfulfilled rx req " ++ "(id=%d, st=%d).\n", ++ rx->id, rx->status); ++ xennet_move_rx_slot(np, skb, ref); ++ err = -ENOMEM; ++ goto next; ++ } ++ ++ if (!xen_feature(XENFEAT_auto_translated_physmap)) { ++ /* Remap the page. */ ++ struct page *page = ++ skb_shinfo(skb)->frags[0].page; ++ unsigned long pfn = page_to_pfn(page); ++ void *vaddr = page_address(page); ++ ++ mcl = np->rx_mcl + pages_flipped; ++ mmu = np->rx_mmu + pages_flipped; ++ ++ MULTI_update_va_mapping(mcl, ++ (unsigned long)vaddr, ++ mfn_pte(mfn, PAGE_KERNEL), ++ 0); ++ mmu->ptr = ((u64)mfn << PAGE_SHIFT) ++ | MMU_MACHPHYS_UPDATE; ++ mmu->val = pfn; ++ ++ set_phys_to_machine(pfn, mfn); ++ } ++ pages_flipped++; ++ } else { ++ ret = gnttab_end_foreign_access_ref(ref, 0); ++ BUG_ON(!ret); ++ } ++ ++ gnttab_release_grant_reference(&np->gref_rx_head, ref); ++ ++ __skb_queue_tail(list, skb); ++ ++next: ++ if (!(rx->flags & NETRXF_more_data)) ++ break; ++ ++ if (cons + frags == rp) { ++ if (net_ratelimit()) ++ dev_warn(dev, "Need more frags\n"); ++ err = -ENOENT; ++ break; ++ } ++ ++ rx = RING_GET_RESPONSE(&np->rx, cons + frags); ++ skb = xennet_get_rx_skb(np, cons + frags); ++ ref = xennet_get_rx_ref(np, cons + frags); ++ frags++; ++ } ++ ++ if (unlikely(frags > max)) { ++ if (net_ratelimit()) ++ dev_warn(dev, "Too many frags\n"); ++ err = -E2BIG; ++ } ++ ++ if (unlikely(err)) ++ np->rx.rsp_cons = cons + frags; ++ ++ *pages_flipped_p = pages_flipped; ++ ++ return err; ++} ++ ++static int xennet_set_skb_gso(struct sk_buff *skb, ++ struct xen_netif_extra_info *gso) ++{ ++ if (!gso->u.gso.size) { ++ if (net_ratelimit()) ++ printk(KERN_WARNING "GSO size must not be zero.\n"); ++ return -EINVAL; ++ } ++ ++ /* Currently only TCPv4 S.O. is supported. */ ++ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { ++ if (net_ratelimit()) ++ printk(KERN_WARNING "Bad GSO type %d.\n", gso->u.gso.type); ++ return -EINVAL; ++ } ++ ++ skb_shinfo(skb)->gso_size = gso->u.gso.size; ++ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; ++ ++ /* Header must be checked, and gso_segs computed. */ ++ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; ++ skb_shinfo(skb)->gso_segs = 0; ++ ++ return 0; ++} ++ ++static RING_IDX xennet_fill_frags(struct netfront_info *np, ++ struct sk_buff *skb, ++ struct sk_buff_head *list) ++{ ++ struct skb_shared_info *shinfo = skb_shinfo(skb); ++ int nr_frags = shinfo->nr_frags; ++ RING_IDX cons = np->rx.rsp_cons; ++ skb_frag_t *frag = shinfo->frags + nr_frags; ++ struct sk_buff *nskb; ++ ++ while ((nskb = __skb_dequeue(list))) { ++ struct xen_netif_rx_response *rx = ++ RING_GET_RESPONSE(&np->rx, ++cons); ++ ++ frag->page = skb_shinfo(nskb)->frags[0].page; ++ frag->page_offset = rx->offset; ++ frag->size = rx->status; ++ ++ skb->data_len += rx->status; ++ ++ skb_shinfo(nskb)->nr_frags = 0; ++ kfree_skb(nskb); ++ ++ frag++; ++ nr_frags++; ++ } ++ ++ shinfo->nr_frags = nr_frags; ++ return cons; ++} ++ ++static int skb_checksum_setup(struct sk_buff *skb) ++{ ++ struct iphdr *iph; ++ unsigned char *th; ++ int err = -EPROTO; ++ ++ if (skb->protocol != htons(ETH_P_IP)) ++ goto out; ++ ++ iph = (void *)skb->data; ++ th = skb->data + 4 * iph->ihl; ++ if (th >= skb_tail_pointer(skb)) ++ goto out; ++ ++ skb->csum_start = th - skb->head; ++ switch (iph->protocol) { ++ case IPPROTO_TCP: ++ skb->csum_offset = offsetof(struct tcphdr, check); ++ break; ++ case IPPROTO_UDP: ++ skb->csum_offset = offsetof(struct udphdr, check); ++ break; ++ default: ++ if (net_ratelimit()) ++ printk(KERN_ERR "Attempting to checksum a non-" ++ "TCP/UDP packet, dropping a protocol" ++ " %d packet", iph->protocol); ++ goto out; ++ } ++ ++ if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) ++ goto out; ++ ++ err = 0; ++ ++out: ++ return err; ++} ++ ++static int handle_incoming_queue(struct net_device *dev, ++ struct sk_buff_head *rxq) ++{ ++ struct netfront_info *np = netdev_priv(dev); ++ int packets_dropped = 0; ++ struct sk_buff *skb; ++ ++ while ((skb = __skb_dequeue(rxq)) != NULL) { ++ struct page *page = NETFRONT_SKB_CB(skb)->page; ++ void *vaddr = page_address(page); ++ unsigned offset = NETFRONT_SKB_CB(skb)->offset; ++ ++ memcpy(skb->data, vaddr + offset, ++ skb_headlen(skb)); ++ ++ if (page != skb_shinfo(skb)->frags[0].page) ++ __free_page(page); ++ ++ /* Ethernet work: Delayed to here as it peeks the header. */ ++ skb->protocol = eth_type_trans(skb, dev); ++ ++ if (skb->ip_summed == CHECKSUM_PARTIAL) { ++ if (skb_checksum_setup(skb)) { ++ kfree_skb(skb); ++ packets_dropped++; ++ np->stats.rx_errors++; ++ continue; ++ } ++ } ++ ++ np->stats.rx_packets++; ++ np->stats.rx_bytes += skb->len; ++ ++ /* Pass it up. */ ++ netif_receive_skb(skb); ++ dev->last_rx = jiffies; ++ } ++ ++ return packets_dropped; ++} ++ ++static int xennet_poll(struct net_device *dev, int *pbudget) ++{ ++ struct netfront_info *np = netdev_priv(dev); ++ struct sk_buff *skb; ++ struct netfront_rx_info rinfo; ++ struct xen_netif_rx_response *rx = &rinfo.rx; ++ struct xen_netif_extra_info *extras = rinfo.extras; ++ RING_IDX i, rp; ++ struct multicall_entry *mcl; ++ int work_done, budget, more_to_do = 1; ++ struct sk_buff_head rxq; ++ struct sk_buff_head errq; ++ struct sk_buff_head tmpq; ++ unsigned long flags; ++ unsigned int len; ++ int pages_flipped = 0; ++ int err; ++ ++ spin_lock(&np->rx_lock); ++ ++ if (unlikely(!netif_carrier_ok(dev))) { ++ spin_unlock(&np->rx_lock); ++ return 0; ++ } ++ ++ skb_queue_head_init(&rxq); ++ skb_queue_head_init(&errq); ++ skb_queue_head_init(&tmpq); ++ ++ if ((budget = *pbudget) > dev->quota) ++ budget = dev->quota; ++ rp = np->rx.sring->rsp_prod; ++ rmb(); /* Ensure we see queued responses up to 'rp'. */ ++ ++ i = np->rx.rsp_cons; ++ work_done = 0; ++ while ((i != rp) && (work_done < budget)) { ++ memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); ++ memset(extras, 0, sizeof(rinfo.extras)); ++ ++ err = xennet_get_responses(np, &rinfo, rp, &tmpq, ++ &pages_flipped); ++ ++ if (unlikely(err)) { ++err: ++ while ((skb = __skb_dequeue(&tmpq))) ++ __skb_queue_tail(&errq, skb); ++ np->stats.rx_errors++; ++ i = np->rx.rsp_cons; ++ continue; ++ } ++ ++ skb = __skb_dequeue(&tmpq); ++ ++ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) { ++ struct xen_netif_extra_info *gso; ++ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; ++ ++ if (unlikely(xennet_set_skb_gso(skb, gso))) { ++ __skb_queue_head(&tmpq, skb); ++ np->rx.rsp_cons += skb_queue_len(&tmpq); ++ goto err; ++ } ++ } ++ ++ NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page; ++ NETFRONT_SKB_CB(skb)->offset = rx->offset; ++ ++ len = rx->status; ++ if (len > RX_COPY_THRESHOLD) ++ len = RX_COPY_THRESHOLD; ++ skb_put(skb, len); ++ ++ if (rx->status > len) { ++ skb_shinfo(skb)->frags[0].page_offset = ++ rx->offset + len; ++ skb_shinfo(skb)->frags[0].size = rx->status - len; ++ skb->data_len = rx->status - len; ++ } else { ++ skb_shinfo(skb)->frags[0].page = NULL; ++ skb_shinfo(skb)->nr_frags = 0; ++ } ++ ++ i = xennet_fill_frags(np, skb, &tmpq); ++ ++ /* ++ * Truesize approximates the size of true data plus ++ * any supervisor overheads. Adding hypervisor ++ * overheads has been shown to significantly reduce ++ * achievable bandwidth with the default receive ++ * buffer size. It is therefore not wise to account ++ * for it here. ++ * ++ * After alloc_skb(RX_COPY_THRESHOLD), truesize is set ++ * to RX_COPY_THRESHOLD + the supervisor ++ * overheads. Here, we add the size of the data pulled ++ * in xennet_fill_frags(). ++ * ++ * We also adjust for any unused space in the main ++ * data area by subtracting (RX_COPY_THRESHOLD - ++ * len). This is especially important with drivers ++ * which split incoming packets into header and data, ++ * using only 66 bytes of the main data area (see the ++ * e1000 driver for example.) On such systems, ++ * without this last adjustement, our achievable ++ * receive throughout using the standard receive ++ * buffer size was cut by 25%(!!!). ++ */ ++ skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len); ++ skb->len += skb->data_len; ++ ++ if (rx->flags & NETRXF_csum_blank) ++ skb->ip_summed = CHECKSUM_PARTIAL; ++ else if (rx->flags & NETRXF_data_validated) ++ skb->ip_summed = CHECKSUM_UNNECESSARY; ++ ++ __skb_queue_tail(&rxq, skb); ++ ++ np->rx.rsp_cons = ++i; ++ work_done++; ++ } ++ ++ if (pages_flipped) { ++ /* Do all the remapping work, and M2P updates. */ ++ if (!xen_feature(XENFEAT_auto_translated_physmap)) { ++ mcl = np->rx_mcl + pages_flipped; ++ MULTI_mmu_update(mcl, np->rx_mmu, ++ pages_flipped, 0, DOMID_SELF); ++ (void)HYPERVISOR_multicall(np->rx_mcl, ++ pages_flipped + 1); ++ } ++ } ++ ++ while ((skb = __skb_dequeue(&errq))) ++ kfree_skb(skb); ++ ++ work_done -= handle_incoming_queue(dev, &rxq); ++ ++ /* If we get a callback with very few responses, reduce fill target. */ ++ /* NB. Note exponential increase, linear decrease. */ ++ if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > ++ ((3*np->rx_target) / 4)) && ++ (--np->rx_target < np->rx_min_target)) ++ np->rx_target = np->rx_min_target; ++ ++ xennet_alloc_rx_buffers(dev); ++ ++ *pbudget -= work_done; ++ dev->quota -= work_done; ++ ++ if (work_done < budget) { ++ local_irq_save(flags); ++ ++ RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do); ++ if (!more_to_do) ++ __netif_rx_complete(dev); ++ ++ local_irq_restore(flags); ++ } ++ ++ spin_unlock(&np->rx_lock); ++ ++ return more_to_do; ++} ++ ++static int xennet_change_mtu(struct net_device *dev, int mtu) ++{ ++ int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN; ++ ++ if (mtu > max) ++ return -EINVAL; ++ dev->mtu = mtu; ++ return 0; ++} ++ ++static void xennet_release_tx_bufs(struct netfront_info *np) ++{ ++ struct sk_buff *skb; ++ int i; ++ ++ for (i = 0; i < NET_TX_RING_SIZE; i++) { ++ /* Skip over entries which are actually freelist references */ ++ if ((unsigned long)np->tx_skbs[i].skb < PAGE_OFFSET) ++ continue; ++ ++ skb = np->tx_skbs[i].skb; ++ gnttab_end_foreign_access_ref(np->grant_tx_ref[i], ++ GNTMAP_readonly); ++ gnttab_release_grant_reference(&np->gref_tx_head, ++ np->grant_tx_ref[i]); ++ np->grant_tx_ref[i] = GRANT_INVALID_REF; ++ add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i); ++ dev_kfree_skb_irq(skb); ++ } ++} ++ ++static void xennet_release_rx_bufs(struct netfront_info *np) ++{ ++ struct mmu_update *mmu = np->rx_mmu; ++ struct multicall_entry *mcl = np->rx_mcl; ++ struct sk_buff_head free_list; ++ struct sk_buff *skb; ++ unsigned long mfn; ++ int xfer = 0, noxfer = 0, unused = 0; ++ int id, ref; ++ ++ if (np->copying_receiver) { ++ dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n", ++ __func__); ++ return; ++ } ++ ++ skb_queue_head_init(&free_list); ++ ++ spin_lock_bh(&np->rx_lock); ++ ++ for (id = 0; id < NET_RX_RING_SIZE; id++) { ++ if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) { ++ unused++; ++ continue; ++ } ++ ++ skb = np->rx_skbs[id]; ++ mfn = gnttab_end_foreign_transfer_ref(ref); ++ gnttab_release_grant_reference(&np->gref_rx_head, ref); ++ np->grant_rx_ref[id] = GRANT_INVALID_REF; ++ ++ if (0 == mfn) { ++ skb_shinfo(skb)->nr_frags = 0; ++ dev_kfree_skb(skb); ++ noxfer++; ++ continue; ++ } ++ ++ if (!xen_feature(XENFEAT_auto_translated_physmap)) { ++ /* Remap the page. */ ++ struct page *page = skb_shinfo(skb)->frags[0].page; ++ unsigned long pfn = page_to_pfn(page); ++ void *vaddr = page_address(page); ++ ++ MULTI_update_va_mapping(mcl, (unsigned long)vaddr, ++ mfn_pte(mfn, PAGE_KERNEL), ++ 0); ++ mcl++; ++ mmu->ptr = ((u64)mfn << PAGE_SHIFT) ++ | MMU_MACHPHYS_UPDATE; ++ mmu->val = pfn; ++ mmu++; ++ ++ set_phys_to_machine(pfn, mfn); ++ } ++ __skb_queue_tail(&free_list, skb); ++ xfer++; ++ } ++ ++ dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n", ++ __func__, xfer, noxfer, unused); ++ ++ if (xfer) { ++ if (!xen_feature(XENFEAT_auto_translated_physmap)) { ++ /* Do all the remapping work and M2P updates. */ ++ MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu, ++ 0, DOMID_SELF); ++ mcl++; ++ HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl); ++ } ++ } ++ ++ while ((skb = __skb_dequeue(&free_list)) != NULL) ++ dev_kfree_skb(skb); ++ ++ spin_unlock_bh(&np->rx_lock); ++} ++ ++static void xennet_uninit(struct net_device *dev) ++{ ++ struct netfront_info *np = netdev_priv(dev); ++ xennet_release_tx_bufs(np); ++ xennet_release_rx_bufs(np); ++ gnttab_free_grant_references(np->gref_tx_head); ++ gnttab_free_grant_references(np->gref_rx_head); ++} ++ ++static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev) ++{ ++ int i, err; ++ struct net_device *netdev; ++ struct netfront_info *np; ++ ++ netdev = alloc_etherdev(sizeof(struct netfront_info)); ++ if (!netdev) { ++ printk(KERN_WARNING "%s> alloc_etherdev failed.\n", ++ __func__); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ np = netdev_priv(netdev); ++ np->xbdev = dev; ++ ++ spin_lock_init(&np->tx_lock); ++ spin_lock_init(&np->rx_lock); ++ ++ skb_queue_head_init(&np->rx_batch); ++ np->rx_target = RX_DFL_MIN_TARGET; ++ np->rx_min_target = RX_DFL_MIN_TARGET; ++ np->rx_max_target = RX_MAX_TARGET; ++ ++ init_timer(&np->rx_refill_timer); ++ np->rx_refill_timer.data = (unsigned long)netdev; ++ np->rx_refill_timer.function = rx_refill_timeout; ++ ++ /* Initialise tx_skbs as a free chain containing every entry. */ ++ np->tx_skb_freelist = 0; ++ for (i = 0; i < NET_TX_RING_SIZE; i++) { ++ np->tx_skbs[i].link = i+1; ++ np->grant_tx_ref[i] = GRANT_INVALID_REF; ++ } ++ ++ /* Clear out rx_skbs */ ++ for (i = 0; i < NET_RX_RING_SIZE; i++) { ++ np->rx_skbs[i] = NULL; ++ np->grant_rx_ref[i] = GRANT_INVALID_REF; ++ } ++ ++ /* A grant for every tx ring slot */ ++ if (gnttab_alloc_grant_references(TX_MAX_TARGET, ++ &np->gref_tx_head) < 0) { ++ printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); ++ err = -ENOMEM; ++ goto exit; ++ } ++ /* A grant for every rx ring slot */ ++ if (gnttab_alloc_grant_references(RX_MAX_TARGET, ++ &np->gref_rx_head) < 0) { ++ printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); ++ err = -ENOMEM; ++ goto exit_free_tx; ++ } ++ ++ netdev->open = xennet_open; ++ netdev->hard_start_xmit = xennet_start_xmit; ++ netdev->stop = xennet_close; ++ netdev->get_stats = xennet_get_stats; ++ netdev->poll = xennet_poll; ++ netdev->uninit = xennet_uninit; ++ netdev->change_mtu = xennet_change_mtu; ++ netdev->weight = 64; ++ netdev->features = NETIF_F_IP_CSUM; ++ ++ SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops); ++ SET_MODULE_OWNER(netdev); ++ SET_NETDEV_DEV(netdev, &dev->dev); ++ ++ np->netdev = netdev; ++ ++ netif_carrier_off(netdev); ++ ++ return netdev; ++ ++ exit_free_tx: ++ gnttab_free_grant_references(np->gref_tx_head); ++ exit: ++ free_netdev(netdev); ++ return ERR_PTR(err); ++} ++ ++/** ++ * Entry point to this code when a new device is created. Allocate the basic ++ * structures and the ring buffers for communication with the backend, and ++ * inform the backend of the appropriate details for those. ++ */ ++static int __devinit netfront_probe(struct xenbus_device *dev, ++ const struct xenbus_device_id *id) ++{ ++ int err; ++ struct net_device *netdev; ++ struct netfront_info *info; ++ ++ netdev = xennet_create_dev(dev); ++ if (IS_ERR(netdev)) { ++ err = PTR_ERR(netdev); ++ xenbus_dev_fatal(dev, err, "creating netdev"); ++ return err; ++ } ++ ++ info = netdev_priv(netdev); ++ dev->dev.driver_data = info; ++ ++ err = register_netdev(info->netdev); ++ if (err) { ++ printk(KERN_WARNING "%s: register_netdev err=%d\n", ++ __func__, err); ++ goto fail; ++ } ++ ++ err = xennet_sysfs_addif(info->netdev); ++ if (err) { ++ unregister_netdev(info->netdev); ++ printk(KERN_WARNING "%s: add sysfs failed err=%d\n", ++ __func__, err); ++ goto fail; ++ } ++ ++ return 0; ++ ++ fail: ++ free_netdev(netdev); ++ dev->dev.driver_data = NULL; ++ return err; ++} ++ ++static void xennet_end_access(int ref, void *page) ++{ ++ /* This frees the page as a side-effect */ ++ if (ref != GRANT_INVALID_REF) ++ gnttab_end_foreign_access(ref, 0, (unsigned long)page); ++} ++ ++static void xennet_disconnect_backend(struct netfront_info *info) ++{ ++ /* Stop old i/f to prevent errors whilst we rebuild the state. */ ++ spin_lock_bh(&info->rx_lock); ++ spin_lock_irq(&info->tx_lock); ++ netif_carrier_off(info->netdev); ++ spin_unlock_irq(&info->tx_lock); ++ spin_unlock_bh(&info->rx_lock); ++ ++ if (info->netdev->irq) ++ unbind_from_irqhandler(info->netdev->irq, info->netdev); ++ info->evtchn = info->netdev->irq = 0; ++ ++ /* End access and free the pages */ ++ xennet_end_access(info->tx_ring_ref, info->tx.sring); ++ xennet_end_access(info->rx_ring_ref, info->rx.sring); ++ ++ info->tx_ring_ref = GRANT_INVALID_REF; ++ info->rx_ring_ref = GRANT_INVALID_REF; ++ info->tx.sring = NULL; ++ info->rx.sring = NULL; ++} ++ ++/** ++ * We are reconnecting to the backend, due to a suspend/resume, or a backend ++ * driver restart. We tear down our netif structure and recreate it, but ++ * leave the device-layer structures intact so that this is transparent to the ++ * rest of the kernel. ++ */ ++static int netfront_resume(struct xenbus_device *dev) ++{ ++ struct netfront_info *info = dev->dev.driver_data; ++ ++ dev_dbg(&dev->dev, "%s\n", dev->nodename); ++ ++ xennet_disconnect_backend(info); ++ return 0; ++} ++ ++static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) ++{ ++ char *s, *e, *macstr; ++ int i; ++ ++ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL); ++ if (IS_ERR(macstr)) ++ return PTR_ERR(macstr); ++ ++ for (i = 0; i < ETH_ALEN; i++) { ++ mac[i] = simple_strtoul(s, &e, 16); ++ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { ++ kfree(macstr); ++ return -ENOENT; ++ } ++ s = e+1; ++ } ++ ++ kfree(macstr); ++ return 0; ++} ++ ++static irqreturn_t xennet_interrupt(int irq, void *dev_id) ++{ ++ struct net_device *dev = dev_id; ++ struct netfront_info *np = netdev_priv(dev); ++ unsigned long flags; ++ ++ spin_lock_irqsave(&np->tx_lock, flags); ++ ++ if (likely(netif_carrier_ok(dev))) { ++ xennet_tx_buf_gc(dev); ++ /* Under tx_lock: protects access to rx shared-ring indexes. */ ++ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) ++ netif_rx_schedule(dev); ++ } ++ ++ spin_unlock_irqrestore(&np->tx_lock, flags); ++ ++ return IRQ_HANDLED; ++} ++ ++static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) ++{ ++ struct xen_netif_tx_sring *txs; ++ struct xen_netif_rx_sring *rxs; ++ int err; ++ struct net_device *netdev = info->netdev; ++ ++ info->tx_ring_ref = GRANT_INVALID_REF; ++ info->rx_ring_ref = GRANT_INVALID_REF; ++ info->rx.sring = NULL; ++ info->tx.sring = NULL; ++ netdev->irq = 0; ++ ++ err = xen_net_read_mac(dev, netdev->dev_addr); ++ if (err) { ++ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); ++ goto fail; ++ } ++ ++ txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_KERNEL); ++ if (!txs) { ++ err = -ENOMEM; ++ xenbus_dev_fatal(dev, err, "allocating tx ring page"); ++ goto fail; ++ } ++ SHARED_RING_INIT(txs); ++ FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); ++ ++ err = xenbus_grant_ring(dev, virt_to_mfn(txs)); ++ if (err < 0) { ++ free_page((unsigned long)txs); ++ goto fail; ++ } ++ ++ info->tx_ring_ref = err; ++ rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_KERNEL); ++ if (!rxs) { ++ err = -ENOMEM; ++ xenbus_dev_fatal(dev, err, "allocating rx ring page"); ++ goto fail; ++ } ++ SHARED_RING_INIT(rxs); ++ FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); ++ ++ err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); ++ if (err < 0) { ++ free_page((unsigned long)rxs); ++ goto fail; ++ } ++ info->rx_ring_ref = err; ++ ++ err = xenbus_alloc_evtchn(dev, &info->evtchn); ++ if (err) ++ goto fail; ++ ++ err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt, ++ IRQF_SAMPLE_RANDOM, netdev->name, ++ netdev); ++ if (err < 0) ++ goto fail; ++ netdev->irq = err; ++ return 0; ++ ++ fail: ++ return err; ++} ++ ++/* Common code used when first setting up, and when resuming. */ ++static int talk_to_backend(struct xenbus_device *dev, ++ struct netfront_info *info) ++{ ++ const char *message; ++ struct xenbus_transaction xbt; ++ int err; ++ ++ /* Create shared ring, alloc event channel. */ ++ err = setup_netfront(dev, info); ++ if (err) ++ goto out; ++ ++again: ++ err = xenbus_transaction_start(&xbt); ++ if (err) { ++ xenbus_dev_fatal(dev, err, "starting transaction"); ++ goto destroy_ring; ++ } ++ ++ err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u", ++ info->tx_ring_ref); ++ if (err) { ++ message = "writing tx ring-ref"; ++ goto abort_transaction; ++ } ++ err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u", ++ info->rx_ring_ref); ++ if (err) { ++ message = "writing rx ring-ref"; ++ goto abort_transaction; ++ } ++ err = xenbus_printf(xbt, dev->nodename, ++ "event-channel", "%u", info->evtchn); ++ if (err) { ++ message = "writing event-channel"; ++ goto abort_transaction; ++ } ++ ++ err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u", ++ info->copying_receiver); ++ if (err) { ++ message = "writing request-rx-copy"; ++ goto abort_transaction; ++ } ++ ++ err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1); ++ if (err) { ++ message = "writing feature-rx-notify"; ++ goto abort_transaction; ++ } ++ ++ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1); ++ if (err) { ++ message = "writing feature-sg"; ++ goto abort_transaction; ++ } ++ ++ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1); ++ if (err) { ++ message = "writing feature-gso-tcpv4"; ++ goto abort_transaction; ++ } ++ ++ err = xenbus_transaction_end(xbt, 0); ++ if (err) { ++ if (err == -EAGAIN) ++ goto again; ++ xenbus_dev_fatal(dev, err, "completing transaction"); ++ goto destroy_ring; ++ } ++ ++ return 0; ++ ++ abort_transaction: ++ xenbus_transaction_end(xbt, 1); ++ xenbus_dev_fatal(dev, err, "%s", message); ++ destroy_ring: ++ xennet_disconnect_backend(info); ++ out: ++ return err; ++} ++ ++static int xennet_set_sg(struct net_device *dev, u32 data) ++{ ++ if (data) { ++ struct netfront_info *np = netdev_priv(dev); ++ int val; ++ ++ if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg", ++ "%d", &val) < 0) ++ val = 0; ++ if (!val) ++ return -ENOSYS; ++ } else if (dev->mtu > ETH_DATA_LEN) ++ dev->mtu = ETH_DATA_LEN; ++ ++ return ethtool_op_set_sg(dev, data); ++} ++ ++static int xennet_set_tso(struct net_device *dev, u32 data) ++{ ++ if (data) { ++ struct netfront_info *np = netdev_priv(dev); ++ int val; ++ ++ if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, ++ "feature-gso-tcpv4", "%d", &val) < 0) ++ val = 0; ++ if (!val) ++ return -ENOSYS; ++ } ++ ++ return ethtool_op_set_tso(dev, data); ++} ++ ++static void xennet_set_features(struct net_device *dev) ++{ ++ /* Turn off all GSO bits except ROBUST. */ ++ dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1; ++ dev->features |= NETIF_F_GSO_ROBUST; ++ xennet_set_sg(dev, 0); ++ ++ /* We need checksum offload to enable scatter/gather and TSO. */ ++ if (!(dev->features & NETIF_F_IP_CSUM)) ++ return; ++ ++ if (!xennet_set_sg(dev, 1)) ++ xennet_set_tso(dev, 1); ++} ++ ++static int xennet_connect(struct net_device *dev) ++{ ++ struct netfront_info *np = netdev_priv(dev); ++ int i, requeue_idx, err; ++ struct sk_buff *skb; ++ grant_ref_t ref; ++ struct xen_netif_rx_request *req; ++ unsigned int feature_rx_copy, feature_rx_flip; ++ ++ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, ++ "feature-rx-copy", "%u", &feature_rx_copy); ++ if (err != 1) ++ feature_rx_copy = 0; ++ ++ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend, ++ "feature-rx-flip", "%u", &feature_rx_flip); ++ /* Flip is the default, since it was once the only mode of ++ operation. */ ++ if (err != 1) ++ feature_rx_flip = 1; ++ ++ /* ++ * Copy packets on receive path if: ++ * (a) This was requested by user, and the backend supports it; or ++ * (b) Flipping was requested, but this is unsupported by the backend. ++ */ ++ np->copying_receiver = (((rx_mode == RX_COPY) && feature_rx_copy) || ++ ((rx_mode == RX_FLIP) && !feature_rx_flip)); ++ ++ err = talk_to_backend(np->xbdev, np); ++ if (err) ++ return err; ++ ++ xennet_set_features(dev); ++ ++ dev_info(&dev->dev, "has %s receive path.\n", ++ np->copying_receiver ? "copying" : "flipping"); ++ ++ spin_lock_bh(&np->rx_lock); ++ spin_lock_irq(&np->tx_lock); ++ ++ /* Step 1: Discard all pending TX packet fragments. */ ++ xennet_release_tx_bufs(np); ++ ++ /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ ++ for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { ++ if (!np->rx_skbs[i]) ++ continue; ++ ++ skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i); ++ ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i); ++ req = RING_GET_REQUEST(&np->rx, requeue_idx); ++ ++ if (!np->copying_receiver) { ++ gnttab_grant_foreign_transfer_ref( ++ ref, np->xbdev->otherend_id, ++ page_to_pfn(skb_shinfo(skb)->frags->page)); ++ } else { ++ gnttab_grant_foreign_access_ref( ++ ref, np->xbdev->otherend_id, ++ pfn_to_mfn(page_to_pfn(skb_shinfo(skb)-> ++ frags->page)), ++ 0); ++ } ++ req->gref = ref; ++ req->id = requeue_idx; ++ ++ requeue_idx++; ++ } ++ ++ np->rx.req_prod_pvt = requeue_idx; ++ ++ /* ++ * Step 3: All public and private state should now be sane. Get ++ * ready to start sending and receiving packets and give the driver ++ * domain a kick because we've probably just requeued some ++ * packets. ++ */ ++ netif_carrier_on(np->netdev); ++ notify_remote_via_irq(np->netdev->irq); ++ xennet_tx_buf_gc(dev); ++ xennet_alloc_rx_buffers(dev); ++ ++ spin_unlock_irq(&np->tx_lock); ++ spin_unlock_bh(&np->rx_lock); ++ ++ return 0; ++} ++ ++/** ++ * Callback received when the backend's state changes. ++ */ ++static void backend_changed(struct xenbus_device *dev, ++ enum xenbus_state backend_state) ++{ ++ struct netfront_info *np = dev->dev.driver_data; ++ struct net_device *netdev = np->netdev; ++ ++ dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state)); ++ ++ switch (backend_state) { ++ case XenbusStateInitialising: ++ case XenbusStateInitialised: ++ case XenbusStateConnected: ++ case XenbusStateUnknown: ++ case XenbusStateClosed: ++ break; ++ ++ case XenbusStateInitWait: ++ if (dev->state != XenbusStateInitialising) ++ break; ++ if (xennet_connect(netdev) != 0) ++ break; ++ xenbus_switch_state(dev, XenbusStateConnected); ++ break; ++ ++ case XenbusStateClosing: ++ xenbus_frontend_closed(dev); ++ break; ++ } ++} ++ ++static struct ethtool_ops xennet_ethtool_ops = ++{ ++ .get_tx_csum = ethtool_op_get_tx_csum, ++ .set_tx_csum = ethtool_op_set_tx_csum, ++ .get_sg = ethtool_op_get_sg, ++ .set_sg = xennet_set_sg, ++ .get_tso = ethtool_op_get_tso, ++ .set_tso = xennet_set_tso, ++ .get_link = ethtool_op_get_link, ++}; ++ ++#ifdef CONFIG_SYSFS ++static ssize_t show_rxbuf_min(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct net_device *netdev = to_net_dev(dev); ++ struct netfront_info *info = netdev_priv(netdev); ++ ++ return sprintf(buf, "%u\n", info->rx_min_target); ++} ++ ++static ssize_t store_rxbuf_min(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t len) ++{ ++ struct net_device *netdev = to_net_dev(dev); ++ struct netfront_info *np = netdev_priv(netdev); ++ char *endp; ++ unsigned long target; ++ ++ if (!capable(CAP_NET_ADMIN)) ++ return -EPERM; ++ ++ target = simple_strtoul(buf, &endp, 0); ++ if (endp == buf) ++ return -EBADMSG; ++ ++ if (target < RX_MIN_TARGET) ++ target = RX_MIN_TARGET; ++ if (target > RX_MAX_TARGET) ++ target = RX_MAX_TARGET; ++ ++ spin_lock_bh(&np->rx_lock); ++ if (target > np->rx_max_target) ++ np->rx_max_target = target; ++ np->rx_min_target = target; ++ if (target > np->rx_target) ++ np->rx_target = target; ++ ++ xennet_alloc_rx_buffers(netdev); ++ ++ spin_unlock_bh(&np->rx_lock); ++ return len; ++} ++ ++static ssize_t show_rxbuf_max(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct net_device *netdev = to_net_dev(dev); ++ struct netfront_info *info = netdev_priv(netdev); ++ ++ return sprintf(buf, "%u\n", info->rx_max_target); ++} ++ ++static ssize_t store_rxbuf_max(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t len) ++{ ++ struct net_device *netdev = to_net_dev(dev); ++ struct netfront_info *np = netdev_priv(netdev); ++ char *endp; ++ unsigned long target; ++ ++ if (!capable(CAP_NET_ADMIN)) ++ return -EPERM; ++ ++ target = simple_strtoul(buf, &endp, 0); ++ if (endp == buf) ++ return -EBADMSG; ++ ++ if (target < RX_MIN_TARGET) ++ target = RX_MIN_TARGET; ++ if (target > RX_MAX_TARGET) ++ target = RX_MAX_TARGET; ++ ++ spin_lock_bh(&np->rx_lock); ++ if (target < np->rx_min_target) ++ np->rx_min_target = target; ++ np->rx_max_target = target; ++ if (target < np->rx_target) ++ np->rx_target = target; ++ ++ xennet_alloc_rx_buffers(netdev); ++ ++ spin_unlock_bh(&np->rx_lock); ++ return len; ++} ++ ++static ssize_t show_rxbuf_cur(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct net_device *netdev = to_net_dev(dev); ++ struct netfront_info *info = netdev_priv(netdev); ++ ++ return sprintf(buf, "%u\n", info->rx_target); ++} ++ ++static struct device_attribute xennet_attrs[] = { ++ __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min), ++ __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max), ++ __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL), ++}; ++ ++static int xennet_sysfs_addif(struct net_device *netdev) ++{ ++ int i; ++ int err; ++ ++ for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) { ++ err = device_create_file(&netdev->dev, ++ &xennet_attrs[i]); ++ if (err) ++ goto fail; ++ } ++ return 0; ++ ++ fail: ++ while (--i >= 0) ++ device_remove_file(&netdev->dev, &xennet_attrs[i]); ++ return err; ++} ++ ++static void xennet_sysfs_delif(struct net_device *netdev) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) ++ device_remove_file(&netdev->dev, &xennet_attrs[i]); ++} ++ ++#endif /* CONFIG_SYSFS */ ++ ++static struct xenbus_device_id netfront_ids[] = { ++ { "vif" }, ++ { "" } ++}; ++ ++ ++static int __devexit xennet_remove(struct xenbus_device *dev) ++{ ++ struct netfront_info *info = dev->dev.driver_data; ++ ++ dev_dbg(&dev->dev, "%s\n", dev->nodename); ++ ++ unregister_netdev(info->netdev); ++ ++ xennet_disconnect_backend(info); ++ ++ del_timer_sync(&info->rx_refill_timer); ++ ++ xennet_sysfs_delif(info->netdev); ++ ++ free_netdev(info->netdev); ++ ++ return 0; ++} ++ ++static struct xenbus_driver netfront = { ++ .name = "vif", ++ .owner = THIS_MODULE, ++ .ids = netfront_ids, ++ .probe = netfront_probe, ++ .remove = __devexit_p(xennet_remove), ++ .resume = netfront_resume, ++ .otherend_changed = backend_changed, ++}; ++ ++static int __init netif_init(void) ++{ ++ if (!is_running_on_xen()) ++ return -ENODEV; ++ ++ if (is_initial_xendomain()) ++ return 0; ++ ++ printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n"); ++ ++ return xenbus_register_frontend(&netfront); ++} ++module_init(netif_init); ++ ++ ++static void __exit netif_exit(void) ++{ ++ if (is_initial_xendomain()) ++ return; ++ ++ return xenbus_unregister_driver(&netfront); ++} ++module_exit(netif_exit); ++ ++MODULE_DESCRIPTION("Xen virtual network device frontend"); ++MODULE_LICENSE("GPL"); +diff -Nurb linux-2.6.22-570/drivers/net/yellowfin.c linux-2.6.22-591/drivers/net/yellowfin.c +--- linux-2.6.22-570/drivers/net/yellowfin.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/net/yellowfin.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1137,7 +1137,7 @@ + if (skb == NULL) + break; + skb_reserve(skb, 2); /* 16 byte align the IP header */ +- eth_copy_and_sum(skb, rx_skb->data, pkt_len, 0); ++ skb_copy_to_linear_data(skb, rx_skb->data, pkt_len); + skb_put(skb, pkt_len); + pci_dma_sync_single_for_device(yp->pci_dev, desc->addr, + yp->rx_buf_sz, +diff -Nurb linux-2.6.22-570/drivers/parisc/led.c linux-2.6.22-591/drivers/parisc/led.c +--- linux-2.6.22-570/drivers/parisc/led.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/parisc/led.c 2007-12-21 15:36:14.000000000 -0500 +@@ -359,7 +359,7 @@ + * for reading should be OK */ + read_lock(&dev_base_lock); + rcu_read_lock(); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + struct net_device_stats *stats; + struct in_device *in_dev = __in_dev_get_rcu(dev); + if (!in_dev || !in_dev->ifa_list) +diff -Nurb linux-2.6.22-570/drivers/parisc/pdc_stable.c linux-2.6.22-591/drivers/parisc/pdc_stable.c +--- linux-2.6.22-570/drivers/parisc/pdc_stable.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/parisc/pdc_stable.c 2007-12-21 15:36:12.000000000 -0500 +@@ -121,14 +121,14 @@ + + #define PDCS_ATTR(_name, _mode, _show, _store) \ + struct subsys_attribute pdcs_attr_##_name = { \ +- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ ++ .attr = {.name = __stringify(_name), .mode = _mode}, \ + .show = _show, \ + .store = _store, \ + }; + + #define PATHS_ATTR(_name, _mode, _show, _store) \ + struct pdcspath_attribute paths_attr_##_name = { \ +- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \ ++ .attr = {.name = __stringify(_name), .mode = _mode}, \ + .show = _show, \ + .store = _store, \ + }; +diff -Nurb linux-2.6.22-570/drivers/pci/hotplug/acpiphp_ibm.c linux-2.6.22-591/drivers/pci/hotplug/acpiphp_ibm.c +--- linux-2.6.22-570/drivers/pci/hotplug/acpiphp_ibm.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/pci/hotplug/acpiphp_ibm.c 2007-12-21 15:36:12.000000000 -0500 +@@ -106,6 +106,7 @@ + static void ibm_handle_events(acpi_handle handle, u32 event, void *context); + static int ibm_get_table_from_acpi(char **bufp); + static ssize_t ibm_read_apci_table(struct kobject *kobj, ++ struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t size); + static acpi_status __init ibm_find_acpi_device(acpi_handle handle, + u32 lvl, void *context, void **rv); +@@ -117,7 +118,6 @@ + static struct bin_attribute ibm_apci_table_attr = { + .attr = { + .name = "apci_table", +- .owner = THIS_MODULE, + .mode = S_IRUGO, + }, + .read = ibm_read_apci_table, +@@ -358,6 +358,7 @@ + * our solution is to only allow reading the table in all at once + **/ + static ssize_t ibm_read_apci_table(struct kobject *kobj, ++ struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t size) + { + int bytes_read = -EINVAL; +diff -Nurb linux-2.6.22-570/drivers/pci/hotplug/rpadlpar_core.c linux-2.6.22-591/drivers/pci/hotplug/rpadlpar_core.c +--- linux-2.6.22-570/drivers/pci/hotplug/rpadlpar_core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/pci/hotplug/rpadlpar_core.c 2007-12-21 15:36:12.000000000 -0500 +@@ -159,8 +159,8 @@ + /* Claim new bus resources */ + pcibios_claim_one_bus(dev->bus); + +- /* ioremap() for child bus, which may or may not succeed */ +- remap_bus_range(dev->subordinate); ++ /* Map IO space for child bus, which may or may not succeed */ ++ pcibios_map_io_space(dev->subordinate); + + /* Add new devices to global lists. Register in proc, sysfs. */ + pci_bus_add_devices(phb->bus); +@@ -390,7 +390,7 @@ + } else + pcibios_remove_pci_devices(bus); + +- if (unmap_bus_range(bus)) { ++ if (pcibios_unmap_io_space(bus)) { + printk(KERN_ERR "%s: failed to unmap bus range\n", + __FUNCTION__); + return -ERANGE; +diff -Nurb linux-2.6.22-570/drivers/pci/pci-sysfs.c linux-2.6.22-591/drivers/pci/pci-sysfs.c +--- linux-2.6.22-570/drivers/pci/pci-sysfs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/pci/pci-sysfs.c 2007-12-21 15:36:12.000000000 -0500 +@@ -213,7 +213,8 @@ + }; + + static ssize_t +-pci_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count) ++pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); + unsigned int size = 64; +@@ -285,7 +286,8 @@ + } + + static ssize_t +-pci_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count) ++pci_write_config(struct kobject *kobj, struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj)); + unsigned int size = count; +@@ -352,7 +354,8 @@ + * callback routine (pci_legacy_read). + */ + ssize_t +-pci_read_legacy_io(struct kobject *kobj, char *buf, loff_t off, size_t count) ++pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct pci_bus *bus = to_pci_bus(container_of(kobj, + struct class_device, +@@ -376,7 +379,8 @@ + * callback routine (pci_legacy_write). + */ + ssize_t +-pci_write_legacy_io(struct kobject *kobj, char *buf, loff_t off, size_t count) ++pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct pci_bus *bus = to_pci_bus(container_of(kobj, + struct class_device, +@@ -499,7 +503,6 @@ + sprintf(res_attr_name, "resource%d", i); + res_attr->attr.name = res_attr_name; + res_attr->attr.mode = S_IRUSR | S_IWUSR; +- res_attr->attr.owner = THIS_MODULE; + res_attr->size = pci_resource_len(pdev, i); + res_attr->mmap = pci_mmap_resource; + res_attr->private = &pdev->resource[i]; +@@ -529,7 +532,8 @@ + * writing anything except 0 enables it + */ + static ssize_t +-pci_write_rom(struct kobject *kobj, char *buf, loff_t off, size_t count) ++pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); + +@@ -552,7 +556,8 @@ + * device corresponding to @kobj. + */ + static ssize_t +-pci_read_rom(struct kobject *kobj, char *buf, loff_t off, size_t count) ++pci_read_rom(struct kobject *kobj, struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); + void __iomem *rom; +@@ -582,7 +587,6 @@ + .attr = { + .name = "config", + .mode = S_IRUGO | S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = 256, + .read = pci_read_config, +@@ -593,7 +597,6 @@ + .attr = { + .name = "config", + .mode = S_IRUGO | S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = 4096, + .read = pci_read_config, +@@ -628,7 +631,6 @@ + rom_attr->size = pci_resource_len(pdev, PCI_ROM_RESOURCE); + rom_attr->attr.name = "rom"; + rom_attr->attr.mode = S_IRUSR; +- rom_attr->attr.owner = THIS_MODULE; + rom_attr->read = pci_read_rom; + rom_attr->write = pci_write_rom; + retval = sysfs_create_bin_file(&pdev->dev.kobj, rom_attr); +diff -Nurb linux-2.6.22-570/drivers/pci/probe.c linux-2.6.22-591/drivers/pci/probe.c +--- linux-2.6.22-570/drivers/pci/probe.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/pci/probe.c 2007-12-21 15:36:12.000000000 -0500 +@@ -39,7 +39,6 @@ + b->legacy_io->attr.name = "legacy_io"; + b->legacy_io->size = 0xffff; + b->legacy_io->attr.mode = S_IRUSR | S_IWUSR; +- b->legacy_io->attr.owner = THIS_MODULE; + b->legacy_io->read = pci_read_legacy_io; + b->legacy_io->write = pci_write_legacy_io; + class_device_create_bin_file(&b->class_dev, b->legacy_io); +@@ -49,7 +48,6 @@ + b->legacy_mem->attr.name = "legacy_mem"; + b->legacy_mem->size = 1024*1024; + b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR; +- b->legacy_mem->attr.owner = THIS_MODULE; + b->legacy_mem->mmap = pci_mmap_legacy_mem; + class_device_create_bin_file(&b->class_dev, b->legacy_mem); + } +diff -Nurb linux-2.6.22-570/drivers/pcmcia/cs.c linux-2.6.22-591/drivers/pcmcia/cs.c +--- linux-2.6.22-570/drivers/pcmcia/cs.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/pcmcia/cs.c 2007-12-21 15:36:12.000000000 -0500 +@@ -654,6 +654,7 @@ + add_wait_queue(&skt->thread_wait, &wait); + complete(&skt->thread_done); + ++ set_freezable(); + for (;;) { + unsigned long flags; + unsigned int events; +diff -Nurb linux-2.6.22-570/drivers/pcmcia/socket_sysfs.c linux-2.6.22-591/drivers/pcmcia/socket_sysfs.c +--- linux-2.6.22-570/drivers/pcmcia/socket_sysfs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/pcmcia/socket_sysfs.c 2007-12-21 15:36:12.000000000 -0500 +@@ -283,7 +283,9 @@ + return (ret); + } + +-static ssize_t pccard_show_cis(struct kobject *kobj, char *buf, loff_t off, size_t count) ++static ssize_t pccard_show_cis(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + unsigned int size = 0x200; + +@@ -311,7 +313,9 @@ + return (count); + } + +-static ssize_t pccard_store_cis(struct kobject *kobj, char *buf, loff_t off, size_t count) ++static ssize_t pccard_store_cis(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct pcmcia_socket *s = to_socket(container_of(kobj, struct device, kobj)); + cisdump_t *cis; +@@ -366,7 +370,7 @@ + }; + + static struct bin_attribute pccard_cis_attr = { +- .attr = { .name = "cis", .mode = S_IRUGO | S_IWUSR, .owner = THIS_MODULE}, ++ .attr = { .name = "cis", .mode = S_IRUGO | S_IWUSR }, + .size = 0x200, + .read = pccard_show_cis, + .write = pccard_store_cis, +diff -Nurb linux-2.6.22-570/drivers/pnp/driver.c linux-2.6.22-591/drivers/pnp/driver.c +--- linux-2.6.22-570/drivers/pnp/driver.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/pnp/driver.c 2007-12-21 15:36:12.000000000 -0500 +@@ -167,6 +167,8 @@ + return error; + } + ++ if (pnp_dev->protocol && pnp_dev->protocol->suspend) ++ pnp_dev->protocol->suspend(pnp_dev, state); + return 0; + } + +@@ -179,6 +181,9 @@ + if (!pnp_drv) + return 0; + ++ if (pnp_dev->protocol && pnp_dev->protocol->resume) ++ pnp_dev->protocol->resume(pnp_dev); ++ + if (!(pnp_drv->flags & PNP_DRIVER_RES_DO_NOT_CHANGE)) { + error = pnp_start_dev(pnp_dev); + if (error) +diff -Nurb linux-2.6.22-570/drivers/pnp/pnpacpi/core.c linux-2.6.22-591/drivers/pnp/pnpacpi/core.c +--- linux-2.6.22-570/drivers/pnp/pnpacpi/core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/pnp/pnpacpi/core.c 2007-12-21 15:36:12.000000000 -0500 +@@ -119,11 +119,23 @@ + return ACPI_FAILURE(status) ? -ENODEV : 0; + } + ++static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state) ++{ ++ return acpi_bus_set_power((acpi_handle)dev->data, 3); ++} ++ ++static int pnpacpi_resume(struct pnp_dev *dev) ++{ ++ return acpi_bus_set_power((acpi_handle)dev->data, 0); ++} ++ + static struct pnp_protocol pnpacpi_protocol = { + .name = "Plug and Play ACPI", + .get = pnpacpi_get_resources, + .set = pnpacpi_set_resources, + .disable = pnpacpi_disable_resources, ++ .suspend = pnpacpi_suspend, ++ .resume = pnpacpi_resume, + }; + + static int __init pnpacpi_add_device(struct acpi_device *device) +diff -Nurb linux-2.6.22-570/drivers/pnp/pnpbios/core.c linux-2.6.22-591/drivers/pnp/pnpbios/core.c +--- linux-2.6.22-570/drivers/pnp/pnpbios/core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/pnp/pnpbios/core.c 2007-12-21 15:36:12.000000000 -0500 +@@ -147,7 +147,7 @@ + info->location_id, info->serial, info->capabilities); + envp[i] = NULL; + +- value = call_usermodehelper (argv [0], argv, envp, 0); ++ value = call_usermodehelper (argv [0], argv, envp, UMH_WAIT_EXEC); + kfree (buf); + kfree (envp); + return 0; +@@ -160,6 +160,7 @@ + { + static struct pnp_docking_station_info now; + int docked = -1, d = 0; ++ set_freezable(); + while (!unloading) + { + int status; +diff -Nurb linux-2.6.22-570/drivers/rapidio/rio-sysfs.c linux-2.6.22-591/drivers/rapidio/rio-sysfs.c +--- linux-2.6.22-570/drivers/rapidio/rio-sysfs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/rapidio/rio-sysfs.c 2007-12-21 15:36:12.000000000 -0500 +@@ -67,7 +67,8 @@ + }; + + static ssize_t +-rio_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count) ++rio_read_config(struct kobject *kobj, struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct rio_dev *dev = + to_rio_dev(container_of(kobj, struct device, kobj)); +@@ -137,7 +138,8 @@ + } + + static ssize_t +-rio_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count) ++rio_write_config(struct kobject *kobj, struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct rio_dev *dev = + to_rio_dev(container_of(kobj, struct device, kobj)); +@@ -197,7 +199,6 @@ + .attr = { + .name = "config", + .mode = S_IRUGO | S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = 0x200000, + .read = rio_read_config, +diff -Nurb linux-2.6.22-570/drivers/rtc/rtc-ds1553.c linux-2.6.22-591/drivers/rtc/rtc-ds1553.c +--- linux-2.6.22-570/drivers/rtc/rtc-ds1553.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/rtc/rtc-ds1553.c 2007-12-21 15:36:12.000000000 -0500 +@@ -258,8 +258,9 @@ + .ioctl = ds1553_rtc_ioctl, + }; + +-static ssize_t ds1553_nvram_read(struct kobject *kobj, char *buf, +- loff_t pos, size_t size) ++static ssize_t ds1553_nvram_read(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t pos, size_t size) + { + struct platform_device *pdev = + to_platform_device(container_of(kobj, struct device, kobj)); +@@ -272,8 +273,9 @@ + return count; + } + +-static ssize_t ds1553_nvram_write(struct kobject *kobj, char *buf, +- loff_t pos, size_t size) ++static ssize_t ds1553_nvram_write(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t pos, size_t size) + { + struct platform_device *pdev = + to_platform_device(container_of(kobj, struct device, kobj)); +@@ -290,7 +292,6 @@ + .attr = { + .name = "nvram", + .mode = S_IRUGO | S_IWUGO, +- .owner = THIS_MODULE, + }, + .size = RTC_OFFSET, + .read = ds1553_nvram_read, +diff -Nurb linux-2.6.22-570/drivers/rtc/rtc-ds1742.c linux-2.6.22-591/drivers/rtc/rtc-ds1742.c +--- linux-2.6.22-570/drivers/rtc/rtc-ds1742.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/rtc/rtc-ds1742.c 2007-12-21 15:36:12.000000000 -0500 +@@ -127,8 +127,9 @@ + .set_time = ds1742_rtc_set_time, + }; + +-static ssize_t ds1742_nvram_read(struct kobject *kobj, char *buf, +- loff_t pos, size_t size) ++static ssize_t ds1742_nvram_read(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t pos, size_t size) + { + struct platform_device *pdev = + to_platform_device(container_of(kobj, struct device, kobj)); +@@ -141,8 +142,9 @@ + return count; + } + +-static ssize_t ds1742_nvram_write(struct kobject *kobj, char *buf, +- loff_t pos, size_t size) ++static ssize_t ds1742_nvram_write(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t pos, size_t size) + { + struct platform_device *pdev = + to_platform_device(container_of(kobj, struct device, kobj)); +@@ -159,7 +161,6 @@ + .attr = { + .name = "nvram", + .mode = S_IRUGO | S_IWUGO, +- .owner = THIS_MODULE, + }, + .read = ds1742_nvram_read, + .write = ds1742_nvram_write, +diff -Nurb linux-2.6.22-570/drivers/s390/cio/chp.c linux-2.6.22-591/drivers/s390/cio/chp.c +--- linux-2.6.22-570/drivers/s390/cio/chp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/s390/cio/chp.c 2007-12-21 15:36:12.000000000 -0500 +@@ -141,8 +141,9 @@ + /* + * Channel measurement related functions + */ +-static ssize_t chp_measurement_chars_read(struct kobject *kobj, char *buf, +- loff_t off, size_t count) ++static ssize_t chp_measurement_chars_read(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct channel_path *chp; + unsigned int size; +@@ -165,7 +166,6 @@ + .attr = { + .name = "measurement_chars", + .mode = S_IRUSR, +- .owner = THIS_MODULE, + }, + .size = sizeof(struct cmg_chars), + .read = chp_measurement_chars_read, +@@ -193,8 +193,9 @@ + } while (reference_buf.values[0] != buf->values[0]); + } + +-static ssize_t chp_measurement_read(struct kobject *kobj, char *buf, +- loff_t off, size_t count) ++static ssize_t chp_measurement_read(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct channel_path *chp; + struct channel_subsystem *css; +@@ -217,7 +218,6 @@ + .attr = { + .name = "measurement", + .mode = S_IRUSR, +- .owner = THIS_MODULE, + }, + .size = sizeof(struct cmg_entry), + .read = chp_measurement_read, +diff -Nurb linux-2.6.22-570/drivers/s390/net/qeth_main.c linux-2.6.22-591/drivers/s390/net/qeth_main.c +--- linux-2.6.22-570/drivers/s390/net/qeth_main.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/s390/net/qeth_main.c 2007-12-21 15:36:14.000000000 -0500 +@@ -8127,7 +8127,7 @@ + neigh->parms = neigh_parms_clone(parms); + rcu_read_unlock(); + +- neigh->type = inet_addr_type(*(__be32 *) neigh->primary_key); ++ neigh->type = inet_addr_type(&init_net, *(__be32 *) neigh->primary_key); + neigh->nud_state = NUD_NOARP; + neigh->ops = arp_direct_ops; + neigh->output = neigh->ops->queue_xmit; +diff -Nurb linux-2.6.22-570/drivers/s390/net/qeth_sys.c linux-2.6.22-591/drivers/s390/net/qeth_sys.c +--- linux-2.6.22-570/drivers/s390/net/qeth_sys.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/s390/net/qeth_sys.c 2007-12-21 15:36:12.000000000 -0500 +@@ -991,7 +991,7 @@ + + #define QETH_DEVICE_ATTR(_id,_name,_mode,_show,_store) \ + struct device_attribute dev_attr_##_id = { \ +- .attr = {.name=__stringify(_name), .mode=_mode, .owner=THIS_MODULE },\ ++ .attr = {.name=__stringify(_name), .mode=_mode, },\ + .show = _show, \ + .store = _store, \ + }; +diff -Nurb linux-2.6.22-570/drivers/s390/scsi/zfcp_aux.c linux-2.6.22-591/drivers/s390/scsi/zfcp_aux.c +--- linux-2.6.22-570/drivers/s390/scsi/zfcp_aux.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/s390/scsi/zfcp_aux.c 2007-12-21 15:36:12.000000000 -0500 +@@ -815,9 +815,7 @@ + struct zfcp_unit * + zfcp_unit_enqueue(struct zfcp_port *port, fcp_lun_t fcp_lun) + { +- struct zfcp_unit *unit, *tmp_unit; +- unsigned int scsi_lun; +- int found; ++ struct zfcp_unit *unit; + + /* + * check that there is no unit with this FCP_LUN already in list +@@ -863,21 +861,9 @@ + } + + zfcp_unit_get(unit); ++ unit->scsi_lun = scsilun_to_int((struct scsi_lun *)&unit->fcp_lun); + +- scsi_lun = 0; +- found = 0; + write_lock_irq(&zfcp_data.config_lock); +- list_for_each_entry(tmp_unit, &port->unit_list_head, list) { +- if (tmp_unit->scsi_lun != scsi_lun) { +- found = 1; +- break; +- } +- scsi_lun++; +- } +- unit->scsi_lun = scsi_lun; +- if (found) +- list_add_tail(&unit->list, &tmp_unit->list); +- else + list_add_tail(&unit->list, &port->unit_list_head); + atomic_clear_mask(ZFCP_STATUS_COMMON_REMOVE, &unit->status); + atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING, &unit->status); +diff -Nurb linux-2.6.22-570/drivers/s390/scsi/zfcp_erp.c linux-2.6.22-591/drivers/s390/scsi/zfcp_erp.c +--- linux-2.6.22-570/drivers/s390/scsi/zfcp_erp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/s390/scsi/zfcp_erp.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1986,6 +1986,10 @@ + failed_openfcp: + zfcp_close_fsf(erp_action->adapter); + failed_qdio: ++ atomic_clear_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK | ++ ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED | ++ ZFCP_STATUS_ADAPTER_XPORT_OK, ++ &erp_action->adapter->status); + out: + return retval; + } +@@ -2167,6 +2171,9 @@ + sleep *= 2; + } + ++ atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT, ++ &adapter->status); ++ + if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK, + &adapter->status)) { + ZFCP_LOG_INFO("error: exchange of configuration data for " +diff -Nurb linux-2.6.22-570/drivers/sbus/char/bbc_envctrl.c linux-2.6.22-591/drivers/sbus/char/bbc_envctrl.c +--- linux-2.6.22-570/drivers/sbus/char/bbc_envctrl.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/sbus/char/bbc_envctrl.c 2007-12-21 15:36:12.000000000 -0500 +@@ -7,6 +7,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -170,8 +171,6 @@ + static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) + { + static int shutting_down = 0; +- static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; +- char *argv[] = { "/sbin/shutdown", "-h", "now", NULL }; + char *type = "???"; + s8 val = -1; + +@@ -195,7 +194,7 @@ + printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); + + shutting_down = 1; +- if (call_usermodehelper("/sbin/shutdown", argv, envp, 0) < 0) ++ if (orderly_poweroff(true) < 0) + printk(KERN_CRIT "envctrl: shutdown execution failed\n"); + } + +diff -Nurb linux-2.6.22-570/drivers/sbus/char/envctrl.c linux-2.6.22-591/drivers/sbus/char/envctrl.c +--- linux-2.6.22-570/drivers/sbus/char/envctrl.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/sbus/char/envctrl.c 2007-12-21 15:36:12.000000000 -0500 +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -966,10 +967,6 @@ + static void envctrl_do_shutdown(void) + { + static int inprog = 0; +- static char *envp[] = { +- "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; +- char *argv[] = { +- "/sbin/shutdown", "-h", "now", NULL }; + int ret; + + if (inprog != 0) +@@ -977,7 +974,7 @@ + + inprog = 1; + printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n"); +- ret = call_usermodehelper("/sbin/shutdown", argv, envp, 0); ++ ret = orderly_poweroff(true); + if (ret < 0) { + printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n"); + inprog = 0; /* unlikely to succeed, but we could try again */ +diff -Nurb linux-2.6.22-570/drivers/scsi/3w-9xxx.c linux-2.6.22-591/drivers/scsi/3w-9xxx.c +--- linux-2.6.22-570/drivers/scsi/3w-9xxx.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/scsi/3w-9xxx.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1307,22 +1307,26 @@ + wake_up(&tw_dev->ioctl_wqueue); + } + } else { ++ struct scsi_cmnd *cmd; ++ ++ cmd = tw_dev->srb[request_id]; ++ + twa_scsiop_execute_scsi_complete(tw_dev, request_id); + /* If no error command was a success */ + if (error == 0) { +- tw_dev->srb[request_id]->result = (DID_OK << 16); ++ cmd->result = (DID_OK << 16); + } + + /* If error, command failed */ + if (error == 1) { + /* Ask for a host reset */ +- tw_dev->srb[request_id]->result = (DID_OK << 16) | (CHECK_CONDITION << 1); ++ cmd->result = (DID_OK << 16) | (CHECK_CONDITION << 1); + } + + /* Report residual bytes for single sgl */ +- if ((tw_dev->srb[request_id]->use_sg <= 1) && (full_command_packet->command.newcommand.status == 0)) { +- if (full_command_packet->command.newcommand.sg_list[0].length < tw_dev->srb[request_id]->request_bufflen) +- tw_dev->srb[request_id]->resid = tw_dev->srb[request_id]->request_bufflen - full_command_packet->command.newcommand.sg_list[0].length; ++ if ((scsi_sg_count(cmd) <= 1) && (full_command_packet->command.newcommand.status == 0)) { ++ if (full_command_packet->command.newcommand.sg_list[0].length < scsi_bufflen(tw_dev->srb[request_id])) ++ scsi_set_resid(cmd, scsi_bufflen(cmd) - full_command_packet->command.newcommand.sg_list[0].length); + } + + /* Now complete the io */ +@@ -1385,52 +1389,20 @@ + { + int use_sg; + struct scsi_cmnd *cmd = tw_dev->srb[request_id]; +- struct pci_dev *pdev = tw_dev->tw_pci_dev; +- int retval = 0; +- +- if (cmd->use_sg == 0) +- goto out; +- +- use_sg = pci_map_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL); + +- if (use_sg == 0) { ++ use_sg = scsi_dma_map(cmd); ++ if (!use_sg) ++ return 0; ++ else if (use_sg < 0) { + TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1c, "Failed to map scatter gather list"); +- goto out; ++ return 0; + } + + cmd->SCp.phase = TW_PHASE_SGLIST; + cmd->SCp.have_data_in = use_sg; +- retval = use_sg; +-out: +- return retval; +-} /* End twa_map_scsi_sg_data() */ +- +-/* This function will perform a pci-dma map for a single buffer */ +-static dma_addr_t twa_map_scsi_single_data(TW_Device_Extension *tw_dev, int request_id) +-{ +- dma_addr_t mapping; +- struct scsi_cmnd *cmd = tw_dev->srb[request_id]; +- struct pci_dev *pdev = tw_dev->tw_pci_dev; +- dma_addr_t retval = 0; +- +- if (cmd->request_bufflen == 0) { +- retval = 0; +- goto out; +- } +- +- mapping = pci_map_single(pdev, cmd->request_buffer, cmd->request_bufflen, DMA_BIDIRECTIONAL); +- +- if (mapping == 0) { +- TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1d, "Failed to map page"); +- goto out; +- } + +- cmd->SCp.phase = TW_PHASE_SINGLE; +- cmd->SCp.have_data_in = mapping; +- retval = mapping; +-out: +- return retval; +-} /* End twa_map_scsi_single_data() */ ++ return use_sg; ++} /* End twa_map_scsi_sg_data() */ + + /* This function will poll for a response interrupt of a request */ + static int twa_poll_response(TW_Device_Extension *tw_dev, int request_id, int seconds) +@@ -1816,15 +1788,13 @@ + u32 num_sectors = 0x0; + int i, sg_count; + struct scsi_cmnd *srb = NULL; +- struct scatterlist *sglist = NULL; +- dma_addr_t buffaddr = 0x0; ++ struct scatterlist *sglist = NULL, *sg; + int retval = 1; + + if (tw_dev->srb[request_id]) { +- if (tw_dev->srb[request_id]->request_buffer) { +- sglist = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer; +- } + srb = tw_dev->srb[request_id]; ++ if (scsi_sglist(srb)) ++ sglist = scsi_sglist(srb); + } + + /* Initialize command packet */ +@@ -1857,32 +1827,12 @@ + + if (!sglistarg) { + /* Map sglist from scsi layer to cmd packet */ +- if (tw_dev->srb[request_id]->use_sg == 0) { +- if (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH) { +- command_packet->sg_list[0].address = TW_CPU_TO_SGL(tw_dev->generic_buffer_phys[request_id]); +- command_packet->sg_list[0].length = cpu_to_le32(TW_MIN_SGL_LENGTH); +- if (tw_dev->srb[request_id]->sc_data_direction == DMA_TO_DEVICE || tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL) +- memcpy(tw_dev->generic_buffer_virt[request_id], tw_dev->srb[request_id]->request_buffer, tw_dev->srb[request_id]->request_bufflen); +- } else { +- buffaddr = twa_map_scsi_single_data(tw_dev, request_id); +- if (buffaddr == 0) +- goto out; +- +- command_packet->sg_list[0].address = TW_CPU_TO_SGL(buffaddr); +- command_packet->sg_list[0].length = cpu_to_le32(tw_dev->srb[request_id]->request_bufflen); +- } +- command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), 1)); + +- if (command_packet->sg_list[0].address & TW_CPU_TO_SGL(TW_ALIGNMENT_9000_SGL)) { +- TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2d, "Found unaligned address during execute scsi"); +- goto out; +- } +- } +- +- if (tw_dev->srb[request_id]->use_sg > 0) { +- if ((tw_dev->srb[request_id]->use_sg == 1) && (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH)) { +- if (tw_dev->srb[request_id]->sc_data_direction == DMA_TO_DEVICE || tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL) { +- struct scatterlist *sg = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer; ++ if (scsi_sg_count(srb)) { ++ if ((scsi_sg_count(srb) == 1) && ++ (scsi_bufflen(srb) < TW_MIN_SGL_LENGTH)) { ++ if (srb->sc_data_direction == DMA_TO_DEVICE || srb->sc_data_direction == DMA_BIDIRECTIONAL) { ++ struct scatterlist *sg = scsi_sglist(srb); + char *buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + memcpy(tw_dev->generic_buffer_virt[request_id], buf, sg->length); + kunmap_atomic(buf - sg->offset, KM_IRQ0); +@@ -1894,16 +1844,16 @@ + if (sg_count == 0) + goto out; + +- for (i = 0; i < sg_count; i++) { +- command_packet->sg_list[i].address = TW_CPU_TO_SGL(sg_dma_address(&sglist[i])); +- command_packet->sg_list[i].length = cpu_to_le32(sg_dma_len(&sglist[i])); ++ scsi_for_each_sg(srb, sg, sg_count, i) { ++ command_packet->sg_list[i].address = TW_CPU_TO_SGL(sg_dma_address(sg)); ++ command_packet->sg_list[i].length = cpu_to_le32(sg_dma_len(sg)); + if (command_packet->sg_list[i].address & TW_CPU_TO_SGL(TW_ALIGNMENT_9000_SGL)) { + TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2e, "Found unaligned sgl address during execute scsi"); + goto out; + } + } + } +- command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), tw_dev->srb[request_id]->use_sg)); ++ command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), scsi_sg_count(tw_dev->srb[request_id]))); + } + } else { + /* Internal cdb post */ +@@ -1933,7 +1883,7 @@ + + /* Update SG statistics */ + if (srb) { +- tw_dev->sgl_entries = tw_dev->srb[request_id]->use_sg; ++ tw_dev->sgl_entries = scsi_sg_count(tw_dev->srb[request_id]); + if (tw_dev->sgl_entries > tw_dev->max_sgl_entries) + tw_dev->max_sgl_entries = tw_dev->sgl_entries; + } +@@ -1952,16 +1902,13 @@ + /* This function completes an execute scsi operation */ + static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int request_id) + { +- if (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH && +- (tw_dev->srb[request_id]->sc_data_direction == DMA_FROM_DEVICE || +- tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL)) { +- if (tw_dev->srb[request_id]->use_sg == 0) { +- memcpy(tw_dev->srb[request_id]->request_buffer, +- tw_dev->generic_buffer_virt[request_id], +- tw_dev->srb[request_id]->request_bufflen); +- } +- if (tw_dev->srb[request_id]->use_sg == 1) { +- struct scatterlist *sg = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer; ++ struct scsi_cmnd *cmd = tw_dev->srb[request_id]; ++ ++ if (scsi_bufflen(cmd) < TW_MIN_SGL_LENGTH && ++ (cmd->sc_data_direction == DMA_FROM_DEVICE || ++ cmd->sc_data_direction == DMA_BIDIRECTIONAL)) { ++ if (scsi_sg_count(cmd) == 1) { ++ struct scatterlist *sg = scsi_sglist(tw_dev->srb[request_id]); + char *buf; + unsigned long flags = 0; + local_irq_save(flags); +@@ -2018,16 +1965,8 @@ + static void twa_unmap_scsi_data(TW_Device_Extension *tw_dev, int request_id) + { + struct scsi_cmnd *cmd = tw_dev->srb[request_id]; +- struct pci_dev *pdev = tw_dev->tw_pci_dev; + +- switch(cmd->SCp.phase) { +- case TW_PHASE_SINGLE: +- pci_unmap_single(pdev, cmd->SCp.have_data_in, cmd->request_bufflen, DMA_BIDIRECTIONAL); +- break; +- case TW_PHASE_SGLIST: +- pci_unmap_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL); +- break; +- } ++ scsi_dma_unmap(cmd); + } /* End twa_unmap_scsi_data() */ + + /* scsi_host_template initializer */ +diff -Nurb linux-2.6.22-570/drivers/scsi/3w-xxxx.c linux-2.6.22-591/drivers/scsi/3w-xxxx.c +--- linux-2.6.22-570/drivers/scsi/3w-xxxx.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/3w-xxxx.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1274,12 +1274,8 @@ + + dprintk(KERN_WARNING "3w-xxxx: tw_map_scsi_sg_data()\n"); + +- if (cmd->use_sg == 0) +- return 0; +- +- use_sg = pci_map_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL); +- +- if (use_sg == 0) { ++ use_sg = scsi_dma_map(cmd); ++ if (use_sg < 0) { + printk(KERN_WARNING "3w-xxxx: tw_map_scsi_sg_data(): pci_map_sg() failed.\n"); + return 0; + } +@@ -1290,40 +1286,11 @@ + return use_sg; + } /* End tw_map_scsi_sg_data() */ + +-static u32 tw_map_scsi_single_data(struct pci_dev *pdev, struct scsi_cmnd *cmd) +-{ +- dma_addr_t mapping; +- +- dprintk(KERN_WARNING "3w-xxxx: tw_map_scsi_single_data()\n"); +- +- if (cmd->request_bufflen == 0) +- return 0; +- +- mapping = pci_map_page(pdev, virt_to_page(cmd->request_buffer), offset_in_page(cmd->request_buffer), cmd->request_bufflen, DMA_BIDIRECTIONAL); +- +- if (mapping == 0) { +- printk(KERN_WARNING "3w-xxxx: tw_map_scsi_single_data(): pci_map_page() failed.\n"); +- return 0; +- } +- +- cmd->SCp.phase = TW_PHASE_SINGLE; +- cmd->SCp.have_data_in = mapping; +- +- return mapping; +-} /* End tw_map_scsi_single_data() */ +- + static void tw_unmap_scsi_data(struct pci_dev *pdev, struct scsi_cmnd *cmd) + { + dprintk(KERN_WARNING "3w-xxxx: tw_unmap_scsi_data()\n"); + +- switch(cmd->SCp.phase) { +- case TW_PHASE_SINGLE: +- pci_unmap_page(pdev, cmd->SCp.have_data_in, cmd->request_bufflen, DMA_BIDIRECTIONAL); +- break; +- case TW_PHASE_SGLIST: +- pci_unmap_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL); +- break; +- } ++ scsi_dma_unmap(cmd); + } /* End tw_unmap_scsi_data() */ + + /* This function will reset a device extension */ +@@ -1499,27 +1466,16 @@ + void *buf; + unsigned int transfer_len; + unsigned long flags = 0; ++ struct scatterlist *sg = scsi_sglist(cmd); + +- if (cmd->use_sg) { +- struct scatterlist *sg = +- (struct scatterlist *)cmd->request_buffer; + local_irq_save(flags); + buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + transfer_len = min(sg->length, len); +- } else { +- buf = cmd->request_buffer; +- transfer_len = min(cmd->request_bufflen, len); +- } + + memcpy(buf, data, transfer_len); + +- if (cmd->use_sg) { +- struct scatterlist *sg; +- +- sg = (struct scatterlist *)cmd->request_buffer; + kunmap_atomic(buf - sg->offset, KM_IRQ0); + local_irq_restore(flags); +- } + } + + /* This function is called by the isr to complete an inquiry command */ +@@ -1764,19 +1720,20 @@ + { + TW_Command *command_packet; + unsigned long command_que_value; +- u32 lba = 0x0, num_sectors = 0x0, buffaddr = 0x0; ++ u32 lba = 0x0, num_sectors = 0x0; + int i, use_sg; + struct scsi_cmnd *srb; +- struct scatterlist *sglist; ++ struct scatterlist *sglist, *sg; + + dprintk(KERN_NOTICE "3w-xxxx: tw_scsiop_read_write()\n"); + +- if (tw_dev->srb[request_id]->request_buffer == NULL) { ++ srb = tw_dev->srb[request_id]; ++ ++ sglist = scsi_sglist(srb); ++ if (!sglist) { + printk(KERN_WARNING "3w-xxxx: tw_scsiop_read_write(): Request buffer NULL.\n"); + return 1; + } +- sglist = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer; +- srb = tw_dev->srb[request_id]; + + /* Initialize command packet */ + command_packet = (TW_Command *)tw_dev->command_packet_virtual_address[request_id]; +@@ -1819,33 +1776,18 @@ + command_packet->byte8.io.lba = lba; + command_packet->byte6.block_count = num_sectors; + +- /* Do this if there are no sg list entries */ +- if (tw_dev->srb[request_id]->use_sg == 0) { +- dprintk(KERN_NOTICE "3w-xxxx: tw_scsiop_read_write(): SG = 0\n"); +- buffaddr = tw_map_scsi_single_data(tw_dev->tw_pci_dev, tw_dev->srb[request_id]); +- if (buffaddr == 0) +- return 1; +- +- command_packet->byte8.io.sgl[0].address = buffaddr; +- command_packet->byte8.io.sgl[0].length = tw_dev->srb[request_id]->request_bufflen; +- command_packet->size+=2; +- } +- +- /* Do this if we have multiple sg list entries */ +- if (tw_dev->srb[request_id]->use_sg > 0) { + use_sg = tw_map_scsi_sg_data(tw_dev->tw_pci_dev, tw_dev->srb[request_id]); +- if (use_sg == 0) ++ if (!use_sg) + return 1; + +- for (i=0;ibyte8.io.sgl[i].address = sg_dma_address(&sglist[i]); +- command_packet->byte8.io.sgl[i].length = sg_dma_len(&sglist[i]); ++ scsi_for_each_sg(tw_dev->srb[request_id], sg, use_sg, i) { ++ command_packet->byte8.io.sgl[i].address = sg_dma_address(sg); ++ command_packet->byte8.io.sgl[i].length = sg_dma_len(sg); + command_packet->size+=2; + } +- } + + /* Update SG statistics */ +- tw_dev->sgl_entries = tw_dev->srb[request_id]->use_sg; ++ tw_dev->sgl_entries = scsi_sg_count(tw_dev->srb[request_id]); + if (tw_dev->sgl_entries > tw_dev->max_sgl_entries) + tw_dev->max_sgl_entries = tw_dev->sgl_entries; + +diff -Nurb linux-2.6.22-570/drivers/scsi/53c700.c linux-2.6.22-591/drivers/scsi/53c700.c +--- linux-2.6.22-570/drivers/scsi/53c700.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/53c700.c 2007-12-21 15:36:12.000000000 -0500 +@@ -585,16 +585,8 @@ + struct NCR_700_command_slot *slot) + { + if(SCp->sc_data_direction != DMA_NONE && +- SCp->sc_data_direction != DMA_BIDIRECTIONAL) { +- if(SCp->use_sg) { +- dma_unmap_sg(hostdata->dev, SCp->request_buffer, +- SCp->use_sg, SCp->sc_data_direction); +- } else { +- dma_unmap_single(hostdata->dev, slot->dma_handle, +- SCp->request_bufflen, +- SCp->sc_data_direction); +- } +- } ++ SCp->sc_data_direction != DMA_BIDIRECTIONAL) ++ scsi_dma_unmap(SCp); + } + + STATIC inline void +@@ -661,7 +653,6 @@ + { + struct NCR_700_Host_Parameters *hostdata = + (struct NCR_700_Host_Parameters *)host->hostdata[0]; +- __u32 dcntl_extra = 0; + __u8 min_period; + __u8 min_xferp = (hostdata->chip710 ? NCR_710_MIN_XFERP : NCR_700_MIN_XFERP); + +@@ -686,13 +677,14 @@ + burst_disable = BURST_DISABLE; + break; + } +- dcntl_extra = COMPAT_700_MODE; ++ hostdata->dcntl_extra |= COMPAT_700_MODE; + +- NCR_700_writeb(dcntl_extra, host, DCNTL_REG); ++ NCR_700_writeb(hostdata->dcntl_extra, host, DCNTL_REG); + NCR_700_writeb(burst_length | hostdata->dmode_extra, + host, DMODE_710_REG); +- NCR_700_writeb(burst_disable | (hostdata->differential ? +- DIFF : 0), host, CTEST7_REG); ++ NCR_700_writeb(burst_disable | hostdata->ctest7_extra | ++ (hostdata->differential ? DIFF : 0), ++ host, CTEST7_REG); + NCR_700_writeb(BTB_TIMER_DISABLE, host, CTEST0_REG); + NCR_700_writeb(FULL_ARBITRATION | ENABLE_PARITY | PARITY + | AUTO_ATN, host, SCNTL0_REG); +@@ -727,13 +719,13 @@ + * of spec: sync divider 2, async divider 3 */ + DEBUG(("53c700: sync 2 async 3\n")); + NCR_700_writeb(SYNC_DIV_2_0, host, SBCL_REG); +- NCR_700_writeb(ASYNC_DIV_3_0 | dcntl_extra, host, DCNTL_REG); ++ NCR_700_writeb(ASYNC_DIV_3_0 | hostdata->dcntl_extra, host, DCNTL_REG); + hostdata->sync_clock = hostdata->clock/2; + } else if(hostdata->clock > 50 && hostdata->clock <= 75) { + /* sync divider 1.5, async divider 3 */ + DEBUG(("53c700: sync 1.5 async 3\n")); + NCR_700_writeb(SYNC_DIV_1_5, host, SBCL_REG); +- NCR_700_writeb(ASYNC_DIV_3_0 | dcntl_extra, host, DCNTL_REG); ++ NCR_700_writeb(ASYNC_DIV_3_0 | hostdata->dcntl_extra, host, DCNTL_REG); + hostdata->sync_clock = hostdata->clock*2; + hostdata->sync_clock /= 3; + +@@ -741,18 +733,18 @@ + /* sync divider 1, async divider 2 */ + DEBUG(("53c700: sync 1 async 2\n")); + NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG); +- NCR_700_writeb(ASYNC_DIV_2_0 | dcntl_extra, host, DCNTL_REG); ++ NCR_700_writeb(ASYNC_DIV_2_0 | hostdata->dcntl_extra, host, DCNTL_REG); + hostdata->sync_clock = hostdata->clock; + } else if(hostdata->clock > 25 && hostdata->clock <=37) { + /* sync divider 1, async divider 1.5 */ + DEBUG(("53c700: sync 1 async 1.5\n")); + NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG); +- NCR_700_writeb(ASYNC_DIV_1_5 | dcntl_extra, host, DCNTL_REG); ++ NCR_700_writeb(ASYNC_DIV_1_5 | hostdata->dcntl_extra, host, DCNTL_REG); + hostdata->sync_clock = hostdata->clock; + } else { + DEBUG(("53c700: sync 1 async 1\n")); + NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG); +- NCR_700_writeb(ASYNC_DIV_1_0 | dcntl_extra, host, DCNTL_REG); ++ NCR_700_writeb(ASYNC_DIV_1_0 | hostdata->dcntl_extra, host, DCNTL_REG); + /* sync divider 1, async divider 1 */ + hostdata->sync_clock = hostdata->clock; + } +@@ -1263,12 +1255,11 @@ + host->host_no, pun, lun, NCR_700_condition[i], + NCR_700_phase[j], dsp - hostdata->pScript); + if(SCp != NULL) { +- scsi_print_command(SCp); ++ struct scatterlist *sg; + +- if(SCp->use_sg) { +- for(i = 0; i < SCp->use_sg + 1; i++) { +- printk(KERN_INFO " SG[%d].length = %d, move_insn=%08x, addr %08x\n", i, ((struct scatterlist *)SCp->request_buffer)[i].length, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].ins, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].pAddr); +- } ++ scsi_print_command(SCp); ++ scsi_for_each_sg(SCp, sg, scsi_sg_count(SCp) + 1, i) { ++ printk(KERN_INFO " SG[%d].length = %d, move_insn=%08x, addr %08x\n", i, sg->length, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].ins, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].pAddr); + } + } + NCR_700_internal_bus_reset(host); +@@ -1844,8 +1835,8 @@ + } + /* sanity check: some of the commands generated by the mid-layer + * have an eccentric idea of their sc_data_direction */ +- if(!SCp->use_sg && !SCp->request_bufflen +- && SCp->sc_data_direction != DMA_NONE) { ++ if(!scsi_sg_count(SCp) && !scsi_bufflen(SCp) && ++ SCp->sc_data_direction != DMA_NONE) { + #ifdef NCR_700_DEBUG + printk("53c700: Command"); + scsi_print_command(SCp); +@@ -1887,31 +1878,15 @@ + int i; + int sg_count; + dma_addr_t vPtr = 0; ++ struct scatterlist *sg; + __u32 count = 0; + +- if(SCp->use_sg) { +- sg_count = dma_map_sg(hostdata->dev, +- SCp->request_buffer, SCp->use_sg, +- direction); +- } else { +- vPtr = dma_map_single(hostdata->dev, +- SCp->request_buffer, +- SCp->request_bufflen, +- direction); +- count = SCp->request_bufflen; +- slot->dma_handle = vPtr; +- sg_count = 1; +- } +- +- +- for(i = 0; i < sg_count; i++) { ++ sg_count = scsi_dma_map(SCp); ++ BUG_ON(sg_count < 0); + +- if(SCp->use_sg) { +- struct scatterlist *sg = SCp->request_buffer; +- +- vPtr = sg_dma_address(&sg[i]); +- count = sg_dma_len(&sg[i]); +- } ++ scsi_for_each_sg(SCp, sg, sg_count, i) { ++ vPtr = sg_dma_address(sg); ++ count = sg_dma_len(sg); + + slot->SG[i].ins = bS_to_host(move_ins | count); + DEBUG((" scatter block %d: move %d[%08x] from 0x%lx\n", +diff -Nurb linux-2.6.22-570/drivers/scsi/53c700.h linux-2.6.22-591/drivers/scsi/53c700.h +--- linux-2.6.22-570/drivers/scsi/53c700.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/53c700.h 2007-12-21 15:36:12.000000000 -0500 +@@ -177,6 +177,7 @@ + __u8 state; + #define NCR_700_FLAG_AUTOSENSE 0x01 + __u8 flags; ++ __u8 pad1[2]; /* Needed for m68k where min alignment is 2 bytes */ + int tag; + __u32 resume_offset; + struct scsi_cmnd *cmnd; +@@ -196,6 +197,8 @@ + void __iomem *base; /* the base for the port (copied to host) */ + struct device *dev; + __u32 dmode_extra; /* adjustable bus settings */ ++ __u32 dcntl_extra; /* adjustable bus settings */ ++ __u32 ctest7_extra; /* adjustable bus settings */ + __u32 differential:1; /* if we are differential */ + #ifdef CONFIG_53C700_LE_ON_BE + /* This option is for HP only. Set it if your chip is wired for +@@ -352,6 +355,7 @@ + #define SEL_TIMEOUT_DISABLE 0x10 /* 710 only */ + #define DFP 0x08 + #define EVP 0x04 ++#define CTEST7_TT1 0x02 + #define DIFF 0x01 + #define CTEST6_REG 0x1A + #define TEMP_REG 0x1C +@@ -385,6 +389,7 @@ + #define SOFTWARE_RESET 0x01 + #define COMPAT_700_MODE 0x01 + #define SCRPTS_16BITS 0x20 ++#define EA_710 0x20 + #define ASYNC_DIV_2_0 0x00 + #define ASYNC_DIV_1_5 0x40 + #define ASYNC_DIV_1_0 0x80 +diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx.c linux-2.6.22-591/drivers/scsi/53c7xx.c +--- linux-2.6.22-570/drivers/scsi/53c7xx.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/53c7xx.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,6102 +0,0 @@ +-/* +- * 53c710 driver. Modified from Drew Eckhardts driver +- * for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk] +- * Check out PERM_OPTIONS and EXPECTED_CLOCK, which may be defined in the +- * relevant machine specific file (eg. mvme16x.[ch], amiga7xx.[ch]). +- * There are also currently some defines at the top of 53c7xx.scr. +- * The chip type is #defined in script_asm.pl, as well as the Makefile. +- * Host scsi ID expected to be 7 - see NCR53c7x0_init(). +- * +- * I have removed the PCI code and some of the 53c8xx specific code - +- * simply to make this file smaller and easier to manage. +- * +- * MVME16x issues: +- * Problems trying to read any chip registers in NCR53c7x0_init(), as they +- * may never have been set by 16xBug (eg. If kernel has come in over tftp). +- */ +- +-/* +- * Adapted for Linux/m68k Amiga platforms for the A4000T/A4091 and +- * WarpEngine SCSI controllers. +- * By Alan Hourihane +- * Thanks to Richard Hirst for making it possible with the MVME additions +- */ +- +-/* +- * 53c710 rev 0 doesn't support add with carry. Rev 1 and 2 does. To +- * overcome this problem you can define FORCE_DSA_ALIGNMENT, which ensures +- * that the DSA address is always xxxxxx00. If disconnection is not allowed, +- * then the script only ever tries to add small (< 256) positive offsets to +- * DSA, so lack of carry isn't a problem. FORCE_DSA_ALIGNMENT can, of course, +- * be defined for all chip revisions at a small cost in memory usage. +- */ +- +-#define FORCE_DSA_ALIGNMENT +- +-/* +- * Selection timer does not always work on the 53c710, depending on the +- * timing at the last disconnect, if this is a problem for you, try +- * using validids as detailed below. +- * +- * Options for the NCR7xx driver +- * +- * noasync:0 - disables sync and asynchronous negotiation +- * nosync:0 - disables synchronous negotiation (does async) +- * nodisconnect:0 - disables disconnection +- * validids:0x?? - Bitmask field that disallows certain ID's. +- * - e.g. 0x03 allows ID 0,1 +- * - 0x1F allows ID 0,1,2,3,4 +- * opthi:n - replace top word of options with 'n' +- * optlo:n - replace bottom word of options with 'n' +- * - ALWAYS SPECIFY opthi THEN optlo <<<<<<<<<< +- */ +- +-/* +- * PERM_OPTIONS are driver options which will be enabled for all NCR boards +- * in the system at driver initialization time. +- * +- * Don't THINK about touching these in PERM_OPTIONS : +- * OPTION_MEMORY_MAPPED +- * 680x0 doesn't have an IO map! +- * +- * OPTION_DEBUG_TEST1 +- * Test 1 does bus mastering and interrupt tests, which will help weed +- * out brain damaged main boards. +- * +- * Other PERM_OPTIONS settings are listed below. Note the actual options +- * required are set in the relevant file (mvme16x.c, amiga7xx.c, etc): +- * +- * OPTION_NO_ASYNC +- * Don't negotiate for asynchronous transfers on the first command +- * when OPTION_ALWAYS_SYNCHRONOUS is set. Useful for dain bramaged +- * devices which do something bad rather than sending a MESSAGE +- * REJECT back to us like they should if they can't cope. +- * +- * OPTION_SYNCHRONOUS +- * Enable support for synchronous transfers. Target negotiated +- * synchronous transfers will be responded to. To initiate +- * a synchronous transfer request, call +- * +- * request_synchronous (hostno, target) +- * +- * from within KGDB. +- * +- * OPTION_ALWAYS_SYNCHRONOUS +- * Negotiate for synchronous transfers with every target after +- * driver initialization or a SCSI bus reset. This is a bit dangerous, +- * since there are some dain bramaged SCSI devices which will accept +- * SDTR messages but keep talking asynchronously. +- * +- * OPTION_DISCONNECT +- * Enable support for disconnect/reconnect. To change the +- * default setting on a given host adapter, call +- * +- * request_disconnect (hostno, allow) +- * +- * where allow is non-zero to allow, 0 to disallow. +- * +- * If you really want to run 10MHz FAST SCSI-II transfers, you should +- * know that the NCR driver currently ignores parity information. Most +- * systems do 5MHz SCSI fine. I've seen a lot that have problems faster +- * than 8MHz. To play it safe, we only request 5MHz transfers. +- * +- * If you'd rather get 10MHz transfers, edit sdtr_message and change +- * the fourth byte from 50 to 25. +- */ +- +-/* +- * Sponsored by +- * iX Multiuser Multitasking Magazine +- * Hannover, Germany +- * hm@ix.de +- * +- * Copyright 1993, 1994, 1995 Drew Eckhardt +- * Visionary Computing +- * (Unix and Linux consulting and custom programming) +- * drew@PoohSticks.ORG +- * +1 (303) 786-7975 +- * +- * TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation. +- * +- * For more information, please consult +- * +- * NCR53C810 +- * SCSI I/O Processor +- * Programmer's Guide +- * +- * NCR 53C810 +- * PCI-SCSI I/O Processor +- * Data Manual +- * +- * NCR 53C810/53C820 +- * PCI-SCSI I/O Processor Design In Guide +- * +- * For literature on Symbios Logic Inc. formerly NCR, SCSI, +- * and Communication products please call (800) 334-5454 or +- * (719) 536-3300. +- * +- * PCI BIOS Specification Revision +- * PCI Local Bus Specification +- * PCI System Design Guide +- * +- * PCI Special Interest Group +- * M/S HF3-15A +- * 5200 N.E. Elam Young Parkway +- * Hillsboro, Oregon 97124-6497 +- * +1 (503) 696-2000 +- * +1 (800) 433-5177 +- */ +- +-/* +- * Design issues : +- * The cumulative latency needed to propagate a read/write request +- * through the file system, buffer cache, driver stacks, SCSI host, and +- * SCSI device is ultimately the limiting factor in throughput once we +- * have a sufficiently fast host adapter. +- * +- * So, to maximize performance we want to keep the ratio of latency to data +- * transfer time to a minimum by +- * 1. Minimizing the total number of commands sent (typical command latency +- * including drive and bus mastering host overhead is as high as 4.5ms) +- * to transfer a given amount of data. +- * +- * This is accomplished by placing no arbitrary limit on the number +- * of scatter/gather buffers supported, since we can transfer 1K +- * per scatter/gather buffer without Eric's cluster patches, +- * 4K with. +- * +- * 2. Minimizing the number of fatal interrupts serviced, since +- * fatal interrupts halt the SCSI I/O processor. Basically, +- * this means offloading the practical maximum amount of processing +- * to the SCSI chip. +- * +- * On the NCR53c810/820/720, this is accomplished by using +- * interrupt-on-the-fly signals when commands complete, +- * and only handling fatal errors and SDTR / WDTR messages +- * in the host code. +- * +- * On the NCR53c710, interrupts are generated as on the NCR53c8x0, +- * only the lack of a interrupt-on-the-fly facility complicates +- * things. Also, SCSI ID registers and commands are +- * bit fielded rather than binary encoded. +- * +- * On the NCR53c700 and NCR53c700-66, operations that are done via +- * indirect, table mode on the more advanced chips must be +- * replaced by calls through a jump table which +- * acts as a surrogate for the DSA. Unfortunately, this +- * will mean that we must service an interrupt for each +- * disconnect/reconnect. +- * +- * 3. Eliminating latency by pipelining operations at the different levels. +- * +- * This driver allows a configurable number of commands to be enqueued +- * for each target/lun combination (experimentally, I have discovered +- * that two seems to work best) and will ultimately allow for +- * SCSI-II tagged queuing. +- * +- * +- * Architecture : +- * This driver is built around a Linux queue of commands waiting to +- * be executed, and a shared Linux/NCR array of commands to start. Commands +- * are transferred to the array by the run_process_issue_queue() function +- * which is called whenever a command completes. +- * +- * As commands are completed, the interrupt routine is triggered, +- * looks for commands in the linked list of completed commands with +- * valid status, removes these commands from a list of running commands, +- * calls the done routine, and flags their target/luns as not busy. +- * +- * Due to limitations in the intelligence of the NCR chips, certain +- * concessions are made. In many cases, it is easier to dynamically +- * generate/fix-up code rather than calculate on the NCR at run time. +- * So, code is generated or fixed up for +- * +- * - Handling data transfers, using a variable number of MOVE instructions +- * interspersed with CALL MSG_IN, WHEN MSGIN instructions. +- * +- * The DATAIN and DATAOUT routines are separate, so that an incorrect +- * direction can be trapped, and space isn't wasted. +- * +- * It may turn out that we're better off using some sort +- * of table indirect instruction in a loop with a variable +- * sized table on the NCR53c710 and newer chips. +- * +- * - Checking for reselection (NCR53c710 and better) +- * +- * - Handling the details of SCSI context switches (NCR53c710 and better), +- * such as reprogramming appropriate synchronous parameters, +- * removing the dsa structure from the NCR's queue of outstanding +- * commands, etc. +- * +- */ +- +-#include +- +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#ifdef CONFIG_AMIGA +-#include +-#include +-#include +- +-#define BIG_ENDIAN +-#define NO_IO_SPACE +-#endif +- +-#ifdef CONFIG_MVME16x +-#include +- +-#define BIG_ENDIAN +-#define NO_IO_SPACE +-#define VALID_IDS +-#endif +- +-#ifdef CONFIG_BVME6000 +-#include +- +-#define BIG_ENDIAN +-#define NO_IO_SPACE +-#define VALID_IDS +-#endif +- +-#include "scsi.h" +-#include +-#include +-#include +-#include "53c7xx.h" +-#include +-#include +- +-#ifdef NO_IO_SPACE +-/* +- * The following make the definitions in 53c7xx.h (write8, etc) smaller, +- * we don't have separate i/o space anyway. +- */ +-#undef inb +-#undef outb +-#undef inw +-#undef outw +-#undef inl +-#undef outl +-#define inb(x) 1 +-#define inw(x) 1 +-#define inl(x) 1 +-#define outb(x,y) 1 +-#define outw(x,y) 1 +-#define outl(x,y) 1 +-#endif +- +-static int check_address (unsigned long addr, int size); +-static void dump_events (struct Scsi_Host *host, int count); +-static Scsi_Cmnd * return_outstanding_commands (struct Scsi_Host *host, +- int free, int issue); +-static void hard_reset (struct Scsi_Host *host); +-static void ncr_scsi_reset (struct Scsi_Host *host); +-static void print_lots (struct Scsi_Host *host); +-static void set_synchronous (struct Scsi_Host *host, int target, int sxfer, +- int scntl3, int now_connected); +-static int datapath_residual (struct Scsi_Host *host); +-static const char * sbcl_to_phase (int sbcl); +-static void print_progress (Scsi_Cmnd *cmd); +-static void print_queues (struct Scsi_Host *host); +-static void process_issue_queue (unsigned long flags); +-static int shutdown (struct Scsi_Host *host); +-static void abnormal_finished (struct NCR53c7x0_cmd *cmd, int result); +-static int disable (struct Scsi_Host *host); +-static int NCR53c7xx_run_tests (struct Scsi_Host *host); +-static irqreturn_t NCR53c7x0_intr(int irq, void *dev_id); +-static void NCR53c7x0_intfly (struct Scsi_Host *host); +-static int ncr_halt (struct Scsi_Host *host); +-static void intr_phase_mismatch (struct Scsi_Host *host, struct NCR53c7x0_cmd +- *cmd); +-static void intr_dma (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd); +-static void print_dsa (struct Scsi_Host *host, u32 *dsa, +- const char *prefix); +-static int print_insn (struct Scsi_Host *host, const u32 *insn, +- const char *prefix, int kernel); +- +-static void NCR53c7xx_dsa_fixup (struct NCR53c7x0_cmd *cmd); +-static void NCR53c7x0_init_fixup (struct Scsi_Host *host); +-static int NCR53c7x0_dstat_sir_intr (struct Scsi_Host *host, struct +- NCR53c7x0_cmd *cmd); +-static void NCR53c7x0_soft_reset (struct Scsi_Host *host); +- +-/* Size of event list (per host adapter) */ +-static int track_events = 0; +-static struct Scsi_Host *first_host = NULL; /* Head of list of NCR boards */ +-static struct scsi_host_template *the_template = NULL; +- +-/* NCR53c710 script handling code */ +- +-#include "53c7xx_d.h" +-#ifdef A_int_debug_sync +-#define DEBUG_SYNC_INTR A_int_debug_sync +-#endif +-int NCR53c7xx_script_len = sizeof (SCRIPT); +-int NCR53c7xx_dsa_len = A_dsa_end + Ent_dsa_zero - Ent_dsa_code_template; +-#ifdef FORCE_DSA_ALIGNMENT +-int CmdPageStart = (0 - Ent_dsa_zero - sizeof(struct NCR53c7x0_cmd)) & 0xff; +-#endif +- +-static char *setup_strings[] = +- {"","","","","","","",""}; +- +-#define MAX_SETUP_STRINGS ARRAY_SIZE(setup_strings) +-#define SETUP_BUFFER_SIZE 200 +-static char setup_buffer[SETUP_BUFFER_SIZE]; +-static char setup_used[MAX_SETUP_STRINGS]; +- +-void ncr53c7xx_setup (char *str, int *ints) +-{ +- int i; +- char *p1, *p2; +- +- p1 = setup_buffer; +- *p1 = '\0'; +- if (str) +- strncpy(p1, str, SETUP_BUFFER_SIZE - strlen(setup_buffer)); +- setup_buffer[SETUP_BUFFER_SIZE - 1] = '\0'; +- p1 = setup_buffer; +- i = 0; +- while (*p1 && (i < MAX_SETUP_STRINGS)) { +- p2 = strchr(p1, ','); +- if (p2) { +- *p2 = '\0'; +- if (p1 != p2) +- setup_strings[i] = p1; +- p1 = p2 + 1; +- i++; +- } +- else { +- setup_strings[i] = p1; +- break; +- } +- } +- for (i=0; i= '0') && (*cp <= '9')) { +- *val = simple_strtoul(cp,NULL,0); +- } +- return ++x; +-} +- +- +- +-/* +- * KNOWN BUGS : +- * - There is some sort of conflict when the PPP driver is compiled with +- * support for 16 channels? +- * +- * - On systems which predate the 1.3.x initialization order change, +- * the NCR driver will cause Cannot get free page messages to appear. +- * These are harmless, but I don't know of an easy way to avoid them. +- * +- * - With OPTION_DISCONNECT, on two systems under unknown circumstances, +- * we get a PHASE MISMATCH with DSA set to zero (suggests that we +- * are occurring somewhere in the reselection code) where +- * DSP=some value DCMD|DBC=same value. +- * +- * Closer inspection suggests that we may be trying to execute +- * some portion of the DSA? +- * scsi0 : handling residual transfer (+ 0 bytes from DMA FIFO) +- * scsi0 : handling residual transfer (+ 0 bytes from DMA FIFO) +- * scsi0 : no current command : unexpected phase MSGIN. +- * DSP=0x1c46cc, DCMD|DBC=0x1c46ac, DSA=0x0 +- * DSPS=0x0, TEMP=0x1c3e70, DMODE=0x80 +- * scsi0 : DSP-> +- * 001c46cc : 0x001c46cc 0x00000000 +- * 001c46d4 : 0x001c5ea0 0x000011f8 +- * +- * Changed the print code in the phase_mismatch handler so +- * that we call print_lots to try to diagnose this. +- * +- */ +- +-/* +- * Possible future direction of architecture for max performance : +- * +- * We're using a single start array for the NCR chip. This is +- * sub-optimal, because we cannot add a command which would conflict with +- * an executing command to this start queue, and therefore must insert the +- * next command for a given I/T/L combination after the first has completed; +- * incurring our interrupt latency between SCSI commands. +- * +- * To allow further pipelining of the NCR and host CPU operation, we want +- * to set things up so that immediately on termination of a command destined +- * for a given LUN, we get that LUN busy again. +- * +- * To do this, we need to add a 32 bit pointer to which is jumped to +- * on completion of a command. If no new command is available, this +- * would point to the usual DSA issue queue select routine. +- * +- * If one were, it would point to a per-NCR53c7x0_cmd select routine +- * which starts execution immediately, inserting the command at the head +- * of the start queue if the NCR chip is selected or reselected. +- * +- * We would change so that we keep a list of outstanding commands +- * for each unit, rather than a single running_list. We'd insert +- * a new command into the right running list; if the NCR didn't +- * have something running for that yet, we'd put it in the +- * start queue as well. Some magic needs to happen to handle the +- * race condition between the first command terminating before the +- * new one is written. +- * +- * Potential for profiling : +- * Call do_gettimeofday(struct timeval *tv) to get 800ns resolution. +- */ +- +- +-/* +- * TODO : +- * 1. To support WIDE transfers, not much needs to happen. We +- * should do CHMOVE instructions instead of MOVEs when +- * we have scatter/gather segments of uneven length. When +- * we do this, we need to handle the case where we disconnect +- * between segments. +- * +- * 2. Currently, when Icky things happen we do a FATAL(). Instead, +- * we want to do an integrity check on the parts of the NCR hostdata +- * structure which were initialized at boot time; FATAL() if that +- * fails, and otherwise try to recover. Keep track of how many +- * times this has happened within a single SCSI command; if it +- * gets excessive, then FATAL(). +- * +- * 3. Parity checking is currently disabled, and a few things should +- * happen here now that we support synchronous SCSI transfers : +- * 1. On soft-reset, we shoould set the EPC (Enable Parity Checking) +- * and AAP (Assert SATN/ on parity error) bits in SCNTL0. +- * +- * 2. We should enable the parity interrupt in the SIEN0 register. +- * +- * 3. intr_phase_mismatch() needs to believe that message out is +- * always an "acceptable" phase to have a mismatch in. If +- * the old phase was MSG_IN, we should send a MESSAGE PARITY +- * error. If the old phase was something else, we should send +- * a INITIATOR_DETECTED_ERROR message. Note that this could +- * cause a RESTORE POINTERS message; so we should handle that +- * correctly first. Instead, we should probably do an +- * initiator_abort. +- * +- * 4. MPEE bit of CTEST4 should be set so we get interrupted if +- * we detect an error. +- * +- * +- * 5. The initial code has been tested on the NCR53c810. I don't +- * have access to NCR53c700, 700-66 (Forex boards), NCR53c710 +- * (NCR Pentium systems), NCR53c720, NCR53c820, or NCR53c825 boards to +- * finish development on those platforms. +- * +- * NCR53c820/825/720 - need to add wide transfer support, including WDTR +- * negotiation, programming of wide transfer capabilities +- * on reselection and table indirect selection. +- * +- * NCR53c710 - need to add fatal interrupt or GEN code for +- * command completion signaling. Need to modify all +- * SDID, SCID, etc. registers, and table indirect select code +- * since these use bit fielded (ie 1<NOP_insn) ? +- /* +- * If the IF TRUE bit is set, it's a JUMP instruction. The +- * operand is a bus pointer to the dsa_begin routine for this DSA. The +- * dsa field of the NCR53c7x0_cmd structure starts with the +- * DSA code template. By converting to a virtual address, +- * subtracting the code template size, and offset of the +- * dsa field, we end up with a pointer to the start of the +- * structure (alternatively, we could use the +- * dsa_cmnd field, an anachronism from when we weren't +- * sure what the relationship between the NCR structures +- * and host structures were going to be. +- */ +- (struct NCR53c7x0_cmd *) ((char *) bus_to_virt (issue[1]) - +- (hostdata->E_dsa_code_begin - hostdata->E_dsa_code_template) - +- offsetof(struct NCR53c7x0_cmd, dsa)) +- /* If the IF TRUE bit is not set, it's a NOP */ +- : NULL; +-} +- +- +-/* +- * FIXME: we should junk these, in favor of synchronous_want and +- * wide_want in the NCR53c7x0_hostdata structure. +- */ +- +-/* Template for "preferred" synchronous transfer parameters. */ +- +-static const unsigned char sdtr_message[] = { +-#ifdef CONFIG_SCSI_NCR53C7xx_FAST +- EXTENDED_MESSAGE, 3 /* length */, EXTENDED_SDTR, 25 /* *4ns */, 8 /* off */ +-#else +- EXTENDED_MESSAGE, 3 /* length */, EXTENDED_SDTR, 50 /* *4ns */, 8 /* off */ +-#endif +-}; +- +-/* Template to request asynchronous transfers */ +- +-static const unsigned char async_message[] = { +- EXTENDED_MESSAGE, 3 /* length */, EXTENDED_SDTR, 0, 0 /* asynchronous */ +-}; +- +-/* Template for "preferred" WIDE transfer parameters */ +- +-static const unsigned char wdtr_message[] = { +- EXTENDED_MESSAGE, 2 /* length */, EXTENDED_WDTR, 1 /* 2^1 bytes */ +-}; +- +-#if 0 +-/* +- * Function : struct Scsi_Host *find_host (int host) +- * +- * Purpose : KGDB support function which translates a host number +- * to a host structure. +- * +- * Inputs : host - number of SCSI host +- * +- * Returns : NULL on failure, pointer to host structure on success. +- */ +- +-static struct Scsi_Host * +-find_host (int host) { +- struct Scsi_Host *h; +- for (h = first_host; h && h->host_no != host; h = h->next); +- if (!h) { +- printk (KERN_ALERT "scsi%d not found\n", host); +- return NULL; +- } else if (h->hostt != the_template) { +- printk (KERN_ALERT "scsi%d is not a NCR board\n", host); +- return NULL; +- } +- return h; +-} +- +-#if 0 +-/* +- * Function : request_synchronous (int host, int target) +- * +- * Purpose : KGDB interface which will allow us to negotiate for +- * synchronous transfers. This ill be replaced with a more +- * integrated function; perhaps a new entry in the scsi_host +- * structure, accessible via an ioctl() or perhaps /proc/scsi. +- * +- * Inputs : host - number of SCSI host; target - number of target. +- * +- * Returns : 0 when negotiation has been setup for next SCSI command, +- * -1 on failure. +- */ +- +-static int +-request_synchronous (int host, int target) { +- struct Scsi_Host *h; +- struct NCR53c7x0_hostdata *hostdata; +- unsigned long flags; +- if (target < 0) { +- printk (KERN_ALERT "target %d is bogus\n", target); +- return -1; +- } +- if (!(h = find_host (host))) +- return -1; +- else if (h->this_id == target) { +- printk (KERN_ALERT "target %d is host ID\n", target); +- return -1; +- } +- else if (target >= h->max_id) { +- printk (KERN_ALERT "target %d exceeds maximum of %d\n", target, +- h->max_id); +- return -1; +- } +- hostdata = (struct NCR53c7x0_hostdata *)h->hostdata[0]; +- +- local_irq_save(flags); +- if (hostdata->initiate_sdtr & (1 << target)) { +- local_irq_restore(flags); +- printk (KERN_ALERT "target %d already doing SDTR\n", target); +- return -1; +- } +- hostdata->initiate_sdtr |= (1 << target); +- local_irq_restore(flags); +- return 0; +-} +-#endif +- +-/* +- * Function : request_disconnect (int host, int on_or_off) +- * +- * Purpose : KGDB support function, tells us to allow or disallow +- * disconnections. +- * +- * Inputs : host - number of SCSI host; on_or_off - non-zero to allow, +- * zero to disallow. +- * +- * Returns : 0 on success, * -1 on failure. +- */ +- +-static int +-request_disconnect (int host, int on_or_off) { +- struct Scsi_Host *h; +- struct NCR53c7x0_hostdata *hostdata; +- if (!(h = find_host (host))) +- return -1; +- hostdata = (struct NCR53c7x0_hostdata *) h->hostdata[0]; +- if (on_or_off) +- hostdata->options |= OPTION_DISCONNECT; +- else +- hostdata->options &= ~OPTION_DISCONNECT; +- return 0; +-} +-#endif +- +-/* +- * Function : static void NCR53c7x0_driver_init (struct Scsi_Host *host) +- * +- * Purpose : Initialize internal structures, as required on startup, or +- * after a SCSI bus reset. +- * +- * Inputs : host - pointer to this host adapter's structure +- */ +- +-static void +-NCR53c7x0_driver_init (struct Scsi_Host *host) { +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- int i, j; +- u32 *ncrcurrent; +- +- for (i = 0; i < 16; ++i) { +- hostdata->request_sense[i] = 0; +- for (j = 0; j < 8; ++j) +- hostdata->busy[i][j] = 0; +- set_synchronous (host, i, /* sxfer */ 0, hostdata->saved_scntl3, 0); +- } +- hostdata->issue_queue = NULL; +- hostdata->running_list = hostdata->finished_queue = +- hostdata->ncrcurrent = NULL; +- for (i = 0, ncrcurrent = (u32 *) hostdata->schedule; +- i < host->can_queue; ++i, ncrcurrent += 2) { +- ncrcurrent[0] = hostdata->NOP_insn; +- ncrcurrent[1] = 0xdeadbeef; +- } +- ncrcurrent[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) << 24) | DBC_TCI_TRUE; +- ncrcurrent[1] = (u32) virt_to_bus (hostdata->script) + +- hostdata->E_wait_reselect; +- hostdata->reconnect_dsa_head = 0; +- hostdata->addr_reconnect_dsa_head = (u32) +- virt_to_bus((void *) &(hostdata->reconnect_dsa_head)); +- hostdata->expecting_iid = 0; +- hostdata->expecting_sto = 0; +- if (hostdata->options & OPTION_ALWAYS_SYNCHRONOUS) +- hostdata->initiate_sdtr = 0xffff; +- else +- hostdata->initiate_sdtr = 0; +- hostdata->talked_to = 0; +- hostdata->idle = 1; +-} +- +-/* +- * Function : static int clock_to_ccf_710 (int clock) +- * +- * Purpose : Return the clock conversion factor for a given SCSI clock. +- * +- * Inputs : clock - SCSI clock expressed in Hz. +- * +- * Returns : ccf on success, -1 on failure. +- */ +- +-static int +-clock_to_ccf_710 (int clock) { +- if (clock <= 16666666) +- return -1; +- if (clock <= 25000000) +- return 2; /* Divide by 1.0 */ +- else if (clock <= 37500000) +- return 1; /* Divide by 1.5 */ +- else if (clock <= 50000000) +- return 0; /* Divide by 2.0 */ +- else if (clock <= 66000000) +- return 3; /* Divide by 3.0 */ +- else +- return -1; +-} +- +-/* +- * Function : static int NCR53c7x0_init (struct Scsi_Host *host) +- * +- * Purpose : initialize the internal structures for a given SCSI host +- * +- * Inputs : host - pointer to this host adapter's structure +- * +- * Preconditions : when this function is called, the chip_type +- * field of the hostdata structure MUST have been set. +- * +- * Returns : 0 on success, -1 on failure. +- */ +- +-int +-NCR53c7x0_init (struct Scsi_Host *host) { +- NCR53c7x0_local_declare(); +- int i, ccf; +- unsigned char revision; +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- /* +- * There are some things which we need to know about in order to provide +- * a semblance of support. Print 'em if they aren't what we expect, +- * otherwise don't add to the noise. +- * +- * -1 means we don't know what to expect. +- */ +- int val, flags; +- char buf[32]; +- int expected_id = -1; +- int expected_clock = -1; +- int uninitialized = 0; +-#ifdef NO_IO_SPACE +- int expected_mapping = OPTION_MEMORY_MAPPED; +-#else +- int expected_mapping = OPTION_IO_MAPPED; +-#endif +- for (i=0;i<7;i++) +- hostdata->valid_ids[i] = 1; /* Default all ID's to scan */ +- +- /* Parse commandline flags */ +- if (check_setup_strings("noasync",&flags,&val,buf)) +- { +- hostdata->options |= OPTION_NO_ASYNC; +- hostdata->options &= ~(OPTION_SYNCHRONOUS | OPTION_ALWAYS_SYNCHRONOUS); +- } +- +- if (check_setup_strings("nosync",&flags,&val,buf)) +- { +- hostdata->options &= ~(OPTION_SYNCHRONOUS | OPTION_ALWAYS_SYNCHRONOUS); +- } +- +- if (check_setup_strings("nodisconnect",&flags,&val,buf)) +- hostdata->options &= ~OPTION_DISCONNECT; +- +- if (check_setup_strings("validids",&flags,&val,buf)) +- { +- for (i=0;i<7;i++) +- hostdata->valid_ids[i] = val & (1<options = (long long)val << 32; +- if (check_setup_strings("optlo",&flags,&val,buf)) +- hostdata->options |= val; +- +- NCR53c7x0_local_setup(host); +- switch (hostdata->chip) { +- case 710: +- case 770: +- hostdata->dstat_sir_intr = NCR53c7x0_dstat_sir_intr; +- hostdata->init_save_regs = NULL; +- hostdata->dsa_fixup = NCR53c7xx_dsa_fixup; +- hostdata->init_fixup = NCR53c7x0_init_fixup; +- hostdata->soft_reset = NCR53c7x0_soft_reset; +- hostdata->run_tests = NCR53c7xx_run_tests; +- expected_clock = hostdata->scsi_clock; +- expected_id = 7; +- break; +- default: +- printk ("scsi%d : chip type of %d is not supported yet, detaching.\n", +- host->host_no, hostdata->chip); +- scsi_unregister (host); +- return -1; +- } +- +- /* Assign constants accessed by NCR */ +- hostdata->NCR53c7xx_zero = 0; +- hostdata->NCR53c7xx_msg_reject = MESSAGE_REJECT; +- hostdata->NCR53c7xx_msg_abort = ABORT; +- hostdata->NCR53c7xx_msg_nop = NOP; +- hostdata->NOP_insn = (DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) << 24; +- if (expected_mapping == -1 || +- (hostdata->options & (OPTION_MEMORY_MAPPED)) != +- (expected_mapping & OPTION_MEMORY_MAPPED)) +- printk ("scsi%d : using %s mapped access\n", host->host_no, +- (hostdata->options & OPTION_MEMORY_MAPPED) ? "memory" : +- "io"); +- +- hostdata->dmode = (hostdata->chip == 700 || hostdata->chip == 70066) ? +- DMODE_REG_00 : DMODE_REG_10; +- hostdata->istat = ((hostdata->chip / 100) == 8) ? +- ISTAT_REG_800 : ISTAT_REG_700; +- +-/* We have to assume that this may be the first access to the chip, so +- * we must set EA in DCNTL. */ +- +- NCR53c7x0_write8 (DCNTL_REG, DCNTL_10_EA|DCNTL_10_COM); +- +- +-/* Only the ISTAT register is readable when the NCR is running, so make +- sure it's halted. */ +- ncr_halt(host); +- +-/* +- * XXX - the NCR53c700 uses bitfielded registers for SCID, SDID, etc, +- * as does the 710 with one bit per SCSI ID. Conversely, the NCR +- * uses a normal, 3 bit binary representation of these values. +- * +- * Get the rest of the NCR documentation, and FIND OUT where the change +- * was. +- */ +- +-#if 0 +- /* May not be able to do this - chip my not have been set up yet */ +- tmp = hostdata->this_id_mask = NCR53c7x0_read8(SCID_REG); +- for (host->this_id = 0; tmp != 1; tmp >>=1, ++host->this_id); +-#else +- host->this_id = 7; +-#endif +- +-/* +- * Note : we should never encounter a board setup for ID0. So, +- * if we see ID0, assume that it was uninitialized and set it +- * to the industry standard 7. +- */ +- if (!host->this_id) { +- printk("scsi%d : initiator ID was %d, changing to 7\n", +- host->host_no, host->this_id); +- host->this_id = 7; +- hostdata->this_id_mask = 1 << 7; +- uninitialized = 1; +- }; +- +- if (expected_id == -1 || host->this_id != expected_id) +- printk("scsi%d : using initiator ID %d\n", host->host_no, +- host->this_id); +- +- /* +- * Save important registers to allow a soft reset. +- */ +- +- /* +- * CTEST7 controls cache snooping, burst mode, and support for +- * external differential drivers. This isn't currently used - the +- * default value may not be optimal anyway. +- * Even worse, it may never have been set up since reset. +- */ +- hostdata->saved_ctest7 = NCR53c7x0_read8(CTEST7_REG) & CTEST7_SAVE; +- revision = (NCR53c7x0_read8(CTEST8_REG) & 0xF0) >> 4; +- switch (revision) { +- case 1: revision = 0; break; +- case 2: revision = 1; break; +- case 4: revision = 2; break; +- case 8: revision = 3; break; +- default: revision = 255; break; +- } +- printk("scsi%d: Revision 0x%x\n",host->host_no,revision); +- +- if ((revision == 0 || revision == 255) && (hostdata->options & (OPTION_SYNCHRONOUS|OPTION_DISCONNECT|OPTION_ALWAYS_SYNCHRONOUS))) +- { +- printk ("scsi%d: Disabling sync working and disconnect/reselect\n", +- host->host_no); +- hostdata->options &= ~(OPTION_SYNCHRONOUS|OPTION_DISCONNECT|OPTION_ALWAYS_SYNCHRONOUS); +- } +- +- /* +- * On NCR53c700 series chips, DCNTL controls the SCSI clock divisor, +- * on 800 series chips, it allows for a totem-pole IRQ driver. +- * NOTE saved_dcntl currently overwritten in init function. +- * The value read here may be garbage anyway, MVME16x board at least +- * does not initialise chip if kernel arrived via tftp. +- */ +- +- hostdata->saved_dcntl = NCR53c7x0_read8(DCNTL_REG); +- +- /* +- * DMODE controls DMA burst length, and on 700 series chips, +- * 286 mode and bus width +- * NOTE: On MVME16x, chip may have been reset, so this could be a +- * power-on/reset default value. +- */ +- hostdata->saved_dmode = NCR53c7x0_read8(hostdata->dmode); +- +- /* +- * Now that burst length and enabled/disabled status is known, +- * clue the user in on it. +- */ +- +- ccf = clock_to_ccf_710 (expected_clock); +- +- for (i = 0; i < 16; ++i) +- hostdata->cmd_allocated[i] = 0; +- +- if (hostdata->init_save_regs) +- hostdata->init_save_regs (host); +- if (hostdata->init_fixup) +- hostdata->init_fixup (host); +- +- if (!the_template) { +- the_template = host->hostt; +- first_host = host; +- } +- +- /* +- * Linux SCSI drivers have always been plagued with initialization +- * problems - some didn't work with the BIOS disabled since they expected +- * initialization from it, some didn't work when the networking code +- * was enabled and registers got scrambled, etc. +- * +- * To avoid problems like this, in the future, we will do a soft +- * reset on the SCSI chip, taking it back to a sane state. +- */ +- +- hostdata->soft_reset (host); +- +-#if 1 +- hostdata->debug_count_limit = -1; +-#else +- hostdata->debug_count_limit = 1; +-#endif +- hostdata->intrs = -1; +- hostdata->resets = -1; +- memcpy ((void *) hostdata->synchronous_want, (void *) sdtr_message, +- sizeof (hostdata->synchronous_want)); +- +- NCR53c7x0_driver_init (host); +- +- if (request_irq(host->irq, NCR53c7x0_intr, IRQF_SHARED, "53c7xx", host)) +- { +- printk("scsi%d : IRQ%d not free, detaching\n", +- host->host_no, host->irq); +- goto err_unregister; +- } +- +- if ((hostdata->run_tests && hostdata->run_tests(host) == -1) || +- (hostdata->options & OPTION_DEBUG_TESTS_ONLY)) { +- /* XXX Should disable interrupts, etc. here */ +- goto err_free_irq; +- } else { +- if (host->io_port) { +- host->n_io_port = 128; +- if (!request_region (host->io_port, host->n_io_port, "ncr53c7xx")) +- goto err_free_irq; +- } +- } +- +- if (NCR53c7x0_read8 (SBCL_REG) & SBCL_BSY) { +- printk ("scsi%d : bus wedge, doing SCSI reset\n", host->host_no); +- hard_reset (host); +- } +- return 0; +- +- err_free_irq: +- free_irq(host->irq, NCR53c7x0_intr); +- err_unregister: +- scsi_unregister(host); +- return -1; +-} +- +-/* +- * Function : int ncr53c7xx_init(struct scsi_host_template *tpnt, int board, int chip, +- * unsigned long base, int io_port, int irq, int dma, long long options, +- * int clock); +- * +- * Purpose : initializes a NCR53c7,8x0 based on base addresses, +- * IRQ, and DMA channel. +- * +- * Inputs : tpnt - Template for this SCSI adapter, board - board level +- * product, chip - 710 +- * +- * Returns : 0 on success, -1 on failure. +- * +- */ +- +-int +-ncr53c7xx_init (struct scsi_host_template *tpnt, int board, int chip, +- unsigned long base, int io_port, int irq, int dma, +- long long options, int clock) +-{ +- struct Scsi_Host *instance; +- struct NCR53c7x0_hostdata *hostdata; +- char chip_str[80]; +- int script_len = 0, dsa_len = 0, size = 0, max_cmd_size = 0, +- schedule_size = 0, ok = 0; +- void *tmp; +- unsigned long page; +- +- switch (chip) { +- case 710: +- case 770: +- schedule_size = (tpnt->can_queue + 1) * 8 /* JUMP instruction size */; +- script_len = NCR53c7xx_script_len; +- dsa_len = NCR53c7xx_dsa_len; +- options |= OPTION_INTFLY; +- sprintf (chip_str, "NCR53c%d", chip); +- break; +- default: +- printk("scsi-ncr53c7xx : unsupported SCSI chip %d\n", chip); +- return -1; +- } +- +- printk("scsi-ncr53c7xx : %s at memory 0x%lx, io 0x%x, irq %d", +- chip_str, base, io_port, irq); +- if (dma == DMA_NONE) +- printk("\n"); +- else +- printk(", dma %d\n", dma); +- +- if (options & OPTION_DEBUG_PROBE_ONLY) { +- printk ("scsi-ncr53c7xx : probe only enabled, aborting initialization\n"); +- return -1; +- } +- +- max_cmd_size = sizeof(struct NCR53c7x0_cmd) + dsa_len + +- /* Size of dynamic part of command structure : */ +- 2 * /* Worst case : we don't know if we need DATA IN or DATA out */ +- ( 2 * /* Current instructions per scatter/gather segment */ +- tpnt->sg_tablesize + +- 3 /* Current startup / termination required per phase */ +- ) * +- 8 /* Each instruction is eight bytes */; +- +- /* Allocate fixed part of hostdata, dynamic part to hold appropriate +- SCSI SCRIPT(tm) plus a single, maximum-sized NCR53c7x0_cmd structure. +- +- We need a NCR53c7x0_cmd structure for scan_scsis() when we are +- not loaded as a module, and when we're loaded as a module, we +- can't use a non-dynamically allocated structure because modules +- are vmalloc()'d, which can allow structures to cross page +- boundaries and breaks our physical/virtual address assumptions +- for DMA. +- +- So, we stick it past the end of our hostdata structure. +- +- ASSUMPTION : +- Regardless of how many simultaneous SCSI commands we allow, +- the probe code only executes a _single_ instruction at a time, +- so we only need one here, and don't need to allocate NCR53c7x0_cmd +- structures for each target until we are no longer in scan_scsis +- and kmalloc() has become functional (memory_init() happens +- after all device driver initialization). +- */ +- +- size = sizeof(struct NCR53c7x0_hostdata) + script_len + +- /* Note that alignment will be guaranteed, since we put the command +- allocated at probe time after the fixed-up SCSI script, which +- consists of 32 bit words, aligned on a 32 bit boundary. But +- on a 64bit machine we need 8 byte alignment for hostdata->free, so +- we add in another 4 bytes to take care of potential misalignment +- */ +- (sizeof(void *) - sizeof(u32)) + max_cmd_size + schedule_size; +- +- page = __get_free_pages(GFP_ATOMIC,1); +- if(page==0) +- { +- printk(KERN_ERR "53c7xx: out of memory.\n"); +- return -ENOMEM; +- } +-#ifdef FORCE_DSA_ALIGNMENT +- /* +- * 53c710 rev.0 doesn't have an add-with-carry instruction. +- * Ensure we allocate enough memory to force DSA alignment. +- */ +- size += 256; +-#endif +- /* Size should be < 8K, so we can fit it in two pages. */ +- if (size > 8192) { +- printk(KERN_ERR "53c7xx: hostdata > 8K\n"); +- return -1; +- } +- +- instance = scsi_register (tpnt, 4); +- if (!instance) +- { +- free_page(page); +- return -1; +- } +- instance->hostdata[0] = page; +- memset((void *)instance->hostdata[0], 0, 8192); +- cache_push(virt_to_phys((void *)(instance->hostdata[0])), 8192); +- cache_clear(virt_to_phys((void *)(instance->hostdata[0])), 8192); +- kernel_set_cachemode((void *)instance->hostdata[0], 8192, IOMAP_NOCACHE_SER); +- +- /* FIXME : if we ever support an ISA NCR53c7xx based board, we +- need to check if the chip is running in a 16 bit mode, and if so +- unregister it if it is past the 16M (0x1000000) mark */ +- +- hostdata = (struct NCR53c7x0_hostdata *)instance->hostdata[0]; +- hostdata->size = size; +- hostdata->script_count = script_len / sizeof(u32); +- hostdata->board = board; +- hostdata->chip = chip; +- +- /* +- * Being memory mapped is more desirable, since +- * +- * - Memory accesses may be faster. +- * +- * - The destination and source address spaces are the same for +- * all instructions, meaning we don't have to twiddle dmode or +- * any other registers. +- * +- * So, we try for memory mapped, and if we don't get it, +- * we go for port mapped, and that failing we tell the user +- * it can't work. +- */ +- +- if (base) { +- instance->base = base; +- /* Check for forced I/O mapping */ +- if (!(options & OPTION_IO_MAPPED)) { +- options |= OPTION_MEMORY_MAPPED; +- ok = 1; +- } +- } else { +- options &= ~OPTION_MEMORY_MAPPED; +- } +- +- if (io_port) { +- instance->io_port = io_port; +- options |= OPTION_IO_MAPPED; +- ok = 1; +- } else { +- options &= ~OPTION_IO_MAPPED; +- } +- +- if (!ok) { +- printk ("scsi%d : not initializing, no I/O or memory mapping known \n", +- instance->host_no); +- scsi_unregister (instance); +- return -1; +- } +- instance->irq = irq; +- instance->dma_channel = dma; +- +- hostdata->options = options; +- hostdata->dsa_len = dsa_len; +- hostdata->max_cmd_size = max_cmd_size; +- hostdata->num_cmds = 1; +- hostdata->scsi_clock = clock; +- /* Initialize single command */ +- tmp = (hostdata->script + hostdata->script_count); +-#ifdef FORCE_DSA_ALIGNMENT +- { +- void *t = ROUNDUP(tmp, void *); +- if (((u32)t & 0xff) > CmdPageStart) +- t = (void *)((u32)t + 255); +- t = (void *)(((u32)t & ~0xff) + CmdPageStart); +- hostdata->free = t; +-#if 0 +- printk ("scsi: Registered size increased by 256 to %d\n", size); +- printk ("scsi: CmdPageStart = 0x%02x\n", CmdPageStart); +- printk ("scsi: tmp = 0x%08x, hostdata->free set to 0x%08x\n", +- (u32)tmp, (u32)t); +-#endif +- } +-#else +- hostdata->free = ROUNDUP(tmp, void *); +-#endif +- hostdata->free->real = tmp; +- hostdata->free->size = max_cmd_size; +- hostdata->free->free = NULL; +- hostdata->free->next = NULL; +- hostdata->extra_allocate = 0; +- +- /* Allocate command start code space */ +- hostdata->schedule = (chip == 700 || chip == 70066) ? +- NULL : (u32 *) ((char *)hostdata->free + max_cmd_size); +- +-/* +- * For diagnostic purposes, we don't really care how fast things blaze. +- * For profiling, we want to access the 800ns resolution system clock, +- * using a 'C' call on the host processor. +- * +- * Therefore, there's no need for the NCR chip to directly manipulate +- * this data, and we should put it wherever is most convenient for +- * Linux. +- */ +- if (track_events) +- hostdata->events = (struct NCR53c7x0_event *) (track_events ? +- vmalloc (sizeof (struct NCR53c7x0_event) * track_events) : NULL); +- else +- hostdata->events = NULL; +- +- if (hostdata->events) { +- memset ((void *) hostdata->events, 0, sizeof(struct NCR53c7x0_event) * +- track_events); +- hostdata->event_size = track_events; +- hostdata->event_index = 0; +- } else +- hostdata->event_size = 0; +- +- return NCR53c7x0_init(instance); +-} +- +- +-/* +- * Function : static void NCR53c7x0_init_fixup (struct Scsi_Host *host) +- * +- * Purpose : copy and fixup the SCSI SCRIPTS(tm) code for this device. +- * +- * Inputs : host - pointer to this host adapter's structure +- * +- */ +- +-static void +-NCR53c7x0_init_fixup (struct Scsi_Host *host) { +- NCR53c7x0_local_declare(); +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- unsigned char tmp; +- int i, ncr_to_memory, memory_to_ncr; +- u32 base; +- NCR53c7x0_local_setup(host); +- +- +- /* XXX - NOTE : this code MUST be made endian aware */ +- /* Copy code into buffer that was allocated at detection time. */ +- memcpy ((void *) hostdata->script, (void *) SCRIPT, +- sizeof(SCRIPT)); +- /* Fixup labels */ +- for (i = 0; i < PATCHES; ++i) +- hostdata->script[LABELPATCHES[i]] += +- virt_to_bus(hostdata->script); +- /* Fixup addresses of constants that used to be EXTERNAL */ +- +- patch_abs_32 (hostdata->script, 0, NCR53c7xx_msg_abort, +- virt_to_bus(&(hostdata->NCR53c7xx_msg_abort))); +- patch_abs_32 (hostdata->script, 0, NCR53c7xx_msg_reject, +- virt_to_bus(&(hostdata->NCR53c7xx_msg_reject))); +- patch_abs_32 (hostdata->script, 0, NCR53c7xx_zero, +- virt_to_bus(&(hostdata->NCR53c7xx_zero))); +- patch_abs_32 (hostdata->script, 0, NCR53c7xx_sink, +- virt_to_bus(&(hostdata->NCR53c7xx_sink))); +- patch_abs_32 (hostdata->script, 0, NOP_insn, +- virt_to_bus(&(hostdata->NOP_insn))); +- patch_abs_32 (hostdata->script, 0, schedule, +- virt_to_bus((void *) hostdata->schedule)); +- +- /* Fixup references to external variables: */ +- for (i = 0; i < EXTERNAL_PATCHES_LEN; ++i) +- hostdata->script[EXTERNAL_PATCHES[i].offset] += +- virt_to_bus(EXTERNAL_PATCHES[i].address); +- +- /* +- * Fixup absolutes set at boot-time. +- * +- * All non-code absolute variables suffixed with "dsa_" and "int_" +- * are constants, and need no fixup provided the assembler has done +- * it for us (I don't know what the "real" NCR assembler does in +- * this case, my assembler does the right magic). +- */ +- +- patch_abs_rwri_data (hostdata->script, 0, dsa_save_data_pointer, +- Ent_dsa_code_save_data_pointer - Ent_dsa_zero); +- patch_abs_rwri_data (hostdata->script, 0, dsa_restore_pointers, +- Ent_dsa_code_restore_pointers - Ent_dsa_zero); +- patch_abs_rwri_data (hostdata->script, 0, dsa_check_reselect, +- Ent_dsa_code_check_reselect - Ent_dsa_zero); +- +- /* +- * Just for the hell of it, preserve the settings of +- * Burst Length and Enable Read Line bits from the DMODE +- * register. Make sure SCRIPTS start automagically. +- */ +- +-#if defined(CONFIG_MVME16x) || defined(CONFIG_BVME6000) +- /* We know better what we want than 16xBug does! */ +- tmp = DMODE_10_BL_8 | DMODE_10_FC2; +-#else +- tmp = NCR53c7x0_read8(DMODE_REG_10); +- tmp &= (DMODE_BL_MASK | DMODE_10_FC2 | DMODE_10_FC1 | DMODE_710_PD | +- DMODE_710_UO); +-#endif +- +- if (!(hostdata->options & OPTION_MEMORY_MAPPED)) { +- base = (u32) host->io_port; +- memory_to_ncr = tmp|DMODE_800_DIOM; +- ncr_to_memory = tmp|DMODE_800_SIOM; +- } else { +- base = virt_to_bus((void *)host->base); +- memory_to_ncr = ncr_to_memory = tmp; +- } +- +- /* SCRATCHB_REG_10 == SCRATCHA_REG_800, as it happens */ +- patch_abs_32 (hostdata->script, 0, addr_scratch, base + SCRATCHA_REG_800); +- patch_abs_32 (hostdata->script, 0, addr_temp, base + TEMP_REG); +- patch_abs_32 (hostdata->script, 0, addr_dsa, base + DSA_REG); +- +- /* +- * I needed some variables in the script to be accessible to +- * both the NCR chip and the host processor. For these variables, +- * I made the arbitrary decision to store them directly in the +- * hostdata structure rather than in the RELATIVE area of the +- * SCRIPTS. +- */ +- +- +- patch_abs_rwri_data (hostdata->script, 0, dmode_memory_to_memory, tmp); +- patch_abs_rwri_data (hostdata->script, 0, dmode_memory_to_ncr, memory_to_ncr); +- patch_abs_rwri_data (hostdata->script, 0, dmode_ncr_to_memory, ncr_to_memory); +- +- patch_abs_32 (hostdata->script, 0, msg_buf, +- virt_to_bus((void *)&(hostdata->msg_buf))); +- patch_abs_32 (hostdata->script, 0, reconnect_dsa_head, +- virt_to_bus((void *)&(hostdata->reconnect_dsa_head))); +- patch_abs_32 (hostdata->script, 0, addr_reconnect_dsa_head, +- virt_to_bus((void *)&(hostdata->addr_reconnect_dsa_head))); +- patch_abs_32 (hostdata->script, 0, reselected_identify, +- virt_to_bus((void *)&(hostdata->reselected_identify))); +-/* reselected_tag is currently unused */ +-#if 0 +- patch_abs_32 (hostdata->script, 0, reselected_tag, +- virt_to_bus((void *)&(hostdata->reselected_tag))); +-#endif +- +- patch_abs_32 (hostdata->script, 0, test_dest, +- virt_to_bus((void*)&hostdata->test_dest)); +- patch_abs_32 (hostdata->script, 0, test_src, +- virt_to_bus(&hostdata->test_source)); +- patch_abs_32 (hostdata->script, 0, saved_dsa, +- virt_to_bus((void *)&hostdata->saved2_dsa)); +- patch_abs_32 (hostdata->script, 0, emulfly, +- virt_to_bus((void *)&hostdata->emulated_intfly)); +- +- patch_abs_rwri_data (hostdata->script, 0, dsa_check_reselect, +- (unsigned char)(Ent_dsa_code_check_reselect - Ent_dsa_zero)); +- +-/* These are for event logging; the ncr_event enum contains the +- actual interrupt numbers. */ +-#ifdef A_int_EVENT_SELECT +- patch_abs_32 (hostdata->script, 0, int_EVENT_SELECT, (u32) EVENT_SELECT); +-#endif +-#ifdef A_int_EVENT_DISCONNECT +- patch_abs_32 (hostdata->script, 0, int_EVENT_DISCONNECT, (u32) EVENT_DISCONNECT); +-#endif +-#ifdef A_int_EVENT_RESELECT +- patch_abs_32 (hostdata->script, 0, int_EVENT_RESELECT, (u32) EVENT_RESELECT); +-#endif +-#ifdef A_int_EVENT_COMPLETE +- patch_abs_32 (hostdata->script, 0, int_EVENT_COMPLETE, (u32) EVENT_COMPLETE); +-#endif +-#ifdef A_int_EVENT_IDLE +- patch_abs_32 (hostdata->script, 0, int_EVENT_IDLE, (u32) EVENT_IDLE); +-#endif +-#ifdef A_int_EVENT_SELECT_FAILED +- patch_abs_32 (hostdata->script, 0, int_EVENT_SELECT_FAILED, +- (u32) EVENT_SELECT_FAILED); +-#endif +-#ifdef A_int_EVENT_BEFORE_SELECT +- patch_abs_32 (hostdata->script, 0, int_EVENT_BEFORE_SELECT, +- (u32) EVENT_BEFORE_SELECT); +-#endif +-#ifdef A_int_EVENT_RESELECT_FAILED +- patch_abs_32 (hostdata->script, 0, int_EVENT_RESELECT_FAILED, +- (u32) EVENT_RESELECT_FAILED); +-#endif +- +- /* +- * Make sure the NCR and Linux code agree on the location of +- * certain fields. +- */ +- +- hostdata->E_accept_message = Ent_accept_message; +- hostdata->E_command_complete = Ent_command_complete; +- hostdata->E_cmdout_cmdout = Ent_cmdout_cmdout; +- hostdata->E_data_transfer = Ent_data_transfer; +- hostdata->E_debug_break = Ent_debug_break; +- hostdata->E_dsa_code_template = Ent_dsa_code_template; +- hostdata->E_dsa_code_template_end = Ent_dsa_code_template_end; +- hostdata->E_end_data_transfer = Ent_end_data_transfer; +- hostdata->E_initiator_abort = Ent_initiator_abort; +- hostdata->E_msg_in = Ent_msg_in; +- hostdata->E_other_transfer = Ent_other_transfer; +- hostdata->E_other_in = Ent_other_in; +- hostdata->E_other_out = Ent_other_out; +- hostdata->E_reject_message = Ent_reject_message; +- hostdata->E_respond_message = Ent_respond_message; +- hostdata->E_select = Ent_select; +- hostdata->E_select_msgout = Ent_select_msgout; +- hostdata->E_target_abort = Ent_target_abort; +-#ifdef Ent_test_0 +- hostdata->E_test_0 = Ent_test_0; +-#endif +- hostdata->E_test_1 = Ent_test_1; +- hostdata->E_test_2 = Ent_test_2; +-#ifdef Ent_test_3 +- hostdata->E_test_3 = Ent_test_3; +-#endif +- hostdata->E_wait_reselect = Ent_wait_reselect; +- hostdata->E_dsa_code_begin = Ent_dsa_code_begin; +- +- hostdata->dsa_cmdout = A_dsa_cmdout; +- hostdata->dsa_cmnd = A_dsa_cmnd; +- hostdata->dsa_datain = A_dsa_datain; +- hostdata->dsa_dataout = A_dsa_dataout; +- hostdata->dsa_end = A_dsa_end; +- hostdata->dsa_msgin = A_dsa_msgin; +- hostdata->dsa_msgout = A_dsa_msgout; +- hostdata->dsa_msgout_other = A_dsa_msgout_other; +- hostdata->dsa_next = A_dsa_next; +- hostdata->dsa_select = A_dsa_select; +- hostdata->dsa_start = Ent_dsa_code_template - Ent_dsa_zero; +- hostdata->dsa_status = A_dsa_status; +- hostdata->dsa_jump_dest = Ent_dsa_code_fix_jump - Ent_dsa_zero + +- 8 /* destination operand */; +- +- /* sanity check */ +- if (A_dsa_fields_start != Ent_dsa_code_template_end - +- Ent_dsa_zero) +- printk("scsi%d : NCR dsa_fields start is %d not %d\n", +- host->host_no, A_dsa_fields_start, Ent_dsa_code_template_end - +- Ent_dsa_zero); +- +- printk("scsi%d : NCR code relocated to 0x%lx (virt 0x%p)\n", host->host_no, +- virt_to_bus(hostdata->script), hostdata->script); +-} +- +-/* +- * Function : static int NCR53c7xx_run_tests (struct Scsi_Host *host) +- * +- * Purpose : run various verification tests on the NCR chip, +- * including interrupt generation, and proper bus mastering +- * operation. +- * +- * Inputs : host - a properly initialized Scsi_Host structure +- * +- * Preconditions : the NCR chip must be in a halted state. +- * +- * Returns : 0 if all tests were successful, -1 on error. +- * +- */ +- +-static int +-NCR53c7xx_run_tests (struct Scsi_Host *host) { +- NCR53c7x0_local_declare(); +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- unsigned long timeout; +- u32 start; +- int failed, i; +- unsigned long flags; +- NCR53c7x0_local_setup(host); +- +- /* The NCR chip _must_ be idle to run the test scripts */ +- +- local_irq_save(flags); +- if (!hostdata->idle) { +- printk ("scsi%d : chip not idle, aborting tests\n", host->host_no); +- local_irq_restore(flags); +- return -1; +- } +- +- /* +- * Check for functional interrupts, this could work as an +- * autoprobe routine. +- */ +- +- if ((hostdata->options & OPTION_DEBUG_TEST1) && +- hostdata->state != STATE_DISABLED) { +- hostdata->idle = 0; +- hostdata->test_running = 1; +- hostdata->test_completed = -1; +- hostdata->test_dest = 0; +- hostdata->test_source = 0xdeadbeef; +- start = virt_to_bus (hostdata->script) + hostdata->E_test_1; +- hostdata->state = STATE_RUNNING; +- printk ("scsi%d : test 1", host->host_no); +- NCR53c7x0_write32 (DSP_REG, start); +- if (hostdata->options & OPTION_DEBUG_TRACE) +- NCR53c7x0_write8 (DCNTL_REG, hostdata->saved_dcntl | DCNTL_SSM | +- DCNTL_STD); +- printk (" started\n"); +- local_irq_restore(flags); +- +- /* +- * This is currently a .5 second timeout, since (in theory) no slow +- * board will take that long. In practice, we've seen one +- * pentium which occassionally fails with this, but works with +- * 10 times as much? +- */ +- +- timeout = jiffies + 5 * HZ / 10; +- while ((hostdata->test_completed == -1) && time_before(jiffies, timeout)) +- barrier(); +- +- failed = 1; +- if (hostdata->test_completed == -1) +- printk ("scsi%d : driver test 1 timed out%s\n",host->host_no , +- (hostdata->test_dest == 0xdeadbeef) ? +- " due to lost interrupt.\n" +- " Please verify that the correct IRQ is being used for your board,\n" +- : ""); +- else if (hostdata->test_completed != 1) +- printk ("scsi%d : test 1 bad interrupt value (%d)\n", +- host->host_no, hostdata->test_completed); +- else +- failed = (hostdata->test_dest != 0xdeadbeef); +- +- if (hostdata->test_dest != 0xdeadbeef) { +- printk ("scsi%d : driver test 1 read 0x%x instead of 0xdeadbeef indicating a\n" +- " probable cache invalidation problem. Please configure caching\n" +- " as write-through or disabled\n", +- host->host_no, hostdata->test_dest); +- } +- +- if (failed) { +- printk ("scsi%d : DSP = 0x%p (script at 0x%p, start at 0x%x)\n", +- host->host_no, bus_to_virt(NCR53c7x0_read32(DSP_REG)), +- hostdata->script, start); +- printk ("scsi%d : DSPS = 0x%x\n", host->host_no, +- NCR53c7x0_read32(DSPS_REG)); +- local_irq_restore(flags); +- return -1; +- } +- hostdata->test_running = 0; +- } +- +- if ((hostdata->options & OPTION_DEBUG_TEST2) && +- hostdata->state != STATE_DISABLED) { +- u32 dsa[48]; +- unsigned char identify = IDENTIFY(0, 0); +- unsigned char cmd[6]; +- unsigned char data[36]; +- unsigned char status = 0xff; +- unsigned char msg = 0xff; +- +- cmd[0] = INQUIRY; +- cmd[1] = cmd[2] = cmd[3] = cmd[5] = 0; +- cmd[4] = sizeof(data); +- +- dsa[2] = 1; +- dsa[3] = virt_to_bus(&identify); +- dsa[4] = 6; +- dsa[5] = virt_to_bus(&cmd); +- dsa[6] = sizeof(data); +- dsa[7] = virt_to_bus(&data); +- dsa[8] = 1; +- dsa[9] = virt_to_bus(&status); +- dsa[10] = 1; +- dsa[11] = virt_to_bus(&msg); +- +- for (i = 0; i < 6; ++i) { +-#ifdef VALID_IDS +- if (!hostdata->valid_ids[i]) +- continue; +-#endif +- local_irq_disable(); +- if (!hostdata->idle) { +- printk ("scsi%d : chip not idle, aborting tests\n", host->host_no); +- local_irq_restore(flags); +- return -1; +- } +- +- /* 710: bit mapped scsi ID, async */ +- dsa[0] = (1 << i) << 16; +- hostdata->idle = 0; +- hostdata->test_running = 2; +- hostdata->test_completed = -1; +- start = virt_to_bus(hostdata->script) + hostdata->E_test_2; +- hostdata->state = STATE_RUNNING; +- NCR53c7x0_write32 (DSA_REG, virt_to_bus(dsa)); +- NCR53c7x0_write32 (DSP_REG, start); +- if (hostdata->options & OPTION_DEBUG_TRACE) +- NCR53c7x0_write8 (DCNTL_REG, hostdata->saved_dcntl | +- DCNTL_SSM | DCNTL_STD); +- local_irq_restore(flags); +- +- timeout = jiffies + 5 * HZ; /* arbitrary */ +- while ((hostdata->test_completed == -1) && time_before(jiffies, timeout)) +- barrier(); +- +- NCR53c7x0_write32 (DSA_REG, 0); +- +- if (hostdata->test_completed == 2) { +- data[35] = 0; +- printk ("scsi%d : test 2 INQUIRY to target %d, lun 0 : %s\n", +- host->host_no, i, data + 8); +- printk ("scsi%d : status ", host->host_no); +- scsi_print_status (status); +- printk ("\nscsi%d : message ", host->host_no); +- spi_print_msg(&msg); +- printk ("\n"); +- } else if (hostdata->test_completed == 3) { +- printk("scsi%d : test 2 no connection with target %d\n", +- host->host_no, i); +- if (!hostdata->idle) { +- printk("scsi%d : not idle\n", host->host_no); +- local_irq_restore(flags); +- return -1; +- } +- } else if (hostdata->test_completed == -1) { +- printk ("scsi%d : test 2 timed out\n", host->host_no); +- local_irq_restore(flags); +- return -1; +- } +- hostdata->test_running = 0; +- } +- } +- +- local_irq_restore(flags); +- return 0; +-} +- +-/* +- * Function : static void NCR53c7xx_dsa_fixup (struct NCR53c7x0_cmd *cmd) +- * +- * Purpose : copy the NCR53c8xx dsa structure into cmd's dsa buffer, +- * performing all necessary relocation. +- * +- * Inputs : cmd, a NCR53c7x0_cmd structure with a dsa area large +- * enough to hold the NCR53c8xx dsa. +- */ +- +-static void +-NCR53c7xx_dsa_fixup (struct NCR53c7x0_cmd *cmd) { +- Scsi_Cmnd *c = cmd->cmd; +- struct Scsi_Host *host = c->device->host; +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- int i; +- +- memcpy (cmd->dsa, hostdata->script + (hostdata->E_dsa_code_template / 4), +- hostdata->E_dsa_code_template_end - hostdata->E_dsa_code_template); +- +- /* +- * Note : within the NCR 'C' code, dsa points to the _start_ +- * of the DSA structure, and _not_ the offset of dsa_zero within +- * that structure used to facilitate shorter signed offsets +- * for the 8 bit ALU. +- * +- * The implications of this are that +- * +- * - 32 bit A_dsa_* absolute values require an additional +- * dsa_zero added to their value to be correct, since they are +- * relative to dsa_zero which is in essentially a separate +- * space from the code symbols. +- * +- * - All other symbols require no special treatment. +- */ +- +- patch_abs_tci_data (cmd->dsa, Ent_dsa_code_template / sizeof(u32), +- dsa_temp_lun, c->device->lun); +- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), +- dsa_temp_addr_next, virt_to_bus(&cmd->dsa_next_addr)); +- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), +- dsa_temp_next, virt_to_bus(cmd->dsa) + Ent_dsa_zero - +- Ent_dsa_code_template + A_dsa_next); +- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), +- dsa_temp_sync, virt_to_bus((void *)hostdata->sync[c->device->id].script)); +- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), +- dsa_sscf_710, virt_to_bus((void *)&hostdata->sync[c->device->id].sscf_710)); +- patch_abs_tci_data (cmd->dsa, Ent_dsa_code_template / sizeof(u32), +- dsa_temp_target, 1 << c->device->id); +- /* XXX - new pointer stuff */ +- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), +- dsa_temp_addr_saved_pointer, virt_to_bus(&cmd->saved_data_pointer)); +- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), +- dsa_temp_addr_saved_residual, virt_to_bus(&cmd->saved_residual)); +- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), +- dsa_temp_addr_residual, virt_to_bus(&cmd->residual)); +- +- /* XXX - new start stuff */ +- +- patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), +- dsa_temp_addr_dsa_value, virt_to_bus(&cmd->dsa_addr)); +-} +- +-/* +- * Function : run_process_issue_queue (void) +- * +- * Purpose : insure that the coroutine is running and will process our +- * request. process_issue_queue_running is checked/set here (in an +- * inline function) rather than in process_issue_queue itself to reduce +- * the chances of stack overflow. +- * +- */ +- +-static volatile int process_issue_queue_running = 0; +- +-static __inline__ void +-run_process_issue_queue(void) { +- unsigned long flags; +- local_irq_save(flags); +- if (!process_issue_queue_running) { +- process_issue_queue_running = 1; +- process_issue_queue(flags); +- /* +- * process_issue_queue_running is cleared in process_issue_queue +- * once it can't do more work, and process_issue_queue exits with +- * interrupts disabled. +- */ +- } +- local_irq_restore(flags); +-} +- +-/* +- * Function : static void abnormal_finished (struct NCR53c7x0_cmd *cmd, int +- * result) +- * +- * Purpose : mark SCSI command as finished, OR'ing the host portion +- * of the result word into the result field of the corresponding +- * Scsi_Cmnd structure, and removing it from the internal queues. +- * +- * Inputs : cmd - command, result - entire result field +- * +- * Preconditions : the NCR chip should be in a halted state when +- * abnormal_finished is run, since it modifies structures which +- * the NCR expects to have exclusive access to. +- */ +- +-static void +-abnormal_finished (struct NCR53c7x0_cmd *cmd, int result) { +- Scsi_Cmnd *c = cmd->cmd; +- struct Scsi_Host *host = c->device->host; +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- unsigned long flags; +- int left, found; +- volatile struct NCR53c7x0_cmd * linux_search; +- volatile struct NCR53c7x0_cmd * volatile *linux_prev; +- volatile u32 *ncr_prev, *ncrcurrent, ncr_search; +- +-#if 0 +- printk ("scsi%d: abnormal finished\n", host->host_no); +-#endif +- +- local_irq_save(flags); +- found = 0; +- /* +- * Traverse the NCR issue array until we find a match or run out +- * of instructions. Instructions in the NCR issue array are +- * either JUMP or NOP instructions, which are 2 words in length. +- */ +- +- +- for (found = 0, left = host->can_queue, ncrcurrent = hostdata->schedule; +- left > 0; --left, ncrcurrent += 2) +- { +- if (issue_to_cmd (host, hostdata, (u32 *) ncrcurrent) == cmd) +- { +- ncrcurrent[0] = hostdata->NOP_insn; +- ncrcurrent[1] = 0xdeadbeef; +- ++found; +- break; +- } +- } +- +- /* +- * Traverse the NCR reconnect list of DSA structures until we find +- * a pointer to this dsa or have found too many command structures. +- * We let prev point at the next field of the previous element or +- * head of the list, so we don't do anything different for removing +- * the head element. +- */ +- +- for (left = host->can_queue, +- ncr_search = hostdata->reconnect_dsa_head, +- ncr_prev = &hostdata->reconnect_dsa_head; +- left >= 0 && ncr_search && +- ((char*)bus_to_virt(ncr_search) + hostdata->dsa_start) +- != (char *) cmd->dsa; +- ncr_prev = (u32*) ((char*)bus_to_virt(ncr_search) + +- hostdata->dsa_next), ncr_search = *ncr_prev, --left); +- +- if (left < 0) +- printk("scsi%d: loop detected in ncr reconncect list\n", +- host->host_no); +- else if (ncr_search) { +- if (found) +- printk("scsi%d: scsi %ld in ncr issue array and reconnect lists\n", +- host->host_no, c->pid); +- else { +- volatile u32 * next = (u32 *) +- ((char *)bus_to_virt(ncr_search) + hostdata->dsa_next); +- *ncr_prev = *next; +-/* If we're at the tail end of the issue queue, update that pointer too. */ +- found = 1; +- } +- } +- +- /* +- * Traverse the host running list until we find this command or discover +- * we have too many elements, pointing linux_prev at the next field of the +- * linux_previous element or head of the list, search at this element. +- */ +- +- for (left = host->can_queue, linux_search = hostdata->running_list, +- linux_prev = &hostdata->running_list; +- left >= 0 && linux_search && linux_search != cmd; +- linux_prev = &(linux_search->next), +- linux_search = linux_search->next, --left); +- +- if (left < 0) +- printk ("scsi%d: loop detected in host running list for scsi pid %ld\n", +- host->host_no, c->pid); +- else if (linux_search) { +- *linux_prev = linux_search->next; +- --hostdata->busy[c->device->id][c->device->lun]; +- } +- +- /* Return the NCR command structure to the free list */ +- cmd->next = hostdata->free; +- hostdata->free = cmd; +- c->host_scribble = NULL; +- +- /* And return */ +- c->result = result; +- c->scsi_done(c); +- +- local_irq_restore(flags); +- run_process_issue_queue(); +-} +- +-/* +- * Function : static void intr_break (struct Scsi_Host *host, +- * struct NCR53c7x0_cmd *cmd) +- * +- * Purpose : Handler for breakpoint interrupts from a SCSI script +- * +- * Inputs : host - pointer to this host adapter's structure, +- * cmd - pointer to the command (if any) dsa was pointing +- * to. +- * +- */ +- +-static void +-intr_break (struct Scsi_Host *host, struct +- NCR53c7x0_cmd *cmd) { +- NCR53c7x0_local_declare(); +- struct NCR53c7x0_break *bp; +-#if 0 +- Scsi_Cmnd *c = cmd ? cmd->cmd : NULL; +-#endif +- u32 *dsp; +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- unsigned long flags; +- NCR53c7x0_local_setup(host); +- +- /* +- * Find the break point corresponding to this address, and +- * dump the appropriate debugging information to standard +- * output. +- */ +- local_irq_save(flags); +- dsp = (u32 *) bus_to_virt(NCR53c7x0_read32(DSP_REG)); +- for (bp = hostdata->breakpoints; bp && bp->address != dsp; +- bp = bp->next); +- if (!bp) +- panic("scsi%d : break point interrupt from %p with no breakpoint!", +- host->host_no, dsp); +- +- /* +- * Configure the NCR chip for manual start mode, so that we can +- * point the DSP register at the instruction that follows the +- * INT int_debug_break instruction. +- */ +- +- NCR53c7x0_write8 (hostdata->dmode, +- NCR53c7x0_read8(hostdata->dmode)|DMODE_MAN); +- +- /* +- * And update the DSP register, using the size of the old +- * instruction in bytes. +- */ +- +- local_irq_restore(flags); +-} +-/* +- * Function : static void print_synchronous (const char *prefix, +- * const unsigned char *msg) +- * +- * Purpose : print a pretty, user and machine parsable representation +- * of a SDTR message, including the "real" parameters, data +- * clock so we can tell transfer rate at a glance. +- * +- * Inputs ; prefix - text to prepend, msg - SDTR message (5 bytes) +- */ +- +-static void +-print_synchronous (const char *prefix, const unsigned char *msg) { +- if (msg[4]) { +- int Hz = 1000000000 / (msg[3] * 4); +- int integer = Hz / 1000000; +- int fraction = (Hz - (integer * 1000000)) / 10000; +- printk ("%speriod %dns offset %d %d.%02dMHz %s SCSI%s\n", +- prefix, (int) msg[3] * 4, (int) msg[4], integer, fraction, +- (((msg[3] * 4) < 200) ? "FAST" : "synchronous"), +- (((msg[3] * 4) < 200) ? "-II" : "")); +- } else +- printk ("%sasynchronous SCSI\n", prefix); +-} +- +-/* +- * Function : static void set_synchronous (struct Scsi_Host *host, +- * int target, int sxfer, int scntl3, int now_connected) +- * +- * Purpose : reprogram transfers between the selected SCSI initiator and +- * target with the given register values; in the indirect +- * select operand, reselection script, and chip registers. +- * +- * Inputs : host - NCR53c7,8xx SCSI host, target - number SCSI target id, +- * sxfer and scntl3 - NCR registers. now_connected - if non-zero, +- * we should reprogram the registers now too. +- * +- * NOTE: For 53c710, scntl3 is actually used for SCF bits from +- * SBCL, as we don't have a SCNTL3. +- */ +- +-static void +-set_synchronous (struct Scsi_Host *host, int target, int sxfer, int scntl3, +- int now_connected) { +- NCR53c7x0_local_declare(); +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- u32 *script; +- NCR53c7x0_local_setup(host); +- +- /* These are eight bit registers */ +- sxfer &= 0xff; +- scntl3 &= 0xff; +- +- hostdata->sync[target].sxfer_sanity = sxfer; +- hostdata->sync[target].scntl3_sanity = scntl3; +- +-/* +- * HARD CODED : synchronous script is EIGHT words long. This +- * must agree with 53c7.8xx.h +- */ +- +- if ((hostdata->chip != 700) && (hostdata->chip != 70066)) { +- hostdata->sync[target].select_indirect = (1 << target) << 16 | +- (sxfer << 8); +- hostdata->sync[target].sscf_710 = scntl3; +- +- script = (u32 *) hostdata->sync[target].script; +- +- /* XXX - add NCR53c7x0 code to reprogram SCF bits if we want to */ +- script[0] = ((DCMD_TYPE_RWRI | DCMD_RWRI_OPC_MODIFY | +- DCMD_RWRI_OP_MOVE) << 24) | +- (SBCL_REG << 16) | (scntl3 << 8); +- script[1] = 0; +- script += 2; +- +- script[0] = ((DCMD_TYPE_RWRI | DCMD_RWRI_OPC_MODIFY | +- DCMD_RWRI_OP_MOVE) << 24) | +- (SXFER_REG << 16) | (sxfer << 8); +- script[1] = 0; +- script += 2; +- +-#ifdef DEBUG_SYNC_INTR +- if (hostdata->options & OPTION_DEBUG_DISCONNECT) { +- script[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_INT) << 24) | DBC_TCI_TRUE; +- script[1] = DEBUG_SYNC_INTR; +- script += 2; +- } +-#endif +- +- script[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_RETURN) << 24) | DBC_TCI_TRUE; +- script[1] = 0; +- script += 2; +- } +- +- if (hostdata->options & OPTION_DEBUG_SYNCHRONOUS) +- printk ("scsi%d : target %d sync parameters are sxfer=0x%x, scntl3=0x%x\n", +- host->host_no, target, sxfer, scntl3); +- +- if (now_connected) { +- NCR53c7x0_write8(SBCL_REG, scntl3); +- NCR53c7x0_write8(SXFER_REG, sxfer); +- } +-} +- +- +-/* +- * Function : static int asynchronous (struct Scsi_Host *host, int target) +- * +- * Purpose : reprogram between the selected SCSI Host adapter and target +- * (assumed to be currently connected) for asynchronous transfers. +- * +- * Inputs : host - SCSI host structure, target - numeric target ID. +- * +- * Preconditions : the NCR chip should be in one of the halted states +- */ +- +-static void +-asynchronous (struct Scsi_Host *host, int target) { +- NCR53c7x0_local_declare(); +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- NCR53c7x0_local_setup(host); +- set_synchronous (host, target, /* no offset */ 0, hostdata->saved_scntl3, +- 1); +- printk ("scsi%d : setting target %d to asynchronous SCSI\n", +- host->host_no, target); +-} +- +-/* +- * XXX - do we want to go out of our way (ie, add extra code to selection +- * in the NCR53c710/NCR53c720 script) to reprogram the synchronous +- * conversion bits, or can we be content in just setting the +- * sxfer bits? I chose to do so [richard@sleepie.demon.co.uk] +- */ +- +-/* Table for NCR53c8xx synchronous values */ +- +-/* This table is also correct for 710, allowing that scf=4 is equivalent +- * of SSCF=0 (ie use DCNTL, divide by 3) for a 50.01-66.00MHz clock. +- * For any other clock values, we cannot use entries with SCF values of +- * 4. I guess that for a 66MHz clock, the slowest it will set is 2MHz, +- * and for a 50MHz clock, the slowest will be 2.27Mhz. Should check +- * that a device doesn't try and negotiate sync below these limits! +- */ +- +-static const struct { +- int div; /* Total clock divisor * 10 */ +- unsigned char scf; /* */ +- unsigned char tp; /* 4 + tp = xferp divisor */ +-} syncs[] = { +-/* div scf tp div scf tp div scf tp */ +- { 40, 1, 0}, { 50, 1, 1}, { 60, 1, 2}, +- { 70, 1, 3}, { 75, 2, 1}, { 80, 1, 4}, +- { 90, 1, 5}, { 100, 1, 6}, { 105, 2, 3}, +- { 110, 1, 7}, { 120, 2, 4}, { 135, 2, 5}, +- { 140, 3, 3}, { 150, 2, 6}, { 160, 3, 4}, +- { 165, 2, 7}, { 180, 3, 5}, { 200, 3, 6}, +- { 210, 4, 3}, { 220, 3, 7}, { 240, 4, 4}, +- { 270, 4, 5}, { 300, 4, 6}, { 330, 4, 7} +-}; +- +-/* +- * Function : static void synchronous (struct Scsi_Host *host, int target, +- * char *msg) +- * +- * Purpose : reprogram transfers between the selected SCSI initiator and +- * target for synchronous SCSI transfers such that the synchronous +- * offset is less than that requested and period at least as long +- * as that requested. Also modify *msg such that it contains +- * an appropriate response. +- * +- * Inputs : host - NCR53c7,8xx SCSI host, target - number SCSI target id, +- * msg - synchronous transfer request. +- */ +- +- +-static void +-synchronous (struct Scsi_Host *host, int target, char *msg) { +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- int desire, divisor, i, limit; +- unsigned char scntl3, sxfer; +-/* The diagnostic message fits on one line, even with max. width integers */ +- char buf[80]; +- +-/* Desired transfer clock in Hz */ +- desire = 1000000000L / (msg[3] * 4); +-/* Scale the available SCSI clock by 10 so we get tenths */ +- divisor = (hostdata->scsi_clock * 10) / desire; +- +-/* NCR chips can handle at most an offset of 8 */ +- if (msg[4] > 8) +- msg[4] = 8; +- +- if (hostdata->options & OPTION_DEBUG_SDTR) +- printk("scsi%d : optimal synchronous divisor of %d.%01d\n", +- host->host_no, divisor / 10, divisor % 10); +- +- limit = ARRAY_SIZE(syncs) - 1; +- for (i = 0; (i < limit) && (divisor > syncs[i].div); ++i); +- +- if (hostdata->options & OPTION_DEBUG_SDTR) +- printk("scsi%d : selected synchronous divisor of %d.%01d\n", +- host->host_no, syncs[i].div / 10, syncs[i].div % 10); +- +- msg[3] = ((1000000000L / hostdata->scsi_clock) * syncs[i].div / 10 / 4); +- +- if (hostdata->options & OPTION_DEBUG_SDTR) +- printk("scsi%d : selected synchronous period of %dns\n", host->host_no, +- msg[3] * 4); +- +- scntl3 = syncs[i].scf; +- sxfer = (msg[4] << SXFER_MO_SHIFT) | (syncs[i].tp << 4); +- if (hostdata->options & OPTION_DEBUG_SDTR) +- printk ("scsi%d : sxfer=0x%x scntl3=0x%x\n", +- host->host_no, (int) sxfer, (int) scntl3); +- set_synchronous (host, target, sxfer, scntl3, 1); +- sprintf (buf, "scsi%d : setting target %d to ", host->host_no, target); +- print_synchronous (buf, msg); +-} +- +-/* +- * Function : static int NCR53c7x0_dstat_sir_intr (struct Scsi_Host *host, +- * struct NCR53c7x0_cmd *cmd) +- * +- * Purpose : Handler for INT generated instructions for the +- * NCR53c810/820 SCSI SCRIPT +- * +- * Inputs : host - pointer to this host adapter's structure, +- * cmd - pointer to the command (if any) dsa was pointing +- * to. +- * +- */ +- +-static int +-NCR53c7x0_dstat_sir_intr (struct Scsi_Host *host, struct +- NCR53c7x0_cmd *cmd) { +- NCR53c7x0_local_declare(); +- int print; +- Scsi_Cmnd *c = cmd ? cmd->cmd : NULL; +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- u32 dsps,*dsp; /* Argument of the INT instruction */ +- +- NCR53c7x0_local_setup(host); +- dsps = NCR53c7x0_read32(DSPS_REG); +- dsp = (u32 *) bus_to_virt(NCR53c7x0_read32(DSP_REG)); +- +- /* RGH 150597: Frig. Commands which fail with Check Condition are +- * Flagged as successful - hack dsps to indicate check condition */ +-#if 0 +- /* RGH 200597: Need to disable for BVME6000, as it gets Check Conditions +- * and then dies. Seems to handle Check Condition at startup, but +- * not mid kernel build. */ +- if (dsps == A_int_norm_emulateintfly && cmd && cmd->result == 2) +- dsps = A_int_err_check_condition; +-#endif +- +- if (hostdata->options & OPTION_DEBUG_INTR) +- printk ("scsi%d : DSPS = 0x%x\n", host->host_no, dsps); +- +- switch (dsps) { +- case A_int_msg_1: +- print = 1; +- switch (hostdata->msg_buf[0]) { +- /* +- * Unless we've initiated synchronous negotiation, I don't +- * think that this should happen. +- */ +- case MESSAGE_REJECT: +- hostdata->dsp = hostdata->script + hostdata->E_accept_message / +- sizeof(u32); +- hostdata->dsp_changed = 1; +- if (cmd && (cmd->flags & CMD_FLAG_SDTR)) { +- printk ("scsi%d : target %d rejected SDTR\n", host->host_no, +- c->device->id); +- cmd->flags &= ~CMD_FLAG_SDTR; +- asynchronous (host, c->device->id); +- print = 0; +- } +- break; +- case INITIATE_RECOVERY: +- printk ("scsi%d : extended contingent allegiance not supported yet, rejecting\n", +- host->host_no); +- /* Fall through to default */ +- hostdata->dsp = hostdata->script + hostdata->E_reject_message / +- sizeof(u32); +- hostdata->dsp_changed = 1; +- break; +- default: +- printk ("scsi%d : unsupported message, rejecting\n", +- host->host_no); +- hostdata->dsp = hostdata->script + hostdata->E_reject_message / +- sizeof(u32); +- hostdata->dsp_changed = 1; +- } +- if (print) { +- printk ("scsi%d : received message", host->host_no); +- if (c) +- printk (" from target %d lun %d ", c->device->id, c->device->lun); +- spi_print_msg((unsigned char *) hostdata->msg_buf); +- printk("\n"); +- } +- +- return SPECIFIC_INT_NOTHING; +- +- +- case A_int_msg_sdtr: +-/* +- * At this point, hostdata->msg_buf contains +- * 0 EXTENDED MESSAGE +- * 1 length +- * 2 SDTR +- * 3 period * 4ns +- * 4 offset +- */ +- +- if (cmd) { +- char buf[80]; +- sprintf (buf, "scsi%d : target %d %s ", host->host_no, c->device->id, +- (cmd->flags & CMD_FLAG_SDTR) ? "accepting" : "requesting"); +- print_synchronous (buf, (unsigned char *) hostdata->msg_buf); +- +- /* +- * Initiator initiated, won't happen unless synchronous +- * transfers are enabled. If we get a SDTR message in +- * response to our SDTR, we should program our parameters +- * such that +- * offset <= requested offset +- * period >= requested period +- */ +- if (cmd->flags & CMD_FLAG_SDTR) { +- cmd->flags &= ~CMD_FLAG_SDTR; +- if (hostdata->msg_buf[4]) +- synchronous (host, c->device->id, (unsigned char *) +- hostdata->msg_buf); +- else +- asynchronous (host, c->device->id); +- hostdata->dsp = hostdata->script + hostdata->E_accept_message / +- sizeof(u32); +- hostdata->dsp_changed = 1; +- return SPECIFIC_INT_NOTHING; +- } else { +- if (hostdata->options & OPTION_SYNCHRONOUS) { +- cmd->flags |= CMD_FLAG_DID_SDTR; +- synchronous (host, c->device->id, (unsigned char *) +- hostdata->msg_buf); +- } else { +- hostdata->msg_buf[4] = 0; /* 0 offset = async */ +- asynchronous (host, c->device->id); +- } +- patch_dsa_32 (cmd->dsa, dsa_msgout_other, 0, 5); +- patch_dsa_32 (cmd->dsa, dsa_msgout_other, 1, (u32) +- virt_to_bus ((void *)&hostdata->msg_buf)); +- hostdata->dsp = hostdata->script + +- hostdata->E_respond_message / sizeof(u32); +- hostdata->dsp_changed = 1; +- } +- return SPECIFIC_INT_NOTHING; +- } +- /* Fall through to abort if we couldn't find a cmd, and +- therefore a dsa structure to twiddle */ +- case A_int_msg_wdtr: +- hostdata->dsp = hostdata->script + hostdata->E_reject_message / +- sizeof(u32); +- hostdata->dsp_changed = 1; +- return SPECIFIC_INT_NOTHING; +- case A_int_err_unexpected_phase: +- if (hostdata->options & OPTION_DEBUG_INTR) +- printk ("scsi%d : unexpected phase\n", host->host_no); +- return SPECIFIC_INT_ABORT; +- case A_int_err_selected: +- if ((hostdata->chip / 100) == 8) +- printk ("scsi%d : selected by target %d\n", host->host_no, +- (int) NCR53c7x0_read8(SDID_REG_800) &7); +- else +- printk ("scsi%d : selected by target LCRC=0x%02x\n", host->host_no, +- (int) NCR53c7x0_read8(LCRC_REG_10)); +- hostdata->dsp = hostdata->script + hostdata->E_target_abort / +- sizeof(u32); +- hostdata->dsp_changed = 1; +- return SPECIFIC_INT_NOTHING; +- case A_int_err_unexpected_reselect: +- if ((hostdata->chip / 100) == 8) +- printk ("scsi%d : unexpected reselect by target %d lun %d\n", +- host->host_no, (int) NCR53c7x0_read8(SDID_REG_800) & 7, +- hostdata->reselected_identify & 7); +- else +- printk ("scsi%d : unexpected reselect LCRC=0x%02x\n", host->host_no, +- (int) NCR53c7x0_read8(LCRC_REG_10)); +- hostdata->dsp = hostdata->script + hostdata->E_initiator_abort / +- sizeof(u32); +- hostdata->dsp_changed = 1; +- return SPECIFIC_INT_NOTHING; +-/* +- * Since contingent allegiance conditions are cleared by the next +- * command issued to a target, we must issue a REQUEST SENSE +- * command after receiving a CHECK CONDITION status, before +- * another command is issued. +- * +- * Since this NCR53c7x0_cmd will be freed after use, we don't +- * care if we step on the various fields, so modify a few things. +- */ +- case A_int_err_check_condition: +-#if 0 +- if (hostdata->options & OPTION_DEBUG_INTR) +-#endif +- printk ("scsi%d : CHECK CONDITION\n", host->host_no); +- if (!c) { +- printk("scsi%d : CHECK CONDITION with no SCSI command\n", +- host->host_no); +- return SPECIFIC_INT_PANIC; +- } +- +- /* +- * FIXME : this uses the normal one-byte selection message. +- * We may want to renegotiate for synchronous & WIDE transfers +- * since these could be the crux of our problem. +- * +- hostdata->NOP_insn* FIXME : once SCSI-II tagged queuing is implemented, we'll +- * have to set this up so that the rest of the DSA +- * agrees with this being an untagged queue'd command. +- */ +- +- patch_dsa_32 (cmd->dsa, dsa_msgout, 0, 1); +- +- /* +- * Modify the table indirect for COMMAND OUT phase, since +- * Request Sense is a six byte command. +- */ +- +- patch_dsa_32 (cmd->dsa, dsa_cmdout, 0, 6); +- +- /* +- * The CDB is now mirrored in our local non-cached +- * structure, but keep the old structure up to date as well, +- * just in case anyone looks at it. +- */ +- +- /* +- * XXX Need to worry about data buffer alignment/cache state +- * XXX here, but currently never get A_int_err_check_condition, +- * XXX so ignore problem for now. +- */ +- cmd->cmnd[0] = c->cmnd[0] = REQUEST_SENSE; +- cmd->cmnd[0] = c->cmnd[1] &= 0xe0; /* Zero all but LUN */ +- cmd->cmnd[0] = c->cmnd[2] = 0; +- cmd->cmnd[0] = c->cmnd[3] = 0; +- cmd->cmnd[0] = c->cmnd[4] = sizeof(c->sense_buffer); +- cmd->cmnd[0] = c->cmnd[5] = 0; +- +- /* +- * Disable dataout phase, and program datain to transfer to the +- * sense buffer, and add a jump to other_transfer after the +- * command so overflow/underrun conditions are detected. +- */ +- +- patch_dsa_32 (cmd->dsa, dsa_dataout, 0, +- virt_to_bus(hostdata->script) + hostdata->E_other_transfer); +- patch_dsa_32 (cmd->dsa, dsa_datain, 0, +- virt_to_bus(cmd->data_transfer_start)); +- cmd->data_transfer_start[0] = (((DCMD_TYPE_BMI | DCMD_BMI_OP_MOVE_I | +- DCMD_BMI_IO)) << 24) | sizeof(c->sense_buffer); +- cmd->data_transfer_start[1] = (u32) virt_to_bus(c->sense_buffer); +- +- cmd->data_transfer_start[2] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_JUMP) +- << 24) | DBC_TCI_TRUE; +- cmd->data_transfer_start[3] = (u32) virt_to_bus(hostdata->script) + +- hostdata->E_other_transfer; +- +- /* +- * Currently, this command is flagged as completed, ie +- * it has valid status and message data. Reflag it as +- * incomplete. Q - need to do something so that original +- * status, etc are used. +- */ +- +- cmd->result = cmd->cmd->result = 0xffff; +- +- /* +- * Restart command as a REQUEST SENSE. +- */ +- hostdata->dsp = (u32 *) hostdata->script + hostdata->E_select / +- sizeof(u32); +- hostdata->dsp_changed = 1; +- return SPECIFIC_INT_NOTHING; +- case A_int_debug_break: +- return SPECIFIC_INT_BREAK; +- case A_int_norm_aborted: +- hostdata->dsp = (u32 *) hostdata->schedule; +- hostdata->dsp_changed = 1; +- if (cmd) +- abnormal_finished (cmd, DID_ERROR << 16); +- return SPECIFIC_INT_NOTHING; +- case A_int_norm_emulateintfly: +- NCR53c7x0_intfly(host); +- return SPECIFIC_INT_NOTHING; +- case A_int_test_1: +- case A_int_test_2: +- hostdata->idle = 1; +- hostdata->test_completed = (dsps - A_int_test_1) / 0x00010000 + 1; +- if (hostdata->options & OPTION_DEBUG_INTR) +- printk("scsi%d : test%d complete\n", host->host_no, +- hostdata->test_completed); +- return SPECIFIC_INT_NOTHING; +-#ifdef A_int_debug_reselected_ok +- case A_int_debug_reselected_ok: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| +- OPTION_DEBUG_DISCONNECT)) { +- /* +- * Note - this dsa is not based on location relative to +- * the command structure, but to location relative to the +- * DSA register +- */ +- u32 *dsa; +- dsa = (u32 *) bus_to_virt (NCR53c7x0_read32(DSA_REG)); +- +- printk("scsi%d : reselected_ok (DSA = 0x%x (virt 0x%p)\n", +- host->host_no, NCR53c7x0_read32(DSA_REG), dsa); +- printk("scsi%d : resume address is 0x%x (virt 0x%p)\n", +- host->host_no, cmd->saved_data_pointer, +- bus_to_virt(cmd->saved_data_pointer)); +- print_insn (host, hostdata->script + Ent_reselected_ok / +- sizeof(u32), "", 1); +- if ((hostdata->chip / 100) == 8) +- printk ("scsi%d : sxfer=0x%x, scntl3=0x%x\n", +- host->host_no, NCR53c7x0_read8(SXFER_REG), +- NCR53c7x0_read8(SCNTL3_REG_800)); +- else +- printk ("scsi%d : sxfer=0x%x, cannot read SBCL\n", +- host->host_no, NCR53c7x0_read8(SXFER_REG)); +- if (c) { +- print_insn (host, (u32 *) +- hostdata->sync[c->device->id].script, "", 1); +- print_insn (host, (u32 *) +- hostdata->sync[c->device->id].script + 2, "", 1); +- } +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_reselect_check +- case A_int_debug_reselect_check: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { +- u32 *dsa; +-#if 0 +- u32 *code; +-#endif +- /* +- * Note - this dsa is not based on location relative to +- * the command structure, but to location relative to the +- * DSA register +- */ +- dsa = bus_to_virt (NCR53c7x0_read32(DSA_REG)); +- printk("scsi%d : reselected_check_next (DSA = 0x%lx (virt 0x%p))\n", +- host->host_no, virt_to_bus(dsa), dsa); +- if (dsa) { +- printk("scsi%d : resume address is 0x%x (virt 0x%p)\n", +- host->host_no, cmd->saved_data_pointer, +- bus_to_virt (cmd->saved_data_pointer)); +-#if 0 +- printk("scsi%d : template code :\n", host->host_no); +- for (code = dsa + (Ent_dsa_code_check_reselect - Ent_dsa_zero) +- / sizeof(u32); code < (dsa + Ent_dsa_zero / sizeof(u32)); +- code += print_insn (host, code, "", 1)); +-#endif +- } +- print_insn (host, hostdata->script + Ent_reselected_ok / +- sizeof(u32), "", 1); +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_dsa_schedule +- case A_int_debug_dsa_schedule: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { +- u32 *dsa; +- /* +- * Note - this dsa is not based on location relative to +- * the command structure, but to location relative to the +- * DSA register +- */ +- dsa = (u32 *) bus_to_virt (NCR53c7x0_read32(DSA_REG)); +- printk("scsi%d : dsa_schedule (old DSA = 0x%lx (virt 0x%p))\n", +- host->host_no, virt_to_bus(dsa), dsa); +- if (dsa) +- printk("scsi%d : resume address is 0x%x (virt 0x%p)\n" +- " (temp was 0x%x (virt 0x%p))\n", +- host->host_no, cmd->saved_data_pointer, +- bus_to_virt (cmd->saved_data_pointer), +- NCR53c7x0_read32 (TEMP_REG), +- bus_to_virt (NCR53c7x0_read32(TEMP_REG))); +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_scheduled +- case A_int_debug_scheduled: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { +- printk("scsi%d : new I/O 0x%x (virt 0x%p) scheduled\n", +- host->host_no, NCR53c7x0_read32(DSA_REG), +- bus_to_virt(NCR53c7x0_read32(DSA_REG))); +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_idle +- case A_int_debug_idle: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { +- printk("scsi%d : idle\n", host->host_no); +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_cmd +- case A_int_debug_cmd: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { +- printk("scsi%d : command sent\n"); +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_dsa_loaded +- case A_int_debug_dsa_loaded: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { +- printk("scsi%d : DSA loaded with 0x%x (virt 0x%p)\n", host->host_no, +- NCR53c7x0_read32(DSA_REG), +- bus_to_virt(NCR53c7x0_read32(DSA_REG))); +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_reselected +- case A_int_debug_reselected: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| +- OPTION_DEBUG_DISCONNECT)) { +- if ((hostdata->chip / 100) == 8) +- printk("scsi%d : reselected by target %d lun %d\n", +- host->host_no, (int) NCR53c7x0_read8(SDID_REG_800) & ~0x80, +- (int) hostdata->reselected_identify & 7); +- else +- printk("scsi%d : reselected by LCRC=0x%02x lun %d\n", +- host->host_no, (int) NCR53c7x0_read8(LCRC_REG_10), +- (int) hostdata->reselected_identify & 7); +- print_queues(host); +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_disconnect_msg +- case A_int_debug_disconnect_msg: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) { +- if (c) +- printk("scsi%d : target %d lun %d disconnecting\n", +- host->host_no, c->device->id, c->device->lun); +- else +- printk("scsi%d : unknown target disconnecting\n", +- host->host_no); +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_disconnected +- case A_int_debug_disconnected: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| +- OPTION_DEBUG_DISCONNECT)) { +- printk ("scsi%d : disconnected, new queues are\n", +- host->host_no); +- print_queues(host); +-#if 0 +- /* Not valid on ncr53c710! */ +- printk ("scsi%d : sxfer=0x%x, scntl3=0x%x\n", +- host->host_no, NCR53c7x0_read8(SXFER_REG), +- NCR53c7x0_read8(SCNTL3_REG_800)); +-#endif +- if (c) { +- print_insn (host, (u32 *) +- hostdata->sync[c->device->id].script, "", 1); +- print_insn (host, (u32 *) +- hostdata->sync[c->device->id].script + 2, "", 1); +- } +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_panic +- case A_int_debug_panic: +- printk("scsi%d : int_debug_panic received\n", host->host_no); +- print_lots (host); +- return SPECIFIC_INT_PANIC; +-#endif +-#ifdef A_int_debug_saved +- case A_int_debug_saved: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| +- OPTION_DEBUG_DISCONNECT)) { +- printk ("scsi%d : saved data pointer 0x%x (virt 0x%p)\n", +- host->host_no, cmd->saved_data_pointer, +- bus_to_virt (cmd->saved_data_pointer)); +- print_progress (c); +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_restored +- case A_int_debug_restored: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| +- OPTION_DEBUG_DISCONNECT)) { +- if (cmd) { +- int size; +- printk ("scsi%d : restored data pointer 0x%x (virt 0x%p)\n", +- host->host_no, cmd->saved_data_pointer, bus_to_virt ( +- cmd->saved_data_pointer)); +- size = print_insn (host, (u32 *) +- bus_to_virt(cmd->saved_data_pointer), "", 1); +- size = print_insn (host, (u32 *) +- bus_to_virt(cmd->saved_data_pointer) + size, "", 1); +- print_progress (c); +- } +-#if 0 +- printk ("scsi%d : datapath residual %d\n", +- host->host_no, datapath_residual (host)) ; +-#endif +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_sync +- case A_int_debug_sync: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| +- OPTION_DEBUG_DISCONNECT|OPTION_DEBUG_SDTR)) { +- unsigned char sxfer = NCR53c7x0_read8 (SXFER_REG), scntl3; +- if ((hostdata->chip / 100) == 8) { +- scntl3 = NCR53c7x0_read8 (SCNTL3_REG_800); +- if (c) { +- if (sxfer != hostdata->sync[c->device->id].sxfer_sanity || +- scntl3 != hostdata->sync[c->device->id].scntl3_sanity) { +- printk ("scsi%d : sync sanity check failed sxfer=0x%x, scntl3=0x%x", +- host->host_no, sxfer, scntl3); +- NCR53c7x0_write8 (SXFER_REG, sxfer); +- NCR53c7x0_write8 (SCNTL3_REG_800, scntl3); +- } +- } else +- printk ("scsi%d : unknown command sxfer=0x%x, scntl3=0x%x\n", +- host->host_no, (int) sxfer, (int) scntl3); +- } else { +- if (c) { +- if (sxfer != hostdata->sync[c->device->id].sxfer_sanity) { +- printk ("scsi%d : sync sanity check failed sxfer=0x%x", +- host->host_no, sxfer); +- NCR53c7x0_write8 (SXFER_REG, sxfer); +- NCR53c7x0_write8 (SBCL_REG, +- hostdata->sync[c->device->id].sscf_710); +- } +- } else +- printk ("scsi%d : unknown command sxfer=0x%x\n", +- host->host_no, (int) sxfer); +- } +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_datain +- case A_int_debug_datain: +- if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR| +- OPTION_DEBUG_DISCONNECT|OPTION_DEBUG_SDTR)) { +- int size; +- if ((hostdata->chip / 100) == 8) +- printk ("scsi%d : In do_datain (%s) sxfer=0x%x, scntl3=0x%x\n" +- " datapath residual=%d\n", +- host->host_no, sbcl_to_phase (NCR53c7x0_read8 (SBCL_REG)), +- (int) NCR53c7x0_read8(SXFER_REG), +- (int) NCR53c7x0_read8(SCNTL3_REG_800), +- datapath_residual (host)) ; +- else +- printk ("scsi%d : In do_datain (%s) sxfer=0x%x\n" +- " datapath residual=%d\n", +- host->host_no, sbcl_to_phase (NCR53c7x0_read8 (SBCL_REG)), +- (int) NCR53c7x0_read8(SXFER_REG), +- datapath_residual (host)) ; +- print_insn (host, dsp, "", 1); +- size = print_insn (host, (u32 *) bus_to_virt(dsp[1]), "", 1); +- print_insn (host, (u32 *) bus_to_virt(dsp[1]) + size, "", 1); +- } +- return SPECIFIC_INT_RESTART; +-#endif +-#ifdef A_int_debug_check_dsa +- case A_int_debug_check_dsa: +- if (NCR53c7x0_read8 (SCNTL1_REG) & SCNTL1_CON) { +- int sdid; +- int tmp; +- char *where; +- if (hostdata->chip / 100 == 8) +- sdid = NCR53c7x0_read8 (SDID_REG_800) & 15; +- else { +- tmp = NCR53c7x0_read8 (SDID_REG_700); +- if (!tmp) +- panic ("SDID_REG_700 = 0"); +- tmp >>= 1; +- sdid = 0; +- while (tmp) { +- tmp >>= 1; +- sdid++; +- } +- } +- where = dsp - NCR53c7x0_insn_size(NCR53c7x0_read8 +- (DCMD_REG)) == hostdata->script + +- Ent_select_check_dsa / sizeof(u32) ? +- "selection" : "reselection"; +- if (c && sdid != c->device->id) { +- printk ("scsi%d : SDID target %d != DSA target %d at %s\n", +- host->host_no, sdid, c->device->id, where); +- print_lots(host); +- dump_events (host, 20); +- return SPECIFIC_INT_PANIC; +- } +- } +- return SPECIFIC_INT_RESTART; +-#endif +- default: +- if ((dsps & 0xff000000) == 0x03000000) { +- printk ("scsi%d : misc debug interrupt 0x%x\n", +- host->host_no, dsps); +- return SPECIFIC_INT_RESTART; +- } else if ((dsps & 0xff000000) == 0x05000000) { +- if (hostdata->events) { +- struct NCR53c7x0_event *event; +- ++hostdata->event_index; +- if (hostdata->event_index >= hostdata->event_size) +- hostdata->event_index = 0; +- event = (struct NCR53c7x0_event *) hostdata->events + +- hostdata->event_index; +- event->event = (enum ncr_event) dsps; +- event->dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG)); +- if (NCR53c7x0_read8 (SCNTL1_REG) & SCNTL1_CON) { +- if (hostdata->chip / 100 == 8) +- event->target = NCR53c7x0_read8(SSID_REG_800); +- else { +- unsigned char tmp, sdid; +- tmp = NCR53c7x0_read8 (SDID_REG_700); +- if (!tmp) +- panic ("SDID_REG_700 = 0"); +- tmp >>= 1; +- sdid = 0; +- while (tmp) { +- tmp >>= 1; +- sdid++; +- } +- event->target = sdid; +- } +- } +- else +- event->target = 255; +- +- if (event->event == EVENT_RESELECT) +- event->lun = hostdata->reselected_identify & 0xf; +- else if (c) +- event->lun = c->device->lun; +- else +- event->lun = 255; +- do_gettimeofday(&(event->time)); +- if (c) { +- event->pid = c->pid; +- memcpy ((void *) event->cmnd, (void *) c->cmnd, +- sizeof (event->cmnd)); +- } else { +- event->pid = -1; +- } +- } +- return SPECIFIC_INT_RESTART; +- } +- +- printk ("scsi%d : unknown user interrupt 0x%x\n", +- host->host_no, (unsigned) dsps); +- return SPECIFIC_INT_PANIC; +- } +-} +- +-/* +- * XXX - the stock NCR assembler won't output the scriptu.h file, +- * which undefine's all #define'd CPP symbols from the script.h +- * file, which will create problems if you use multiple scripts +- * with the same symbol names. +- * +- * If you insist on using NCR's assembler, you could generate +- * scriptu.h from script.h using something like +- * +- * grep #define script.h | \ +- * sed 's/#define[ ][ ]*\([_a-zA-Z][_a-zA-Z0-9]*\).*$/#undefine \1/' \ +- * > scriptu.h +- */ +- +-#include "53c7xx_u.h" +- +-/* XXX - add alternate script handling code here */ +- +- +-/* +- * Function : static void NCR537xx_soft_reset (struct Scsi_Host *host) +- * +- * Purpose : perform a soft reset of the NCR53c7xx chip +- * +- * Inputs : host - pointer to this host adapter's structure +- * +- * Preconditions : NCR53c7x0_init must have been called for this +- * host. +- * +- */ +- +-static void +-NCR53c7x0_soft_reset (struct Scsi_Host *host) { +- NCR53c7x0_local_declare(); +- unsigned long flags; +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- NCR53c7x0_local_setup(host); +- +- local_irq_save(flags); +- +- /* Disable scsi chip and s/w level 7 ints */ +- +-#ifdef CONFIG_MVME16x +- if (MACH_IS_MVME16x) +- { +- volatile unsigned long v; +- +- v = *(volatile unsigned long *)0xfff4006c; +- v &= ~0x8000; +- *(volatile unsigned long *)0xfff4006c = v; +- v = *(volatile unsigned long *)0xfff4202c; +- v &= ~0x10; +- *(volatile unsigned long *)0xfff4202c = v; +- } +-#endif +- /* Anything specific for your hardware? */ +- +- /* +- * Do a soft reset of the chip so that everything is +- * reinitialized to the power-on state. +- * +- * Basically follow the procedure outlined in the NCR53c700 +- * data manual under Chapter Six, How to Use, Steps Necessary to +- * Start SCRIPTS, with the exception of actually starting the +- * script and setting up the synchronous transfer gunk. +- */ +- +- /* Should we reset the scsi bus here??????????????????? */ +- +- NCR53c7x0_write8(ISTAT_REG_700, ISTAT_10_SRST); +- NCR53c7x0_write8(ISTAT_REG_700, 0); +- +- /* +- * saved_dcntl is set up in NCR53c7x0_init() before it is overwritten +- * here. We should have some better way of working out the CF bit +- * setting.. +- */ +- +- hostdata->saved_dcntl = DCNTL_10_EA|DCNTL_10_COM; +- if (hostdata->scsi_clock > 50000000) +- hostdata->saved_dcntl |= DCNTL_700_CF_3; +- else +- if (hostdata->scsi_clock > 37500000) +- hostdata->saved_dcntl |= DCNTL_700_CF_2; +-#if 0 +- else +- /* Any clocks less than 37.5MHz? */ +-#endif +- +- if (hostdata->options & OPTION_DEBUG_TRACE) +- NCR53c7x0_write8(DCNTL_REG, hostdata->saved_dcntl | DCNTL_SSM); +- else +- NCR53c7x0_write8(DCNTL_REG, hostdata->saved_dcntl); +- /* Following disables snooping - snooping is not required, as non- +- * cached pages are used for shared data, and appropriate use is +- * made of cache_push/cache_clear. Indeed, for 68060 +- * enabling snooping causes disk corruption of ext2fs free block +- * bitmaps and the like. If you have a 68060 with snooping hardwared +- * on, then you need to enable CONFIG_060_WRITETHROUGH. +- */ +- NCR53c7x0_write8(CTEST7_REG, CTEST7_10_TT1|CTEST7_STD); +- /* Actually burst of eight, according to my 53c710 databook */ +- NCR53c7x0_write8(hostdata->dmode, DMODE_10_BL_8 | DMODE_10_FC2); +- NCR53c7x0_write8(SCID_REG, 1 << host->this_id); +- NCR53c7x0_write8(SBCL_REG, 0); +- NCR53c7x0_write8(SCNTL1_REG, SCNTL1_ESR_700); +- NCR53c7x0_write8(SCNTL0_REG, ((hostdata->options & OPTION_PARITY) ? +- SCNTL0_EPC : 0) | SCNTL0_EPG_700 | SCNTL0_ARB1 | SCNTL0_ARB2); +- +- /* +- * Enable all interrupts, except parity which we only want when +- * the user requests it. +- */ +- +- NCR53c7x0_write8(DIEN_REG, DIEN_700_BF | +- DIEN_ABRT | DIEN_SSI | DIEN_SIR | DIEN_700_OPC); +- +- NCR53c7x0_write8(SIEN_REG_700, ((hostdata->options & OPTION_PARITY) ? +- SIEN_PAR : 0) | SIEN_700_STO | SIEN_RST | SIEN_UDC | +- SIEN_SGE | SIEN_MA); +- +-#ifdef CONFIG_MVME16x +- if (MACH_IS_MVME16x) +- { +- volatile unsigned long v; +- +- /* Enable scsi chip and s/w level 7 ints */ +- v = *(volatile unsigned long *)0xfff40080; +- v = (v & ~(0xf << 28)) | (4 << 28); +- *(volatile unsigned long *)0xfff40080 = v; +- v = *(volatile unsigned long *)0xfff4006c; +- v |= 0x8000; +- *(volatile unsigned long *)0xfff4006c = v; +- v = *(volatile unsigned long *)0xfff4202c; +- v = (v & ~0xff) | 0x10 | 4; +- *(volatile unsigned long *)0xfff4202c = v; +- } +-#endif +- /* Anything needed for your hardware? */ +- local_irq_restore(flags); +-} +- +- +-/* +- * Function static struct NCR53c7x0_cmd *allocate_cmd (Scsi_Cmnd *cmd) +- * +- * Purpose : Return the first free NCR53c7x0_cmd structure (which are +- * reused in a LIFO manner to minimize cache thrashing). +- * +- * Side effects : If we haven't yet scheduled allocation of NCR53c7x0_cmd +- * structures for this device, do so. Attempt to complete all scheduled +- * allocations using get_zeroed_page(), putting NCR53c7x0_cmd structures on +- * the free list. Teach programmers not to drink and hack. +- * +- * Inputs : cmd - SCSI command +- * +- * Returns : NCR53c7x0_cmd structure allocated on behalf of cmd; +- * NULL on failure. +- */ +- +-static void +-my_free_page (void *addr, int dummy) +-{ +- /* XXX This assumes default cache mode to be IOMAP_FULL_CACHING, which +- * XXX may be invalid (CONFIG_060_WRITETHROUGH) +- */ +- kernel_set_cachemode((void *)addr, 4096, IOMAP_FULL_CACHING); +- free_page ((u32)addr); +-} +- +-static struct NCR53c7x0_cmd * +-allocate_cmd (Scsi_Cmnd *cmd) { +- struct Scsi_Host *host = cmd->device->host; +- struct NCR53c7x0_hostdata *hostdata = +- (struct NCR53c7x0_hostdata *) host->hostdata[0]; +- u32 real; /* Real address */ +- int size; /* Size of *tmp */ +- struct NCR53c7x0_cmd *tmp; +- unsigned long flags; +- +- if (hostdata->options & OPTION_DEBUG_ALLOCATION) +- printk ("scsi%d : num_cmds = %d, can_queue = %d\n" +- " target = %d, lun = %d, %s\n", +- host->host_no, hostdata->num_cmds, host->can_queue, +- cmd->device->id, cmd->device->lun, (hostdata->cmd_allocated[cmd->device->id] & +- (1 << cmd->device->lun)) ? "already allocated" : "not allocated"); +- +-/* +- * If we have not yet reserved commands for this I_T_L nexus, and +- * the device exists (as indicated by permanent Scsi_Cmnd structures +- * being allocated under 1.3.x, or being outside of scan_scsis in +- * 1.2.x), do so now. +- */ +- if (!(hostdata->cmd_allocated[cmd->device->id] & (1 << cmd->device->lun)) && +- cmd->device && cmd->device->has_cmdblocks) { +- if ((hostdata->extra_allocate + hostdata->num_cmds) < host->can_queue) +- hostdata->extra_allocate += host->cmd_per_lun; +- hostdata->cmd_allocated[cmd->device->id] |= (1 << cmd->device->lun); +- } +- +- for (; hostdata->extra_allocate > 0 ; --hostdata->extra_allocate, +- ++hostdata->num_cmds) { +- /* historically, kmalloc has returned unaligned addresses; pad so we +- have enough room to ROUNDUP */ +- size = hostdata->max_cmd_size + sizeof (void *); +-#ifdef FORCE_DSA_ALIGNMENT +- /* +- * 53c710 rev.0 doesn't have an add-with-carry instruction. +- * Ensure we allocate enough memory to force alignment. +- */ +- size += 256; +-#endif +-/* FIXME: for ISA bus '7xx chips, we need to or GFP_DMA in here */ +- +- if (size > 4096) { +- printk (KERN_ERR "53c7xx: allocate_cmd size > 4K\n"); +- return NULL; +- } +- real = get_zeroed_page(GFP_ATOMIC); +- if (real == 0) +- return NULL; +- cache_push(virt_to_phys((void *)real), 4096); +- cache_clear(virt_to_phys((void *)real), 4096); +- kernel_set_cachemode((void *)real, 4096, IOMAP_NOCACHE_SER); +- tmp = ROUNDUP(real, void *); +-#ifdef FORCE_DSA_ALIGNMENT +- { +- if (((u32)tmp & 0xff) > CmdPageStart) +- tmp = (struct NCR53c7x0_cmd *)((u32)tmp + 255); +- tmp = (struct NCR53c7x0_cmd *)(((u32)tmp & ~0xff) + CmdPageStart); +-#if 0 +- printk ("scsi: size = %d, real = 0x%08x, tmp set to 0x%08x\n", +- size, real, (u32)tmp); +-#endif +- } +-#endif +- tmp->real = (void *)real; +- tmp->size = size; +- tmp->free = ((void (*)(void *, int)) my_free_page); +- local_irq_save(flags); +- tmp->next = hostdata->free; +- hostdata->free = tmp; +- local_irq_restore(flags); +- } +- local_irq_save(flags); +- tmp = (struct NCR53c7x0_cmd *) hostdata->free; +- if (tmp) { +- hostdata->free = tmp->next; +- } +- local_irq_restore(flags); +- if (!tmp) +- printk ("scsi%d : can't allocate command for target %d lun %d\n", +- host->host_no, cmd->device->id, cmd->device->lun); +- return tmp; +-} +- +-/* +- * Function static struct NCR53c7x0_cmd *create_cmd (Scsi_Cmnd *cmd) +- * +- * +- * Purpose : allocate a NCR53c7x0_cmd structure, initialize it based on the +- * Scsi_Cmnd structure passed in cmd, including dsa and Linux field +- * initialization, and dsa code relocation. +- * +- * Inputs : cmd - SCSI command +- * +- * Returns : NCR53c7x0_cmd structure corresponding to cmd, +- * NULL on failure. +- */ +-static struct NCR53c7x0_cmd * +-create_cmd (Scsi_Cmnd *cmd) { +- NCR53c7x0_local_declare(); +- struct Scsi_Host *host = cmd->device->host; +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- struct NCR53c7x0_cmd *tmp; /* NCR53c7x0_cmd structure for this command */ +- int datain, /* Number of instructions per phase */ +- dataout; +- int data_transfer_instructions, /* Count of dynamic instructions */ +- i; /* Counter */ +- u32 *cmd_datain, /* Address of datain/dataout code */ +- *cmd_dataout; /* Incremented as we assemble */ +-#ifdef notyet +- unsigned char *msgptr; /* Current byte in select message */ +- int msglen; /* Length of whole select message */ +-#endif +- unsigned long flags; +- u32 exp_select_indirect; /* Used in sanity check */ +- NCR53c7x0_local_setup(cmd->device->host); +- +- if (!(tmp = allocate_cmd (cmd))) +- return NULL; +- +- /* +- * Copy CDB and initialised result fields from Scsi_Cmnd to NCR53c7x0_cmd. +- * We do this because NCR53c7x0_cmd may have a special cache mode +- * selected to cope with lack of bus snooping, etc. +- */ +- +- memcpy(tmp->cmnd, cmd->cmnd, 12); +- tmp->result = cmd->result; +- +- /* +- * Decide whether we need to generate commands for DATA IN, +- * DATA OUT, neither, or both based on the SCSI command +- */ +- +- switch (cmd->cmnd[0]) { +- /* These commands do DATA IN */ +- case INQUIRY: +- case MODE_SENSE: +- case READ_6: +- case READ_10: +- case READ_CAPACITY: +- case REQUEST_SENSE: +- case READ_BLOCK_LIMITS: +- case READ_TOC: +- datain = 2 * (cmd->use_sg ? cmd->use_sg : 1) + 3; +- dataout = 0; +- break; +- /* These commands do DATA OUT */ +- case MODE_SELECT: +- case WRITE_6: +- case WRITE_10: +-#if 0 +- printk("scsi%d : command is ", host->host_no); +- __scsi_print_command(cmd->cmnd); +-#endif +-#if 0 +- printk ("scsi%d : %d scatter/gather segments\n", host->host_no, +- cmd->use_sg); +-#endif +- datain = 0; +- dataout = 2 * (cmd->use_sg ? cmd->use_sg : 1) + 3; +-#if 0 +- hostdata->options |= OPTION_DEBUG_INTR; +-#endif +- break; +- /* +- * These commands do no data transfer, we should force an +- * interrupt if a data phase is attempted on them. +- */ +- case TEST_UNIT_READY: +- case ALLOW_MEDIUM_REMOVAL: +- case START_STOP: +- datain = dataout = 0; +- break; +- /* +- * We don't know about these commands, so generate code to handle +- * both DATA IN and DATA OUT phases. More efficient to identify them +- * and add them to the above cases. +- */ +- default: +- printk("scsi%d : datain+dataout for command ", host->host_no); +- __scsi_print_command(cmd->cmnd); +- datain = dataout = 2 * (cmd->use_sg ? cmd->use_sg : 1) + 3; +- } +- +- /* +- * New code : so that active pointers work correctly regardless +- * of where the saved data pointer is at, we want to immediately +- * enter the dynamic code after selection, and on a non-data +- * phase perform a CALL to the non-data phase handler, with +- * returns back to this address. +- * +- * If a phase mismatch is encountered in the middle of a +- * Block MOVE instruction, we want to _leave_ that instruction +- * unchanged as the current case is, modify a temporary buffer, +- * and point the active pointer (TEMP) at that. +- * +- * Furthermore, we want to implement a saved data pointer, +- * set by the SAVE_DATA_POINTERs message. +- * +- * So, the data transfer segments will change to +- * CALL data_transfer, WHEN NOT data phase +- * MOVE x, x, WHEN data phase +- * ( repeat ) +- * JUMP other_transfer +- */ +- +- data_transfer_instructions = datain + dataout; +- +- /* +- * When we perform a request sense, we overwrite various things, +- * including the data transfer code. Make sure we have enough +- * space to do that. +- */ +- +- if (data_transfer_instructions < 2) +- data_transfer_instructions = 2; +- +- +- /* +- * The saved data pointer is set up so that a RESTORE POINTERS message +- * will start the data transfer over at the beginning. +- */ +- +- tmp->saved_data_pointer = virt_to_bus (hostdata->script) + +- hostdata->E_data_transfer; +- +- /* +- * Initialize Linux specific fields. +- */ +- +- tmp->cmd = cmd; +- tmp->next = NULL; +- tmp->flags = 0; +- tmp->dsa_next_addr = virt_to_bus(tmp->dsa) + hostdata->dsa_next - +- hostdata->dsa_start; +- tmp->dsa_addr = virt_to_bus(tmp->dsa) - hostdata->dsa_start; +- +- /* +- * Calculate addresses of dynamic code to fill in DSA +- */ +- +- tmp->data_transfer_start = tmp->dsa + (hostdata->dsa_end - +- hostdata->dsa_start) / sizeof(u32); +- tmp->data_transfer_end = tmp->data_transfer_start + +- 2 * data_transfer_instructions; +- +- cmd_datain = datain ? tmp->data_transfer_start : NULL; +- cmd_dataout = dataout ? (datain ? cmd_datain + 2 * datain : tmp-> +- data_transfer_start) : NULL; +- +- /* +- * Fill in the NCR53c7x0_cmd structure as follows +- * dsa, with fixed up DSA code +- * datain code +- * dataout code +- */ +- +- /* Copy template code into dsa and perform all necessary fixups */ +- if (hostdata->dsa_fixup) +- hostdata->dsa_fixup(tmp); +- +- patch_dsa_32(tmp->dsa, dsa_next, 0, 0); +- /* +- * XXX is this giving 53c710 access to the Scsi_Cmnd in some way? +- * Do we need to change it for caching reasons? +- */ +- patch_dsa_32(tmp->dsa, dsa_cmnd, 0, virt_to_bus(cmd)); +- +- if (hostdata->options & OPTION_DEBUG_SYNCHRONOUS) { +- +- exp_select_indirect = ((1 << cmd->device->id) << 16) | +- (hostdata->sync[cmd->device->id].sxfer_sanity << 8); +- +- if (hostdata->sync[cmd->device->id].select_indirect != +- exp_select_indirect) { +- printk ("scsi%d : sanity check failed select_indirect=0x%x\n", +- host->host_no, hostdata->sync[cmd->device->id].select_indirect); +- FATAL(host); +- +- } +- } +- +- patch_dsa_32(tmp->dsa, dsa_select, 0, +- hostdata->sync[cmd->device->id].select_indirect); +- +- /* +- * Right now, we'll do the WIDE and SYNCHRONOUS negotiations on +- * different commands; although it should be trivial to do them +- * both at the same time. +- */ +- if (hostdata->initiate_wdtr & (1 << cmd->device->id)) { +- memcpy ((void *) (tmp->select + 1), (void *) wdtr_message, +- sizeof(wdtr_message)); +- patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1 + sizeof(wdtr_message)); +- local_irq_save(flags); +- hostdata->initiate_wdtr &= ~(1 << cmd->device->id); +- local_irq_restore(flags); +- } else if (hostdata->initiate_sdtr & (1 << cmd->device->id)) { +- memcpy ((void *) (tmp->select + 1), (void *) sdtr_message, +- sizeof(sdtr_message)); +- patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1 + sizeof(sdtr_message)); +- tmp->flags |= CMD_FLAG_SDTR; +- local_irq_save(flags); +- hostdata->initiate_sdtr &= ~(1 << cmd->device->id); +- local_irq_restore(flags); +- +- } +-#if 1 +- else if (!(hostdata->talked_to & (1 << cmd->device->id)) && +- !(hostdata->options & OPTION_NO_ASYNC)) { +- +- memcpy ((void *) (tmp->select + 1), (void *) async_message, +- sizeof(async_message)); +- patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1 + sizeof(async_message)); +- tmp->flags |= CMD_FLAG_SDTR; +- } +-#endif +- else +- patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1); +- +- hostdata->talked_to |= (1 << cmd->device->id); +- tmp->select[0] = (hostdata->options & OPTION_DISCONNECT) ? +- IDENTIFY (1, cmd->device->lun) : IDENTIFY (0, cmd->device->lun); +- patch_dsa_32(tmp->dsa, dsa_msgout, 1, virt_to_bus(tmp->select)); +- patch_dsa_32(tmp->dsa, dsa_cmdout, 0, cmd->cmd_len); +- patch_dsa_32(tmp->dsa, dsa_cmdout, 1, virt_to_bus(tmp->cmnd)); +- patch_dsa_32(tmp->dsa, dsa_dataout, 0, cmd_dataout ? +- virt_to_bus (cmd_dataout) +- : virt_to_bus (hostdata->script) + hostdata->E_other_transfer); +- patch_dsa_32(tmp->dsa, dsa_datain, 0, cmd_datain ? +- virt_to_bus (cmd_datain) +- : virt_to_bus (hostdata->script) + hostdata->E_other_transfer); +- /* +- * XXX - need to make endian aware, should use separate variables +- * for both status and message bytes. +- */ +- patch_dsa_32(tmp->dsa, dsa_msgin, 0, 1); +-/* +- * FIXME : these only works for little endian. We probably want to +- * provide message and status fields in the NCR53c7x0_cmd +- * structure, and assign them to cmd->result when we're done. +- */ +-#ifdef BIG_ENDIAN +- patch_dsa_32(tmp->dsa, dsa_msgin, 1, virt_to_bus(&tmp->result) + 2); +- patch_dsa_32(tmp->dsa, dsa_status, 0, 1); +- patch_dsa_32(tmp->dsa, dsa_status, 1, virt_to_bus(&tmp->result) + 3); +-#else +- patch_dsa_32(tmp->dsa, dsa_msgin, 1, virt_to_bus(&tmp->result) + 1); +- patch_dsa_32(tmp->dsa, dsa_status, 0, 1); +- patch_dsa_32(tmp->dsa, dsa_status, 1, virt_to_bus(&tmp->result)); +-#endif +- patch_dsa_32(tmp->dsa, dsa_msgout_other, 0, 1); +- patch_dsa_32(tmp->dsa, dsa_msgout_other, 1, +- virt_to_bus(&(hostdata->NCR53c7xx_msg_nop))); +- +- /* +- * Generate code for zero or more of the DATA IN, DATA OUT phases +- * in the format +- * +- * CALL data_transfer, WHEN NOT phase +- * MOVE first buffer length, first buffer address, WHEN phase +- * ... +- * MOVE last buffer length, last buffer address, WHEN phase +- * JUMP other_transfer +- */ +- +-/* +- * See if we're getting to data transfer by generating an unconditional +- * interrupt. +- */ +-#if 0 +- if (datain) { +- cmd_datain[0] = 0x98080000; +- cmd_datain[1] = 0x03ffd00d; +- cmd_datain += 2; +- } +-#endif +- +-/* +- * XXX - I'm undecided whether all of this nonsense is faster +- * in the long run, or whether I should just go and implement a loop +- * on the NCR chip using table indirect mode? +- * +- * In any case, this is how it _must_ be done for 53c700/700-66 chips, +- * so this stays even when we come up with something better. +- * +- * When we're limited to 1 simultaneous command, no overlapping processing, +- * we're seeing 630K/sec, with 7% CPU usage on a slow Syquest 45M +- * drive. +- * +- * Not bad, not good. We'll see. +- */ +- +- tmp->bounce.len = 0; /* Assume aligned buffer */ +- +- for (i = 0; cmd->use_sg ? (i < cmd->use_sg) : !i; cmd_datain += 4, +- cmd_dataout += 4, ++i) { +- u32 vbuf = cmd->use_sg +- ? (u32)page_address(((struct scatterlist *)cmd->request_buffer)[i].page)+ +- ((struct scatterlist *)cmd->request_buffer)[i].offset +- : (u32)(cmd->request_buffer); +- u32 bbuf = virt_to_bus((void *)vbuf); +- u32 count = cmd->use_sg ? +- ((struct scatterlist *)cmd->request_buffer)[i].length : +- cmd->request_bufflen; +- +- /* +- * If we have buffers which are not aligned with 16 byte cache +- * lines, then we just hope nothing accesses the other parts of +- * those cache lines while the transfer is in progress. That would +- * fill the cache, and subsequent reads of the dma data would pick +- * up the wrong thing. +- * XXX We need a bounce buffer to handle that correctly. +- */ +- +- if (((bbuf & 15) || (count & 15)) && (datain || dataout)) +- { +- /* Bounce buffer needed */ +- if (cmd->use_sg) +- printk ("53c7xx: Non-aligned buffer with use_sg\n"); +- else if (datain && dataout) +- printk ("53c7xx: Non-aligned buffer with datain && dataout\n"); +- else if (count > 256) +- printk ("53c7xx: Non-aligned transfer > 256 bytes\n"); +- else +- { +- if (datain) +- { +- tmp->bounce.len = count; +- tmp->bounce.addr = vbuf; +- bbuf = virt_to_bus(tmp->bounce.buf); +- tmp->bounce.buf[0] = 0xff; +- tmp->bounce.buf[1] = 0xfe; +- tmp->bounce.buf[2] = 0xfd; +- tmp->bounce.buf[3] = 0xfc; +- } +- if (dataout) +- { +- memcpy ((void *)tmp->bounce.buf, (void *)vbuf, count); +- bbuf = virt_to_bus(tmp->bounce.buf); +- } +- } +- } +- +- if (datain) { +- cache_clear(virt_to_phys((void *)vbuf), count); +- /* CALL other_in, WHEN NOT DATA_IN */ +- cmd_datain[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_CALL | +- DCMD_TCI_IO) << 24) | +- DBC_TCI_WAIT_FOR_VALID | DBC_TCI_COMPARE_PHASE; +- cmd_datain[1] = virt_to_bus (hostdata->script) + +- hostdata->E_other_in; +- /* MOVE count, buf, WHEN DATA_IN */ +- cmd_datain[2] = ((DCMD_TYPE_BMI | DCMD_BMI_OP_MOVE_I | DCMD_BMI_IO) +- << 24) | count; +- cmd_datain[3] = bbuf; +-#if 0 +- print_insn (host, cmd_datain, "dynamic ", 1); +- print_insn (host, cmd_datain + 2, "dynamic ", 1); +-#endif +- } +- if (dataout) { +- cache_push(virt_to_phys((void *)vbuf), count); +- /* CALL other_out, WHEN NOT DATA_OUT */ +- cmd_dataout[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_CALL) << 24) | +- DBC_TCI_WAIT_FOR_VALID | DBC_TCI_COMPARE_PHASE; +- cmd_dataout[1] = virt_to_bus(hostdata->script) + +- hostdata->E_other_out; +- /* MOVE count, buf, WHEN DATA+OUT */ +- cmd_dataout[2] = ((DCMD_TYPE_BMI | DCMD_BMI_OP_MOVE_I) << 24) +- | count; +- cmd_dataout[3] = bbuf; +-#if 0 +- print_insn (host, cmd_dataout, "dynamic ", 1); +- print_insn (host, cmd_dataout + 2, "dynamic ", 1); +-#endif +- } +- } +- +- /* +- * Install JUMP instructions after the data transfer routines to return +- * control to the do_other_transfer routines. +- */ +- +- +- if (datain) { +- cmd_datain[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_JUMP) << 24) | +- DBC_TCI_TRUE; +- cmd_datain[1] = virt_to_bus(hostdata->script) + +- hostdata->E_other_transfer; +-#if 0 +- print_insn (host, cmd_datain, "dynamic jump ", 1); +-#endif +- cmd_datain += 2; +- } +-#if 0 +- if (datain) { +- cmd_datain[0] = 0x98080000; +- cmd_datain[1] = 0x03ffdeed; +- cmd_datain += 2; +- } +-#endif +- if (dataout) { +- cmd_dataout[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_JUMP) << 24) | +- DBC_TCI_TRUE; +- cmd_dataout[1] = virt_to_bus(hostdata->script) + +- hostdata->E_other_transfer; +-#if 0 +- print_insn (host, cmd_dataout, "dynamic jump ", 1); +-#endif +- cmd_dataout += 2; +- } +- +- return tmp; +-} +- +-/* +- * Function : int NCR53c7xx_queue_command (Scsi_Cmnd *cmd, +- * void (*done)(Scsi_Cmnd *)) +- * +- * Purpose : enqueues a SCSI command +- * +- * Inputs : cmd - SCSI command, done - function called on completion, with +- * a pointer to the command descriptor. +- * +- * Returns : 0 +- * +- * Side effects : +- * cmd is added to the per instance driver issue_queue, with major +- * twiddling done to the host specific fields of cmd. If the +- * process_issue_queue coroutine isn't running, it is restarted. +- * +- * NOTE : we use the host_scribble field of the Scsi_Cmnd structure to +- * hold our own data, and pervert the ptr field of the SCp field +- * to create a linked list. +- */ +- +-int +-NCR53c7xx_queue_command (Scsi_Cmnd *cmd, void (* done)(Scsi_Cmnd *)) { +- struct Scsi_Host *host = cmd->device->host; +- struct NCR53c7x0_hostdata *hostdata = +- (struct NCR53c7x0_hostdata *) host->hostdata[0]; +- unsigned long flags; +- Scsi_Cmnd *tmp; +- +- cmd->scsi_done = done; +- cmd->host_scribble = NULL; +- cmd->SCp.ptr = NULL; +- cmd->SCp.buffer = NULL; +- +-#ifdef VALID_IDS +- /* Ignore commands on invalid IDs */ +- if (!hostdata->valid_ids[cmd->device->id]) { +- printk("scsi%d : ignoring target %d lun %d\n", host->host_no, +- cmd->device->id, cmd->device->lun); +- cmd->result = (DID_BAD_TARGET << 16); +- done(cmd); +- return 0; +- } +-#endif +- +- local_irq_save(flags); +- if ((hostdata->options & (OPTION_DEBUG_INIT_ONLY|OPTION_DEBUG_PROBE_ONLY)) +- || ((hostdata->options & OPTION_DEBUG_TARGET_LIMIT) && +- !(hostdata->debug_lun_limit[cmd->device->id] & (1 << cmd->device->lun))) +-#ifdef LINUX_1_2 +- || cmd->device->id > 7 +-#else +- || cmd->device->id >= host->max_id +-#endif +- || cmd->device->id == host->this_id +- || hostdata->state == STATE_DISABLED) { +- printk("scsi%d : disabled or bad target %d lun %d\n", host->host_no, +- cmd->device->id, cmd->device->lun); +- cmd->result = (DID_BAD_TARGET << 16); +- done(cmd); +- local_irq_restore(flags); +- return 0; +- } +- +- if ((hostdata->options & OPTION_DEBUG_NCOMMANDS_LIMIT) && +- (hostdata->debug_count_limit == 0)) { +- printk("scsi%d : maximum commands exceeded\n", host->host_no); +- cmd->result = (DID_BAD_TARGET << 16); +- done(cmd); +- local_irq_restore(flags); +- return 0; +- } +- +- if (hostdata->options & OPTION_DEBUG_READ_ONLY) { +- switch (cmd->cmnd[0]) { +- case WRITE_6: +- case WRITE_10: +- printk("scsi%d : WRITE attempted with NO_WRITE debugging flag set\n", +- host->host_no); +- cmd->result = (DID_BAD_TARGET << 16); +- done(cmd); +- local_irq_restore(flags); +- return 0; +- } +- } +- +- if ((hostdata->options & OPTION_DEBUG_TARGET_LIMIT) && +- hostdata->debug_count_limit != -1) +- --hostdata->debug_count_limit; +- +- cmd->result = 0xffff; /* The NCR will overwrite message +- and status with valid data */ +- cmd->host_scribble = (unsigned char *) tmp = create_cmd (cmd); +- +- /* +- * REQUEST SENSE commands are inserted at the head of the queue +- * so that we do not clear the contingent allegiance condition +- * they may be looking at. +- */ +- +- if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) { +- cmd->SCp.ptr = (unsigned char *) hostdata->issue_queue; +- hostdata->issue_queue = cmd; +- } else { +- for (tmp = (Scsi_Cmnd *) hostdata->issue_queue; tmp->SCp.ptr; +- tmp = (Scsi_Cmnd *) tmp->SCp.ptr); +- tmp->SCp.ptr = (unsigned char *) cmd; +- } +- local_irq_restore(flags); +- run_process_issue_queue(); +- return 0; +-} +- +-/* +- * Function : void to_schedule_list (struct Scsi_Host *host, +- * struct NCR53c7x0_hostdata * hostdata, Scsi_Cmnd *cmd) +- * +- * Purpose : takes a SCSI command which was just removed from the +- * issue queue, and deals with it by inserting it in the first +- * free slot in the schedule list or by terminating it immediately. +- * +- * Inputs : +- * host - SCSI host adapter; hostdata - hostdata structure for +- * this adapter; cmd - a pointer to the command; should have +- * the host_scribble field initialized to point to a valid +- * +- * Side effects : +- * cmd is added to the per instance schedule list, with minor +- * twiddling done to the host specific fields of cmd. +- * +- */ +- +-static __inline__ void +-to_schedule_list (struct Scsi_Host *host, struct NCR53c7x0_hostdata *hostdata, +- struct NCR53c7x0_cmd *cmd) { +- NCR53c7x0_local_declare(); +- Scsi_Cmnd *tmp = cmd->cmd; +- unsigned long flags; +- /* dsa start is negative, so subtraction is used */ +- volatile u32 *ncrcurrent; +- +- int i; +- NCR53c7x0_local_setup(host); +-#if 0 +- printk("scsi%d : new dsa is 0x%lx (virt 0x%p)\n", host->host_no, +- virt_to_bus(hostdata->dsa), hostdata->dsa); +-#endif +- +- local_irq_save(flags); +- +- /* +- * Work around race condition : if an interrupt fired and we +- * got disabled forget about this command. +- */ +- +- if (hostdata->state == STATE_DISABLED) { +- printk("scsi%d : driver disabled\n", host->host_no); +- tmp->result = (DID_BAD_TARGET << 16); +- cmd->next = (struct NCR53c7x0_cmd *) hostdata->free; +- hostdata->free = cmd; +- tmp->scsi_done(tmp); +- local_irq_restore(flags); +- return; +- } +- +- for (i = host->can_queue, ncrcurrent = hostdata->schedule; +- i > 0 && ncrcurrent[0] != hostdata->NOP_insn; +- --i, ncrcurrent += 2 /* JUMP instructions are two words */); +- +- if (i > 0) { +- ++hostdata->busy[tmp->device->id][tmp->device->lun]; +- cmd->next = hostdata->running_list; +- hostdata->running_list = cmd; +- +- /* Restore this instruction to a NOP once the command starts */ +- cmd->dsa [(hostdata->dsa_jump_dest - hostdata->dsa_start) / +- sizeof(u32)] = (u32) virt_to_bus ((void *)ncrcurrent); +- /* Replace the current jump operand. */ +- ncrcurrent[1] = +- virt_to_bus ((void *) cmd->dsa) + hostdata->E_dsa_code_begin - +- hostdata->E_dsa_code_template; +- /* Replace the NOP instruction with a JUMP */ +- ncrcurrent[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) << 24) | +- DBC_TCI_TRUE; +- } else { +- printk ("scsi%d: no free slot\n", host->host_no); +- disable(host); +- tmp->result = (DID_ERROR << 16); +- cmd->next = (struct NCR53c7x0_cmd *) hostdata->free; +- hostdata->free = cmd; +- tmp->scsi_done(tmp); +- local_irq_restore(flags); +- return; +- } +- +- /* +- * If the NCR chip is in an idle state, start it running the scheduler +- * immediately. Otherwise, signal the chip to jump to schedule as +- * soon as it is idle. +- */ +- +- if (hostdata->idle) { +- hostdata->idle = 0; +- hostdata->state = STATE_RUNNING; +- NCR53c7x0_write32 (DSP_REG, virt_to_bus ((void *)hostdata->schedule)); +- if (hostdata->options & OPTION_DEBUG_TRACE) +- NCR53c7x0_write8 (DCNTL_REG, hostdata->saved_dcntl | +- DCNTL_SSM | DCNTL_STD); +- } else { +- NCR53c7x0_write8(hostdata->istat, ISTAT_10_SIGP); +- } +- +- local_irq_restore(flags); +-} +- +-/* +- * Function : busyp (struct Scsi_Host *host, struct NCR53c7x0_hostdata +- * *hostdata, Scsi_Cmnd *cmd) +- * +- * Purpose : decide if we can pass the given SCSI command on to the +- * device in question or not. +- * +- * Returns : non-zero when we're busy, 0 when we aren't. +- */ +- +-static __inline__ int +-busyp (struct Scsi_Host *host, struct NCR53c7x0_hostdata *hostdata, +- Scsi_Cmnd *cmd) { +- /* FIXME : in the future, this needs to accommodate SCSI-II tagged +- queuing, and we may be able to play with fairness here a bit. +- */ +- return hostdata->busy[cmd->device->id][cmd->device->lun]; +-} +- +-/* +- * Function : process_issue_queue (void) +- * +- * Purpose : transfer commands from the issue queue to NCR start queue +- * of each NCR53c7/8xx in the system, avoiding kernel stack +- * overflows when the scsi_done() function is invoked recursively. +- * +- * NOTE : process_issue_queue exits with interrupts *disabled*, so the +- * caller must reenable them if it desires. +- * +- * NOTE : process_issue_queue should be called from both +- * NCR53c7x0_queue_command() and from the interrupt handler +- * after command completion in case NCR53c7x0_queue_command() +- * isn't invoked again but we've freed up resources that are +- * needed. +- */ +- +-static void +-process_issue_queue (unsigned long flags) { +- Scsi_Cmnd *tmp, *prev; +- struct Scsi_Host *host; +- struct NCR53c7x0_hostdata *hostdata; +- int done; +- +- /* +- * We run (with interrupts disabled) until we're sure that none of +- * the host adapters have anything that can be done, at which point +- * we set process_issue_queue_running to 0 and exit. +- * +- * Interrupts are enabled before doing various other internal +- * instructions, after we've decided that we need to run through +- * the loop again. +- * +- */ +- +- do { +- local_irq_disable(); /* Freeze request queues */ +- done = 1; +- for (host = first_host; host && host->hostt == the_template; +- host = host->next) { +- hostdata = (struct NCR53c7x0_hostdata *) host->hostdata[0]; +- local_irq_disable(); +- if (hostdata->issue_queue) { +- if (hostdata->state == STATE_DISABLED) { +- tmp = (Scsi_Cmnd *) hostdata->issue_queue; +- hostdata->issue_queue = (Scsi_Cmnd *) tmp->SCp.ptr; +- tmp->result = (DID_BAD_TARGET << 16); +- if (tmp->host_scribble) { +- ((struct NCR53c7x0_cmd *)tmp->host_scribble)->next = +- hostdata->free; +- hostdata->free = +- (struct NCR53c7x0_cmd *)tmp->host_scribble; +- tmp->host_scribble = NULL; +- } +- tmp->scsi_done (tmp); +- done = 0; +- } else +- for (tmp = (Scsi_Cmnd *) hostdata->issue_queue, +- prev = NULL; tmp; prev = tmp, tmp = (Scsi_Cmnd *) +- tmp->SCp.ptr) +- if (!tmp->host_scribble || +- !busyp (host, hostdata, tmp)) { +- if (prev) +- prev->SCp.ptr = tmp->SCp.ptr; +- else +- hostdata->issue_queue = (Scsi_Cmnd *) +- tmp->SCp.ptr; +- tmp->SCp.ptr = NULL; +- if (tmp->host_scribble) { +- if (hostdata->options & OPTION_DEBUG_QUEUES) +- printk ("scsi%d : moving command for target %d lun %d to start list\n", +- host->host_no, tmp->device->id, tmp->device->lun); +- +- +- to_schedule_list (host, hostdata, +- (struct NCR53c7x0_cmd *) +- tmp->host_scribble); +- } else { +- if (((tmp->result & 0xff) == 0xff) || +- ((tmp->result & 0xff00) == 0xff00)) { +- printk ("scsi%d : danger Will Robinson!\n", +- host->host_no); +- tmp->result = DID_ERROR << 16; +- disable (host); +- } +- tmp->scsi_done(tmp); +- } +- done = 0; +- } /* if target/lun is not busy */ +- } /* if hostdata->issue_queue */ +- if (!done) +- local_irq_restore(flags); +- } /* for host */ +- } while (!done); +- process_issue_queue_running = 0; +-} +- +-/* +- * Function : static void intr_scsi (struct Scsi_Host *host, +- * struct NCR53c7x0_cmd *cmd) +- * +- * Purpose : handle all SCSI interrupts, indicated by the setting +- * of the SIP bit in the ISTAT register. +- * +- * Inputs : host, cmd - host and NCR command causing the interrupt, cmd +- * may be NULL. +- */ +- +-static void +-intr_scsi (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) { +- NCR53c7x0_local_declare(); +- struct NCR53c7x0_hostdata *hostdata = +- (struct NCR53c7x0_hostdata *) host->hostdata[0]; +- unsigned char sstat0_sist0, sist1, /* Registers */ +- fatal; /* Did a fatal interrupt +- occur ? */ +- +- NCR53c7x0_local_setup(host); +- +- fatal = 0; +- +- sstat0_sist0 = NCR53c7x0_read8(SSTAT0_REG); +- sist1 = 0; +- +- if (hostdata->options & OPTION_DEBUG_INTR) +- printk ("scsi%d : SIST0 0x%0x, SIST1 0x%0x\n", host->host_no, +- sstat0_sist0, sist1); +- +- /* 250ms selection timeout */ +- if (sstat0_sist0 & SSTAT0_700_STO) { +- fatal = 1; +- if (hostdata->options & OPTION_DEBUG_INTR) { +- printk ("scsi%d : Selection Timeout\n", host->host_no); +- if (cmd) { +- printk("scsi%d : target %d, lun %d, command ", +- host->host_no, cmd->cmd->device->id, cmd->cmd->device->lun); +- __scsi_print_command (cmd->cmd->cmnd); +- printk("scsi%d : dsp = 0x%x (virt 0x%p)\n", host->host_no, +- NCR53c7x0_read32(DSP_REG), +- bus_to_virt(NCR53c7x0_read32(DSP_REG))); +- } else { +- printk("scsi%d : no command\n", host->host_no); +- } +- } +-/* +- * XXX - question : how do we want to handle the Illegal Instruction +- * interrupt, which may occur before or after the Selection Timeout +- * interrupt? +- */ +- +- if (1) { +- hostdata->idle = 1; +- hostdata->expecting_sto = 0; +- +- if (hostdata->test_running) { +- hostdata->test_running = 0; +- hostdata->test_completed = 3; +- } else if (cmd) { +- abnormal_finished(cmd, DID_BAD_TARGET << 16); +- } +-#if 0 +- hostdata->intrs = 0; +-#endif +- } +- } +- +-/* +- * FIXME : in theory, we can also get a UDC when a STO occurs. +- */ +- if (sstat0_sist0 & SSTAT0_UDC) { +- fatal = 1; +- if (cmd) { +- printk("scsi%d : target %d lun %d unexpected disconnect\n", +- host->host_no, cmd->cmd->device->id, cmd->cmd->device->lun); +- print_lots (host); +- abnormal_finished(cmd, DID_ERROR << 16); +- } else +- printk("scsi%d : unexpected disconnect (no command)\n", +- host->host_no); +- +- hostdata->dsp = (u32 *) hostdata->schedule; +- hostdata->dsp_changed = 1; +- } +- +- /* SCSI PARITY error */ +- if (sstat0_sist0 & SSTAT0_PAR) { +- fatal = 1; +- if (cmd && cmd->cmd) { +- printk("scsi%d : target %d lun %d parity error.\n", +- host->host_no, cmd->cmd->device->id, cmd->cmd->device->lun); +- abnormal_finished (cmd, DID_PARITY << 16); +- } else +- printk("scsi%d : parity error\n", host->host_no); +- /* Should send message out, parity error */ +- +- /* XXX - Reduce synchronous transfer rate! */ +- hostdata->dsp = hostdata->script + hostdata->E_initiator_abort / +- sizeof(u32); +- hostdata->dsp_changed = 1; +- /* SCSI GROSS error */ +- } +- +- if (sstat0_sist0 & SSTAT0_SGE) { +- fatal = 1; +- printk("scsi%d : gross error, saved2_dsa = 0x%x\n", host->host_no, +- (unsigned int)hostdata->saved2_dsa); +- print_lots (host); +- +- /* +- * A SCSI gross error may occur when we have +- * +- * - A synchronous offset which causes the SCSI FIFO to be overwritten. +- * +- * - A REQ which causes the maximum synchronous offset programmed in +- * the SXFER register to be exceeded. +- * +- * - A phase change with an outstanding synchronous offset. +- * +- * - Residual data in the synchronous data FIFO, with a transfer +- * other than a synchronous receive is started.$# +- */ +- +- +- /* XXX Should deduce synchronous transfer rate! */ +- hostdata->dsp = hostdata->script + hostdata->E_initiator_abort / +- sizeof(u32); +- hostdata->dsp_changed = 1; +- /* Phase mismatch */ +- } +- +- if (sstat0_sist0 & SSTAT0_MA) { +- fatal = 1; +- if (hostdata->options & OPTION_DEBUG_INTR) +- printk ("scsi%d : SSTAT0_MA\n", host->host_no); +- intr_phase_mismatch (host, cmd); +- } +- +-#if 0 +- if (sstat0_sist0 & SIST0_800_RSL) +- printk ("scsi%d : Oh no Mr. Bill!\n", host->host_no); +-#endif +- +-/* +- * If a fatal SCSI interrupt occurs, we must insure that the DMA and +- * SCSI FIFOs were flushed. +- */ +- +- if (fatal) { +- if (!hostdata->dstat_valid) { +- hostdata->dstat = NCR53c7x0_read8(DSTAT_REG); +- hostdata->dstat_valid = 1; +- } +- +- if (!(hostdata->dstat & DSTAT_DFE)) { +- printk ("scsi%d : DMA FIFO not empty\n", host->host_no); +- /* +- * Really need to check this code for 710 RGH. +- * Havn't seen any problems, but maybe we should FLUSH before +- * clearing sometimes. +- */ +- NCR53c7x0_write8 (CTEST8_REG, CTEST8_10_CLF); +- while (NCR53c7x0_read8 (CTEST8_REG) & CTEST8_10_CLF) +- ; +- hostdata->dstat |= DSTAT_DFE; +- } +- } +-} +- +-#ifdef CYCLIC_TRACE +- +-/* +- * The following implements a cyclic log of instructions executed, if you turn +- * TRACE on. It will also print the log for you. Very useful when debugging +- * 53c710 support, possibly not really needed any more. +- */ +- +-u32 insn_log[4096]; +-u32 insn_log_index = 0; +- +-void log1 (u32 i) +-{ +- insn_log[insn_log_index++] = i; +- if (insn_log_index == 4096) +- insn_log_index = 0; +-} +- +-void log_insn (u32 *ip) +-{ +- log1 ((u32)ip); +- log1 (*ip); +- log1 (*(ip+1)); +- if (((*ip >> 24) & DCMD_TYPE_MASK) == DCMD_TYPE_MMI) +- log1 (*(ip+2)); +-} +- +-void dump_log(void) +-{ +- int cnt = 0; +- int i = insn_log_index; +- int size; +- struct Scsi_Host *host = first_host; +- +- while (cnt < 4096) { +- printk ("%08x (+%6x): ", insn_log[i], (insn_log[i] - (u32)&(((struct NCR53c7x0_hostdata *)host->hostdata[0])->script))/4); +- if (++i == 4096) +- i = 0; +- cnt++; +- if (((insn_log[i] >> 24) & DCMD_TYPE_MASK) == DCMD_TYPE_MMI) +- size = 3; +- else +- size = 2; +- while (size--) { +- printk ("%08x ", insn_log[i]); +- if (++i == 4096) +- i = 0; +- cnt++; +- } +- printk ("\n"); +- } +-} +-#endif +- +- +-/* +- * Function : static void NCR53c7x0_intfly (struct Scsi_Host *host) +- * +- * Purpose : Scan command queue for specified host, looking for completed +- * commands. +- * +- * Inputs : Scsi_Host pointer. +- * +- * This is called from the interrupt handler, when a simulated INTFLY +- * interrupt occurs. +- */ +- +-static void +-NCR53c7x0_intfly (struct Scsi_Host *host) +-{ +- NCR53c7x0_local_declare(); +- struct NCR53c7x0_hostdata *hostdata; /* host->hostdata[0] */ +- struct NCR53c7x0_cmd *cmd, /* command which halted */ +- **cmd_prev_ptr; +- unsigned long flags; +- char search_found = 0; /* Got at least one ? */ +- +- hostdata = (struct NCR53c7x0_hostdata *) host->hostdata[0]; +- NCR53c7x0_local_setup(host); +- +- if (hostdata->options & OPTION_DEBUG_INTR) +- printk ("scsi%d : INTFLY\n", host->host_no); +- +- /* +- * Traverse our list of running commands, and look +- * for those with valid (non-0xff ff) status and message +- * bytes encoded in the result which signify command +- * completion. +- */ +- +- local_irq_save(flags); +-restart: +- for (cmd_prev_ptr = (struct NCR53c7x0_cmd **)&(hostdata->running_list), +- cmd = (struct NCR53c7x0_cmd *) hostdata->running_list; cmd ; +- cmd_prev_ptr = (struct NCR53c7x0_cmd **) &(cmd->next), +- cmd = (struct NCR53c7x0_cmd *) cmd->next) +- { +- Scsi_Cmnd *tmp; +- +- if (!cmd) { +- printk("scsi%d : very weird.\n", host->host_no); +- break; +- } +- +- if (!(tmp = cmd->cmd)) { +- printk("scsi%d : weird. NCR53c7x0_cmd has no Scsi_Cmnd\n", +- host->host_no); +- continue; +- } +- /* Copy the result over now; may not be complete, +- * but subsequent tests may as well be done on +- * cached memory. +- */ +- tmp->result = cmd->result; +- +- if (((tmp->result & 0xff) == 0xff) || +- ((tmp->result & 0xff00) == 0xff00)) +- continue; +- +- search_found = 1; +- +- if (cmd->bounce.len) +- memcpy ((void *)cmd->bounce.addr, +- (void *)cmd->bounce.buf, cmd->bounce.len); +- +- /* Important - remove from list _before_ done is called */ +- if (cmd_prev_ptr) +- *cmd_prev_ptr = (struct NCR53c7x0_cmd *) cmd->next; +- +- --hostdata->busy[tmp->device->id][tmp->device->lun]; +- cmd->next = hostdata->free; +- hostdata->free = cmd; +- +- tmp->host_scribble = NULL; +- +- if (hostdata->options & OPTION_DEBUG_INTR) { +- printk ("scsi%d : command complete : pid %lu, id %d,lun %d result 0x%x ", +- host->host_no, tmp->pid, tmp->device->id, tmp->device->lun, tmp->result); +- __scsi_print_command (tmp->cmnd); +- } +- +- tmp->scsi_done(tmp); +- goto restart; +- } +- local_irq_restore(flags); +- +- if (!search_found) { +- printk ("scsi%d : WARNING : INTFLY with no completed commands.\n", +- host->host_no); +- } else { +- run_process_issue_queue(); +- } +- return; +-} +- +-/* +- * Function : static irqreturn_t NCR53c7x0_intr (int irq, void *dev_id) +- * +- * Purpose : handle NCR53c7x0 interrupts for all NCR devices sharing +- * the same IRQ line. +- * +- * Inputs : Since we're using the IRQF_DISABLED interrupt handler +- * semantics, irq indicates the interrupt which invoked +- * this handler. +- * +- * On the 710 we simualte an INTFLY with a script interrupt, and the +- * script interrupt handler will call back to this function. +- */ +- +-static irqreturn_t +-NCR53c7x0_intr (int irq, void *dev_id) +-{ +- NCR53c7x0_local_declare(); +- struct Scsi_Host *host; /* Host we are looking at */ +- unsigned char istat; /* Values of interrupt regs */ +- struct NCR53c7x0_hostdata *hostdata; /* host->hostdata[0] */ +- struct NCR53c7x0_cmd *cmd; /* command which halted */ +- u32 *dsa; /* DSA */ +- int handled = 0; +- +-#ifdef NCR_DEBUG +- char buf[80]; /* Debugging sprintf buffer */ +- size_t buflen; /* Length of same */ +-#endif +- +- host = (struct Scsi_Host *)dev_id; +- hostdata = (struct NCR53c7x0_hostdata *) host->hostdata[0]; +- NCR53c7x0_local_setup(host); +- +- /* +- * Only read istat once per loop, since reading it again will unstack +- * interrupts +- */ +- +- while ((istat = NCR53c7x0_read8(hostdata->istat)) & (ISTAT_SIP|ISTAT_DIP)) { +- handled = 1; +- hostdata->dsp_changed = 0; +- hostdata->dstat_valid = 0; +- hostdata->state = STATE_HALTED; +- +- if (NCR53c7x0_read8 (SSTAT2_REG) & SSTAT2_FF_MASK) +- printk ("scsi%d : SCSI FIFO not empty\n", host->host_no); +- +- /* +- * NCR53c700 and NCR53c700-66 change the current SCSI +- * process, hostdata->ncrcurrent, in the Linux driver so +- * cmd = hostdata->ncrcurrent. +- * +- * With other chips, we must look through the commands +- * executing and find the command structure which +- * corresponds to the DSA register. +- */ +- +- if (hostdata->options & OPTION_700) { +- cmd = (struct NCR53c7x0_cmd *) hostdata->ncrcurrent; +- } else { +- dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG)); +- for (cmd = (struct NCR53c7x0_cmd *) hostdata->running_list; +- cmd && (dsa + (hostdata->dsa_start / sizeof(u32))) != cmd->dsa; +- cmd = (struct NCR53c7x0_cmd *)(cmd->next)) +- ; +- } +- if (hostdata->options & OPTION_DEBUG_INTR) { +- if (cmd) { +- printk("scsi%d : interrupt for pid %lu, id %d, lun %d ", +- host->host_no, cmd->cmd->pid, (int) cmd->cmd->device->id, +- (int) cmd->cmd->device->lun); +- __scsi_print_command (cmd->cmd->cmnd); +- } else { +- printk("scsi%d : no active command\n", host->host_no); +- } +- } +- +- if (istat & ISTAT_SIP) { +- if (hostdata->options & OPTION_DEBUG_INTR) +- printk ("scsi%d : ISTAT_SIP\n", host->host_no); +- intr_scsi (host, cmd); +- } +- +- if (istat & ISTAT_DIP) { +- if (hostdata->options & OPTION_DEBUG_INTR) +- printk ("scsi%d : ISTAT_DIP\n", host->host_no); +- intr_dma (host, cmd); +- } +- +- if (!hostdata->dstat_valid) { +- hostdata->dstat = NCR53c7x0_read8(DSTAT_REG); +- hostdata->dstat_valid = 1; +- } +- +- if (!(hostdata->dstat & DSTAT_DFE)) { +- printk ("scsi%d : DMA FIFO not empty\n", host->host_no); +- /* Really need to check this out for 710 RGH */ +- NCR53c7x0_write8 (CTEST8_REG, CTEST8_10_CLF); +- while (NCR53c7x0_read8 (CTEST8_REG) & CTEST8_10_CLF) +- ; +- hostdata->dstat |= DSTAT_DFE; +- } +- +- if (!hostdata->idle && hostdata->state == STATE_HALTED) { +- if (!hostdata->dsp_changed) +- hostdata->dsp = (u32 *)bus_to_virt(NCR53c7x0_read32(DSP_REG)); +-#if 0 +- printk("scsi%d : new dsp is 0x%lx (virt 0x%p)\n", +- host->host_no, virt_to_bus(hostdata->dsp), hostdata->dsp); +-#endif +- +- hostdata->state = STATE_RUNNING; +- NCR53c7x0_write32 (DSP_REG, virt_to_bus(hostdata->dsp)); +- if (hostdata->options & OPTION_DEBUG_TRACE) { +-#ifdef CYCLIC_TRACE +- log_insn (hostdata->dsp); +-#else +- print_insn (host, hostdata->dsp, "t ", 1); +-#endif +- NCR53c7x0_write8 (DCNTL_REG, +- hostdata->saved_dcntl | DCNTL_SSM | DCNTL_STD); +- } +- } +- } +- return IRQ_HANDLED; +-} +- +- +-/* +- * Function : static int abort_connected (struct Scsi_Host *host) +- * +- * Purpose : Assuming that the NCR SCSI processor is currently +- * halted, break the currently established nexus. Clean +- * up of the NCR53c7x0_cmd and Scsi_Cmnd structures should +- * be done on receipt of the abort interrupt. +- * +- * Inputs : host - SCSI host +- * +- */ +- +-static int +-abort_connected (struct Scsi_Host *host) { +-#ifdef NEW_ABORT +- NCR53c7x0_local_declare(); +-#endif +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +-/* FIXME : this probably should change for production kernels; at the +- least, counter should move to a per-host structure. */ +- static int counter = 5; +-#ifdef NEW_ABORT +- int sstat, phase, offset; +- u32 *script; +- NCR53c7x0_local_setup(host); +-#endif +- +- if (--counter <= 0) { +- disable(host); +- return 0; +- } +- +- printk ("scsi%d : DANGER : abort_connected() called \n", +- host->host_no); +- +-#ifdef NEW_ABORT +- +-/* +- * New strategy : Rather than using a generic abort routine, +- * we'll specifically try to source or sink the appropriate +- * amount of data for the phase we're currently in (taking into +- * account the current synchronous offset) +- */ +- +- sstat = NCR53c8x0_read8 (SSTAT2_REG); +- offset = OFFSET (sstat & SSTAT2_FF_MASK) >> SSTAT2_FF_SHIFT; +- phase = sstat & SSTAT2_PHASE_MASK; +- +-/* +- * SET ATN +- * MOVE source_or_sink, WHEN CURRENT PHASE +- * < repeat for each outstanding byte > +- * JUMP send_abort_message +- */ +- +- script = hostdata->abort_script = kmalloc ( +- 8 /* instruction size */ * ( +- 1 /* set ATN */ + +- (!offset ? 1 : offset) /* One transfer per outstanding byte */ + +- 1 /* send abort message */), +- GFP_ATOMIC); +- +- +-#else /* def NEW_ABORT */ +- hostdata->dsp = hostdata->script + hostdata->E_initiator_abort / +- sizeof(u32); +-#endif /* def NEW_ABORT */ +- hostdata->dsp_changed = 1; +- +-/* XXX - need to flag the command as aborted after the abort_connected +- code runs +- */ +- return 0; +-} +- +-/* +- * Function : static int datapath_residual (Scsi_Host *host) +- * +- * Purpose : return residual data count of what's in the chip. +- * +- * Inputs : host - SCSI host +- */ +- +-static int +-datapath_residual (struct Scsi_Host *host) { +- NCR53c7x0_local_declare(); +- int count, synchronous, sstat; +- unsigned int ddir; +- +- NCR53c7x0_local_setup(host); +- /* COMPAT : the 700 and 700-66 need to use DFIFO_00_BO_MASK */ +- count = ((NCR53c7x0_read8 (DFIFO_REG) & DFIFO_10_BO_MASK) - +- (NCR53c7x0_read32 (DBC_REG) & DFIFO_10_BO_MASK)) & DFIFO_10_BO_MASK; +- synchronous = NCR53c7x0_read8 (SXFER_REG) & SXFER_MO_MASK; +- /* COMPAT : DDIR is elsewhere on non-'8xx chips. */ +- ddir = NCR53c7x0_read8 (CTEST0_REG_700) & CTEST0_700_DDIR; +- +- if (ddir) { +- /* Receive */ +- if (synchronous) +- count += (NCR53c7x0_read8 (SSTAT2_REG) & SSTAT2_FF_MASK) >> SSTAT2_FF_SHIFT; +- else +- if (NCR53c7x0_read8 (SSTAT1_REG) & SSTAT1_ILF) +- ++count; +- } else { +- /* Send */ +- sstat = NCR53c7x0_read8 (SSTAT1_REG); +- if (sstat & SSTAT1_OLF) +- ++count; +- if (synchronous && (sstat & SSTAT1_ORF)) +- ++count; +- } +- return count; +-} +- +-/* +- * Function : static const char * sbcl_to_phase (int sbcl)_ +- * +- * Purpose : Convert SBCL register to user-parsable phase representation +- * +- * Inputs : sbcl - value of sbcl register +- */ +- +- +-static const char * +-sbcl_to_phase (int sbcl) { +- switch (sbcl & SBCL_PHASE_MASK) { +- case SBCL_PHASE_DATAIN: +- return "DATAIN"; +- case SBCL_PHASE_DATAOUT: +- return "DATAOUT"; +- case SBCL_PHASE_MSGIN: +- return "MSGIN"; +- case SBCL_PHASE_MSGOUT: +- return "MSGOUT"; +- case SBCL_PHASE_CMDOUT: +- return "CMDOUT"; +- case SBCL_PHASE_STATIN: +- return "STATUSIN"; +- default: +- return "unknown"; +- } +-} +- +-/* +- * Function : static const char * sstat2_to_phase (int sstat)_ +- * +- * Purpose : Convert SSTAT2 register to user-parsable phase representation +- * +- * Inputs : sstat - value of sstat register +- */ +- +- +-static const char * +-sstat2_to_phase (int sstat) { +- switch (sstat & SSTAT2_PHASE_MASK) { +- case SSTAT2_PHASE_DATAIN: +- return "DATAIN"; +- case SSTAT2_PHASE_DATAOUT: +- return "DATAOUT"; +- case SSTAT2_PHASE_MSGIN: +- return "MSGIN"; +- case SSTAT2_PHASE_MSGOUT: +- return "MSGOUT"; +- case SSTAT2_PHASE_CMDOUT: +- return "CMDOUT"; +- case SSTAT2_PHASE_STATIN: +- return "STATUSIN"; +- default: +- return "unknown"; +- } +-} +- +-/* +- * Function : static void intr_phase_mismatch (struct Scsi_Host *host, +- * struct NCR53c7x0_cmd *cmd) +- * +- * Purpose : Handle phase mismatch interrupts +- * +- * Inputs : host, cmd - host and NCR command causing the interrupt, cmd +- * may be NULL. +- * +- * Side effects : The abort_connected() routine is called or the NCR chip +- * is restarted, jumping to the command_complete entry point, or +- * patching the address and transfer count of the current instruction +- * and calling the msg_in entry point as appropriate. +- */ +- +-static void +-intr_phase_mismatch (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) { +- NCR53c7x0_local_declare(); +- u32 dbc_dcmd, *dsp, *dsp_next; +- unsigned char dcmd, sbcl; +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- int residual; +- enum {ACTION_ABORT, ACTION_ABORT_PRINT, ACTION_CONTINUE} action = +- ACTION_ABORT_PRINT; +- const char *where = NULL; +- +- NCR53c7x0_local_setup(host); +- +- /* +- * Corrective action is based on where in the SCSI SCRIPT(tm) the error +- * occurred, as well as which SCSI phase we are currently in. +- */ +- dsp_next = bus_to_virt(NCR53c7x0_read32(DSP_REG)); +- +- /* +- * Fetch the current instruction, and remove the operands for easier +- * interpretation. +- */ +- dbc_dcmd = NCR53c7x0_read32(DBC_REG); +- dcmd = (dbc_dcmd & 0xff000000) >> 24; +- /* +- * Like other processors, the NCR adjusts the instruction pointer before +- * instruction decode. Set the DSP address back to what it should +- * be for this instruction based on its size (2 or 3 32 bit words). +- */ +- dsp = dsp_next - NCR53c7x0_insn_size(dcmd); +- +- +- /* +- * Read new SCSI phase from the SBCL lines. Since all of our code uses +- * a WHEN conditional instead of an IF conditional, we don't need to +- * wait for a new REQ. +- */ +- sbcl = NCR53c7x0_read8(SBCL_REG) & SBCL_PHASE_MASK; +- +- if (!cmd) { +- action = ACTION_ABORT_PRINT; +- where = "no current command"; +- /* +- * The way my SCSI SCRIPTS(tm) are architected, recoverable phase +- * mismatches should only occur where we're doing a multi-byte +- * BMI instruction. Specifically, this means +- * +- * - select messages (a SCSI-I target may ignore additional messages +- * after the IDENTIFY; any target may reject a SDTR or WDTR) +- * +- * - command out (targets may send a message to signal an error +- * condition, or go into STATUSIN after they've decided +- * they don't like the command. +- * +- * - reply_message (targets may reject a multi-byte message in the +- * middle) +- * +- * - data transfer routines (command completion with buffer space +- * left, disconnect message, or error message) +- */ +- } else if (((dsp >= cmd->data_transfer_start && +- dsp < cmd->data_transfer_end)) || dsp == (cmd->residual + 2)) { +- if ((dcmd & (DCMD_TYPE_MASK|DCMD_BMI_OP_MASK|DCMD_BMI_INDIRECT| +- DCMD_BMI_MSG|DCMD_BMI_CD)) == (DCMD_TYPE_BMI| +- DCMD_BMI_OP_MOVE_I)) { +- residual = datapath_residual (host); +- if (hostdata->options & OPTION_DEBUG_DISCONNECT) +- printk ("scsi%d : handling residual transfer (+ %d bytes from DMA FIFO)\n", +- host->host_no, residual); +- +- /* +- * The first instruction is a CALL to the alternate handler for +- * this data transfer phase, so we can do calls to +- * munge_msg_restart as we would if control were passed +- * from normal dynamic code. +- */ +- if (dsp != cmd->residual + 2) { +- cmd->residual[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_CALL | +- ((dcmd & DCMD_BMI_IO) ? DCMD_TCI_IO : 0)) << 24) | +- DBC_TCI_WAIT_FOR_VALID | DBC_TCI_COMPARE_PHASE; +- cmd->residual[1] = virt_to_bus(hostdata->script) +- + ((dcmd & DCMD_BMI_IO) +- ? hostdata->E_other_in : hostdata->E_other_out); +- } +- +- /* +- * The second instruction is the a data transfer block +- * move instruction, reflecting the pointer and count at the +- * time of the phase mismatch. +- */ +- cmd->residual[2] = dbc_dcmd + residual; +- cmd->residual[3] = NCR53c7x0_read32(DNAD_REG) - residual; +- +- /* +- * The third and final instruction is a jump to the instruction +- * which follows the instruction which had to be 'split' +- */ +- if (dsp != cmd->residual + 2) { +- cmd->residual[4] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) +- << 24) | DBC_TCI_TRUE; +- cmd->residual[5] = virt_to_bus(dsp_next); +- } +- +- /* +- * For the sake of simplicity, transfer control to the +- * conditional CALL at the start of the residual buffer. +- */ +- hostdata->dsp = cmd->residual; +- hostdata->dsp_changed = 1; +- action = ACTION_CONTINUE; +- } else { +- where = "non-BMI dynamic DSA code"; +- action = ACTION_ABORT_PRINT; +- } +- } else if (dsp == (hostdata->script + hostdata->E_select_msgout / 4 + 2)) { +- /* RGH 290697: Added +2 above, to compensate for the script +- * instruction which disables the selection timer. */ +- /* Release ATN */ +- NCR53c7x0_write8 (SOCL_REG, 0); +- switch (sbcl) { +- /* +- * Some devices (SQ555 come to mind) grab the IDENTIFY message +- * sent on selection, and decide to go into COMMAND OUT phase +- * rather than accepting the rest of the messages or rejecting +- * them. Handle these devices gracefully. +- */ +- case SBCL_PHASE_CMDOUT: +- hostdata->dsp = dsp + 2 /* two _words_ */; +- hostdata->dsp_changed = 1; +- printk ("scsi%d : target %d ignored SDTR and went into COMMAND OUT\n", +- host->host_no, cmd->cmd->device->id); +- cmd->flags &= ~CMD_FLAG_SDTR; +- action = ACTION_CONTINUE; +- break; +- case SBCL_PHASE_MSGIN: +- hostdata->dsp = hostdata->script + hostdata->E_msg_in / +- sizeof(u32); +- hostdata->dsp_changed = 1; +- action = ACTION_CONTINUE; +- break; +- default: +- where="select message out"; +- action = ACTION_ABORT_PRINT; +- } +- /* +- * Some SCSI devices will interpret a command as they read the bytes +- * off the SCSI bus, and may decide that the command is Bogus before +- * they've read the entire command off the bus. +- */ +- } else if (dsp == hostdata->script + hostdata->E_cmdout_cmdout / sizeof +- (u32)) { +- hostdata->dsp = hostdata->script + hostdata->E_data_transfer / +- sizeof (u32); +- hostdata->dsp_changed = 1; +- action = ACTION_CONTINUE; +- /* FIXME : we need to handle message reject, etc. within msg_respond. */ +-#ifdef notyet +- } else if (dsp == hostdata->script + hostdata->E_reply_message) { +- switch (sbcl) { +- /* Any other phase mismatches abort the currently executing command. */ +-#endif +- } else { +- where = "unknown location"; +- action = ACTION_ABORT_PRINT; +- } +- +- /* Flush DMA FIFO */ +- if (!hostdata->dstat_valid) { +- hostdata->dstat = NCR53c7x0_read8(DSTAT_REG); +- hostdata->dstat_valid = 1; +- } +- if (!(hostdata->dstat & DSTAT_DFE)) { +- /* Really need to check this out for 710 RGH */ +- NCR53c7x0_write8 (CTEST8_REG, CTEST8_10_CLF); +- while (NCR53c7x0_read8 (CTEST8_REG) & CTEST8_10_CLF); +- hostdata->dstat |= DSTAT_DFE; +- } +- +- switch (action) { +- case ACTION_ABORT_PRINT: +- printk("scsi%d : %s : unexpected phase %s.\n", +- host->host_no, where ? where : "unknown location", +- sbcl_to_phase(sbcl)); +- print_lots (host); +- /* Fall through to ACTION_ABORT */ +- case ACTION_ABORT: +- abort_connected (host); +- break; +- case ACTION_CONTINUE: +- break; +- } +- +-#if 0 +- if (hostdata->dsp_changed) { +- printk("scsi%d: new dsp 0x%p\n", host->host_no, hostdata->dsp); +- print_insn (host, hostdata->dsp, "", 1); +- } +-#endif +-} +- +-/* +- * Function : static void intr_bf (struct Scsi_Host *host, +- * struct NCR53c7x0_cmd *cmd) +- * +- * Purpose : handle BUS FAULT interrupts +- * +- * Inputs : host, cmd - host and NCR command causing the interrupt, cmd +- * may be NULL. +- */ +- +-static void +-intr_bf (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) { +- NCR53c7x0_local_declare(); +- u32 *dsp, +- *next_dsp, /* Current dsp */ +- *dsa, +- dbc_dcmd; /* DCMD (high eight bits) + DBC */ +- char *reason = NULL; +- /* Default behavior is for a silent error, with a retry until we've +- exhausted retries. */ +- enum {MAYBE, ALWAYS, NEVER} retry = MAYBE; +- int report = 0; +- NCR53c7x0_local_setup(host); +- +- dbc_dcmd = NCR53c7x0_read32 (DBC_REG); +- next_dsp = bus_to_virt (NCR53c7x0_read32(DSP_REG)); +- dsp = next_dsp - NCR53c7x0_insn_size ((dbc_dcmd >> 24) & 0xff); +-/* FIXME - check chip type */ +- dsa = bus_to_virt (NCR53c7x0_read32(DSA_REG)); +- +- /* +- * Bus faults can be caused by either a Bad Address or +- * Target Abort. We should check the Received Target Abort +- * bit of the PCI status register and Master Abort Bit. +- * +- * - Master Abort bit indicates that no device claimed +- * the address with DEVSEL within five clocks +- * +- * - Target Abort bit indicates that a target claimed it, +- * but changed its mind once it saw the byte enables. +- * +- */ +- +- /* 53c710, not PCI system */ +- report = 1; +- reason = "Unknown"; +- +-#ifndef notyet +- report = 1; +-#endif +- if (report && reason) +- { +- printk(KERN_ALERT "scsi%d : BUS FAULT reason = %s\n", +- host->host_no, reason ? reason : "unknown"); +- print_lots (host); +- } +- +-#ifndef notyet +- retry = NEVER; +-#endif +- +- /* +- * TODO : we should attempt to recover from any spurious bus +- * faults. After X retries, we should figure that things are +- * sufficiently wedged, and call NCR53c7xx_reset. +- * +- * This code should only get executed once we've decided that we +- * cannot retry. +- */ +- +- if (retry == NEVER) { +- printk(KERN_ALERT " mail richard@sleepie.demon.co.uk\n"); +- FATAL (host); +- } +-} +- +-/* +- * Function : static void intr_dma (struct Scsi_Host *host, +- * struct NCR53c7x0_cmd *cmd) +- * +- * Purpose : handle all DMA interrupts, indicated by the setting +- * of the DIP bit in the ISTAT register. +- * +- * Inputs : host, cmd - host and NCR command causing the interrupt, cmd +- * may be NULL. +- */ +- +-static void +-intr_dma (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) { +- NCR53c7x0_local_declare(); +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- unsigned char dstat; /* DSTAT */ +- u32 *dsp, +- *next_dsp, /* Current dsp */ +- *dsa, +- dbc_dcmd; /* DCMD (high eight bits) + DBC */ +- int tmp; +- unsigned long flags; +- NCR53c7x0_local_setup(host); +- +- if (!hostdata->dstat_valid) { +- hostdata->dstat = NCR53c7x0_read8(DSTAT_REG); +- hostdata->dstat_valid = 1; +- } +- +- dstat = hostdata->dstat; +- +- if (hostdata->options & OPTION_DEBUG_INTR) +- printk("scsi%d : DSTAT=0x%x\n", host->host_no, (int) dstat); +- +- dbc_dcmd = NCR53c7x0_read32 (DBC_REG); +- next_dsp = bus_to_virt(NCR53c7x0_read32(DSP_REG)); +- dsp = next_dsp - NCR53c7x0_insn_size ((dbc_dcmd >> 24) & 0xff); +-/* XXX - check chip type */ +- dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG)); +- +- /* +- * DSTAT_ABRT is the aborted interrupt. This is set whenever the +- * SCSI chip is aborted. +- * +- * With NCR53c700 and NCR53c700-66 style chips, we should only +- * get this when the chip is currently running the accept +- * reselect/select code and we have set the abort bit in the +- * ISTAT register. +- * +- */ +- +- if (dstat & DSTAT_ABRT) { +-#if 0 +- /* XXX - add code here to deal with normal abort */ +- if ((hostdata->options & OPTION_700) && (hostdata->state == +- STATE_ABORTING)) { +- } else +-#endif +- { +- printk(KERN_ALERT "scsi%d : unexpected abort interrupt at\n" +- " ", host->host_no); +- print_insn (host, dsp, KERN_ALERT "s ", 1); +- FATAL (host); +- } +- } +- +- /* +- * DSTAT_SSI is the single step interrupt. Should be generated +- * whenever we have single stepped or are tracing. +- */ +- +- if (dstat & DSTAT_SSI) { +- if (hostdata->options & OPTION_DEBUG_TRACE) { +- /* Don't print instr. until we write DSP at end of intr function */ +- } else if (hostdata->options & OPTION_DEBUG_SINGLE) { +- print_insn (host, dsp, "s ", 0); +- local_irq_save(flags); +-/* XXX - should we do this, or can we get away with writing dsp? */ +- +- NCR53c7x0_write8 (DCNTL_REG, (NCR53c7x0_read8(DCNTL_REG) & +- ~DCNTL_SSM) | DCNTL_STD); +- local_irq_restore(flags); +- } else { +- printk(KERN_ALERT "scsi%d : unexpected single step interrupt at\n" +- " ", host->host_no); +- print_insn (host, dsp, KERN_ALERT "", 1); +- printk(KERN_ALERT " mail drew@PoohSticks.ORG\n"); +- FATAL (host); +- } +- } +- +- /* +- * DSTAT_IID / DSTAT_OPC (same bit, same meaning, only the name +- * is different) is generated whenever an illegal instruction is +- * encountered. +- * +- * XXX - we may want to emulate INTFLY here, so we can use +- * the same SCSI SCRIPT (tm) for NCR53c710 through NCR53c810 +- * chips. +- */ +- +- if (dstat & DSTAT_OPC) { +- /* +- * Ascertain if this IID interrupts occurred before or after a STO +- * interrupt. Since the interrupt handling code now leaves +- * DSP unmodified until _after_ all stacked interrupts have been +- * processed, reading the DSP returns the original DSP register. +- * This means that if dsp lies between the select code, and +- * message out following the selection code (where the IID interrupt +- * would have to have occurred by due to the implicit wait for REQ), +- * we have an IID interrupt resulting from a STO condition and +- * can ignore it. +- */ +- +- if (((dsp >= (hostdata->script + hostdata->E_select / sizeof(u32))) && +- (dsp <= (hostdata->script + hostdata->E_select_msgout / +- sizeof(u32) + 8))) || (hostdata->test_running == 2)) { +- if (hostdata->options & OPTION_DEBUG_INTR) +- printk ("scsi%d : ignoring DSTAT_IID for SSTAT_STO\n", +- host->host_no); +- if (hostdata->expecting_iid) { +- hostdata->expecting_iid = 0; +- hostdata->idle = 1; +- if (hostdata->test_running == 2) { +- hostdata->test_running = 0; +- hostdata->test_completed = 3; +- } else if (cmd) +- abnormal_finished (cmd, DID_BAD_TARGET << 16); +- } else { +- hostdata->expecting_sto = 1; +- } +- /* +- * We can't guarantee we'll be able to execute the WAIT DISCONNECT +- * instruction within the 3.4us of bus free and arbitration delay +- * that a target can RESELECT in and assert REQ after we've dropped +- * ACK. If this happens, we'll get an illegal instruction interrupt. +- * Doing away with the WAIT DISCONNECT instructions broke everything, +- * so instead I'll settle for moving one WAIT DISCONNECT a few +- * instructions closer to the CLEAR ACK before it to minimize the +- * chances of this happening, and handle it if it occurs anyway. +- * +- * Simply continue with what we were doing, and control should +- * be transferred to the schedule routine which will ultimately +- * pass control onto the reselection or selection (not yet) +- * code. +- */ +- } else if (dbc_dcmd == 0x48000000 && (NCR53c7x0_read8 (SBCL_REG) & +- SBCL_REQ)) { +- if (!(hostdata->options & OPTION_NO_PRINT_RACE)) +- { +- printk("scsi%d: REQ before WAIT DISCONNECT IID\n", +- host->host_no); +- hostdata->options |= OPTION_NO_PRINT_RACE; +- } +- } else { +- printk(KERN_ALERT "scsi%d : invalid instruction\n", host->host_no); +- print_lots (host); +- printk(KERN_ALERT " mail Richard@sleepie.demon.co.uk with ALL\n" +- " boot messages and diagnostic output\n"); +- FATAL (host); +- } +- } +- +- /* +- * DSTAT_BF are bus fault errors. DSTAT_800_BF is valid for 710 also. +- */ +- +- if (dstat & DSTAT_800_BF) { +- intr_bf (host, cmd); +- } +- +- +- /* +- * DSTAT_SIR interrupts are generated by the execution of +- * the INT instruction. Since the exact values available +- * are determined entirely by the SCSI script running, +- * and are local to a particular script, a unique handler +- * is called for each script. +- */ +- +- if (dstat & DSTAT_SIR) { +- if (hostdata->options & OPTION_DEBUG_INTR) +- printk ("scsi%d : DSTAT_SIR\n", host->host_no); +- switch ((tmp = hostdata->dstat_sir_intr (host, cmd))) { +- case SPECIFIC_INT_NOTHING: +- case SPECIFIC_INT_RESTART: +- break; +- case SPECIFIC_INT_ABORT: +- abort_connected(host); +- break; +- case SPECIFIC_INT_PANIC: +- printk(KERN_ALERT "scsi%d : failure at ", host->host_no); +- print_insn (host, dsp, KERN_ALERT "", 1); +- printk(KERN_ALERT " dstat_sir_intr() returned SPECIFIC_INT_PANIC\n"); +- FATAL (host); +- break; +- case SPECIFIC_INT_BREAK: +- intr_break (host, cmd); +- break; +- default: +- printk(KERN_ALERT "scsi%d : failure at ", host->host_no); +- print_insn (host, dsp, KERN_ALERT "", 1); +- printk(KERN_ALERT" dstat_sir_intr() returned unknown value %d\n", +- tmp); +- FATAL (host); +- } +- } +-} +- +-/* +- * Function : static int print_insn (struct Scsi_Host *host, +- * u32 *insn, int kernel) +- * +- * Purpose : print numeric representation of the instruction pointed +- * to by insn to the debugging or kernel message buffer +- * as appropriate. +- * +- * If desired, a user level program can interpret this +- * information. +- * +- * Inputs : host, insn - host, pointer to instruction, prefix - +- * string to prepend, kernel - use printk instead of debugging buffer. +- * +- * Returns : size, in u32s, of instruction printed. +- */ +- +-/* +- * FIXME: should change kernel parameter so that it takes an ENUM +- * specifying severity - either KERN_ALERT or KERN_PANIC so +- * all panic messages are output with the same severity. +- */ +- +-static int +-print_insn (struct Scsi_Host *host, const u32 *insn, +- const char *prefix, int kernel) { +- char buf[160], /* Temporary buffer and pointer. ICKY +- arbitrary length. */ +- +- +- *tmp; +- unsigned char dcmd; /* dcmd register for *insn */ +- int size; +- +- /* +- * Check to see if the instruction pointer is not bogus before +- * indirecting through it; avoiding red-zone at start of +- * memory. +- * +- * FIXME: icky magic needs to happen here on non-intel boxes which +- * don't have kernel memory mapped in like this. Might be reasonable +- * to use vverify()? +- */ +- +- if (virt_to_phys((void *)insn) < PAGE_SIZE || +- virt_to_phys((void *)(insn + 8)) > virt_to_phys(high_memory) || +- ((((dcmd = (insn[0] >> 24) & 0xff) & DCMD_TYPE_MMI) == DCMD_TYPE_MMI) && +- virt_to_phys((void *)(insn + 12)) > virt_to_phys(high_memory))) { +- size = 0; +- sprintf (buf, "%s%p: address out of range\n", +- prefix, insn); +- } else { +-/* +- * FIXME : (void *) cast in virt_to_bus should be unnecessary, because +- * it should take const void * as argument. +- */ +-#if !defined(CONFIG_MVME16x) && !defined(CONFIG_BVME6000) +- sprintf(buf, "%s0x%lx (virt 0x%p) : 0x%08x 0x%08x (virt 0x%p)", +- (prefix ? prefix : ""), virt_to_bus((void *) insn), insn, +- insn[0], insn[1], bus_to_virt (insn[1])); +-#else +- /* Remove virtual addresses to reduce output, as they are the same */ +- sprintf(buf, "%s0x%x (+%x) : 0x%08x 0x%08x", +- (prefix ? prefix : ""), (u32)insn, ((u32)insn - +- (u32)&(((struct NCR53c7x0_hostdata *)host->hostdata[0])->script))/4, +- insn[0], insn[1]); +-#endif +- tmp = buf + strlen(buf); +- if ((dcmd & DCMD_TYPE_MASK) == DCMD_TYPE_MMI) { +-#if !defined(CONFIG_MVME16x) && !defined(CONFIG_BVME6000) +- sprintf (tmp, " 0x%08x (virt 0x%p)\n", insn[2], +- bus_to_virt(insn[2])); +-#else +- /* Remove virtual addr to reduce output, as it is the same */ +- sprintf (tmp, " 0x%08x\n", insn[2]); +-#endif +- size = 3; +- } else { +- sprintf (tmp, "\n"); +- size = 2; +- } +- } +- +- if (kernel) +- printk ("%s", buf); +-#ifdef NCR_DEBUG +- else { +- size_t len = strlen(buf); +- debugger_kernel_write(host, buf, len); +- } +-#endif +- return size; +-} +- +-/* +- * Function : int NCR53c7xx_abort (Scsi_Cmnd *cmd) +- * +- * Purpose : Abort an errant SCSI command, doing all necessary +- * cleanup of the issue_queue, running_list, shared Linux/NCR +- * dsa issue and reconnect queues. +- * +- * Inputs : cmd - command to abort, code - entire result field +- * +- * Returns : 0 on success, -1 on failure. +- */ +- +-int +-NCR53c7xx_abort (Scsi_Cmnd *cmd) { +- NCR53c7x0_local_declare(); +- struct Scsi_Host *host = cmd->device->host; +- struct NCR53c7x0_hostdata *hostdata = host ? (struct NCR53c7x0_hostdata *) +- host->hostdata[0] : NULL; +- unsigned long flags; +- struct NCR53c7x0_cmd *curr, **prev; +- Scsi_Cmnd *me, **last; +-#if 0 +- static long cache_pid = -1; +-#endif +- +- +- if (!host) { +- printk ("Bogus SCSI command pid %ld; no host structure\n", +- cmd->pid); +- return SCSI_ABORT_ERROR; +- } else if (!hostdata) { +- printk ("Bogus SCSI host %d; no hostdata\n", host->host_no); +- return SCSI_ABORT_ERROR; +- } +- NCR53c7x0_local_setup(host); +- +-/* +- * CHECK : I don't think that reading ISTAT will unstack any interrupts, +- * since we need to write the INTF bit to clear it, and SCSI/DMA +- * interrupts don't clear until we read SSTAT/SIST and DSTAT registers. +- * +- * See that this is the case. Appears to be correct on the 710, at least. +- * +- * I suspect that several of our failures may be coming from a new fatal +- * interrupt (possibly due to a phase mismatch) happening after we've left +- * the interrupt handler, but before the PIC has had the interrupt condition +- * cleared. +- */ +- +- if (NCR53c7x0_read8(hostdata->istat) & (ISTAT_DIP|ISTAT_SIP)) { +- printk ("scsi%d : dropped interrupt for command %ld\n", host->host_no, +- cmd->pid); +- NCR53c7x0_intr (host->irq, NULL, NULL); +- return SCSI_ABORT_BUSY; +- } +- +- local_irq_save(flags); +-#if 0 +- if (cache_pid == cmd->pid) +- panic ("scsi%d : bloody fetus %d\n", host->host_no, cmd->pid); +- else +- cache_pid = cmd->pid; +-#endif +- +- +-/* +- * The command could be hiding in the issue_queue. This would be very +- * nice, as commands can't be moved from the high level driver's issue queue +- * into the shared queue until an interrupt routine is serviced, and this +- * moving is atomic. +- * +- * If this is the case, we don't have to worry about anything - we simply +- * pull the command out of the old queue, and call it aborted. +- */ +- +- for (me = (Scsi_Cmnd *) hostdata->issue_queue, +- last = (Scsi_Cmnd **) &(hostdata->issue_queue); +- me && me != cmd; last = (Scsi_Cmnd **)&(me->SCp.ptr), +- me = (Scsi_Cmnd *)me->SCp.ptr); +- +- if (me) { +- *last = (Scsi_Cmnd *) me->SCp.ptr; +- if (me->host_scribble) { +- ((struct NCR53c7x0_cmd *)me->host_scribble)->next = hostdata->free; +- hostdata->free = (struct NCR53c7x0_cmd *) me->host_scribble; +- me->host_scribble = NULL; +- } +- cmd->result = DID_ABORT << 16; +- cmd->scsi_done(cmd); +- printk ("scsi%d : found command %ld in Linux issue queue\n", +- host->host_no, me->pid); +- local_irq_restore(flags); +- run_process_issue_queue(); +- return SCSI_ABORT_SUCCESS; +- } +- +-/* +- * That failing, the command could be in our list of already executing +- * commands. If this is the case, drastic measures are called for. +- */ +- +- for (curr = (struct NCR53c7x0_cmd *) hostdata->running_list, +- prev = (struct NCR53c7x0_cmd **) &(hostdata->running_list); +- curr && curr->cmd != cmd; prev = (struct NCR53c7x0_cmd **) +- &(curr->next), curr = (struct NCR53c7x0_cmd *) curr->next); +- +- if (curr) { +- if ((curr->result & 0xff) != 0xff && (curr->result & 0xff00) != 0xff00) { +- cmd->result = curr->result; +- if (prev) +- *prev = (struct NCR53c7x0_cmd *) curr->next; +- curr->next = (struct NCR53c7x0_cmd *) hostdata->free; +- cmd->host_scribble = NULL; +- hostdata->free = curr; +- cmd->scsi_done(cmd); +- printk ("scsi%d : found finished command %ld in running list\n", +- host->host_no, cmd->pid); +- local_irq_restore(flags); +- return SCSI_ABORT_NOT_RUNNING; +- } else { +- printk ("scsi%d : DANGER : command running, can not abort.\n", +- cmd->device->host->host_no); +- local_irq_restore(flags); +- return SCSI_ABORT_BUSY; +- } +- } +- +-/* +- * And if we couldn't find it in any of our queues, it must have been +- * a dropped interrupt. +- */ +- +- curr = (struct NCR53c7x0_cmd *) cmd->host_scribble; +- if (curr) { +- curr->next = hostdata->free; +- hostdata->free = curr; +- cmd->host_scribble = NULL; +- } +- +- if (curr == NULL || ((curr->result & 0xff00) == 0xff00) || +- ((curr->result & 0xff) == 0xff)) { +- printk ("scsi%d : did this command ever run?\n", host->host_no); +- cmd->result = DID_ABORT << 16; +- } else { +- printk ("scsi%d : probably lost INTFLY, normal completion\n", +- host->host_no); +- cmd->result = curr->result; +-/* +- * FIXME : We need to add an additional flag which indicates if a +- * command was ever counted as BUSY, so if we end up here we can +- * decrement the busy count if and only if it is necessary. +- */ +- --hostdata->busy[cmd->device->id][cmd->device->lun]; +- } +- local_irq_restore(flags); +- cmd->scsi_done(cmd); +- +-/* +- * We need to run process_issue_queue since termination of this command +- * may allow another queued command to execute first? +- */ +- return SCSI_ABORT_NOT_RUNNING; +-} +- +-/* +- * Function : int NCR53c7xx_reset (Scsi_Cmnd *cmd) +- * +- * Purpose : perform a hard reset of the SCSI bus and NCR +- * chip. +- * +- * Inputs : cmd - command which caused the SCSI RESET +- * +- * Returns : 0 on success. +- */ +- +-int +-NCR53c7xx_reset (Scsi_Cmnd *cmd, unsigned int reset_flags) { +- NCR53c7x0_local_declare(); +- unsigned long flags; +- int found = 0; +- struct NCR53c7x0_cmd * c; +- Scsi_Cmnd *tmp; +- /* +- * When we call scsi_done(), it's going to wake up anything sleeping on the +- * resources which were in use by the aborted commands, and we'll start to +- * get new commands. +- * +- * We can't let this happen until after we've re-initialized the driver +- * structures, and can't reinitialize those structures until after we've +- * dealt with their contents. +- * +- * So, we need to find all of the commands which were running, stick +- * them on a linked list of completed commands (we'll use the host_scribble +- * pointer), do our reinitialization, and then call the done function for +- * each command. +- */ +- Scsi_Cmnd *nuke_list = NULL; +- struct Scsi_Host *host = cmd->device->host; +- struct NCR53c7x0_hostdata *hostdata = +- (struct NCR53c7x0_hostdata *) host->hostdata[0]; +- +- NCR53c7x0_local_setup(host); +- local_irq_save(flags); +- ncr_halt (host); +- print_lots (host); +- dump_events (host, 30); +- ncr_scsi_reset (host); +- for (tmp = nuke_list = return_outstanding_commands (host, 1 /* free */, +- 0 /* issue */ ); tmp; tmp = (Scsi_Cmnd *) tmp->SCp.buffer) +- if (tmp == cmd) { +- found = 1; +- break; +- } +- +- /* +- * If we didn't find the command which caused this reset in our running +- * list, then we've lost it. See that it terminates normally anyway. +- */ +- if (!found) { +- c = (struct NCR53c7x0_cmd *) cmd->host_scribble; +- if (c) { +- cmd->host_scribble = NULL; +- c->next = hostdata->free; +- hostdata->free = c; +- } else +- printk ("scsi%d: lost command %ld\n", host->host_no, cmd->pid); +- cmd->SCp.buffer = (struct scatterlist *) nuke_list; +- nuke_list = cmd; +- } +- +- NCR53c7x0_driver_init (host); +- hostdata->soft_reset (host); +- if (hostdata->resets == 0) +- disable(host); +- else if (hostdata->resets != -1) +- --hostdata->resets; +- local_irq_restore(flags); +- for (; nuke_list; nuke_list = tmp) { +- tmp = (Scsi_Cmnd *) nuke_list->SCp.buffer; +- nuke_list->result = DID_RESET << 16; +- nuke_list->scsi_done (nuke_list); +- } +- local_irq_restore(flags); +- return SCSI_RESET_SUCCESS; +-} +- +-/* +- * The NCR SDMS bios follows Annex A of the SCSI-CAM draft, and +- * therefore shares the scsicam_bios_param function. +- */ +- +-/* +- * Function : int insn_to_offset (Scsi_Cmnd *cmd, u32 *insn) +- * +- * Purpose : convert instructions stored at NCR pointer into data +- * pointer offset. +- * +- * Inputs : cmd - SCSI command; insn - pointer to instruction. Either current +- * DSP, or saved data pointer. +- * +- * Returns : offset on success, -1 on failure. +- */ +- +- +-static int +-insn_to_offset (Scsi_Cmnd *cmd, u32 *insn) { +- struct NCR53c7x0_hostdata *hostdata = +- (struct NCR53c7x0_hostdata *) cmd->device->host->hostdata[0]; +- struct NCR53c7x0_cmd *ncmd = +- (struct NCR53c7x0_cmd *) cmd->host_scribble; +- int offset = 0, buffers; +- struct scatterlist *segment; +- char *ptr; +- int found = 0; +- +-/* +- * With the current code implementation, if the insn is inside dynamically +- * generated code, the data pointer will be the instruction preceding +- * the next transfer segment. +- */ +- +- if (!check_address ((unsigned long) ncmd, sizeof (struct NCR53c7x0_cmd)) && +- ((insn >= ncmd->data_transfer_start && +- insn < ncmd->data_transfer_end) || +- (insn >= ncmd->residual && +- insn < (ncmd->residual + +- sizeof(ncmd->residual))))) { +- ptr = bus_to_virt(insn[3]); +- +- if ((buffers = cmd->use_sg)) { +- for (offset = 0, +- segment = (struct scatterlist *) cmd->request_buffer; +- buffers && !((found = ((ptr >= (char *)page_address(segment->page)+segment->offset) && +- (ptr < ((char *)page_address(segment->page)+segment->offset+segment->length))))); +- --buffers, offset += segment->length, ++segment) +-#if 0 +- printk("scsi%d: comparing 0x%p to 0x%p\n", +- cmd->device->host->host_no, saved, page_address(segment->page+segment->offset)); +-#else +- ; +-#endif +- offset += ptr - ((char *)page_address(segment->page)+segment->offset); +- } else { +- found = 1; +- offset = ptr - (char *) (cmd->request_buffer); +- } +- } else if ((insn >= hostdata->script + +- hostdata->E_data_transfer / sizeof(u32)) && +- (insn <= hostdata->script + +- hostdata->E_end_data_transfer / sizeof(u32))) { +- found = 1; +- offset = 0; +- } +- return found ? offset : -1; +-} +- +- +- +-/* +- * Function : void print_progress (Scsi_Cmnd *cmd) +- * +- * Purpose : print the current location of the saved data pointer +- * +- * Inputs : cmd - command we are interested in +- * +- */ +- +-static void +-print_progress (Scsi_Cmnd *cmd) { +- NCR53c7x0_local_declare(); +- struct NCR53c7x0_cmd *ncmd = +- (struct NCR53c7x0_cmd *) cmd->host_scribble; +- int offset, i; +- char *where; +- u32 *ptr; +- NCR53c7x0_local_setup (cmd->device->host); +- +- if (check_address ((unsigned long) ncmd,sizeof (struct NCR53c7x0_cmd)) == 0) +- { +- printk("\nNCR53c7x0_cmd fields:\n"); +- printk(" bounce.len=0x%x, addr=0x%0x, buf[]=0x%02x %02x %02x %02x\n", +- ncmd->bounce.len, ncmd->bounce.addr, ncmd->bounce.buf[0], +- ncmd->bounce.buf[1], ncmd->bounce.buf[2], ncmd->bounce.buf[3]); +- printk(" result=%04x, cdb[0]=0x%02x\n", ncmd->result, ncmd->cmnd[0]); +- } +- +- for (i = 0; i < 2; ++i) { +- if (check_address ((unsigned long) ncmd, +- sizeof (struct NCR53c7x0_cmd)) == -1) +- continue; +- if (!i) { +- where = "saved"; +- ptr = bus_to_virt(ncmd->saved_data_pointer); +- } else { +- where = "active"; +- ptr = bus_to_virt (NCR53c7x0_read32 (DSP_REG) - +- NCR53c7x0_insn_size (NCR53c7x0_read8 (DCMD_REG)) * +- sizeof(u32)); +- } +- offset = insn_to_offset (cmd, ptr); +- +- if (offset != -1) +- printk ("scsi%d : %s data pointer at offset %d\n", +- cmd->device->host->host_no, where, offset); +- else { +- int size; +- printk ("scsi%d : can't determine %s data pointer offset\n", +- cmd->device->host->host_no, where); +- if (ncmd) { +- size = print_insn (cmd->device->host, +- bus_to_virt(ncmd->saved_data_pointer), "", 1); +- print_insn (cmd->device->host, +- bus_to_virt(ncmd->saved_data_pointer) + size * sizeof(u32), +- "", 1); +- } +- } +- } +-} +- +- +-static void +-print_dsa (struct Scsi_Host *host, u32 *dsa, const char *prefix) { +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- int i, len; +- char *ptr; +- Scsi_Cmnd *cmd; +- +- if (check_address ((unsigned long) dsa, hostdata->dsa_end - +- hostdata->dsa_start) == -1) { +- printk("scsi%d : bad dsa virt 0x%p\n", host->host_no, dsa); +- return; +- } +- printk("%sscsi%d : dsa at phys 0x%lx (virt 0x%p)\n" +- " + %d : dsa_msgout length = %u, data = 0x%x (virt 0x%p)\n" , +- prefix ? prefix : "", +- host->host_no, virt_to_bus (dsa), dsa, hostdata->dsa_msgout, +- dsa[hostdata->dsa_msgout / sizeof(u32)], +- dsa[hostdata->dsa_msgout / sizeof(u32) + 1], +- bus_to_virt (dsa[hostdata->dsa_msgout / sizeof(u32) + 1])); +- +- /* +- * Only print messages if they're sane in length so we don't +- * blow the kernel printk buffer on something which won't buy us +- * anything. +- */ +- +- if (dsa[hostdata->dsa_msgout / sizeof(u32)] < +- sizeof (hostdata->free->select)) +- for (i = dsa[hostdata->dsa_msgout / sizeof(u32)], +- ptr = bus_to_virt (dsa[hostdata->dsa_msgout / sizeof(u32) + 1]); +- i > 0 && !check_address ((unsigned long) ptr, 1); +- ptr += len, i -= len) { +- printk(" "); +- len = spi_print_msg(ptr); +- printk("\n"); +- if (!len) +- break; +- } +- +- printk(" + %d : select_indirect = 0x%x\n", +- hostdata->dsa_select, dsa[hostdata->dsa_select / sizeof(u32)]); +- cmd = (Scsi_Cmnd *) bus_to_virt(dsa[hostdata->dsa_cmnd / sizeof(u32)]); +- printk(" + %d : dsa_cmnd = 0x%x ", hostdata->dsa_cmnd, +- (u32) virt_to_bus(cmd)); +- /* XXX Maybe we should access cmd->host_scribble->result here. RGH */ +- if (cmd) { +- printk(" result = 0x%x, target = %d, lun = %d, cmd = ", +- cmd->result, cmd->device->id, cmd->device->lun); +- __scsi_print_command(cmd->cmnd); +- } else +- printk("\n"); +- printk(" + %d : dsa_next = 0x%x\n", hostdata->dsa_next, +- dsa[hostdata->dsa_next / sizeof(u32)]); +- if (cmd) { +- printk("scsi%d target %d : sxfer_sanity = 0x%x, scntl3_sanity = 0x%x\n" +- " script : ", +- host->host_no, cmd->device->id, +- hostdata->sync[cmd->device->id].sxfer_sanity, +- hostdata->sync[cmd->device->id].scntl3_sanity); +- for (i = 0; i < (sizeof(hostdata->sync[cmd->device->id].script) / 4); ++i) +- printk ("0x%x ", hostdata->sync[cmd->device->id].script[i]); +- printk ("\n"); +- print_progress (cmd); +- } +-} +-/* +- * Function : void print_queues (Scsi_Host *host) +- * +- * Purpose : print the contents of the NCR issue and reconnect queues +- * +- * Inputs : host - SCSI host we are interested in +- * +- */ +- +-static void +-print_queues (struct Scsi_Host *host) { +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- u32 *dsa, *next_dsa; +- volatile u32 *ncrcurrent; +- int left; +- Scsi_Cmnd *cmd, *next_cmd; +- unsigned long flags; +- +- printk ("scsi%d : issue queue\n", host->host_no); +- +- for (left = host->can_queue, cmd = (Scsi_Cmnd *) hostdata->issue_queue; +- left >= 0 && cmd; +- cmd = next_cmd) { +- next_cmd = (Scsi_Cmnd *) cmd->SCp.ptr; +- local_irq_save(flags); +- if (cmd->host_scribble) { +- if (check_address ((unsigned long) (cmd->host_scribble), +- sizeof (cmd->host_scribble)) == -1) +- printk ("scsi%d: scsi pid %ld bad pointer to NCR53c7x0_cmd\n", +- host->host_no, cmd->pid); +- /* print_dsa does sanity check on address, no need to check */ +- else +- print_dsa (host, ((struct NCR53c7x0_cmd *) cmd->host_scribble) +- -> dsa, ""); +- } else +- printk ("scsi%d : scsi pid %ld for target %d lun %d has no NCR53c7x0_cmd\n", +- host->host_no, cmd->pid, cmd->device->id, cmd->device->lun); +- local_irq_restore(flags); +- } +- +- if (left <= 0) { +- printk ("scsi%d : loop detected in issue queue\n", +- host->host_no); +- } +- +- /* +- * Traverse the NCR reconnect and start DSA structures, printing out +- * each element until we hit the end or detect a loop. Currently, +- * the reconnect structure is a linked list; and the start structure +- * is an array. Eventually, the reconnect structure will become a +- * list as well, since this simplifies the code. +- */ +- +- printk ("scsi%d : schedule dsa array :\n", host->host_no); +- for (left = host->can_queue, ncrcurrent = hostdata->schedule; +- left > 0; ncrcurrent += 2, --left) +- if (ncrcurrent[0] != hostdata->NOP_insn) +-/* FIXME : convert pointer to dsa_begin to pointer to dsa. */ +- print_dsa (host, bus_to_virt (ncrcurrent[1] - +- (hostdata->E_dsa_code_begin - +- hostdata->E_dsa_code_template)), ""); +- printk ("scsi%d : end schedule dsa array\n", host->host_no); +- +- printk ("scsi%d : reconnect_dsa_head :\n", host->host_no); +- +- for (left = host->can_queue, +- dsa = bus_to_virt (hostdata->reconnect_dsa_head); +- left >= 0 && dsa; +- dsa = next_dsa) { +- local_irq_save(flags); +- if (check_address ((unsigned long) dsa, sizeof(dsa)) == -1) { +- printk ("scsi%d: bad DSA pointer 0x%p", host->host_no, +- dsa); +- next_dsa = NULL; +- } +- else +- { +- next_dsa = bus_to_virt(dsa[hostdata->dsa_next / sizeof(u32)]); +- print_dsa (host, dsa, ""); +- } +- local_irq_restore(flags); +- } +- printk ("scsi%d : end reconnect_dsa_head\n", host->host_no); +- if (left < 0) +- printk("scsi%d: possible loop in ncr reconnect list\n", +- host->host_no); +-} +- +-static void +-print_lots (struct Scsi_Host *host) { +- NCR53c7x0_local_declare(); +- struct NCR53c7x0_hostdata *hostdata = +- (struct NCR53c7x0_hostdata *) host->hostdata[0]; +- u32 *dsp_next, *dsp, *dsa, dbc_dcmd; +- unsigned char dcmd, sbcl; +- int i, size; +- NCR53c7x0_local_setup(host); +- +- if ((dsp_next = bus_to_virt(NCR53c7x0_read32 (DSP_REG)))) { +- dbc_dcmd = NCR53c7x0_read32(DBC_REG); +- dcmd = (dbc_dcmd & 0xff000000) >> 24; +- dsp = dsp_next - NCR53c7x0_insn_size(dcmd); +- dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG)); +- sbcl = NCR53c7x0_read8 (SBCL_REG); +- +- /* +- * For the 53c710, the following will report value 0 for SCNTL3 +- * and STEST0 - we don't have these registers. +- */ +- printk ("scsi%d : DCMD|DBC=0x%x, DNAD=0x%x (virt 0x%p)\n" +- " DSA=0x%lx (virt 0x%p)\n" +- " DSPS=0x%x, TEMP=0x%x (virt 0x%p), DMODE=0x%x\n" +- " SXFER=0x%x, SCNTL3=0x%x\n" +- " %s%s%sphase=%s, %d bytes in SCSI FIFO\n" +- " SCRATCH=0x%x, saved2_dsa=0x%0lx\n", +- host->host_no, dbc_dcmd, NCR53c7x0_read32(DNAD_REG), +- bus_to_virt(NCR53c7x0_read32(DNAD_REG)), +- virt_to_bus(dsa), dsa, +- NCR53c7x0_read32(DSPS_REG), NCR53c7x0_read32(TEMP_REG), +- bus_to_virt (NCR53c7x0_read32(TEMP_REG)), +- (int) NCR53c7x0_read8(hostdata->dmode), +- (int) NCR53c7x0_read8(SXFER_REG), +- ((hostdata->chip / 100) == 8) ? +- (int) NCR53c7x0_read8(SCNTL3_REG_800) : 0, +- (sbcl & SBCL_BSY) ? "BSY " : "", +- (sbcl & SBCL_SEL) ? "SEL " : "", +- (sbcl & SBCL_REQ) ? "REQ " : "", +- sstat2_to_phase(NCR53c7x0_read8 (((hostdata->chip / 100) == 8) ? +- SSTAT1_REG : SSTAT2_REG)), +- (NCR53c7x0_read8 ((hostdata->chip / 100) == 8 ? +- SSTAT1_REG : SSTAT2_REG) & SSTAT2_FF_MASK) >> SSTAT2_FF_SHIFT, +- ((hostdata->chip / 100) == 8) ? NCR53c7x0_read8 (STEST0_REG_800) : +- NCR53c7x0_read32(SCRATCHA_REG_800), +- hostdata->saved2_dsa); +- printk ("scsi%d : DSP 0x%lx (virt 0x%p) ->\n", host->host_no, +- virt_to_bus(dsp), dsp); +- for (i = 6; i > 0; --i, dsp += size) +- size = print_insn (host, dsp, "", 1); +- if (NCR53c7x0_read8 (SCNTL1_REG) & SCNTL1_CON) { +- if ((hostdata->chip / 100) == 8) +- printk ("scsi%d : connected (SDID=0x%x, SSID=0x%x)\n", +- host->host_no, NCR53c7x0_read8 (SDID_REG_800), +- NCR53c7x0_read8 (SSID_REG_800)); +- else +- printk ("scsi%d : connected (SDID=0x%x)\n", +- host->host_no, NCR53c7x0_read8 (SDID_REG_700)); +- print_dsa (host, dsa, ""); +- } +- +-#if 1 +- print_queues (host); +-#endif +- } +-} +- +-/* +- * Function : static int shutdown (struct Scsi_Host *host) +- * +- * Purpose : does a clean (we hope) shutdown of the NCR SCSI +- * chip. Use prior to dumping core, unloading the NCR driver, +- * +- * Returns : 0 on success +- */ +-static int +-shutdown (struct Scsi_Host *host) { +- NCR53c7x0_local_declare(); +- unsigned long flags; +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- NCR53c7x0_local_setup(host); +- local_irq_save(flags); +-/* Get in a state where we can reset the SCSI bus */ +- ncr_halt (host); +- ncr_scsi_reset (host); +- hostdata->soft_reset(host); +- +- disable (host); +- local_irq_restore(flags); +- return 0; +-} +- +-/* +- * Function : void ncr_scsi_reset (struct Scsi_Host *host) +- * +- * Purpose : reset the SCSI bus. +- */ +- +-static void +-ncr_scsi_reset (struct Scsi_Host *host) { +- NCR53c7x0_local_declare(); +- unsigned long flags; +- NCR53c7x0_local_setup(host); +- local_irq_save(flags); +- NCR53c7x0_write8(SCNTL1_REG, SCNTL1_RST); +- udelay(25); /* Minimum amount of time to assert RST */ +- NCR53c7x0_write8(SCNTL1_REG, 0); +- local_irq_restore(flags); +-} +- +-/* +- * Function : void hard_reset (struct Scsi_Host *host) +- * +- */ +- +-static void +-hard_reset (struct Scsi_Host *host) { +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- unsigned long flags; +- local_irq_save(flags); +- ncr_scsi_reset(host); +- NCR53c7x0_driver_init (host); +- if (hostdata->soft_reset) +- hostdata->soft_reset (host); +- local_irq_restore(flags); +-} +- +- +-/* +- * Function : Scsi_Cmnd *return_outstanding_commands (struct Scsi_Host *host, +- * int free, int issue) +- * +- * Purpose : return a linked list (using the SCp.buffer field as next, +- * so we don't perturb hostdata. We don't use a field of the +- * NCR53c7x0_cmd structure since we may not have allocated one +- * for the command causing the reset.) of Scsi_Cmnd structures that +- * had propagated below the Linux issue queue level. If free is set, +- * free the NCR53c7x0_cmd structures which are associated with +- * the Scsi_Cmnd structures, and clean up any internal +- * NCR lists that the commands were on. If issue is set, +- * also return commands in the issue queue. +- * +- * Returns : linked list of commands +- * +- * NOTE : the caller should insure that the NCR chip is halted +- * if the free flag is set. +- */ +- +-static Scsi_Cmnd * +-return_outstanding_commands (struct Scsi_Host *host, int free, int issue) { +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- struct NCR53c7x0_cmd *c; +- int i; +- u32 *ncrcurrent; +- Scsi_Cmnd *list = NULL, *tmp; +- for (c = (struct NCR53c7x0_cmd *) hostdata->running_list; c; +- c = (struct NCR53c7x0_cmd *) c->next) { +- if (c->cmd->SCp.buffer) { +- printk ("scsi%d : loop detected in running list!\n", host->host_no); +- break; +- } else { +- printk ("Duh? Bad things happening in the NCR driver\n"); +- break; +- } +- +- c->cmd->SCp.buffer = (struct scatterlist *) list; +- list = c->cmd; +- if (free) { +- c->next = hostdata->free; +- hostdata->free = c; +- } +- } +- +- if (free) { +- for (i = 0, ncrcurrent = (u32 *) hostdata->schedule; +- i < host->can_queue; ++i, ncrcurrent += 2) { +- ncrcurrent[0] = hostdata->NOP_insn; +- ncrcurrent[1] = 0xdeadbeef; +- } +- hostdata->ncrcurrent = NULL; +- } +- +- if (issue) { +- for (tmp = (Scsi_Cmnd *) hostdata->issue_queue; tmp; tmp = tmp->next) { +- if (tmp->SCp.buffer) { +- printk ("scsi%d : loop detected in issue queue!\n", +- host->host_no); +- break; +- } +- tmp->SCp.buffer = (struct scatterlist *) list; +- list = tmp; +- } +- if (free) +- hostdata->issue_queue = NULL; +- +- } +- return list; +-} +- +-/* +- * Function : static int disable (struct Scsi_Host *host) +- * +- * Purpose : disables the given NCR host, causing all commands +- * to return a driver error. Call this so we can unload the +- * module during development and try again. Eventually, +- * we should be able to find clean workarounds for these +- * problems. +- * +- * Inputs : host - hostadapter to twiddle +- * +- * Returns : 0 on success. +- */ +- +-static int +-disable (struct Scsi_Host *host) { +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- unsigned long flags; +- Scsi_Cmnd *nuke_list, *tmp; +- local_irq_save(flags); +- if (hostdata->state != STATE_HALTED) +- ncr_halt (host); +- nuke_list = return_outstanding_commands (host, 1 /* free */, 1 /* issue */); +- hard_reset (host); +- hostdata->state = STATE_DISABLED; +- local_irq_restore(flags); +- printk ("scsi%d : nuking commands\n", host->host_no); +- for (; nuke_list; nuke_list = tmp) { +- tmp = (Scsi_Cmnd *) nuke_list->SCp.buffer; +- nuke_list->result = DID_ERROR << 16; +- nuke_list->scsi_done(nuke_list); +- } +- printk ("scsi%d : done. \n", host->host_no); +- printk (KERN_ALERT "scsi%d : disabled. Unload and reload\n", +- host->host_no); +- return 0; +-} +- +-/* +- * Function : static int ncr_halt (struct Scsi_Host *host) +- * +- * Purpose : halts the SCSI SCRIPTS(tm) processor on the NCR chip +- * +- * Inputs : host - SCSI chip to halt +- * +- * Returns : 0 on success +- */ +- +-static int +-ncr_halt (struct Scsi_Host *host) { +- NCR53c7x0_local_declare(); +- unsigned long flags; +- unsigned char istat, tmp; +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- int stage; +- NCR53c7x0_local_setup(host); +- +- local_irq_save(flags); +- /* Stage 0 : eat all interrupts +- Stage 1 : set ABORT +- Stage 2 : eat all but abort interrupts +- Stage 3 : eat all interrupts +- */ +- for (stage = 0;;) { +- if (stage == 1) { +- NCR53c7x0_write8(hostdata->istat, ISTAT_ABRT); +- ++stage; +- } +- istat = NCR53c7x0_read8 (hostdata->istat); +- if (istat & ISTAT_SIP) { +- tmp = NCR53c7x0_read8(SSTAT0_REG); +- } else if (istat & ISTAT_DIP) { +- tmp = NCR53c7x0_read8(DSTAT_REG); +- if (stage == 2) { +- if (tmp & DSTAT_ABRT) { +- NCR53c7x0_write8(hostdata->istat, 0); +- ++stage; +- } else { +- printk(KERN_ALERT "scsi%d : could not halt NCR chip\n", +- host->host_no); +- disable (host); +- } +- } +- } +- if (!(istat & (ISTAT_SIP|ISTAT_DIP))) { +- if (stage == 0) +- ++stage; +- else if (stage == 3) +- break; +- } +- } +- hostdata->state = STATE_HALTED; +- local_irq_restore(flags); +-#if 0 +- print_lots (host); +-#endif +- return 0; +-} +- +-/* +- * Function: event_name (int event) +- * +- * Purpose: map event enum into user-readable strings. +- */ +- +-static const char * +-event_name (int event) { +- switch (event) { +- case EVENT_NONE: return "none"; +- case EVENT_ISSUE_QUEUE: return "to issue queue"; +- case EVENT_START_QUEUE: return "to start queue"; +- case EVENT_SELECT: return "selected"; +- case EVENT_DISCONNECT: return "disconnected"; +- case EVENT_RESELECT: return "reselected"; +- case EVENT_COMPLETE: return "completed"; +- case EVENT_IDLE: return "idle"; +- case EVENT_SELECT_FAILED: return "select failed"; +- case EVENT_BEFORE_SELECT: return "before select"; +- case EVENT_RESELECT_FAILED: return "reselect failed"; +- default: return "unknown"; +- } +-} +- +-/* +- * Function : void dump_events (struct Scsi_Host *host, count) +- * +- * Purpose : print last count events which have occurred. +- */ +-static void +-dump_events (struct Scsi_Host *host, int count) { +- struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) +- host->hostdata[0]; +- struct NCR53c7x0_event event; +- int i; +- unsigned long flags; +- if (hostdata->events) { +- if (count > hostdata->event_size) +- count = hostdata->event_size; +- for (i = hostdata->event_index; count > 0; +- i = (i ? i - 1 : hostdata->event_size -1), --count) { +-/* +- * By copying the event we're currently examining with interrupts +- * disabled, we can do multiple printk(), etc. operations and +- * still be guaranteed that they're happening on the same +- * event structure. +- */ +- local_irq_save(flags); +-#if 0 +- event = hostdata->events[i]; +-#else +- memcpy ((void *) &event, (void *) &(hostdata->events[i]), +- sizeof(event)); +-#endif +- +- local_irq_restore(flags); +- printk ("scsi%d : %s event %d at %ld secs %ld usecs target %d lun %d\n", +- host->host_no, event_name (event.event), count, +- (long) event.time.tv_sec, (long) event.time.tv_usec, +- event.target, event.lun); +- if (event.dsa) +- printk (" event for dsa 0x%lx (virt 0x%p)\n", +- virt_to_bus(event.dsa), event.dsa); +- if (event.pid != -1) { +- printk (" event for pid %ld ", event.pid); +- __scsi_print_command (event.cmnd); +- } +- } +- } +-} +- +-/* +- * Function: check_address +- * +- * Purpose: Check to see if a possibly corrupt pointer will fault the +- * kernel. +- * +- * Inputs: addr - address; size - size of area +- * +- * Returns: 0 if area is OK, -1 on error. +- * +- * NOTES: should be implemented in terms of vverify on kernels +- * that have it. +- */ +- +-static int +-check_address (unsigned long addr, int size) { +- return (virt_to_phys((void *)addr) < PAGE_SIZE || virt_to_phys((void *)(addr + size)) > virt_to_phys(high_memory) ? -1 : 0); +-} +- +-#ifdef MODULE +-int +-NCR53c7x0_release(struct Scsi_Host *host) { +- struct NCR53c7x0_hostdata *hostdata = +- (struct NCR53c7x0_hostdata *) host->hostdata[0]; +- struct NCR53c7x0_cmd *cmd, *tmp; +- shutdown (host); +- if (host->irq != SCSI_IRQ_NONE) +- { +- int irq_count; +- struct Scsi_Host *tmp; +- for (irq_count = 0, tmp = first_host; tmp; tmp = tmp->next) +- if (tmp->hostt == the_template && tmp->irq == host->irq) +- ++irq_count; +- if (irq_count == 1) +- free_irq(host->irq, NULL); +- } +- if (host->dma_channel != DMA_NONE) +- free_dma(host->dma_channel); +- if (host->io_port) +- release_region(host->io_port, host->n_io_port); +- +- for (cmd = (struct NCR53c7x0_cmd *) hostdata->free; cmd; cmd = tmp, +- --hostdata->num_cmds) { +- tmp = (struct NCR53c7x0_cmd *) cmd->next; +- /* +- * If we're going to loop, try to stop it to get a more accurate +- * count of the leaked commands. +- */ +- cmd->next = NULL; +- if (cmd->free) +- cmd->free ((void *) cmd->real, cmd->size); +- } +- if (hostdata->num_cmds) +- printk ("scsi%d : leaked %d NCR53c7x0_cmd structures\n", +- host->host_no, hostdata->num_cmds); +- +- vfree(hostdata->events); +- +- /* XXX This assumes default cache mode to be IOMAP_FULL_CACHING, which +- * XXX may be invalid (CONFIG_060_WRITETHROUGH) +- */ +- kernel_set_cachemode((void *)hostdata, 8192, IOMAP_FULL_CACHING); +- free_pages ((u32)hostdata, 1); +- return 1; +-} +-#endif /* def MODULE */ +diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx.h linux-2.6.22-591/drivers/scsi/53c7xx.h +--- linux-2.6.22-570/drivers/scsi/53c7xx.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/53c7xx.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,1608 +0,0 @@ +-/* +- * 53c710 driver. Modified from Drew Eckhardts driver +- * for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk] +- * +- * I have left the code for the 53c8xx family in here, because it didn't +- * seem worth removing it. The possibility of IO_MAPPED chips rather +- * than MEMORY_MAPPED remains, in case someone wants to add support for +- * 53c710 chips on Intel PCs (some older machines have them on the +- * motherboard). +- * +- * NOTE THERE MAY BE PROBLEMS WITH CASTS IN read8 AND Co. +- */ +- +-/* +- * NCR 53c{7,8}0x0 driver, header file +- * +- * Sponsored by +- * iX Multiuser Multitasking Magazine +- * Hannover, Germany +- * hm@ix.de +- * +- * Copyright 1993, 1994, 1995 Drew Eckhardt +- * Visionary Computing +- * (Unix and Linux consulting and custom programming) +- * drew@PoohSticks.ORG +- * +1 (303) 786-7975 +- * +- * TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation. +- * +- * PRE-ALPHA +- * +- * For more information, please consult +- * +- * NCR 53C700/53C700-66 +- * SCSI I/O Processor +- * Data Manual +- * +- * NCR 53C810 +- * PCI-SCSI I/O Processor +- * Data Manual +- * +- * NCR Microelectronics +- * 1635 Aeroplaza Drive +- * Colorado Springs, CO 80916 +- * +1 (719) 578-3400 +- * +- * Toll free literature number +- * +1 (800) 334-5454 +- * +- */ +- +-#ifndef NCR53c710_H +-#define NCR53c710_H +- +-#ifndef HOSTS_C +- +-/* SCSI control 0 rw, default = 0xc0 */ +-#define SCNTL0_REG 0x00 +-#define SCNTL0_ARB1 0x80 /* 0 0 = simple arbitration */ +-#define SCNTL0_ARB2 0x40 /* 1 1 = full arbitration */ +-#define SCNTL0_STRT 0x20 /* Start Sequence */ +-#define SCNTL0_WATN 0x10 /* Select with ATN */ +-#define SCNTL0_EPC 0x08 /* Enable parity checking */ +-/* Bit 2 is reserved on 800 series chips */ +-#define SCNTL0_EPG_700 0x04 /* Enable parity generation */ +-#define SCNTL0_AAP 0x02 /* ATN/ on parity error */ +-#define SCNTL0_TRG 0x01 /* Target mode */ +- +-/* SCSI control 1 rw, default = 0x00 */ +- +-#define SCNTL1_REG 0x01 +-#define SCNTL1_EXC 0x80 /* Extra Clock Cycle of Data setup */ +-#define SCNTL1_ADB 0x40 /* contents of SODL on bus */ +-#define SCNTL1_ESR_700 0x20 /* Enable SIOP response to selection +- and reselection */ +-#define SCNTL1_DHP_800 0x20 /* Disable halt on parity error or ATN +- target mode only */ +-#define SCNTL1_CON 0x10 /* Connected */ +-#define SCNTL1_RST 0x08 /* SCSI RST/ */ +-#define SCNTL1_AESP 0x04 /* Force bad parity */ +-#define SCNTL1_SND_700 0x02 /* Start SCSI send */ +-#define SCNTL1_IARB_800 0x02 /* Immediate Arbitration, start +- arbitration immediately after +- busfree is detected */ +-#define SCNTL1_RCV_700 0x01 /* Start SCSI receive */ +-#define SCNTL1_SST_800 0x01 /* Start SCSI transfer */ +- +-/* SCSI control 2 rw, */ +- +-#define SCNTL2_REG_800 0x02 +-#define SCNTL2_800_SDU 0x80 /* SCSI disconnect unexpected */ +- +-/* SCSI control 3 rw */ +- +-#define SCNTL3_REG_800 0x03 +-#define SCNTL3_800_SCF_SHIFT 4 +-#define SCNTL3_800_SCF_MASK 0x70 +-#define SCNTL3_800_SCF2 0x40 /* Synchronous divisor */ +-#define SCNTL3_800_SCF1 0x20 /* 0x00 = SCLK/3 */ +-#define SCNTL3_800_SCF0 0x10 /* 0x10 = SCLK/1 */ +- /* 0x20 = SCLK/1.5 +- 0x30 = SCLK/2 +- 0x40 = SCLK/3 */ +- +-#define SCNTL3_800_CCF_SHIFT 0 +-#define SCNTL3_800_CCF_MASK 0x07 +-#define SCNTL3_800_CCF2 0x04 /* 0x00 50.01 to 66 */ +-#define SCNTL3_800_CCF1 0x02 /* 0x01 16.67 to 25 */ +-#define SCNTL3_800_CCF0 0x01 /* 0x02 25.01 - 37.5 +- 0x03 37.51 - 50 +- 0x04 50.01 - 66 */ +- +-/* +- * SCSI destination ID rw - the appropriate bit is set for the selected +- * target ID. This is written by the SCSI SCRIPTS processor. +- * default = 0x00 +- */ +-#define SDID_REG_700 0x02 +-#define SDID_REG_800 0x06 +- +-#define GP_REG_800 0x07 /* General purpose IO */ +-#define GP_800_IO1 0x02 +-#define GP_800_IO2 0x01 +- +-/* SCSI interrupt enable rw, default = 0x00 */ +-#define SIEN_REG_700 0x03 +-#define SIEN0_REG_800 0x40 +-#define SIEN_MA 0x80 /* Phase mismatch (ini) or ATN (tgt) */ +-#define SIEN_FC 0x40 /* Function complete */ +-#define SIEN_700_STO 0x20 /* Selection or reselection timeout */ +-#define SIEN_800_SEL 0x20 /* Selected */ +-#define SIEN_700_SEL 0x10 /* Selected or reselected */ +-#define SIEN_800_RESEL 0x10 /* Reselected */ +-#define SIEN_SGE 0x08 /* SCSI gross error */ +-#define SIEN_UDC 0x04 /* Unexpected disconnect */ +-#define SIEN_RST 0x02 /* SCSI RST/ received */ +-#define SIEN_PAR 0x01 /* Parity error */ +- +-/* +- * SCSI chip ID rw +- * NCR53c700 : +- * When arbitrating, the highest bit is used, when reselection or selection +- * occurs, the chip responds to all IDs for which a bit is set. +- * default = 0x00 +- * NCR53c810 : +- * Uses bit mapping +- */ +-#define SCID_REG 0x04 +-/* Bit 7 is reserved on 800 series chips */ +-#define SCID_800_RRE 0x40 /* Enable response to reselection */ +-#define SCID_800_SRE 0x20 /* Enable response to selection */ +-/* Bits four and three are reserved on 800 series chips */ +-#define SCID_800_ENC_MASK 0x07 /* Encoded SCSI ID */ +- +-/* SCSI transfer rw, default = 0x00 */ +-#define SXFER_REG 0x05 +-#define SXFER_DHP 0x80 /* Disable halt on parity */ +- +-#define SXFER_TP2 0x40 /* Transfer period msb */ +-#define SXFER_TP1 0x20 +-#define SXFER_TP0 0x10 /* lsb */ +-#define SXFER_TP_MASK 0x70 +-/* FIXME : SXFER_TP_SHIFT == 5 is right for '8xx chips */ +-#define SXFER_TP_SHIFT 5 +-#define SXFER_TP_4 0x00 /* Divisors */ +-#define SXFER_TP_5 0x10<<1 +-#define SXFER_TP_6 0x20<<1 +-#define SXFER_TP_7 0x30<<1 +-#define SXFER_TP_8 0x40<<1 +-#define SXFER_TP_9 0x50<<1 +-#define SXFER_TP_10 0x60<<1 +-#define SXFER_TP_11 0x70<<1 +- +-#define SXFER_MO3 0x08 /* Max offset msb */ +-#define SXFER_MO2 0x04 +-#define SXFER_MO1 0x02 +-#define SXFER_MO0 0x01 /* lsb */ +-#define SXFER_MO_MASK 0x0f +-#define SXFER_MO_SHIFT 0 +- +-/* +- * SCSI output data latch rw +- * The contents of this register are driven onto the SCSI bus when +- * the Assert Data Bus bit of the SCNTL1 register is set and +- * the CD, IO, and MSG bits of the SOCL register match the SCSI phase +- */ +-#define SODL_REG_700 0x06 +-#define SODL_REG_800 0x54 +- +- +-/* +- * SCSI output control latch rw, default = 0 +- * Note that when the chip is being manually programmed as an initiator, +- * the MSG, CD, and IO bits must be set correctly for the phase the target +- * is driving the bus in. Otherwise no data transfer will occur due to +- * phase mismatch. +- */ +- +-#define SOCL_REG 0x07 +-#define SOCL_REQ 0x80 /* REQ */ +-#define SOCL_ACK 0x40 /* ACK */ +-#define SOCL_BSY 0x20 /* BSY */ +-#define SOCL_SEL 0x10 /* SEL */ +-#define SOCL_ATN 0x08 /* ATN */ +-#define SOCL_MSG 0x04 /* MSG */ +-#define SOCL_CD 0x02 /* C/D */ +-#define SOCL_IO 0x01 /* I/O */ +- +-/* +- * SCSI first byte received latch ro +- * This register contains the first byte received during a block MOVE +- * SCSI SCRIPTS instruction, including +- * +- * Initiator mode Target mode +- * Message in Command +- * Status Message out +- * Data in Data out +- * +- * It also contains the selecting or reselecting device's ID and our +- * ID. +- * +- * Note that this is the register the various IF conditionals can +- * operate on. +- */ +-#define SFBR_REG 0x08 +- +-/* +- * SCSI input data latch ro +- * In initiator mode, data is latched into this register on the rising +- * edge of REQ/. In target mode, data is latched on the rising edge of +- * ACK/ +- */ +-#define SIDL_REG_700 0x09 +-#define SIDL_REG_800 0x50 +- +-/* +- * SCSI bus data lines ro +- * This register reflects the instantaneous status of the SCSI data +- * lines. Note that SCNTL0 must be set to disable parity checking, +- * otherwise reading this register will latch new parity. +- */ +-#define SBDL_REG_700 0x0a +-#define SBDL_REG_800 0x58 +- +-#define SSID_REG_800 0x0a +-#define SSID_800_VAL 0x80 /* Exactly two bits asserted at sel */ +-#define SSID_800_ENCID_MASK 0x07 /* Device which performed operation */ +- +- +-/* +- * SCSI bus control lines rw, +- * instantaneous readout of control lines +- */ +-#define SBCL_REG 0x0b +-#define SBCL_REQ 0x80 /* REQ ro */ +-#define SBCL_ACK 0x40 /* ACK ro */ +-#define SBCL_BSY 0x20 /* BSY ro */ +-#define SBCL_SEL 0x10 /* SEL ro */ +-#define SBCL_ATN 0x08 /* ATN ro */ +-#define SBCL_MSG 0x04 /* MSG ro */ +-#define SBCL_CD 0x02 /* C/D ro */ +-#define SBCL_IO 0x01 /* I/O ro */ +-#define SBCL_PHASE_CMDOUT SBCL_CD +-#define SBCL_PHASE_DATAIN SBCL_IO +-#define SBCL_PHASE_DATAOUT 0 +-#define SBCL_PHASE_MSGIN (SBCL_CD|SBCL_IO|SBCL_MSG) +-#define SBCL_PHASE_MSGOUT (SBCL_CD|SBCL_MSG) +-#define SBCL_PHASE_STATIN (SBCL_CD|SBCL_IO) +-#define SBCL_PHASE_MASK (SBCL_CD|SBCL_IO|SBCL_MSG) +-/* +- * Synchronous SCSI Clock Control bits +- * 0 - set by DCNTL +- * 1 - SCLK / 1.0 +- * 2 - SCLK / 1.5 +- * 3 - SCLK / 2.0 +- */ +-#define SBCL_SSCF1 0x02 /* wo, -66 only */ +-#define SBCL_SSCF0 0x01 /* wo, -66 only */ +-#define SBCL_SSCF_MASK 0x03 +- +-/* +- * XXX note : when reading the DSTAT and STAT registers to clear interrupts, +- * insure that 10 clocks elapse between the two +- */ +-/* DMA status ro */ +-#define DSTAT_REG 0x0c +-#define DSTAT_DFE 0x80 /* DMA FIFO empty */ +-#define DSTAT_800_MDPE 0x40 /* Master Data Parity Error */ +-#define DSTAT_800_BF 0x20 /* Bus Fault */ +-#define DSTAT_ABRT 0x10 /* Aborted - set on error */ +-#define DSTAT_SSI 0x08 /* SCRIPTS single step interrupt */ +-#define DSTAT_SIR 0x04 /* SCRIPTS interrupt received - +- set when INT instruction is +- executed */ +-#define DSTAT_WTD 0x02 /* Watchdog timeout detected */ +-#define DSTAT_OPC 0x01 /* Illegal instruction */ +-#define DSTAT_800_IID 0x01 /* Same thing, different name */ +- +- +-/* NCR53c800 moves this stuff into SIST0 */ +-#define SSTAT0_REG 0x0d /* SCSI status 0 ro */ +-#define SIST0_REG_800 0x42 +-#define SSTAT0_MA 0x80 /* ini : phase mismatch, +- * tgt : ATN/ asserted +- */ +-#define SSTAT0_CMP 0x40 /* function complete */ +-#define SSTAT0_700_STO 0x20 /* Selection or reselection timeout */ +-#define SIST0_800_SEL 0x20 /* Selected */ +-#define SSTAT0_700_SEL 0x10 /* Selected or reselected */ +-#define SIST0_800_RSL 0x10 /* Reselected */ +-#define SSTAT0_SGE 0x08 /* SCSI gross error */ +-#define SSTAT0_UDC 0x04 /* Unexpected disconnect */ +-#define SSTAT0_RST 0x02 /* SCSI RST/ received */ +-#define SSTAT0_PAR 0x01 /* Parity error */ +- +-/* And uses SSTAT0 for what was SSTAT1 */ +- +-#define SSTAT1_REG 0x0e /* SCSI status 1 ro */ +-#define SSTAT1_ILF 0x80 /* SIDL full */ +-#define SSTAT1_ORF 0x40 /* SODR full */ +-#define SSTAT1_OLF 0x20 /* SODL full */ +-#define SSTAT1_AIP 0x10 /* Arbitration in progress */ +-#define SSTAT1_LOA 0x08 /* Lost arbitration */ +-#define SSTAT1_WOA 0x04 /* Won arbitration */ +-#define SSTAT1_RST 0x02 /* Instant readout of RST/ */ +-#define SSTAT1_SDP 0x01 /* Instant readout of SDP/ */ +- +-#define SSTAT2_REG 0x0f /* SCSI status 2 ro */ +-#define SSTAT2_FF3 0x80 /* number of bytes in synchronous */ +-#define SSTAT2_FF2 0x40 /* data FIFO */ +-#define SSTAT2_FF1 0x20 +-#define SSTAT2_FF0 0x10 +-#define SSTAT2_FF_MASK 0xf0 +-#define SSTAT2_FF_SHIFT 4 +- +-/* +- * Latched signals, latched on the leading edge of REQ/ for initiators, +- * ACK/ for targets. +- */ +-#define SSTAT2_SDP 0x08 /* SDP */ +-#define SSTAT2_MSG 0x04 /* MSG */ +-#define SSTAT2_CD 0x02 /* C/D */ +-#define SSTAT2_IO 0x01 /* I/O */ +-#define SSTAT2_PHASE_CMDOUT SSTAT2_CD +-#define SSTAT2_PHASE_DATAIN SSTAT2_IO +-#define SSTAT2_PHASE_DATAOUT 0 +-#define SSTAT2_PHASE_MSGIN (SSTAT2_CD|SSTAT2_IO|SSTAT2_MSG) +-#define SSTAT2_PHASE_MSGOUT (SSTAT2_CD|SSTAT2_MSG) +-#define SSTAT2_PHASE_STATIN (SSTAT2_CD|SSTAT2_IO) +-#define SSTAT2_PHASE_MASK (SSTAT2_CD|SSTAT2_IO|SSTAT2_MSG) +- +- +-/* NCR53c700-66 only */ +-#define SCRATCHA_REG_00 0x10 /* through 0x13 Scratch A rw */ +-/* NCR53c710 and higher */ +-#define DSA_REG 0x10 /* DATA structure address */ +- +-#define CTEST0_REG_700 0x14 /* Chip test 0 ro */ +-#define CTEST0_REG_800 0x18 /* Chip test 0 rw, general purpose */ +-/* 0x80 - 0x04 are reserved */ +-#define CTEST0_700_RTRG 0x02 /* Real target mode */ +-#define CTEST0_700_DDIR 0x01 /* Data direction, 1 = +- * SCSI bus to host, 0 = +- * host to SCSI. +- */ +- +-#define CTEST1_REG_700 0x15 /* Chip test 1 ro */ +-#define CTEST1_REG_800 0x19 /* Chip test 1 ro */ +-#define CTEST1_FMT3 0x80 /* Identify which byte lanes are empty */ +-#define CTEST1_FMT2 0x40 /* in the DMA FIFO */ +-#define CTEST1_FMT1 0x20 +-#define CTEST1_FMT0 0x10 +- +-#define CTEST1_FFL3 0x08 /* Identify which bytes lanes are full */ +-#define CTEST1_FFL2 0x04 /* in the DMA FIFO */ +-#define CTEST1_FFL1 0x02 +-#define CTEST1_FFL0 0x01 +- +-#define CTEST2_REG_700 0x16 /* Chip test 2 ro */ +-#define CTEST2_REG_800 0x1a /* Chip test 2 ro */ +- +-#define CTEST2_800_DDIR 0x80 /* 1 = SCSI->host */ +-#define CTEST2_800_SIGP 0x40 /* A copy of SIGP in ISTAT. +- Reading this register clears */ +-#define CTEST2_800_CIO 0x20 /* Configured as IO */. +-#define CTEST2_800_CM 0x10 /* Configured as memory */ +- +-/* 0x80 - 0x40 are reserved on 700 series chips */ +-#define CTEST2_700_SOFF 0x20 /* SCSI Offset Compare, +- * As an initiator, this bit is +- * one when the synchronous offset +- * is zero, as a target this bit +- * is one when the synchronous +- * offset is at the maximum +- * defined in SXFER +- */ +-#define CTEST2_700_SFP 0x10 /* SCSI FIFO parity bit, +- * reading CTEST3 unloads a byte +- * from the FIFO and sets this +- */ +-#define CTEST2_700_DFP 0x08 /* DMA FIFO parity bit, +- * reading CTEST6 unloads a byte +- * from the FIFO and sets this +- */ +-#define CTEST2_TEOP 0x04 /* SCSI true end of process, +- * indicates a totally finished +- * transfer +- */ +-#define CTEST2_DREQ 0x02 /* Data request signal */ +-/* 0x01 is reserved on 700 series chips */ +-#define CTEST2_800_DACK 0x01 +- +-/* +- * Chip test 3 ro +- * Unloads the bottom byte of the eight deep SCSI synchronous FIFO, +- * check SSTAT2 FIFO full bits to determine size. Note that a GROSS +- * error results if a read is attempted on this register. Also note +- * that 16 and 32 bit reads of this register will cause corruption. +- */ +-#define CTEST3_REG_700 0x17 +-/* Chip test 3 rw */ +-#define CTEST3_REG_800 0x1b +-#define CTEST3_800_V3 0x80 /* Chip revision */ +-#define CTEST3_800_V2 0x40 +-#define CTEST3_800_V1 0x20 +-#define CTEST3_800_V0 0x10 +-#define CTEST3_800_FLF 0x08 /* Flush DMA FIFO */ +-#define CTEST3_800_CLF 0x04 /* Clear DMA FIFO */ +-#define CTEST3_800_FM 0x02 /* Fetch mode pin */ +-/* bit 0 is reserved on 800 series chips */ +- +-#define CTEST4_REG_700 0x18 /* Chip test 4 rw */ +-#define CTEST4_REG_800 0x21 /* Chip test 4 rw */ +-/* 0x80 is reserved on 700 series chips */ +-#define CTEST4_800_BDIS 0x80 /* Burst mode disable */ +-#define CTEST4_ZMOD 0x40 /* High impedance mode */ +-#define CTEST4_SZM 0x20 /* SCSI bus high impedance */ +-#define CTEST4_700_SLBE 0x10 /* SCSI loopback enabled */ +-#define CTEST4_800_SRTM 0x10 /* Shadow Register Test Mode */ +-#define CTEST4_700_SFWR 0x08 /* SCSI FIFO write enable, +- * redirects writes from SODL +- * to the SCSI FIFO. +- */ +-#define CTEST4_800_MPEE 0x08 /* Enable parity checking +- during master cycles on PCI +- bus */ +- +-/* +- * These bits send the contents of the CTEST6 register to the appropriate +- * byte lane of the 32 bit DMA FIFO. Normal operation is zero, otherwise +- * the high bit means the low two bits select the byte lane. +- */ +-#define CTEST4_FBL2 0x04 +-#define CTEST4_FBL1 0x02 +-#define CTEST4_FBL0 0x01 +-#define CTEST4_FBL_MASK 0x07 +-#define CTEST4_FBL_0 0x04 /* Select DMA FIFO byte lane 0 */ +-#define CTEST4_FBL_1 0x05 /* Select DMA FIFO byte lane 1 */ +-#define CTEST4_FBL_2 0x06 /* Select DMA FIFO byte lane 2 */ +-#define CTEST4_FBL_3 0x07 /* Select DMA FIFO byte lane 3 */ +-#define CTEST4_800_SAVE (CTEST4_800_BDIS) +- +- +-#define CTEST5_REG_700 0x19 /* Chip test 5 rw */ +-#define CTEST5_REG_800 0x22 /* Chip test 5 rw */ +-/* +- * Clock Address Incrementor. When set, it increments the +- * DNAD register to the next bus size boundary. It automatically +- * resets itself when the operation is complete. +- */ +-#define CTEST5_ADCK 0x80 +-/* +- * Clock Byte Counter. When set, it decrements the DBC register to +- * the next bus size boundary. +- */ +-#define CTEST5_BBCK 0x40 +-/* +- * Reset SCSI Offset. Setting this bit to 1 clears the current offset +- * pointer in the SCSI synchronous offset counter (SSTAT). This bit +- * is set to 1 if a SCSI Gross Error Condition occurs. The offset should +- * be cleared when a synchronous transfer fails. When written, it is +- * automatically cleared after the SCSI synchronous offset counter is +- * reset. +- */ +-/* Bit 5 is reserved on 800 series chips */ +-#define CTEST5_700_ROFF 0x20 +-/* +- * Master Control for Set or Reset pulses. When 1, causes the low +- * four bits of register to set when set, 0 causes the low bits to +- * clear when set. +- */ +-#define CTEST5_MASR 0x10 +-#define CTEST5_DDIR 0x08 /* DMA direction */ +-/* +- * Bits 2-0 are reserved on 800 series chips +- */ +-#define CTEST5_700_EOP 0x04 /* End of process */ +-#define CTEST5_700_DREQ 0x02 /* Data request */ +-#define CTEST5_700_DACK 0x01 /* Data acknowledge */ +- +-/* +- * Chip test 6 rw - writing to this register writes to the byte +- * lane in the DMA FIFO as determined by the FBL bits in the CTEST4 +- * register. +- */ +-#define CTEST6_REG_700 0x1a +-#define CTEST6_REG_800 0x23 +- +-#define CTEST7_REG 0x1b /* Chip test 7 rw */ +-/* 0x80 - 0x40 are reserved on NCR53c700 and NCR53c700-66 chips */ +-#define CTEST7_10_CDIS 0x80 /* Cache burst disable */ +-#define CTEST7_10_SC1 0x40 /* Snoop control bits */ +-#define CTEST7_10_SC0 0x20 +-#define CTEST7_10_SC_MASK 0x60 +-/* 0x20 is reserved on the NCR53c700 */ +-#define CTEST7_0060_FM 0x20 /* Fetch mode */ +-#define CTEST7_STD 0x10 /* Selection timeout disable */ +-#define CTEST7_DFP 0x08 /* DMA FIFO parity bit for CTEST6 */ +-#define CTEST7_EVP 0x04 /* 1 = host bus even parity, 0 = odd */ +-#define CTEST7_10_TT1 0x02 /* Transfer type */ +-#define CTEST7_00_DC 0x02 /* Set to drive DC low during instruction +- fetch */ +-#define CTEST7_DIFF 0x01 /* Differential mode */ +- +-#define CTEST7_SAVE ( CTEST7_EVP | CTEST7_DIFF ) +- +- +-#define TEMP_REG 0x1c /* through 0x1f Temporary stack rw */ +- +-#define DFIFO_REG 0x20 /* DMA FIFO rw */ +-/* +- * 0x80 is reserved on the NCR53c710, the CLF and FLF bits have been +- * moved into the CTEST8 register. +- */ +-#define DFIFO_00_FLF 0x80 /* Flush DMA FIFO to memory */ +-#define DFIFO_00_CLF 0x40 /* Clear DMA and SCSI FIFOs */ +-#define DFIFO_BO6 0x40 +-#define DFIFO_BO5 0x20 +-#define DFIFO_BO4 0x10 +-#define DFIFO_BO3 0x08 +-#define DFIFO_BO2 0x04 +-#define DFIFO_BO1 0x02 +-#define DFIFO_BO0 0x01 +-#define DFIFO_10_BO_MASK 0x7f /* 7 bit counter */ +-#define DFIFO_00_BO_MASK 0x3f /* 6 bit counter */ +- +-/* +- * Interrupt status rw +- * Note that this is the only register which can be read while SCSI +- * SCRIPTS are being executed. +- */ +-#define ISTAT_REG_700 0x21 +-#define ISTAT_REG_800 0x14 +-#define ISTAT_ABRT 0x80 /* Software abort, write +- *1 to abort, wait for interrupt. */ +-/* 0x40 and 0x20 are reserved on NCR53c700 and NCR53c700-66 chips */ +-#define ISTAT_10_SRST 0x40 /* software reset */ +-#define ISTAT_10_SIGP 0x20 /* signal script */ +-/* 0x10 is reserved on NCR53c700 series chips */ +-#define ISTAT_800_SEM 0x10 /* semaphore */ +-#define ISTAT_CON 0x08 /* 1 when connected */ +-#define ISTAT_800_INTF 0x04 /* Interrupt on the fly */ +-#define ISTAT_700_PRE 0x04 /* Pointer register empty. +- * Set to 1 when DSPS and DSP +- * registers are empty in pipeline +- * mode, always set otherwise. +- */ +-#define ISTAT_SIP 0x02 /* SCSI interrupt pending from +- * SCSI portion of SIOP see +- * SSTAT0 +- */ +-#define ISTAT_DIP 0x01 /* DMA interrupt pending +- * see DSTAT +- */ +- +-/* NCR53c700-66 and NCR53c710 only */ +-#define CTEST8_REG 0x22 /* Chip test 8 rw */ +-#define CTEST8_0066_EAS 0x80 /* Enable alternate SCSI clock, +- * ie read from SCLK/ rather than CLK/ +- */ +-#define CTEST8_0066_EFM 0x40 /* Enable fetch and master outputs */ +-#define CTEST8_0066_GRP 0x20 /* Generate Receive Parity for +- * pass through. This insures that +- * bad parity won't reach the host +- * bus. +- */ +-#define CTEST8_0066_TE 0x10 /* TolerANT enable. Enable +- * active negation, should only +- * be used for slow SCSI +- * non-differential. +- */ +-#define CTEST8_0066_HSC 0x08 /* Halt SCSI clock */ +-#define CTEST8_0066_SRA 0x04 /* Shorten REQ/ACK filtering, +- * must be set for fast SCSI-II +- * speeds. +- */ +-#define CTEST8_0066_DAS 0x02 /* Disable automatic target/initiator +- * switching. +- */ +-#define CTEST8_0066_LDE 0x01 /* Last disconnect enable. +- * The status of pending +- * disconnect is maintained by +- * the core, eliminating +- * the possibility of missing a +- * selection or reselection +- * while waiting to fetch a +- * WAIT DISCONNECT opcode. +- */ +- +-#define CTEST8_10_V3 0x80 /* Chip revision */ +-#define CTEST8_10_V2 0x40 +-#define CTEST8_10_V1 0x20 +-#define CTEST8_10_V0 0x10 +-#define CTEST8_10_V_MASK 0xf0 +-#define CTEST8_10_FLF 0x08 /* Flush FIFOs */ +-#define CTEST8_10_CLF 0x04 /* Clear FIFOs */ +-#define CTEST8_10_FM 0x02 /* Fetch pin mode */ +-#define CTEST8_10_SM 0x01 /* Snoop pin mode */ +- +- +-/* +- * The CTEST9 register may be used to differentiate between a +- * NCR53c700 and a NCR53c710. +- * +- * Write 0xff to this register. +- * Read it. +- * If the contents are 0xff, it is a NCR53c700 +- * If the contents are 0x00, it is a NCR53c700-66 first revision +- * If the contents are some other value, it is some other NCR53c700-66 +- */ +-#define CTEST9_REG_00 0x23 /* Chip test 9 ro */ +-#define LCRC_REG_10 0x23 +- +-/* +- * 0x24 through 0x27 are the DMA byte counter register. Instructions +- * write their high 8 bits into the DCMD register, the low 24 bits into +- * the DBC register. +- * +- * Function is dependent on the command type being executed. +- */ +- +- +-#define DBC_REG 0x24 +-/* +- * For Block Move Instructions, DBC is a 24 bit quantity representing +- * the number of bytes to transfer. +- * For Transfer Control Instructions, DBC is bit fielded as follows : +- */ +-/* Bits 20 - 23 should be clear */ +-#define DBC_TCI_TRUE (1 << 19) /* Jump when true */ +-#define DBC_TCI_COMPARE_DATA (1 << 18) /* Compare data */ +-#define DBC_TCI_COMPARE_PHASE (1 << 17) /* Compare phase with DCMD field */ +-#define DBC_TCI_WAIT_FOR_VALID (1 << 16) /* Wait for REQ */ +-/* Bits 8 - 15 are reserved on some implementations ? */ +-#define DBC_TCI_MASK_MASK 0xff00 /* Mask for data compare */ +-#define DBC_TCI_MASK_SHIFT 8 +-#define DBC_TCI_DATA_MASK 0xff /* Data to be compared */ +-#define DBC_TCI_DATA_SHIFT 0 +- +-#define DBC_RWRI_IMMEDIATE_MASK 0xff00 /* Immediate data */ +-#define DBC_RWRI_IMMEDIATE_SHIFT 8 /* Amount to shift */ +-#define DBC_RWRI_ADDRESS_MASK 0x3f0000 /* Register address */ +-#define DBC_RWRI_ADDRESS_SHIFT 16 +- +- +-/* +- * DMA command r/w +- */ +-#define DCMD_REG 0x27 +-#define DCMD_TYPE_MASK 0xc0 /* Masks off type */ +-#define DCMD_TYPE_BMI 0x00 /* Indicates a Block Move instruction */ +-#define DCMD_BMI_IO 0x01 /* I/O, CD, and MSG bits selecting */ +-#define DCMD_BMI_CD 0x02 /* the phase for the block MOVE */ +-#define DCMD_BMI_MSG 0x04 /* instruction */ +- +-#define DCMD_BMI_OP_MASK 0x18 /* mask for opcode */ +-#define DCMD_BMI_OP_MOVE_T 0x00 /* MOVE */ +-#define DCMD_BMI_OP_MOVE_I 0x08 /* MOVE Initiator */ +- +-#define DCMD_BMI_INDIRECT 0x20 /* Indirect addressing */ +- +-#define DCMD_TYPE_TCI 0x80 /* Indicates a Transfer Control +- instruction */ +-#define DCMD_TCI_IO 0x01 /* I/O, CD, and MSG bits selecting */ +-#define DCMD_TCI_CD 0x02 /* the phase for the block MOVE */ +-#define DCMD_TCI_MSG 0x04 /* instruction */ +-#define DCMD_TCI_OP_MASK 0x38 /* mask for opcode */ +-#define DCMD_TCI_OP_JUMP 0x00 /* JUMP */ +-#define DCMD_TCI_OP_CALL 0x08 /* CALL */ +-#define DCMD_TCI_OP_RETURN 0x10 /* RETURN */ +-#define DCMD_TCI_OP_INT 0x18 /* INT */ +- +-#define DCMD_TYPE_RWRI 0x40 /* Indicates I/O or register Read/Write +- instruction */ +-#define DCMD_RWRI_OPC_MASK 0x38 /* Opcode mask */ +-#define DCMD_RWRI_OPC_WRITE 0x28 /* Write SFBR to register */ +-#define DCMD_RWRI_OPC_READ 0x30 /* Read register to SFBR */ +-#define DCMD_RWRI_OPC_MODIFY 0x38 /* Modify in place */ +- +-#define DCMD_RWRI_OP_MASK 0x07 +-#define DCMD_RWRI_OP_MOVE 0x00 +-#define DCMD_RWRI_OP_SHL 0x01 +-#define DCMD_RWRI_OP_OR 0x02 +-#define DCMD_RWRI_OP_XOR 0x03 +-#define DCMD_RWRI_OP_AND 0x04 +-#define DCMD_RWRI_OP_SHR 0x05 +-#define DCMD_RWRI_OP_ADD 0x06 +-#define DCMD_RWRI_OP_ADDC 0x07 +- +-#define DCMD_TYPE_MMI 0xc0 /* Indicates a Memory Move instruction +- (three words) */ +- +- +-#define DNAD_REG 0x28 /* through 0x2b DMA next address for +- data */ +-#define DSP_REG 0x2c /* through 0x2f DMA SCRIPTS pointer rw */ +-#define DSPS_REG 0x30 /* through 0x33 DMA SCRIPTS pointer +- save rw */ +-#define DMODE_REG_00 0x34 /* DMA mode rw */ +-#define DMODE_00_BL1 0x80 /* Burst length bits */ +-#define DMODE_00_BL0 0x40 +-#define DMODE_BL_MASK 0xc0 +-/* Burst lengths (800) */ +-#define DMODE_BL_2 0x00 /* 2 transfer */ +-#define DMODE_BL_4 0x40 /* 4 transfers */ +-#define DMODE_BL_8 0x80 /* 8 transfers */ +-#define DMODE_BL_16 0xc0 /* 16 transfers */ +- +-#define DMODE_10_BL_1 0x00 /* 1 transfer */ +-#define DMODE_10_BL_2 0x40 /* 2 transfers */ +-#define DMODE_10_BL_4 0x80 /* 4 transfers */ +-#define DMODE_10_BL_8 0xc0 /* 8 transfers */ +-#define DMODE_10_FC2 0x20 /* Driven to FC2 pin */ +-#define DMODE_10_FC1 0x10 /* Driven to FC1 pin */ +-#define DMODE_710_PD 0x08 /* Program/data on FC0 pin */ +-#define DMODE_710_UO 0x02 /* User prog. output */ +- +-#define DMODE_700_BW16 0x20 /* Host buswidth = 16 */ +-#define DMODE_700_286 0x10 /* 286 mode */ +-#define DMODE_700_IOM 0x08 /* Transfer to IO port */ +-#define DMODE_700_FAM 0x04 /* Fixed address mode */ +-#define DMODE_700_PIPE 0x02 /* Pipeline mode disables +- * automatic fetch / exec +- */ +-#define DMODE_MAN 0x01 /* Manual start mode, +- * requires a 1 to be written +- * to the start DMA bit in the DCNTL +- * register to run scripts +- */ +- +-#define DMODE_700_SAVE ( DMODE_00_BL_MASK | DMODE_00_BW16 | DMODE_00_286 ) +- +-/* NCR53c800 series only */ +-#define SCRATCHA_REG_800 0x34 /* through 0x37 Scratch A rw */ +-/* NCR53c710 only */ +-#define SCRATCHB_REG_10 0x34 /* through 0x37 scratch B rw */ +- +-#define DMODE_REG_10 0x38 /* DMA mode rw, NCR53c710 and newer */ +-#define DMODE_800_SIOM 0x20 /* Source IO = 1 */ +-#define DMODE_800_DIOM 0x10 /* Destination IO = 1 */ +-#define DMODE_800_ERL 0x08 /* Enable Read Line */ +- +-/* 35-38 are reserved on 700 and 700-66 series chips */ +-#define DIEN_REG 0x39 /* DMA interrupt enable rw */ +-/* 0x80, 0x40, and 0x20 are reserved on 700-series chips */ +-#define DIEN_800_MDPE 0x40 /* Master data parity error */ +-#define DIEN_800_BF 0x20 /* BUS fault */ +-#define DIEN_700_BF 0x20 /* BUS fault */ +-#define DIEN_ABRT 0x10 /* Enable aborted interrupt */ +-#define DIEN_SSI 0x08 /* Enable single step interrupt */ +-#define DIEN_SIR 0x04 /* Enable SCRIPTS INT command +- * interrupt +- */ +-/* 0x02 is reserved on 800 series chips */ +-#define DIEN_700_WTD 0x02 /* Enable watchdog timeout interrupt */ +-#define DIEN_700_OPC 0x01 /* Enable illegal instruction +- * interrupt +- */ +-#define DIEN_800_IID 0x01 /* Same meaning, different name */ +- +-/* +- * DMA watchdog timer rw +- * set in 16 CLK input periods. +- */ +-#define DWT_REG 0x3a +- +-/* DMA control rw */ +-#define DCNTL_REG 0x3b +-#define DCNTL_700_CF1 0x80 /* Clock divisor bits */ +-#define DCNTL_700_CF0 0x40 +-#define DCNTL_700_CF_MASK 0xc0 +-/* Clock divisors Divisor SCLK range (MHZ) */ +-#define DCNTL_700_CF_2 0x00 /* 2.0 37.51-50.00 */ +-#define DCNTL_700_CF_1_5 0x40 /* 1.5 25.01-37.50 */ +-#define DCNTL_700_CF_1 0x80 /* 1.0 16.67-25.00 */ +-#define DCNTL_700_CF_3 0xc0 /* 3.0 50.01-66.67 (53c700-66) */ +- +-#define DCNTL_700_S16 0x20 /* Load scripts 16 bits at a time */ +-#define DCNTL_SSM 0x10 /* Single step mode */ +-#define DCNTL_700_LLM 0x08 /* Low level mode, can only be set +- * after selection */ +-#define DCNTL_800_IRQM 0x08 /* Totem pole IRQ pin */ +-#define DCNTL_STD 0x04 /* Start DMA / SCRIPTS */ +-/* 0x02 is reserved */ +-#define DCNTL_00_RST 0x01 /* Software reset, resets everything +- * but 286 mode bit in DMODE. On the +- * NCR53c710, this bit moved to CTEST8 +- */ +-#define DCNTL_10_COM 0x01 /* 700 software compatibility mode */ +-#define DCNTL_10_EA 0x20 /* Enable Ack - needed for MVME16x */ +- +-#define DCNTL_700_SAVE ( DCNTL_CF_MASK | DCNTL_S16) +- +- +-/* NCR53c700-66 only */ +-#define SCRATCHB_REG_00 0x3c /* through 0x3f scratch b rw */ +-#define SCRATCHB_REG_800 0x5c /* through 0x5f scratch b rw */ +-/* NCR53c710 only */ +-#define ADDER_REG_10 0x3c /* Adder, NCR53c710 only */ +- +-#define SIEN1_REG_800 0x41 +-#define SIEN1_800_STO 0x04 /* selection/reselection timeout */ +-#define SIEN1_800_GEN 0x02 /* general purpose timer */ +-#define SIEN1_800_HTH 0x01 /* handshake to handshake */ +- +-#define SIST1_REG_800 0x43 +-#define SIST1_800_STO 0x04 /* selection/reselection timeout */ +-#define SIST1_800_GEN 0x02 /* general purpose timer */ +-#define SIST1_800_HTH 0x01 /* handshake to handshake */ +- +-#define SLPAR_REG_800 0x44 /* Parity */ +- +-#define MACNTL_REG_800 0x46 /* Memory access control */ +-#define MACNTL_800_TYP3 0x80 +-#define MACNTL_800_TYP2 0x40 +-#define MACNTL_800_TYP1 0x20 +-#define MACNTL_800_TYP0 0x10 +-#define MACNTL_800_DWR 0x08 +-#define MACNTL_800_DRD 0x04 +-#define MACNTL_800_PSCPT 0x02 +-#define MACNTL_800_SCPTS 0x01 +- +-#define GPCNTL_REG_800 0x47 /* General Purpose Pin Control */ +- +-/* Timeouts are expressed such that 0=off, 1=100us, doubling after that */ +-#define STIME0_REG_800 0x48 /* SCSI Timer Register 0 */ +-#define STIME0_800_HTH_MASK 0xf0 /* Handshake to Handshake timeout */ +-#define STIME0_800_HTH_SHIFT 4 +-#define STIME0_800_SEL_MASK 0x0f /* Selection timeout */ +-#define STIME0_800_SEL_SHIFT 0 +- +-#define STIME1_REG_800 0x49 +-#define STIME1_800_GEN_MASK 0x0f /* General purpose timer */ +- +-#define RESPID_REG_800 0x4a /* Response ID, bit fielded. 8 +- bits on narrow chips, 16 on WIDE */ +- +-#define STEST0_REG_800 0x4c +-#define STEST0_800_SLT 0x08 /* Selection response logic test */ +-#define STEST0_800_ART 0x04 /* Arbitration priority encoder test */ +-#define STEST0_800_SOZ 0x02 /* Synchronous offset zero */ +-#define STEST0_800_SOM 0x01 /* Synchronous offset maximum */ +- +-#define STEST1_REG_800 0x4d +-#define STEST1_800_SCLK 0x80 /* Disable SCSI clock */ +- +-#define STEST2_REG_800 0x4e +-#define STEST2_800_SCE 0x80 /* Enable SOCL/SODL */ +-#define STEST2_800_ROF 0x40 /* Reset SCSI sync offset */ +-#define STEST2_800_SLB 0x10 /* Enable SCSI loopback mode */ +-#define STEST2_800_SZM 0x08 /* SCSI high impedance mode */ +-#define STEST2_800_EXT 0x02 /* Extend REQ/ACK filter 30 to 60ns */ +-#define STEST2_800_LOW 0x01 /* SCSI low level mode */ +- +-#define STEST3_REG_800 0x4f +-#define STEST3_800_TE 0x80 /* Enable active negation */ +-#define STEST3_800_STR 0x40 /* SCSI FIFO test read */ +-#define STEST3_800_HSC 0x20 /* Halt SCSI clock */ +-#define STEST3_800_DSI 0x10 /* Disable single initiator response */ +-#define STEST3_800_TTM 0x04 /* Time test mode */ +-#define STEST3_800_CSF 0x02 /* Clear SCSI FIFO */ +-#define STEST3_800_STW 0x01 /* SCSI FIFO test write */ +- +-#define OPTION_PARITY 0x1 /* Enable parity checking */ +-#define OPTION_TAGGED_QUEUE 0x2 /* Enable SCSI-II tagged queuing */ +-#define OPTION_700 0x8 /* Always run NCR53c700 scripts */ +-#define OPTION_INTFLY 0x10 /* Use INTFLY interrupts */ +-#define OPTION_DEBUG_INTR 0x20 /* Debug interrupts */ +-#define OPTION_DEBUG_INIT_ONLY 0x40 /* Run initialization code and +- simple test code, return +- DID_NO_CONNECT if any SCSI +- commands are attempted. */ +-#define OPTION_DEBUG_READ_ONLY 0x80 /* Return DID_ERROR if any +- SCSI write is attempted */ +-#define OPTION_DEBUG_TRACE 0x100 /* Animated trace mode, print +- each address and instruction +- executed to debug buffer. */ +-#define OPTION_DEBUG_SINGLE 0x200 /* stop after executing one +- instruction */ +-#define OPTION_SYNCHRONOUS 0x400 /* Enable sync SCSI. */ +-#define OPTION_MEMORY_MAPPED 0x800 /* NCR registers have valid +- memory mapping */ +-#define OPTION_IO_MAPPED 0x1000 /* NCR registers have valid +- I/O mapping */ +-#define OPTION_DEBUG_PROBE_ONLY 0x2000 /* Probe only, don't even init */ +-#define OPTION_DEBUG_TESTS_ONLY 0x4000 /* Probe, init, run selected tests */ +-#define OPTION_DEBUG_TEST0 0x08000 /* Run test 0 */ +-#define OPTION_DEBUG_TEST1 0x10000 /* Run test 1 */ +-#define OPTION_DEBUG_TEST2 0x20000 /* Run test 2 */ +-#define OPTION_DEBUG_DUMP 0x40000 /* Dump commands */ +-#define OPTION_DEBUG_TARGET_LIMIT 0x80000 /* Only talk to target+luns specified */ +-#define OPTION_DEBUG_NCOMMANDS_LIMIT 0x100000 /* Limit the number of commands */ +-#define OPTION_DEBUG_SCRIPT 0x200000 /* Print when checkpoints are passed */ +-#define OPTION_DEBUG_FIXUP 0x400000 /* print fixup values */ +-#define OPTION_DEBUG_DSA 0x800000 +-#define OPTION_DEBUG_CORRUPTION 0x1000000 /* Detect script corruption */ +-#define OPTION_DEBUG_SDTR 0x2000000 /* Debug SDTR problem */ +-#define OPTION_DEBUG_MISMATCH 0x4000000 /* Debug phase mismatches */ +-#define OPTION_DISCONNECT 0x8000000 /* Allow disconnect */ +-#define OPTION_DEBUG_DISCONNECT 0x10000000 +-#define OPTION_ALWAYS_SYNCHRONOUS 0x20000000 /* Negotiate sync. transfers +- on power up */ +-#define OPTION_DEBUG_QUEUES 0x80000000 +-#define OPTION_DEBUG_ALLOCATION 0x100000000LL +-#define OPTION_DEBUG_SYNCHRONOUS 0x200000000LL /* Sanity check SXFER and +- SCNTL3 registers */ +-#define OPTION_NO_ASYNC 0x400000000LL /* Don't automagically send +- SDTR for async transfers when +- we haven't been told to do +- a synchronous transfer. */ +-#define OPTION_NO_PRINT_RACE 0x800000000LL /* Don't print message when +- the reselect/WAIT DISCONNECT +- race condition hits */ +-#if !defined(PERM_OPTIONS) +-#define PERM_OPTIONS 0 +-#endif +- +-/* +- * Some data which is accessed by the NCR chip must be 4-byte aligned. +- * For some hosts the default is less than that (eg. 68K uses 2-byte). +- * Alignment has only been forced where it is important; also if one +- * 32 bit structure field is aligned then it is assumed that following +- * 32 bit fields are also aligned. Take care when adding fields +- * which are other than 32 bit. +- */ +- +-struct NCR53c7x0_synchronous { +- u32 select_indirect /* Value used for indirect selection */ +- __attribute__ ((aligned (4))); +- u32 sscf_710; /* Used to set SSCF bits for 710 */ +- u32 script[8]; /* Size ?? Script used when target is +- reselected */ +- unsigned char synchronous_want[5]; /* Per target desired SDTR */ +-/* +- * Set_synchronous programs these, select_indirect and current settings after +- * int_debug_should show a match. +- */ +- unsigned char sxfer_sanity, scntl3_sanity; +-}; +- +-#define CMD_FLAG_SDTR 1 /* Initiating synchronous +- transfer negotiation */ +-#define CMD_FLAG_WDTR 2 /* Initiating wide transfer +- negotiation */ +-#define CMD_FLAG_DID_SDTR 4 /* did SDTR */ +-#define CMD_FLAG_DID_WDTR 8 /* did WDTR */ +- +-struct NCR53c7x0_table_indirect { +- u32 count; +- void *address; +-}; +- +-enum ncr_event { +- EVENT_NONE = 0, +-/* +- * Order is IMPORTANT, since these must correspond to the event interrupts +- * in 53c7,8xx.scr +- */ +- +- EVENT_ISSUE_QUEUE = 0x5000000, /* 0 Command was added to issue queue */ +- EVENT_START_QUEUE, /* 1 Command moved to start queue */ +- EVENT_SELECT, /* 2 Command completed selection */ +- EVENT_DISCONNECT, /* 3 Command disconnected */ +- EVENT_RESELECT, /* 4 Command reselected */ +- EVENT_COMPLETE, /* 5 Command completed */ +- EVENT_IDLE, /* 6 */ +- EVENT_SELECT_FAILED, /* 7 */ +- EVENT_BEFORE_SELECT, /* 8 */ +- EVENT_RESELECT_FAILED /* 9 */ +-}; +- +-struct NCR53c7x0_event { +- enum ncr_event event; /* What type of event */ +- unsigned char target; +- unsigned char lun; +- struct timeval time; +- u32 *dsa; /* What's in the DSA register now (virt) */ +-/* +- * A few things from that SCSI pid so we know what happened after +- * the Scsi_Cmnd structure in question may have disappeared. +- */ +- unsigned long pid; /* The SCSI PID which caused this +- event */ +- unsigned char cmnd[12]; +-}; +- +-/* +- * Things in the NCR53c7x0_cmd structure are split into two parts : +- * +- * 1. A fixed portion, for things which are not accessed directly by static NCR +- * code (ie, are referenced only by the Linux side of the driver, +- * or only by dynamically generated code). +- * +- * 2. The DSA portion, for things which are accessed directly by static NCR +- * code. +- * +- * This is a little ugly, but it +- * 1. Avoids conflicts between the NCR code's picture of the structure, and +- * Linux code's idea of what it looks like. +- * +- * 2. Minimizes the pain in the Linux side of the code needed +- * to calculate real dsa locations for things, etc. +- * +- */ +- +-struct NCR53c7x0_cmd { +- void *real; /* Real, unaligned address for +- free function */ +- void (* free)(void *, int); /* Command to deallocate; NULL +- for structures allocated with +- scsi_register, etc. */ +- Scsi_Cmnd *cmd; /* Associated Scsi_Cmnd +- structure, Scsi_Cmnd points +- at NCR53c7x0_cmd using +- host_scribble structure */ +- +- int size; /* scsi_malloc'd size of this +- structure */ +- +- int flags; /* CMD_* flags */ +- +- unsigned char cmnd[12]; /* CDB, copied from Scsi_Cmnd */ +- int result; /* Copy to Scsi_Cmnd when done */ +- +- struct { /* Private non-cached bounce buffer */ +- unsigned char buf[256]; +- u32 addr; +- u32 len; +- } bounce; +- +-/* +- * SDTR and WIDE messages are an either/or affair +- * in this message, since we will go into message out and send +- * _the whole mess_ without dropping out of message out to +- * let the target go into message in after sending the first +- * message. +- */ +- +- unsigned char select[11]; /* Select message, includes +- IDENTIFY +- (optional) QUEUE TAG +- (optional) SDTR or WDTR +- */ +- +- +- volatile struct NCR53c7x0_cmd *next; /* Linux maintained lists (free, +- running, eventually finished */ +- +- +- u32 *data_transfer_start; /* Start of data transfer routines */ +- u32 *data_transfer_end; /* Address after end of data transfer o +- routines */ +-/* +- * The following three fields were moved from the DSA proper to here +- * since only dynamically generated NCR code refers to them, meaning +- * we don't need dsa_* absolutes, and it is simpler to let the +- * host code refer to them directly. +- */ +- +-/* +- * HARD CODED : residual and saved_residual need to agree with the sizes +- * used in NCR53c7,8xx.scr. +- * +- * FIXME: we want to consider the case where we have odd-length +- * scatter/gather buffers and a WIDE transfer, in which case +- * we'll need to use the CHAIN MOVE instruction. Ick. +- */ +- u32 residual[6] __attribute__ ((aligned (4))); +- /* Residual data transfer which +- allows pointer code to work +- right. +- +- [0-1] : Conditional call to +- appropriate other transfer +- routine. +- [2-3] : Residual block transfer +- instruction. +- [4-5] : Jump to instruction +- after splice. +- */ +- u32 saved_residual[6]; /* Copy of old residual, so we +- can get another partial +- transfer and still recover +- */ +- +- u32 saved_data_pointer; /* Saved data pointer */ +- +- u32 dsa_next_addr; /* _Address_ of dsa_next field +- in this dsa for RISCy +- style constant. */ +- +- u32 dsa_addr; /* Address of dsa; RISCy style +- constant */ +- +- u32 dsa[0]; /* Variable length (depending +- on host type, number of scatter / +- gather buffers, etc). */ +-}; +- +-struct NCR53c7x0_break { +- u32 *address, old_instruction[2]; +- struct NCR53c7x0_break *next; +- unsigned char old_size; /* Size of old instruction */ +-}; +- +-/* Indicates that the NCR is not executing code */ +-#define STATE_HALTED 0 +-/* +- * Indicates that the NCR is executing the wait for select / reselect +- * script. Only used when running NCR53c700 compatible scripts, only +- * state during which an ABORT is _not_ considered an error condition. +- */ +-#define STATE_WAITING 1 +-/* Indicates that the NCR is executing other code. */ +-#define STATE_RUNNING 2 +-/* +- * Indicates that the NCR was being aborted. +- */ +-#define STATE_ABORTING 3 +-/* Indicates that the NCR was successfully aborted. */ +-#define STATE_ABORTED 4 +-/* Indicates that the NCR has been disabled due to a fatal error */ +-#define STATE_DISABLED 5 +- +-/* +- * Where knowledge of SCSI SCRIPT(tm) specified values are needed +- * in an interrupt handler, an interrupt handler exists for each +- * different SCSI script so we don't have name space problems. +- * +- * Return values of these handlers are as follows : +- */ +-#define SPECIFIC_INT_NOTHING 0 /* don't even restart */ +-#define SPECIFIC_INT_RESTART 1 /* restart at the next instruction */ +-#define SPECIFIC_INT_ABORT 2 /* recoverable error, abort cmd */ +-#define SPECIFIC_INT_PANIC 3 /* unrecoverable error, panic */ +-#define SPECIFIC_INT_DONE 4 /* normal command completion */ +-#define SPECIFIC_INT_BREAK 5 /* break point encountered */ +- +-struct NCR53c7x0_hostdata { +- int size; /* Size of entire Scsi_Host +- structure */ +- int board; /* set to board type, useful if +- we have host specific things, +- ie, a general purpose I/O +- bit is being used to enable +- termination, etc. */ +- +- int chip; /* set to chip type; 700-66 is +- 700-66, rest are last three +- digits of part number */ +- +- char valid_ids[8]; /* Valid SCSI ID's for adapter */ +- +- u32 *dsp; /* dsp to restart with after +- all stacked interrupts are +- handled. */ +- +- unsigned dsp_changed:1; /* Has dsp changed within this +- set of stacked interrupts ? */ +- +- unsigned char dstat; /* Most recent value of dstat */ +- unsigned dstat_valid:1; +- +- unsigned expecting_iid:1; /* Expect IID interrupt */ +- unsigned expecting_sto:1; /* Expect STO interrupt */ +- +- /* +- * The code stays cleaner if we use variables with function +- * pointers and offsets that are unique for the different +- * scripts rather than having a slew of switch(hostdata->chip) +- * statements. +- * +- * It also means that the #defines from the SCSI SCRIPTS(tm) +- * don't have to be visible outside of the script-specific +- * instructions, preventing name space pollution. +- */ +- +- void (* init_fixup)(struct Scsi_Host *host); +- void (* init_save_regs)(struct Scsi_Host *host); +- void (* dsa_fixup)(struct NCR53c7x0_cmd *cmd); +- void (* soft_reset)(struct Scsi_Host *host); +- int (* run_tests)(struct Scsi_Host *host); +- +- /* +- * Called when DSTAT_SIR is set, indicating an interrupt generated +- * by the INT instruction, where values are unique for each SCSI +- * script. Should return one of the SPEC_* values. +- */ +- +- int (* dstat_sir_intr)(struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd); +- +- int dsa_len; /* Size of DSA structure */ +- +- /* +- * Location of DSA fields for the SCSI SCRIPT corresponding to this +- * chip. +- */ +- +- s32 dsa_start; +- s32 dsa_end; +- s32 dsa_next; +- s32 dsa_prev; +- s32 dsa_cmnd; +- s32 dsa_select; +- s32 dsa_msgout; +- s32 dsa_cmdout; +- s32 dsa_dataout; +- s32 dsa_datain; +- s32 dsa_msgin; +- s32 dsa_msgout_other; +- s32 dsa_write_sync; +- s32 dsa_write_resume; +- s32 dsa_check_reselect; +- s32 dsa_status; +- s32 dsa_saved_pointer; +- s32 dsa_jump_dest; +- +- /* +- * Important entry points that generic fixup code needs +- * to know about, fixed up. +- */ +- +- s32 E_accept_message; +- s32 E_command_complete; +- s32 E_data_transfer; +- s32 E_dsa_code_template; +- s32 E_dsa_code_template_end; +- s32 E_end_data_transfer; +- s32 E_msg_in; +- s32 E_initiator_abort; +- s32 E_other_transfer; +- s32 E_other_in; +- s32 E_other_out; +- s32 E_target_abort; +- s32 E_debug_break; +- s32 E_reject_message; +- s32 E_respond_message; +- s32 E_select; +- s32 E_select_msgout; +- s32 E_test_0; +- s32 E_test_1; +- s32 E_test_2; +- s32 E_test_3; +- s32 E_dsa_zero; +- s32 E_cmdout_cmdout; +- s32 E_wait_reselect; +- s32 E_dsa_code_begin; +- +- long long options; /* Bitfielded set of options enabled */ +- volatile u32 test_completed; /* Test completed */ +- int test_running; /* Test currently running */ +- s32 test_source +- __attribute__ ((aligned (4))); +- volatile s32 test_dest; +- +- volatile int state; /* state of driver, only used for +- OPTION_700 */ +- +- unsigned char dmode; /* +- * set to the address of the DMODE +- * register for this chip. +- */ +- unsigned char istat; /* +- * set to the address of the ISTAT +- * register for this chip. +- */ +- +- int scsi_clock; /* +- * SCSI clock in HZ. 0 may be used +- * for unknown, although this will +- * disable synchronous negotiation. +- */ +- +- volatile int intrs; /* Number of interrupts */ +- volatile int resets; /* Number of SCSI resets */ +- unsigned char saved_dmode; +- unsigned char saved_ctest4; +- unsigned char saved_ctest7; +- unsigned char saved_dcntl; +- unsigned char saved_scntl3; +- +- unsigned char this_id_mask; +- +- /* Debugger information */ +- struct NCR53c7x0_break *breakpoints, /* Linked list of all break points */ +- *breakpoint_current; /* Current breakpoint being stepped +- through, NULL if we are running +- normally. */ +-#ifdef NCR_DEBUG +- int debug_size; /* Size of debug buffer */ +- volatile int debug_count; /* Current data count */ +- volatile char *debug_buf; /* Output ring buffer */ +- volatile char *debug_write; /* Current write pointer */ +- volatile char *debug_read; /* Current read pointer */ +-#endif /* def NCR_DEBUG */ +- +- /* XXX - primitive debugging junk, remove when working ? */ +- int debug_print_limit; /* Number of commands to print +- out exhaustive debugging +- information for if +- OPTION_DEBUG_DUMP is set */ +- +- unsigned char debug_lun_limit[16]; /* If OPTION_DEBUG_TARGET_LIMIT +- set, puke if commands are sent +- to other target/lun combinations */ +- +- int debug_count_limit; /* Number of commands to execute +- before puking to limit debugging +- output */ +- +- +- volatile unsigned idle:1; /* set to 1 if idle */ +- +- /* +- * Table of synchronous+wide transfer parameters set on a per-target +- * basis. +- */ +- +- volatile struct NCR53c7x0_synchronous sync[16] +- __attribute__ ((aligned (4))); +- +- volatile Scsi_Cmnd *issue_queue +- __attribute__ ((aligned (4))); +- /* waiting to be issued by +- Linux driver */ +- volatile struct NCR53c7x0_cmd *running_list; +- /* commands running, maintained +- by Linux driver */ +- +- volatile struct NCR53c7x0_cmd *ncrcurrent; /* currently connected +- nexus, ONLY valid for +- NCR53c700/NCR53c700-66 +- */ +- +- volatile struct NCR53c7x0_cmd *spare; /* pointer to spare, +- allocated at probe time, +- which we can use for +- initialization */ +- volatile struct NCR53c7x0_cmd *free; +- int max_cmd_size; /* Maximum size of NCR53c7x0_cmd +- based on number of +- scatter/gather segments, etc. +- */ +- volatile int num_cmds; /* Number of commands +- allocated */ +- volatile int extra_allocate; +- volatile unsigned char cmd_allocated[16]; /* Have we allocated commands +- for this target yet? If not, +- do so ASAP */ +- volatile unsigned char busy[16][8]; /* number of commands +- executing on each target +- */ +- /* +- * Eventually, I'll switch to a coroutine for calling +- * cmd->done(cmd), etc. so that we can overlap interrupt +- * processing with this code for maximum performance. +- */ +- +- volatile struct NCR53c7x0_cmd *finished_queue; +- +- /* Shared variables between SCRIPT and host driver */ +- volatile u32 *schedule +- __attribute__ ((aligned (4))); /* Array of JUMPs to dsa_begin +- routines of various DSAs. +- When not in use, replace +- with jump to next slot */ +- +- +- volatile unsigned char msg_buf[16]; /* buffer for messages +- other than the command +- complete message */ +- +- /* Per-target default synchronous and WIDE messages */ +- volatile unsigned char synchronous_want[16][5]; +- volatile unsigned char wide_want[16][4]; +- +- /* Bit fielded set of targets we want to speak synchronously with */ +- volatile u16 initiate_sdtr; +- /* Bit fielded set of targets we want to speak wide with */ +- volatile u16 initiate_wdtr; +- /* Bit fielded list of targets we've talked to. */ +- volatile u16 talked_to; +- +- /* Array of bit-fielded lun lists that we need to request_sense */ +- volatile unsigned char request_sense[16]; +- +- u32 addr_reconnect_dsa_head +- __attribute__ ((aligned (4))); /* RISCy style constant, +- address of following */ +- volatile u32 reconnect_dsa_head; +- /* Data identifying nexus we are trying to match during reselection */ +- volatile unsigned char reselected_identify; /* IDENTIFY message */ +- volatile unsigned char reselected_tag; /* second byte of queue tag +- message or 0 */ +- +- /* These were static variables before we moved them */ +- +- s32 NCR53c7xx_zero +- __attribute__ ((aligned (4))); +- s32 NCR53c7xx_sink; +- u32 NOP_insn; +- char NCR53c7xx_msg_reject; +- char NCR53c7xx_msg_abort; +- char NCR53c7xx_msg_nop; +- +- /* +- * Following item introduced by RGH to support NCRc710, which is +- * VERY brain-dead when it come to memory moves +- */ +- +- /* DSA save area used only by the NCR chip */ +- volatile unsigned long saved2_dsa +- __attribute__ ((aligned (4))); +- +- volatile unsigned long emulated_intfly +- __attribute__ ((aligned (4))); +- +- volatile int event_size, event_index; +- volatile struct NCR53c7x0_event *events; +- +- /* If we need to generate code to kill off the currently connected +- command, this is where we do it. Should have a BMI instruction +- to source or sink the current data, followed by a JUMP +- to abort_connected */ +- +- u32 *abort_script; +- +- int script_count; /* Size of script in words */ +- u32 script[0]; /* Relocated SCSI script */ +- +-}; +- +-#define SCSI_IRQ_NONE 255 +-#define DMA_NONE 255 +-#define IRQ_AUTO 254 +-#define DMA_AUTO 254 +- +-#define BOARD_GENERIC 0 +- +-#define NCR53c7x0_insn_size(insn) \ +- (((insn) & DCMD_TYPE_MASK) == DCMD_TYPE_MMI ? 3 : 2) +- +- +-#define NCR53c7x0_local_declare() \ +- volatile unsigned char *NCR53c7x0_address_memory; \ +- unsigned int NCR53c7x0_address_io; \ +- int NCR53c7x0_memory_mapped +- +-#define NCR53c7x0_local_setup(host) \ +- NCR53c7x0_address_memory = (void *) (host)->base; \ +- NCR53c7x0_address_io = (unsigned int) (host)->io_port; \ +- NCR53c7x0_memory_mapped = ((struct NCR53c7x0_hostdata *) \ +- host->hostdata[0])-> options & OPTION_MEMORY_MAPPED +- +-#ifdef BIG_ENDIAN +-/* These could be more efficient, given that we are always memory mapped, +- * but they don't give the same problems as the write macros, so leave +- * them. */ +-#ifdef __mc68000__ +-#define NCR53c7x0_read8(address) \ +- ((unsigned int)raw_inb((u32)NCR53c7x0_address_memory + ((u32)(address)^3)) ) +- +-#define NCR53c7x0_read16(address) \ +- ((unsigned int)raw_inw((u32)NCR53c7x0_address_memory + ((u32)(address)^2))) +-#else +-#define NCR53c7x0_read8(address) \ +- (NCR53c7x0_memory_mapped ? \ +- (unsigned int)readb((u32)NCR53c7x0_address_memory + ((u32)(address)^3)) : \ +- inb(NCR53c7x0_address_io + (address))) +- +-#define NCR53c7x0_read16(address) \ +- (NCR53c7x0_memory_mapped ? \ +- (unsigned int)readw((u32)NCR53c7x0_address_memory + ((u32)(address)^2)) : \ +- inw(NCR53c7x0_address_io + (address))) +-#endif /* mc68000 */ +-#else +-#define NCR53c7x0_read8(address) \ +- (NCR53c7x0_memory_mapped ? \ +- (unsigned int)readb((u32)NCR53c7x0_address_memory + (u32)(address)) : \ +- inb(NCR53c7x0_address_io + (address))) +- +-#define NCR53c7x0_read16(address) \ +- (NCR53c7x0_memory_mapped ? \ +- (unsigned int)readw((u32)NCR53c7x0_address_memory + (u32)(address)) : \ +- inw(NCR53c7x0_address_io + (address))) +-#endif +- +-#ifdef __mc68000__ +-#define NCR53c7x0_read32(address) \ +- ((unsigned int) raw_inl((u32)NCR53c7x0_address_memory + (u32)(address))) +-#else +-#define NCR53c7x0_read32(address) \ +- (NCR53c7x0_memory_mapped ? \ +- (unsigned int) readl((u32)NCR53c7x0_address_memory + (u32)(address)) : \ +- inl(NCR53c7x0_address_io + (address))) +-#endif /* mc68000*/ +- +-#ifdef BIG_ENDIAN +-/* If we are big-endian, then we are not Intel, so probably don't have +- * an i/o map as well as a memory map. So, let's assume memory mapped. +- * Also, I am having terrible problems trying to persuade the compiler +- * not to lay down code which does a read after write for these macros. +- * If you remove 'volatile' from writeb() and friends it is ok.... +- */ +- +-#define NCR53c7x0_write8(address,value) \ +- *(volatile unsigned char *) \ +- ((u32)NCR53c7x0_address_memory + ((u32)(address)^3)) = (value) +- +-#define NCR53c7x0_write16(address,value) \ +- *(volatile unsigned short *) \ +- ((u32)NCR53c7x0_address_memory + ((u32)(address)^2)) = (value) +- +-#define NCR53c7x0_write32(address,value) \ +- *(volatile unsigned long *) \ +- ((u32)NCR53c7x0_address_memory + ((u32)(address))) = (value) +- +-#else +- +-#define NCR53c7x0_write8(address,value) \ +- (NCR53c7x0_memory_mapped ? \ +- ({writeb((value), (u32)NCR53c7x0_address_memory + (u32)(address)); mb();}) : \ +- outb((value), NCR53c7x0_address_io + (address))) +- +-#define NCR53c7x0_write16(address,value) \ +- (NCR53c7x0_memory_mapped ? \ +- ({writew((value), (u32)NCR53c7x0_address_memory + (u32)(address)); mb();}) : \ +- outw((value), NCR53c7x0_address_io + (address))) +- +-#define NCR53c7x0_write32(address,value) \ +- (NCR53c7x0_memory_mapped ? \ +- ({writel((value), (u32)NCR53c7x0_address_memory + (u32)(address)); mb();}) : \ +- outl((value), NCR53c7x0_address_io + (address))) +- +-#endif +- +-/* Patch arbitrary 32 bit words in the script */ +-#define patch_abs_32(script, offset, symbol, value) \ +- for (i = 0; i < (sizeof (A_##symbol##_used) / sizeof \ +- (u32)); ++i) { \ +- (script)[A_##symbol##_used[i] - (offset)] += (value); \ +- if (hostdata->options & OPTION_DEBUG_FIXUP) \ +- printk("scsi%d : %s reference %d at 0x%x in %s is now 0x%x\n",\ +- host->host_no, #symbol, i, A_##symbol##_used[i] - \ +- (int)(offset), #script, (script)[A_##symbol##_used[i] - \ +- (offset)]); \ +- } +- +-/* Patch read/write instruction immediate field */ +-#define patch_abs_rwri_data(script, offset, symbol, value) \ +- for (i = 0; i < (sizeof (A_##symbol##_used) / sizeof \ +- (u32)); ++i) \ +- (script)[A_##symbol##_used[i] - (offset)] = \ +- ((script)[A_##symbol##_used[i] - (offset)] & \ +- ~DBC_RWRI_IMMEDIATE_MASK) | \ +- (((value) << DBC_RWRI_IMMEDIATE_SHIFT) & \ +- DBC_RWRI_IMMEDIATE_MASK) +- +-/* Patch transfer control instruction data field */ +-#define patch_abs_tci_data(script, offset, symbol, value) \ +- for (i = 0; i < (sizeof (A_##symbol##_used) / sizeof \ +- (u32)); ++i) \ +- (script)[A_##symbol##_used[i] - (offset)] = \ +- ((script)[A_##symbol##_used[i] - (offset)] & \ +- ~DBC_TCI_DATA_MASK) | \ +- (((value) << DBC_TCI_DATA_SHIFT) & \ +- DBC_TCI_DATA_MASK) +- +-/* Patch field in dsa structure (assignment should be +=?) */ +-#define patch_dsa_32(dsa, symbol, word, value) \ +- { \ +- (dsa)[(hostdata->##symbol - hostdata->dsa_start) / sizeof(u32) \ +- + (word)] = (value); \ +- if (hostdata->options & OPTION_DEBUG_DSA) \ +- printk("scsi : dsa %s symbol %s(%d) word %d now 0x%x\n", \ +- #dsa, #symbol, hostdata->##symbol, \ +- (word), (u32) (value)); \ +- } +- +-/* Paranoid people could use panic() here. */ +-#define FATAL(host) shutdown((host)); +- +-extern int ncr53c7xx_init(struct scsi_host_template *tpnt, int board, int chip, +- unsigned long base, int io_port, int irq, int dma, +- long long options, int clock); +- +-#endif /* NCR53c710_C */ +-#endif /* NCR53c710_H */ +diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx.scr linux-2.6.22-591/drivers/scsi/53c7xx.scr +--- linux-2.6.22-570/drivers/scsi/53c7xx.scr 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/53c7xx.scr 1969-12-31 19:00:00.000000000 -0500 +@@ -1,1591 +0,0 @@ +-#undef DEBUG +-#undef EVENTS +-#undef NO_SELECTION_TIMEOUT +-#define BIG_ENDIAN +- +-; 53c710 driver. Modified from Drew Eckhardts driver +-; for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk] +-; +-; I have left the script for the 53c8xx family in here, as it is likely +-; to be useful to see what I changed when bug hunting. +- +-; NCR 53c810 driver, main script +-; Sponsored by +-; iX Multiuser Multitasking Magazine +-; hm@ix.de +-; +-; Copyright 1993, 1994, 1995 Drew Eckhardt +-; Visionary Computing +-; (Unix and Linux consulting and custom programming) +-; drew@PoohSticks.ORG +-; +1 (303) 786-7975 +-; +-; TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation. +-; +-; PRE-ALPHA +-; +-; For more information, please consult +-; +-; NCR 53C810 +-; PCI-SCSI I/O Processor +-; Data Manual +-; +-; NCR 53C710 +-; SCSI I/O Processor +-; Programmers Guide +-; +-; NCR Microelectronics +-; 1635 Aeroplaza Drive +-; Colorado Springs, CO 80916 +-; 1+ (719) 578-3400 +-; +-; Toll free literature number +-; +1 (800) 334-5454 +-; +-; IMPORTANT : This code is self modifying due to the limitations of +-; the NCR53c7,8xx series chips. Persons debugging this code with +-; the remote debugger should take this into account, and NOT set +-; breakpoints in modified instructions. +-; +-; Design: +-; The NCR53c7,8xx family of SCSI chips are busmasters with an onboard +-; microcontroller using a simple instruction set. +-; +-; So, to minimize the effects of interrupt latency, and to maximize +-; throughput, this driver offloads the practical maximum amount +-; of processing to the SCSI chip while still maintaining a common +-; structure. +-; +-; Where tradeoffs were needed between efficiency on the older +-; chips and the newer NCR53c800 series, the NCR53c800 series +-; was chosen. +-; +-; While the NCR53c700 and NCR53c700-66 lacked the facilities to fully +-; automate SCSI transfers without host processor intervention, this +-; isn't the case with the NCR53c710 and newer chips which allow +-; +-; - reads and writes to the internal registers from within the SCSI +-; scripts, allowing the SCSI SCRIPTS(tm) code to save processor +-; state so that multiple threads of execution are possible, and also +-; provide an ALU for loop control, etc. +-; +-; - table indirect addressing for some instructions. This allows +-; pointers to be located relative to the DSA ((Data Structure +-; Address) register. +-; +-; These features make it possible to implement a mailbox style interface, +-; where the same piece of code is run to handle I/O for multiple threads +-; at once minimizing our need to relocate code. Since the NCR53c700/ +-; NCR53c800 series have a unique combination of features, making a +-; a standard ingoing/outgoing mailbox system, costly, I've modified it. +-; +-; - Mailboxes are a mixture of code and data. This lets us greatly +-; simplify the NCR53c810 code and do things that would otherwise +-; not be possible. +-; +-; The saved data pointer is now implemented as follows : +-; +-; Control flow has been architected such that if control reaches +-; munge_save_data_pointer, on a restore pointers message or +-; reconnection, a jump to the address formerly in the TEMP register +-; will allow the SCSI command to resume execution. +-; +- +-; +-; Note : the DSA structures must be aligned on 32 bit boundaries, +-; since the source and destination of MOVE MEMORY instructions +-; must share the same alignment and this is the alignment of the +-; NCR registers. +-; +- +-; For some systems (MVME166, for example) dmode is always the same, so don't +-; waste time writing it +- +-#if 1 +-#define DMODE_MEMORY_TO_NCR +-#define DMODE_MEMORY_TO_MEMORY +-#define DMODE_NCR_TO_MEMORY +-#else +-#define DMODE_MEMORY_TO_NCR MOVE dmode_memory_to_ncr TO DMODE +-#define DMODE_MEMORY_TO_MEMORY MOVE dmode_memory_to_memory TO DMODE +-#define DMODE_NCR_TO_MEMORY MOVE dmode_ncr_to_memory TO DMODE +-#endif +- +-ABSOLUTE dsa_temp_lun = 0 ; Patch to lun for current dsa +-ABSOLUTE dsa_temp_next = 0 ; Patch to dsa next for current dsa +-ABSOLUTE dsa_temp_addr_next = 0 ; Patch to address of dsa next address +- ; for current dsa +-ABSOLUTE dsa_temp_sync = 0 ; Patch to address of per-target +- ; sync routine +-ABSOLUTE dsa_sscf_710 = 0 ; Patch to address of per-target +- ; sscf value (53c710) +-ABSOLUTE dsa_temp_target = 0 ; Patch to id for current dsa +-ABSOLUTE dsa_temp_addr_saved_pointer = 0; Patch to address of per-command +- ; saved data pointer +-ABSOLUTE dsa_temp_addr_residual = 0 ; Patch to address of per-command +- ; current residual code +-ABSOLUTE dsa_temp_addr_saved_residual = 0; Patch to address of per-command +- ; saved residual code +-ABSOLUTE dsa_temp_addr_new_value = 0 ; Address of value for JUMP operand +-ABSOLUTE dsa_temp_addr_array_value = 0 ; Address to copy to +-ABSOLUTE dsa_temp_addr_dsa_value = 0 ; Address of this DSA value +- +-; +-; Once a device has initiated reselection, we need to compare it +-; against the singly linked list of commands which have disconnected +-; and are pending reselection. These commands are maintained in +-; an unordered singly linked list of DSA structures, through the +-; DSA pointers at their 'centers' headed by the reconnect_dsa_head +-; pointer. +-; +-; To avoid complications in removing commands from the list, +-; I minimize the amount of expensive (at eight operations per +-; addition @ 500-600ns each) pointer operations which must +-; be done in the NCR driver by precomputing them on the +-; host processor during dsa structure generation. +-; +-; The fixed-up per DSA code knows how to recognize the nexus +-; associated with the corresponding SCSI command, and modifies +-; the source and destination pointers for the MOVE MEMORY +-; instruction which is executed when reselected_ok is called +-; to remove the command from the list. Similarly, DSA is +-; loaded with the address of the next DSA structure and +-; reselected_check_next is called if a failure occurs. +-; +-; Perhaps more concisely, the net effect of the mess is +-; +-; for (dsa = reconnect_dsa_head, dest = &reconnect_dsa_head, +-; src = NULL; dsa; dest = &dsa->next, dsa = dsa->next) { +-; src = &dsa->next; +-; if (target_id == dsa->id && target_lun == dsa->lun) { +-; *dest = *src; +-; break; +-; } +-; } +-; +-; if (!dsa) +-; error (int_err_unexpected_reselect); +-; else +-; longjmp (dsa->jump_resume, 0); +-; +-; +- +-#if (CHIP != 700) && (CHIP != 70066) +-; Define DSA structure used for mailboxes +-ENTRY dsa_code_template +-dsa_code_template: +-ENTRY dsa_code_begin +-dsa_code_begin: +-; RGH: Don't care about TEMP and DSA here +- DMODE_MEMORY_TO_NCR +- MOVE MEMORY 4, dsa_temp_addr_dsa_value, addr_scratch +- DMODE_MEMORY_TO_MEMORY +-#if (CHIP == 710) +- MOVE MEMORY 4, addr_scratch, saved_dsa +- ; We are about to go and select the device, so must set SSCF bits +- MOVE MEMORY 4, dsa_sscf_710, addr_scratch +-#ifdef BIG_ENDIAN +- MOVE SCRATCH3 TO SFBR +-#else +- MOVE SCRATCH0 TO SFBR +-#endif +- MOVE SFBR TO SBCL +- MOVE MEMORY 4, saved_dsa, addr_dsa +-#else +- CALL scratch_to_dsa +-#endif +- CALL select +-; Handle the phase mismatch which may have resulted from the +-; MOVE FROM dsa_msgout if we returned here. The CLEAR ATN +-; may or may not be necessary, and we should update script_asm.pl +-; to handle multiple pieces. +- CLEAR ATN +- CLEAR ACK +- +-; Replace second operand with address of JUMP instruction dest operand +-; in schedule table for this DSA. Becomes dsa_jump_dest in 53c7,8xx.c. +-ENTRY dsa_code_fix_jump +-dsa_code_fix_jump: +- MOVE MEMORY 4, NOP_insn, 0 +- JUMP select_done +- +-; wrong_dsa loads the DSA register with the value of the dsa_next +-; field. +-; +-wrong_dsa: +-#if (CHIP == 710) +-; NOTE DSA is corrupt when we arrive here! +-#endif +-; Patch the MOVE MEMORY INSTRUCTION such that +-; the destination address is the address of the OLD +-; next pointer. +-; +- MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 8 +- DMODE_MEMORY_TO_NCR +-; +-; Move the _contents_ of the next pointer into the DSA register as +-; the next I_T_L or I_T_L_Q tupple to check against the established +-; nexus. +-; +- MOVE MEMORY 4, dsa_temp_next, addr_scratch +- DMODE_MEMORY_TO_MEMORY +-#if (CHIP == 710) +- MOVE MEMORY 4, addr_scratch, saved_dsa +- MOVE MEMORY 4, saved_dsa, addr_dsa +-#else +- CALL scratch_to_dsa +-#endif +- JUMP reselected_check_next +- +-ABSOLUTE dsa_save_data_pointer = 0 +-ENTRY dsa_code_save_data_pointer +-dsa_code_save_data_pointer: +-#if (CHIP == 710) +- ; When we get here, TEMP has been saved in jump_temp+4, DSA is corrupt +- ; We MUST return with DSA correct +- MOVE MEMORY 4, jump_temp+4, dsa_temp_addr_saved_pointer +-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h +- MOVE MEMORY 24, dsa_temp_addr_residual, dsa_temp_addr_saved_residual +- CLEAR ACK +-#ifdef DEBUG +- INT int_debug_saved +-#endif +- MOVE MEMORY 4, saved_dsa, addr_dsa +- JUMP jump_temp +-#else +- DMODE_NCR_TO_MEMORY +- MOVE MEMORY 4, addr_temp, dsa_temp_addr_saved_pointer +- DMODE_MEMORY_TO_MEMORY +-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h +- MOVE MEMORY 24, dsa_temp_addr_residual, dsa_temp_addr_saved_residual +- CLEAR ACK +-#ifdef DEBUG +- INT int_debug_saved +-#endif +- RETURN +-#endif +-ABSOLUTE dsa_restore_pointers = 0 +-ENTRY dsa_code_restore_pointers +-dsa_code_restore_pointers: +-#if (CHIP == 710) +- ; TEMP and DSA are corrupt when we get here, but who cares! +- MOVE MEMORY 4, dsa_temp_addr_saved_pointer, jump_temp + 4 +-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h +- MOVE MEMORY 24, dsa_temp_addr_saved_residual, dsa_temp_addr_residual +- CLEAR ACK +- ; Restore DSA, note we don't care about TEMP +- MOVE MEMORY 4, saved_dsa, addr_dsa +-#ifdef DEBUG +- INT int_debug_restored +-#endif +- JUMP jump_temp +-#else +- DMODE_MEMORY_TO_NCR +- MOVE MEMORY 4, dsa_temp_addr_saved_pointer, addr_temp +- DMODE_MEMORY_TO_MEMORY +-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h +- MOVE MEMORY 24, dsa_temp_addr_saved_residual, dsa_temp_addr_residual +- CLEAR ACK +-#ifdef DEBUG +- INT int_debug_restored +-#endif +- RETURN +-#endif +- +-ABSOLUTE dsa_check_reselect = 0 +-; dsa_check_reselect determines whether or not the current target and +-; lun match the current DSA +-ENTRY dsa_code_check_reselect +-dsa_code_check_reselect: +-#if (CHIP == 710) +- /* Arrives here with DSA correct */ +- /* Assumes we are always ID 7 */ +- MOVE LCRC TO SFBR ; LCRC has our ID and his ID bits set +- JUMP REL (wrong_dsa), IF NOT dsa_temp_target, AND MASK 0x80 +-#else +- MOVE SSID TO SFBR ; SSID contains 3 bit target ID +-; FIXME : we need to accommodate bit fielded and binary here for '7xx/'8xx chips +- JUMP REL (wrong_dsa), IF NOT dsa_temp_target, AND MASK 0xf8 +-#endif +-; +-; Hack - move to scratch first, since SFBR is not writeable +-; via the CPU and hence a MOVE MEMORY instruction. +-; +- DMODE_MEMORY_TO_NCR +- MOVE MEMORY 1, reselected_identify, addr_scratch +- DMODE_MEMORY_TO_MEMORY +-#ifdef BIG_ENDIAN +- ; BIG ENDIAN ON MVME16x +- MOVE SCRATCH3 TO SFBR +-#else +- MOVE SCRATCH0 TO SFBR +-#endif +-; FIXME : we need to accommodate bit fielded and binary here for '7xx/'8xx chips +-; Are you sure about that? richard@sleepie.demon.co.uk +- JUMP REL (wrong_dsa), IF NOT dsa_temp_lun, AND MASK 0xf8 +-; Patch the MOVE MEMORY INSTRUCTION such that +-; the source address is the address of this dsa's +-; next pointer. +- MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 4 +- CALL reselected_ok +-#if (CHIP == 710) +-; Restore DSA following memory moves in reselected_ok +-; dsa_temp_sync doesn't really care about DSA, but it has an +-; optional debug INT so a valid DSA is a good idea. +- MOVE MEMORY 4, saved_dsa, addr_dsa +-#endif +- CALL dsa_temp_sync +-; Release ACK on the IDENTIFY message _after_ we've set the synchronous +-; transfer parameters! +- CLEAR ACK +-; Implicitly restore pointers on reselection, so a RETURN +-; will transfer control back to the right spot. +- CALL REL (dsa_code_restore_pointers) +- RETURN +-ENTRY dsa_zero +-dsa_zero: +-ENTRY dsa_code_template_end +-dsa_code_template_end: +- +-; Perform sanity check for dsa_fields_start == dsa_code_template_end - +-; dsa_zero, puke. +- +-ABSOLUTE dsa_fields_start = 0 ; Sanity marker +- ; pad 48 bytes (fix this RSN) +-ABSOLUTE dsa_next = 48 ; len 4 Next DSA +- ; del 4 Previous DSA address +-ABSOLUTE dsa_cmnd = 56 ; len 4 Scsi_Cmnd * for this thread. +-ABSOLUTE dsa_select = 60 ; len 4 Device ID, Period, Offset for +- ; table indirect select +-ABSOLUTE dsa_msgout = 64 ; len 8 table indirect move parameter for +- ; select message +-ABSOLUTE dsa_cmdout = 72 ; len 8 table indirect move parameter for +- ; command +-ABSOLUTE dsa_dataout = 80 ; len 4 code pointer for dataout +-ABSOLUTE dsa_datain = 84 ; len 4 code pointer for datain +-ABSOLUTE dsa_msgin = 88 ; len 8 table indirect move for msgin +-ABSOLUTE dsa_status = 96 ; len 8 table indirect move for status byte +-ABSOLUTE dsa_msgout_other = 104 ; len 8 table indirect for normal message out +- ; (Synchronous transfer negotiation, etc). +-ABSOLUTE dsa_end = 112 +- +-ABSOLUTE schedule = 0 ; Array of JUMP dsa_begin or JUMP (next), +- ; terminated by a call to JUMP wait_reselect +- +-; Linked lists of DSA structures +-ABSOLUTE reconnect_dsa_head = 0 ; Link list of DSAs which can reconnect +-ABSOLUTE addr_reconnect_dsa_head = 0 ; Address of variable containing +- ; address of reconnect_dsa_head +- +-; These select the source and destination of a MOVE MEMORY instruction +-ABSOLUTE dmode_memory_to_memory = 0x0 +-ABSOLUTE dmode_memory_to_ncr = 0x0 +-ABSOLUTE dmode_ncr_to_memory = 0x0 +- +-ABSOLUTE addr_scratch = 0x0 +-ABSOLUTE addr_temp = 0x0 +-#if (CHIP == 710) +-ABSOLUTE saved_dsa = 0x0 +-ABSOLUTE emulfly = 0x0 +-ABSOLUTE addr_dsa = 0x0 +-#endif +-#endif /* CHIP != 700 && CHIP != 70066 */ +- +-; Interrupts - +-; MSB indicates type +-; 0 handle error condition +-; 1 handle message +-; 2 handle normal condition +-; 3 debugging interrupt +-; 4 testing interrupt +-; Next byte indicates specific error +- +-; XXX not yet implemented, I'm not sure if I want to - +-; Next byte indicates the routine the error occurred in +-; The LSB indicates the specific place the error occurred +- +-ABSOLUTE int_err_unexpected_phase = 0x00000000 ; Unexpected phase encountered +-ABSOLUTE int_err_selected = 0x00010000 ; SELECTED (nee RESELECTED) +-ABSOLUTE int_err_unexpected_reselect = 0x00020000 +-ABSOLUTE int_err_check_condition = 0x00030000 +-ABSOLUTE int_err_no_phase = 0x00040000 +-ABSOLUTE int_msg_wdtr = 0x01000000 ; WDTR message received +-ABSOLUTE int_msg_sdtr = 0x01010000 ; SDTR received +-ABSOLUTE int_msg_1 = 0x01020000 ; single byte special message +- ; received +- +-ABSOLUTE int_norm_select_complete = 0x02000000 ; Select complete, reprogram +- ; registers. +-ABSOLUTE int_norm_reselect_complete = 0x02010000 ; Nexus established +-ABSOLUTE int_norm_command_complete = 0x02020000 ; Command complete +-ABSOLUTE int_norm_disconnected = 0x02030000 ; Disconnected +-ABSOLUTE int_norm_aborted =0x02040000 ; Aborted *dsa +-ABSOLUTE int_norm_reset = 0x02050000 ; Generated BUS reset. +-ABSOLUTE int_norm_emulateintfly = 0x02060000 ; 53C710 Emulated intfly +-ABSOLUTE int_debug_break = 0x03000000 ; Break point +-#ifdef DEBUG +-ABSOLUTE int_debug_scheduled = 0x03010000 ; new I/O scheduled +-ABSOLUTE int_debug_idle = 0x03020000 ; scheduler is idle +-ABSOLUTE int_debug_dsa_loaded = 0x03030000 ; dsa reloaded +-ABSOLUTE int_debug_reselected = 0x03040000 ; NCR reselected +-ABSOLUTE int_debug_head = 0x03050000 ; issue head overwritten +-ABSOLUTE int_debug_disconnected = 0x03060000 ; disconnected +-ABSOLUTE int_debug_disconnect_msg = 0x03070000 ; got message to disconnect +-ABSOLUTE int_debug_dsa_schedule = 0x03080000 ; in dsa_schedule +-ABSOLUTE int_debug_reselect_check = 0x03090000 ; Check for reselection of DSA +-ABSOLUTE int_debug_reselected_ok = 0x030a0000 ; Reselection accepted +-#endif +-ABSOLUTE int_debug_panic = 0x030b0000 ; Panic driver +-#ifdef DEBUG +-ABSOLUTE int_debug_saved = 0x030c0000 ; save/restore pointers +-ABSOLUTE int_debug_restored = 0x030d0000 +-ABSOLUTE int_debug_sync = 0x030e0000 ; Sanity check synchronous +- ; parameters. +-ABSOLUTE int_debug_datain = 0x030f0000 ; going into data in phase +- ; now. +-ABSOLUTE int_debug_check_dsa = 0x03100000 ; Sanity check DSA against +- ; SDID. +-#endif +- +-ABSOLUTE int_test_1 = 0x04000000 ; Test 1 complete +-ABSOLUTE int_test_2 = 0x04010000 ; Test 2 complete +-ABSOLUTE int_test_3 = 0x04020000 ; Test 3 complete +- +- +-; These should start with 0x05000000, with low bits incrementing for +-; each one. +- +-#ifdef EVENTS +-ABSOLUTE int_EVENT_SELECT = 0 +-ABSOLUTE int_EVENT_DISCONNECT = 0 +-ABSOLUTE int_EVENT_RESELECT = 0 +-ABSOLUTE int_EVENT_COMPLETE = 0 +-ABSOLUTE int_EVENT_IDLE = 0 +-ABSOLUTE int_EVENT_SELECT_FAILED = 0 +-ABSOLUTE int_EVENT_BEFORE_SELECT = 0 +-ABSOLUTE int_EVENT_RESELECT_FAILED = 0 +-#endif +- +-ABSOLUTE NCR53c7xx_msg_abort = 0 ; Pointer to abort message +-ABSOLUTE NCR53c7xx_msg_reject = 0 ; Pointer to reject message +-ABSOLUTE NCR53c7xx_zero = 0 ; long with zero in it, use for source +-ABSOLUTE NCR53c7xx_sink = 0 ; long to dump worthless data in +-ABSOLUTE NOP_insn = 0 ; NOP instruction +- +-; Pointer to message, potentially multi-byte +-ABSOLUTE msg_buf = 0 +- +-; Pointer to holding area for reselection information +-ABSOLUTE reselected_identify = 0 +-ABSOLUTE reselected_tag = 0 +- +-; Request sense command pointer, it's a 6 byte command, should +-; be constant for all commands since we always want 16 bytes of +-; sense and we don't need to change any fields as we did under +-; SCSI-I when we actually cared about the LUN field. +-;EXTERNAL NCR53c7xx_sense ; Request sense command +- +-#if (CHIP != 700) && (CHIP != 70066) +-; dsa_schedule +-; PURPOSE : after a DISCONNECT message has been received, and pointers +-; saved, insert the current DSA structure at the head of the +-; disconnected queue and fall through to the scheduler. +-; +-; CALLS : OK +-; +-; INPUTS : dsa - current DSA structure, reconnect_dsa_head - list +-; of disconnected commands +-; +-; MODIFIES : SCRATCH, reconnect_dsa_head +-; +-; EXITS : always passes control to schedule +- +-ENTRY dsa_schedule +-dsa_schedule: +-#ifdef DEBUG +- INT int_debug_dsa_schedule +-#endif +- +-; +-; Calculate the address of the next pointer within the DSA +-; structure of the command that is currently disconnecting +-; +-#if (CHIP == 710) +- ; Read what should be the current DSA from memory - actual DSA +- ; register is probably corrupt +- MOVE MEMORY 4, saved_dsa, addr_scratch +-#else +- CALL dsa_to_scratch +-#endif +- MOVE SCRATCH0 + dsa_next TO SCRATCH0 +- MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY +- MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY +- MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY +- +-; Point the next field of this DSA structure at the current disconnected +-; list +- DMODE_NCR_TO_MEMORY +- MOVE MEMORY 4, addr_scratch, dsa_schedule_insert + 8 +- DMODE_MEMORY_TO_MEMORY +-dsa_schedule_insert: +- MOVE MEMORY 4, reconnect_dsa_head, 0 +- +-; And update the head pointer. +-#if (CHIP == 710) +- ; Read what should be the current DSA from memory - actual DSA +- ; register is probably corrupt +- MOVE MEMORY 4, saved_dsa, addr_scratch +-#else +- CALL dsa_to_scratch +-#endif +- DMODE_NCR_TO_MEMORY +- MOVE MEMORY 4, addr_scratch, reconnect_dsa_head +- DMODE_MEMORY_TO_MEMORY +-/* Temporarily, see what happens. */ +-#ifndef ORIGINAL +-#if (CHIP != 710) +- MOVE SCNTL2 & 0x7f TO SCNTL2 +-#endif +- CLEAR ACK +-#endif +-#if (CHIP == 710) +- ; Time to correct DSA following memory move +- MOVE MEMORY 4, saved_dsa, addr_dsa +-#endif +- WAIT DISCONNECT +-#ifdef EVENTS +- INT int_EVENT_DISCONNECT; +-#endif +-#ifdef DEBUG +- INT int_debug_disconnected +-#endif +- JUMP schedule +-#endif +- +-; +-; select +-; +-; PURPOSE : establish a nexus for the SCSI command referenced by DSA. +-; On success, the current DSA structure is removed from the issue +-; queue. Usually, this is entered as a fall-through from schedule, +-; although the contingent allegiance handling code will write +-; the select entry address to the DSP to restart a command as a +-; REQUEST SENSE. A message is sent (usually IDENTIFY, although +-; additional SDTR or WDTR messages may be sent). COMMAND OUT +-; is handled. +-; +-; INPUTS : DSA - SCSI command, issue_dsa_head +-; +-; CALLS : NOT OK +-; +-; MODIFIES : SCRATCH, issue_dsa_head +-; +-; EXITS : on reselection or selection, go to select_failed +-; otherwise, RETURN so control is passed back to +-; dsa_begin. +-; +- +-ENTRY select +-select: +- +-#ifdef EVENTS +- INT int_EVENT_BEFORE_SELECT +-#endif +- +-#ifdef DEBUG +- INT int_debug_scheduled +-#endif +- CLEAR TARGET +- +-; XXX +-; +-; In effect, SELECTION operations are backgrounded, with execution +-; continuing until code which waits for REQ or a fatal interrupt is +-; encountered. +-; +-; So, for more performance, we could overlap the code which removes +-; the command from the NCRs issue queue with the selection, but +-; at this point I don't want to deal with the error recovery. +-; +- +-#if (CHIP != 700) && (CHIP != 70066) +-#if (CHIP == 710) +- ; Enable selection timer +-#ifdef NO_SELECTION_TIMEOUT +- MOVE CTEST7 & 0xff TO CTEST7 +-#else +- MOVE CTEST7 & 0xef TO CTEST7 +-#endif +-#endif +- SELECT ATN FROM dsa_select, select_failed +- JUMP select_msgout, WHEN MSG_OUT +-ENTRY select_msgout +-select_msgout: +-#if (CHIP == 710) +- ; Disable selection timer +- MOVE CTEST7 | 0x10 TO CTEST7 +-#endif +- MOVE FROM dsa_msgout, WHEN MSG_OUT +-#else +-ENTRY select_msgout +- SELECT ATN 0, select_failed +-select_msgout: +- MOVE 0, 0, WHEN MSGOUT +-#endif +- +-#ifdef EVENTS +- INT int_EVENT_SELECT +-#endif +- RETURN +- +-; +-; select_done +-; +-; PURPOSE: continue on to normal data transfer; called as the exit +-; point from dsa_begin. +-; +-; INPUTS: dsa +-; +-; CALLS: OK +-; +-; +- +-select_done: +-#if (CHIP == 710) +-; NOTE DSA is corrupt when we arrive here! +- MOVE MEMORY 4, saved_dsa, addr_dsa +-#endif +- +-#ifdef DEBUG +-ENTRY select_check_dsa +-select_check_dsa: +- INT int_debug_check_dsa +-#endif +- +-; After a successful selection, we should get either a CMD phase or +-; some transfer request negotiation message. +- +- JUMP cmdout, WHEN CMD +- INT int_err_unexpected_phase, WHEN NOT MSG_IN +- +-select_msg_in: +- CALL msg_in, WHEN MSG_IN +- JUMP select_msg_in, WHEN MSG_IN +- +-cmdout: +- INT int_err_unexpected_phase, WHEN NOT CMD +-#if (CHIP == 700) +- INT int_norm_selected +-#endif +-ENTRY cmdout_cmdout +-cmdout_cmdout: +-#if (CHIP != 700) && (CHIP != 70066) +- MOVE FROM dsa_cmdout, WHEN CMD +-#else +- MOVE 0, 0, WHEN CMD +-#endif /* (CHIP != 700) && (CHIP != 70066) */ +- +-; +-; data_transfer +-; other_out +-; other_in +-; other_transfer +-; +-; PURPOSE : handle the main data transfer for a SCSI command in +-; several parts. In the first part, data_transfer, DATA_IN +-; and DATA_OUT phases are allowed, with the user provided +-; code (usually dynamically generated based on the scatter/gather +-; list associated with a SCSI command) called to handle these +-; phases. +-; +-; After control has passed to one of the user provided +-; DATA_IN or DATA_OUT routines, back calls are made to +-; other_transfer_in or other_transfer_out to handle non-DATA IN +-; and DATA OUT phases respectively, with the state of the active +-; data pointer being preserved in TEMP. +-; +-; On completion, the user code passes control to other_transfer +-; which causes DATA_IN and DATA_OUT to result in unexpected_phase +-; interrupts so that data overruns may be trapped. +-; +-; INPUTS : DSA - SCSI command +-; +-; CALLS : OK in data_transfer_start, not ok in other_out and other_in, ok in +-; other_transfer +-; +-; MODIFIES : SCRATCH +-; +-; EXITS : if STATUS IN is detected, signifying command completion, +-; the NCR jumps to command_complete. If MSG IN occurs, a +-; CALL is made to msg_in. Otherwise, other_transfer runs in +-; an infinite loop. +-; +- +-ENTRY data_transfer +-data_transfer: +- JUMP cmdout_cmdout, WHEN CMD +- CALL msg_in, WHEN MSG_IN +- INT int_err_unexpected_phase, WHEN MSG_OUT +- JUMP do_dataout, WHEN DATA_OUT +- JUMP do_datain, WHEN DATA_IN +- JUMP command_complete, WHEN STATUS +- JUMP data_transfer +-ENTRY end_data_transfer +-end_data_transfer: +- +-; +-; FIXME: On NCR53c700 and NCR53c700-66 chips, do_dataout/do_datain +-; should be fixed up whenever the nexus changes so it can point to the +-; correct routine for that command. +-; +- +-#if (CHIP != 700) && (CHIP != 70066) +-; Nasty jump to dsa->dataout +-do_dataout: +-#if (CHIP == 710) +- MOVE MEMORY 4, saved_dsa, addr_scratch +-#else +- CALL dsa_to_scratch +-#endif +- MOVE SCRATCH0 + dsa_dataout TO SCRATCH0 +- MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY +- MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY +- MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY +- DMODE_NCR_TO_MEMORY +- MOVE MEMORY 4, addr_scratch, dataout_to_jump + 4 +- DMODE_MEMORY_TO_MEMORY +-dataout_to_jump: +- MOVE MEMORY 4, 0, dataout_jump + 4 +-#if (CHIP == 710) +- ; Time to correct DSA following memory move +- MOVE MEMORY 4, saved_dsa, addr_dsa +-#endif +-dataout_jump: +- JUMP 0 +- +-; Nasty jump to dsa->dsain +-do_datain: +-#if (CHIP == 710) +- MOVE MEMORY 4, saved_dsa, addr_scratch +-#else +- CALL dsa_to_scratch +-#endif +- MOVE SCRATCH0 + dsa_datain TO SCRATCH0 +- MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY +- MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY +- MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY +- DMODE_NCR_TO_MEMORY +- MOVE MEMORY 4, addr_scratch, datain_to_jump + 4 +- DMODE_MEMORY_TO_MEMORY +-ENTRY datain_to_jump +-datain_to_jump: +- MOVE MEMORY 4, 0, datain_jump + 4 +-#if (CHIP == 710) +- ; Time to correct DSA following memory move +- MOVE MEMORY 4, saved_dsa, addr_dsa +-#endif +-#ifdef DEBUG +- INT int_debug_datain +-#endif +-datain_jump: +- JUMP 0 +-#endif /* (CHIP != 700) && (CHIP != 70066) */ +- +- +-; Note that other_out and other_in loop until a non-data phase +-; is discovered, so we only execute return statements when we +-; can go on to the next data phase block move statement. +- +-ENTRY other_out +-other_out: +-#if 0 +- INT 0x03ffdead +-#endif +- INT int_err_unexpected_phase, WHEN CMD +- JUMP msg_in_restart, WHEN MSG_IN +- INT int_err_unexpected_phase, WHEN MSG_OUT +- INT int_err_unexpected_phase, WHEN DATA_IN +- JUMP command_complete, WHEN STATUS +- JUMP other_out, WHEN NOT DATA_OUT +-#if (CHIP == 710) +-; TEMP should be OK, as we got here from a call in the user dataout code. +-#endif +- RETURN +- +-ENTRY other_in +-other_in: +-#if 0 +- INT 0x03ffdead +-#endif +- INT int_err_unexpected_phase, WHEN CMD +- JUMP msg_in_restart, WHEN MSG_IN +- INT int_err_unexpected_phase, WHEN MSG_OUT +- INT int_err_unexpected_phase, WHEN DATA_OUT +- JUMP command_complete, WHEN STATUS +- JUMP other_in, WHEN NOT DATA_IN +-#if (CHIP == 710) +-; TEMP should be OK, as we got here from a call in the user datain code. +-#endif +- RETURN +- +- +-ENTRY other_transfer +-other_transfer: +- INT int_err_unexpected_phase, WHEN CMD +- CALL msg_in, WHEN MSG_IN +- INT int_err_unexpected_phase, WHEN MSG_OUT +- INT int_err_unexpected_phase, WHEN DATA_OUT +- INT int_err_unexpected_phase, WHEN DATA_IN +- JUMP command_complete, WHEN STATUS +- JUMP other_transfer +- +-; +-; msg_in_restart +-; msg_in +-; munge_msg +-; +-; PURPOSE : process messages from a target. msg_in is called when the +-; caller hasn't read the first byte of the message. munge_message +-; is called when the caller has read the first byte of the message, +-; and left it in SFBR. msg_in_restart is called when the caller +-; hasn't read the first byte of the message, and wishes RETURN +-; to transfer control back to the address of the conditional +-; CALL instruction rather than to the instruction after it. +-; +-; Various int_* interrupts are generated when the host system +-; needs to intervene, as is the case with SDTR, WDTR, and +-; INITIATE RECOVERY messages. +-; +-; When the host system handles one of these interrupts, +-; it can respond by reentering at reject_message, +-; which rejects the message and returns control to +-; the caller of msg_in or munge_msg, accept_message +-; which clears ACK and returns control, or reply_message +-; which sends the message pointed to by the DSA +-; msgout_other table indirect field. +-; +-; DISCONNECT messages are handled by moving the command +-; to the reconnect_dsa_queue. +-#if (CHIP == 710) +-; NOTE: DSA should be valid when we get here - we cannot save both it +-; and TEMP in this routine. +-#endif +-; +-; INPUTS : DSA - SCSI COMMAND, SFBR - first byte of message (munge_msg +-; only) +-; +-; CALLS : NO. The TEMP register isn't backed up to allow nested calls. +-; +-; MODIFIES : SCRATCH, DSA on DISCONNECT +-; +-; EXITS : On receipt of SAVE DATA POINTER, RESTORE POINTERS, +-; and normal return from message handlers running under +-; Linux, control is returned to the caller. Receipt +-; of DISCONNECT messages pass control to dsa_schedule. +-; +-ENTRY msg_in_restart +-msg_in_restart: +-; XXX - hackish +-; +-; Since it's easier to debug changes to the statically +-; compiled code, rather than the dynamically generated +-; stuff, such as +-; +-; MOVE x, y, WHEN data_phase +-; CALL other_z, WHEN NOT data_phase +-; MOVE x, y, WHEN data_phase +-; +-; I'd like to have certain routines (notably the message handler) +-; restart on the conditional call rather than the next instruction. +-; +-; So, subtract 8 from the return address +- +- MOVE TEMP0 + 0xf8 TO TEMP0 +- MOVE TEMP1 + 0xff TO TEMP1 WITH CARRY +- MOVE TEMP2 + 0xff TO TEMP2 WITH CARRY +- MOVE TEMP3 + 0xff TO TEMP3 WITH CARRY +- +-ENTRY msg_in +-msg_in: +- MOVE 1, msg_buf, WHEN MSG_IN +- +-munge_msg: +- JUMP munge_extended, IF 0x01 ; EXTENDED MESSAGE +- JUMP munge_2, IF 0x20, AND MASK 0xdf ; two byte message +-; +-; XXX - I've seen a handful of broken SCSI devices which fail to issue +-; a SAVE POINTERS message before disconnecting in the middle of +-; a transfer, assuming that the DATA POINTER will be implicitly +-; restored. +-; +-; Historically, I've often done an implicit save when the DISCONNECT +-; message is processed. We may want to consider having the option of +-; doing that here. +-; +- JUMP munge_save_data_pointer, IF 0x02 ; SAVE DATA POINTER +- JUMP munge_restore_pointers, IF 0x03 ; RESTORE POINTERS +- JUMP munge_disconnect, IF 0x04 ; DISCONNECT +- INT int_msg_1, IF 0x07 ; MESSAGE REJECT +- INT int_msg_1, IF 0x0f ; INITIATE RECOVERY +-#ifdef EVENTS +- INT int_EVENT_SELECT_FAILED +-#endif +- JUMP reject_message +- +-munge_2: +- JUMP reject_message +-; +-; The SCSI standard allows targets to recover from transient +-; error conditions by backing up the data pointer with a +-; RESTORE POINTERS message. +-; +-; So, we must save and restore the _residual_ code as well as +-; the current instruction pointer. Because of this messiness, +-; it is simpler to put dynamic code in the dsa for this and to +-; just do a simple jump down there. +-; +- +-munge_save_data_pointer: +-#if (CHIP == 710) +- ; We have something in TEMP here, so first we must save that +- MOVE TEMP0 TO SFBR +- MOVE SFBR TO SCRATCH0 +- MOVE TEMP1 TO SFBR +- MOVE SFBR TO SCRATCH1 +- MOVE TEMP2 TO SFBR +- MOVE SFBR TO SCRATCH2 +- MOVE TEMP3 TO SFBR +- MOVE SFBR TO SCRATCH3 +- MOVE MEMORY 4, addr_scratch, jump_temp + 4 +- ; Now restore DSA +- MOVE MEMORY 4, saved_dsa, addr_dsa +-#endif +- MOVE DSA0 + dsa_save_data_pointer TO SFBR +- MOVE SFBR TO SCRATCH0 +- MOVE DSA1 + 0xff TO SFBR WITH CARRY +- MOVE SFBR TO SCRATCH1 +- MOVE DSA2 + 0xff TO SFBR WITH CARRY +- MOVE SFBR TO SCRATCH2 +- MOVE DSA3 + 0xff TO SFBR WITH CARRY +- MOVE SFBR TO SCRATCH3 +- +- DMODE_NCR_TO_MEMORY +- MOVE MEMORY 4, addr_scratch, jump_dsa_save + 4 +- DMODE_MEMORY_TO_MEMORY +-jump_dsa_save: +- JUMP 0 +- +-munge_restore_pointers: +-#if (CHIP == 710) +- ; The code at dsa_restore_pointers will RETURN, but we don't care +- ; about TEMP here, as it will overwrite it anyway. +-#endif +- MOVE DSA0 + dsa_restore_pointers TO SFBR +- MOVE SFBR TO SCRATCH0 +- MOVE DSA1 + 0xff TO SFBR WITH CARRY +- MOVE SFBR TO SCRATCH1 +- MOVE DSA2 + 0xff TO SFBR WITH CARRY +- MOVE SFBR TO SCRATCH2 +- MOVE DSA3 + 0xff TO SFBR WITH CARRY +- MOVE SFBR TO SCRATCH3 +- +- DMODE_NCR_TO_MEMORY +- MOVE MEMORY 4, addr_scratch, jump_dsa_restore + 4 +- DMODE_MEMORY_TO_MEMORY +-jump_dsa_restore: +- JUMP 0 +- +- +-munge_disconnect: +-#ifdef DEBUG +- INT int_debug_disconnect_msg +-#endif +- +-/* +- * Before, we overlapped processing with waiting for disconnect, but +- * debugging was beginning to appear messy. Temporarily move things +- * to just before the WAIT DISCONNECT. +- */ +- +-#ifdef ORIGINAL +-#if (CHIP == 710) +-; Following clears Unexpected Disconnect bit. What do we do? +-#else +- MOVE SCNTL2 & 0x7f TO SCNTL2 +-#endif +- CLEAR ACK +-#endif +- +-#if (CHIP != 700) && (CHIP != 70066) +- JUMP dsa_schedule +-#else +- WAIT DISCONNECT +- INT int_norm_disconnected +-#endif +- +-munge_extended: +- CLEAR ACK +- INT int_err_unexpected_phase, WHEN NOT MSG_IN +- MOVE 1, msg_buf + 1, WHEN MSG_IN +- JUMP munge_extended_2, IF 0x02 +- JUMP munge_extended_3, IF 0x03 +- JUMP reject_message +- +-munge_extended_2: +- CLEAR ACK +- MOVE 1, msg_buf + 2, WHEN MSG_IN +- JUMP reject_message, IF NOT 0x02 ; Must be WDTR +- CLEAR ACK +- MOVE 1, msg_buf + 3, WHEN MSG_IN +- INT int_msg_wdtr +- +-munge_extended_3: +- CLEAR ACK +- MOVE 1, msg_buf + 2, WHEN MSG_IN +- JUMP reject_message, IF NOT 0x01 ; Must be SDTR +- CLEAR ACK +- MOVE 2, msg_buf + 3, WHEN MSG_IN +- INT int_msg_sdtr +- +-ENTRY reject_message +-reject_message: +- SET ATN +- CLEAR ACK +- MOVE 1, NCR53c7xx_msg_reject, WHEN MSG_OUT +- RETURN +- +-ENTRY accept_message +-accept_message: +- CLEAR ATN +- CLEAR ACK +- RETURN +- +-ENTRY respond_message +-respond_message: +- SET ATN +- CLEAR ACK +- MOVE FROM dsa_msgout_other, WHEN MSG_OUT +- RETURN +- +-; +-; command_complete +-; +-; PURPOSE : handle command termination when STATUS IN is detected by reading +-; a status byte followed by a command termination message. +-; +-; Normal termination results in an INTFLY instruction, and +-; the host system can pick out which command terminated by +-; examining the MESSAGE and STATUS buffers of all currently +-; executing commands; +-; +-; Abnormal (CHECK_CONDITION) termination results in an +-; int_err_check_condition interrupt so that a REQUEST SENSE +-; command can be issued out-of-order so that no other command +-; clears the contingent allegiance condition. +-; +-; +-; INPUTS : DSA - command +-; +-; CALLS : OK +-; +-; EXITS : On successful termination, control is passed to schedule. +-; On abnormal termination, the user will usually modify the +-; DSA fields and corresponding buffers and return control +-; to select. +-; +- +-ENTRY command_complete +-command_complete: +- MOVE FROM dsa_status, WHEN STATUS +-#if (CHIP != 700) && (CHIP != 70066) +- MOVE SFBR TO SCRATCH0 ; Save status +-#endif /* (CHIP != 700) && (CHIP != 70066) */ +-ENTRY command_complete_msgin +-command_complete_msgin: +- MOVE FROM dsa_msgin, WHEN MSG_IN +-; Indicate that we should be expecting a disconnect +-#if (CHIP != 710) +- MOVE SCNTL2 & 0x7f TO SCNTL2 +-#else +- ; Above code cleared the Unexpected Disconnect bit, what do we do? +-#endif +- CLEAR ACK +-#if (CHIP != 700) && (CHIP != 70066) +- WAIT DISCONNECT +- +-; +-; The SCSI specification states that when a UNIT ATTENTION condition +-; is pending, as indicated by a CHECK CONDITION status message, +-; the target shall revert to asynchronous transfers. Since +-; synchronous transfers parameters are maintained on a per INITIATOR/TARGET +-; basis, and returning control to our scheduler could work on a command +-; running on another lun on that target using the old parameters, we must +-; interrupt the host processor to get them changed, or change them ourselves. +-; +-; Once SCSI-II tagged queueing is implemented, things will be even more +-; hairy, since contingent allegiance conditions exist on a per-target/lun +-; basis, and issuing a new command with a different tag would clear it. +-; In these cases, we must interrupt the host processor to get a request +-; added to the HEAD of the queue with the request sense command, or we +-; must automatically issue the request sense command. +- +-#if 0 +- MOVE SCRATCH0 TO SFBR +- JUMP command_failed, IF 0x02 +-#endif +-#if (CHIP == 710) +-#if defined(MVME16x_INTFLY) +-; For MVME16x (ie CHIP=710) we will force an INTFLY by triggering a software +-; interrupt (SW7). We can use SCRATCH, as we are about to jump to +-; schedule, which corrupts it anyway. Will probably remove this later, +-; but want to check performance effects first. +- +-#define INTFLY_ADDR 0xfff40070 +- +- MOVE 0 TO SCRATCH0 +- MOVE 0x80 TO SCRATCH1 +- MOVE 0 TO SCRATCH2 +- MOVE 0 TO SCRATCH3 +- MOVE MEMORY 4, addr_scratch, INTFLY_ADDR +-#else +- INT int_norm_emulateintfly +-#endif +-#else +- INTFLY +-#endif +-#endif /* (CHIP != 700) && (CHIP != 70066) */ +-#if (CHIP == 710) +- ; Time to correct DSA following memory move +- MOVE MEMORY 4, saved_dsa, addr_dsa +-#endif +-#ifdef EVENTS +- INT int_EVENT_COMPLETE +-#endif +-#if (CHIP != 700) && (CHIP != 70066) +- JUMP schedule +-command_failed: +- INT int_err_check_condition +-#else +- INT int_norm_command_complete +-#endif +- +-; +-; wait_reselect +-; +-; PURPOSE : This is essentially the idle routine, where control lands +-; when there are no new processes to schedule. wait_reselect +-; waits for reselection, selection, and new commands. +-; +-; When a successful reselection occurs, with the aid +-; of fixed up code in each DSA, wait_reselect walks the +-; reconnect_dsa_queue, asking each dsa if the target ID +-; and LUN match its. +-; +-; If a match is found, a call is made back to reselected_ok, +-; which through the miracles of self modifying code, extracts +-; the found DSA from the reconnect_dsa_queue and then +-; returns control to the DSAs thread of execution. +-; +-; INPUTS : NONE +-; +-; CALLS : OK +-; +-; MODIFIES : DSA, +-; +-; EXITS : On successful reselection, control is returned to the +-; DSA which called reselected_ok. If the WAIT RESELECT +-; was interrupted by a new commands arrival signaled by +-; SIG_P, control is passed to schedule. If the NCR is +-; selected, the host system is interrupted with an +-; int_err_selected which is usually responded to by +-; setting DSP to the target_abort address. +- +-ENTRY wait_reselect +-wait_reselect: +-#ifdef EVENTS +- int int_EVENT_IDLE +-#endif +-#ifdef DEBUG +- int int_debug_idle +-#endif +- WAIT RESELECT wait_reselect_failed +- +-reselected: +-#ifdef EVENTS +- int int_EVENT_RESELECT +-#endif +- CLEAR TARGET +- DMODE_MEMORY_TO_MEMORY +- ; Read all data needed to reestablish the nexus - +- MOVE 1, reselected_identify, WHEN MSG_IN +- ; We used to CLEAR ACK here. +-#if (CHIP != 700) && (CHIP != 70066) +-#ifdef DEBUG +- int int_debug_reselected +-#endif +- +- ; Point DSA at the current head of the disconnected queue. +- DMODE_MEMORY_TO_NCR +- MOVE MEMORY 4, reconnect_dsa_head, addr_scratch +- DMODE_MEMORY_TO_MEMORY +-#if (CHIP == 710) +- MOVE MEMORY 4, addr_scratch, saved_dsa +-#else +- CALL scratch_to_dsa +-#endif +- +- ; Fix the update-next pointer so that the reconnect_dsa_head +- ; pointer is the one that will be updated if this DSA is a hit +- ; and we remove it from the queue. +- +- MOVE MEMORY 4, addr_reconnect_dsa_head, reselected_ok_patch + 8 +-#if (CHIP == 710) +- ; Time to correct DSA following memory move +- MOVE MEMORY 4, saved_dsa, addr_dsa +-#endif +- +-ENTRY reselected_check_next +-reselected_check_next: +-#ifdef DEBUG +- INT int_debug_reselect_check +-#endif +- ; Check for a NULL pointer. +- MOVE DSA0 TO SFBR +- JUMP reselected_not_end, IF NOT 0 +- MOVE DSA1 TO SFBR +- JUMP reselected_not_end, IF NOT 0 +- MOVE DSA2 TO SFBR +- JUMP reselected_not_end, IF NOT 0 +- MOVE DSA3 TO SFBR +- JUMP reselected_not_end, IF NOT 0 +- INT int_err_unexpected_reselect +- +-reselected_not_end: +- ; +- ; XXX the ALU is only eight bits wide, and the assembler +- ; wont do the dirt work for us. As long as dsa_check_reselect +- ; is negative, we need to sign extend with 1 bits to the full +- ; 32 bit width of the address. +- ; +- ; A potential work around would be to have a known alignment +- ; of the DSA structure such that the base address plus +- ; dsa_check_reselect doesn't require carrying from bytes +- ; higher than the LSB. +- ; +- +- MOVE DSA0 TO SFBR +- MOVE SFBR + dsa_check_reselect TO SCRATCH0 +- MOVE DSA1 TO SFBR +- MOVE SFBR + 0xff TO SCRATCH1 WITH CARRY +- MOVE DSA2 TO SFBR +- MOVE SFBR + 0xff TO SCRATCH2 WITH CARRY +- MOVE DSA3 TO SFBR +- MOVE SFBR + 0xff TO SCRATCH3 WITH CARRY +- +- DMODE_NCR_TO_MEMORY +- MOVE MEMORY 4, addr_scratch, reselected_check + 4 +- DMODE_MEMORY_TO_MEMORY +-#if (CHIP == 710) +- ; Time to correct DSA following memory move +- MOVE MEMORY 4, saved_dsa, addr_dsa +-#endif +-reselected_check: +- JUMP 0 +- +- +-; +-; +-#if (CHIP == 710) +-; We have problems here - the memory move corrupts TEMP and DSA. This +-; routine is called from DSA code, and patched from many places. Scratch +-; is probably free when it is called. +-; We have to: +-; copy temp to scratch, one byte at a time +-; write scratch to patch a jump in place of the return +-; do the move memory +-; jump to the patched in return address +-; DSA is corrupt when we get here, and can be left corrupt +- +-ENTRY reselected_ok +-reselected_ok: +- MOVE TEMP0 TO SFBR +- MOVE SFBR TO SCRATCH0 +- MOVE TEMP1 TO SFBR +- MOVE SFBR TO SCRATCH1 +- MOVE TEMP2 TO SFBR +- MOVE SFBR TO SCRATCH2 +- MOVE TEMP3 TO SFBR +- MOVE SFBR TO SCRATCH3 +- MOVE MEMORY 4, addr_scratch, reselected_ok_jump + 4 +-reselected_ok_patch: +- MOVE MEMORY 4, 0, 0 +-reselected_ok_jump: +- JUMP 0 +-#else +-ENTRY reselected_ok +-reselected_ok: +-reselected_ok_patch: +- MOVE MEMORY 4, 0, 0 ; Patched : first word +- ; is address of +- ; successful dsa_next +- ; Second word is last +- ; unsuccessful dsa_next, +- ; starting with +- ; dsa_reconnect_head +- ; We used to CLEAR ACK here. +-#ifdef DEBUG +- INT int_debug_reselected_ok +-#endif +-#ifdef DEBUG +- INT int_debug_check_dsa +-#endif +- RETURN ; Return control to where +-#endif +-#else +- INT int_norm_reselected +-#endif /* (CHIP != 700) && (CHIP != 70066) */ +- +-selected: +- INT int_err_selected; +- +-; +-; A select or reselect failure can be caused by one of two conditions : +-; 1. SIG_P was set. This will be the case if the user has written +-; a new value to a previously NULL head of the issue queue. +-; +-; 2. The NCR53c810 was selected or reselected by another device. +-; +-; 3. The bus was already busy since we were selected or reselected +-; before starting the command. +- +-wait_reselect_failed: +-#ifdef EVENTS +- INT int_EVENT_RESELECT_FAILED +-#endif +-; Check selected bit. +-#if (CHIP == 710) +- ; Must work out how to tell if we are selected.... +-#else +- MOVE SIST0 & 0x20 TO SFBR +- JUMP selected, IF 0x20 +-#endif +-; Reading CTEST2 clears the SIG_P bit in the ISTAT register. +- MOVE CTEST2 & 0x40 TO SFBR +- JUMP schedule, IF 0x40 +-; Check connected bit. +-; FIXME: this needs to change if we support target mode +- MOVE ISTAT & 0x08 TO SFBR +- JUMP reselected, IF 0x08 +-; FIXME : Something bogus happened, and we shouldn't fail silently. +-#if 0 +- JUMP schedule +-#else +- INT int_debug_panic +-#endif +- +- +-select_failed: +-#if (CHIP == 710) +- ; Disable selection timer +- MOVE CTEST7 | 0x10 TO CTEST7 +-#endif +-#ifdef EVENTS +- int int_EVENT_SELECT_FAILED +-#endif +-; Otherwise, mask the selected and reselected bits off SIST0 +-#if (CHIP ==710) +- ; Let's assume we don't get selected for now +- MOVE SSTAT0 & 0x10 TO SFBR +-#else +- MOVE SIST0 & 0x30 TO SFBR +- JUMP selected, IF 0x20 +-#endif +- JUMP reselected, IF 0x10 +-; If SIGP is set, the user just gave us another command, and +-; we should restart or return to the scheduler. +-; Reading CTEST2 clears the SIG_P bit in the ISTAT register. +- MOVE CTEST2 & 0x40 TO SFBR +- JUMP select, IF 0x40 +-; Check connected bit. +-; FIXME: this needs to change if we support target mode +-; FIXME: is this really necessary? +- MOVE ISTAT & 0x08 TO SFBR +- JUMP reselected, IF 0x08 +-; FIXME : Something bogus happened, and we shouldn't fail silently. +-#if 0 +- JUMP schedule +-#else +- INT int_debug_panic +-#endif +- +-; +-; test_1 +-; test_2 +-; +-; PURPOSE : run some verification tests on the NCR. test_1 +-; copies test_src to test_dest and interrupts the host +-; processor, testing for cache coherency and interrupt +-; problems in the processes. +-; +-; test_2 runs a command with offsets relative to the +-; DSA on entry, and is useful for miscellaneous experimentation. +-; +- +-; Verify that interrupts are working correctly and that we don't +-; have a cache invalidation problem. +- +-ABSOLUTE test_src = 0, test_dest = 0 +-ENTRY test_1 +-test_1: +- MOVE MEMORY 4, test_src, test_dest +- INT int_test_1 +- +-; +-; Run arbitrary commands, with test code establishing a DSA +-; +- +-ENTRY test_2 +-test_2: +- CLEAR TARGET +-#if (CHIP == 710) +- ; Enable selection timer +-#ifdef NO_SELECTION_TIMEOUT +- MOVE CTEST7 & 0xff TO CTEST7 +-#else +- MOVE CTEST7 & 0xef TO CTEST7 +-#endif +-#endif +- SELECT ATN FROM 0, test_2_fail +- JUMP test_2_msgout, WHEN MSG_OUT +-ENTRY test_2_msgout +-test_2_msgout: +-#if (CHIP == 710) +- ; Disable selection timer +- MOVE CTEST7 | 0x10 TO CTEST7 +-#endif +- MOVE FROM 8, WHEN MSG_OUT +- MOVE FROM 16, WHEN CMD +- MOVE FROM 24, WHEN DATA_IN +- MOVE FROM 32, WHEN STATUS +- MOVE FROM 40, WHEN MSG_IN +-#if (CHIP != 710) +- MOVE SCNTL2 & 0x7f TO SCNTL2 +-#endif +- CLEAR ACK +- WAIT DISCONNECT +-test_2_fail: +-#if (CHIP == 710) +- ; Disable selection timer +- MOVE CTEST7 | 0x10 TO CTEST7 +-#endif +- INT int_test_2 +- +-ENTRY debug_break +-debug_break: +- INT int_debug_break +- +-; +-; initiator_abort +-; target_abort +-; +-; PURPOSE : Abort the currently established nexus from with initiator +-; or target mode. +-; +-; +- +-ENTRY target_abort +-target_abort: +- SET TARGET +- DISCONNECT +- CLEAR TARGET +- JUMP schedule +- +-ENTRY initiator_abort +-initiator_abort: +- SET ATN +-; +-; The SCSI-I specification says that targets may go into MSG out at +-; their leisure upon receipt of the ATN single. On all versions of the +-; specification, we can't change phases until REQ transitions true->false, +-; so we need to sink/source one byte of data to allow the transition. +-; +-; For the sake of safety, we'll only source one byte of data in all +-; cases, but to accommodate the SCSI-I dain bramage, we'll sink an +-; arbitrary number of bytes. +- JUMP spew_cmd, WHEN CMD +- JUMP eat_msgin, WHEN MSG_IN +- JUMP eat_datain, WHEN DATA_IN +- JUMP eat_status, WHEN STATUS +- JUMP spew_dataout, WHEN DATA_OUT +- JUMP sated +-spew_cmd: +- MOVE 1, NCR53c7xx_zero, WHEN CMD +- JUMP sated +-eat_msgin: +- MOVE 1, NCR53c7xx_sink, WHEN MSG_IN +- JUMP eat_msgin, WHEN MSG_IN +- JUMP sated +-eat_status: +- MOVE 1, NCR53c7xx_sink, WHEN STATUS +- JUMP eat_status, WHEN STATUS +- JUMP sated +-eat_datain: +- MOVE 1, NCR53c7xx_sink, WHEN DATA_IN +- JUMP eat_datain, WHEN DATA_IN +- JUMP sated +-spew_dataout: +- MOVE 1, NCR53c7xx_zero, WHEN DATA_OUT +-sated: +-#if (CHIP != 710) +- MOVE SCNTL2 & 0x7f TO SCNTL2 +-#endif +- MOVE 1, NCR53c7xx_msg_abort, WHEN MSG_OUT +- WAIT DISCONNECT +- INT int_norm_aborted +- +-#if (CHIP != 710) +-; +-; dsa_to_scratch +-; scratch_to_dsa +-; +-; PURPOSE : +-; The NCR chips cannot do a move memory instruction with the DSA register +-; as the source or destination. So, we provide a couple of subroutines +-; that let us switch between the DSA register and scratch register. +-; +-; Memory moves to/from the DSPS register also don't work, but we +-; don't use them. +-; +-; +- +- +-dsa_to_scratch: +- MOVE DSA0 TO SFBR +- MOVE SFBR TO SCRATCH0 +- MOVE DSA1 TO SFBR +- MOVE SFBR TO SCRATCH1 +- MOVE DSA2 TO SFBR +- MOVE SFBR TO SCRATCH2 +- MOVE DSA3 TO SFBR +- MOVE SFBR TO SCRATCH3 +- RETURN +- +-scratch_to_dsa: +- MOVE SCRATCH0 TO SFBR +- MOVE SFBR TO DSA0 +- MOVE SCRATCH1 TO SFBR +- MOVE SFBR TO DSA1 +- MOVE SCRATCH2 TO SFBR +- MOVE SFBR TO DSA2 +- MOVE SCRATCH3 TO SFBR +- MOVE SFBR TO DSA3 +- RETURN +-#endif +- +-#if (CHIP == 710) +-; Little patched jump, used to overcome problems with TEMP getting +-; corrupted on memory moves. +- +-jump_temp: +- JUMP 0 +-#endif +diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx_d.h_shipped linux-2.6.22-591/drivers/scsi/53c7xx_d.h_shipped +--- linux-2.6.22-570/drivers/scsi/53c7xx_d.h_shipped 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/53c7xx_d.h_shipped 1969-12-31 19:00:00.000000000 -0500 +@@ -1,2874 +0,0 @@ +-/* DO NOT EDIT - Generated automatically by script_asm.pl */ +-static u32 SCRIPT[] = { +-/* +- +- +- +- +- +-; 53c710 driver. Modified from Drew Eckhardts driver +-; for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk] +-; +-; I have left the script for the 53c8xx family in here, as it is likely +-; to be useful to see what I changed when bug hunting. +- +-; NCR 53c810 driver, main script +-; Sponsored by +-; iX Multiuser Multitasking Magazine +-; hm@ix.de +-; +-; Copyright 1993, 1994, 1995 Drew Eckhardt +-; Visionary Computing +-; (Unix and Linux consulting and custom programming) +-; drew@PoohSticks.ORG +-; +1 (303) 786-7975 +-; +-; TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation. +-; +-; PRE-ALPHA +-; +-; For more information, please consult +-; +-; NCR 53C810 +-; PCI-SCSI I/O Processor +-; Data Manual +-; +-; NCR 53C710 +-; SCSI I/O Processor +-; Programmers Guide +-; +-; NCR Microelectronics +-; 1635 Aeroplaza Drive +-; Colorado Springs, CO 80916 +-; 1+ (719) 578-3400 +-; +-; Toll free literature number +-; +1 (800) 334-5454 +-; +-; IMPORTANT : This code is self modifying due to the limitations of +-; the NCR53c7,8xx series chips. Persons debugging this code with +-; the remote debugger should take this into account, and NOT set +-; breakpoints in modified instructions. +-; +-; Design: +-; The NCR53c7,8xx family of SCSI chips are busmasters with an onboard +-; microcontroller using a simple instruction set. +-; +-; So, to minimize the effects of interrupt latency, and to maximize +-; throughput, this driver offloads the practical maximum amount +-; of processing to the SCSI chip while still maintaining a common +-; structure. +-; +-; Where tradeoffs were needed between efficiency on the older +-; chips and the newer NCR53c800 series, the NCR53c800 series +-; was chosen. +-; +-; While the NCR53c700 and NCR53c700-66 lacked the facilities to fully +-; automate SCSI transfers without host processor intervention, this +-; isn't the case with the NCR53c710 and newer chips which allow +-; +-; - reads and writes to the internal registers from within the SCSI +-; scripts, allowing the SCSI SCRIPTS(tm) code to save processor +-; state so that multiple threads of execution are possible, and also +-; provide an ALU for loop control, etc. +-; +-; - table indirect addressing for some instructions. This allows +-; pointers to be located relative to the DSA ((Data Structure +-; Address) register. +-; +-; These features make it possible to implement a mailbox style interface, +-; where the same piece of code is run to handle I/O for multiple threads +-; at once minimizing our need to relocate code. Since the NCR53c700/ +-; NCR53c800 series have a unique combination of features, making a +-; a standard ingoing/outgoing mailbox system, costly, I've modified it. +-; +-; - Mailboxes are a mixture of code and data. This lets us greatly +-; simplify the NCR53c810 code and do things that would otherwise +-; not be possible. +-; +-; The saved data pointer is now implemented as follows : +-; +-; Control flow has been architected such that if control reaches +-; munge_save_data_pointer, on a restore pointers message or +-; reconnection, a jump to the address formerly in the TEMP register +-; will allow the SCSI command to resume execution. +-; +- +-; +-; Note : the DSA structures must be aligned on 32 bit boundaries, +-; since the source and destination of MOVE MEMORY instructions +-; must share the same alignment and this is the alignment of the +-; NCR registers. +-; +- +-; For some systems (MVME166, for example) dmode is always the same, so don't +-; waste time writing it +- +- +- +- +- +- +- +- +- +- +- +-ABSOLUTE dsa_temp_lun = 0 ; Patch to lun for current dsa +-ABSOLUTE dsa_temp_next = 0 ; Patch to dsa next for current dsa +-ABSOLUTE dsa_temp_addr_next = 0 ; Patch to address of dsa next address +- ; for current dsa +-ABSOLUTE dsa_temp_sync = 0 ; Patch to address of per-target +- ; sync routine +-ABSOLUTE dsa_sscf_710 = 0 ; Patch to address of per-target +- ; sscf value (53c710) +-ABSOLUTE dsa_temp_target = 0 ; Patch to id for current dsa +-ABSOLUTE dsa_temp_addr_saved_pointer = 0; Patch to address of per-command +- ; saved data pointer +-ABSOLUTE dsa_temp_addr_residual = 0 ; Patch to address of per-command +- ; current residual code +-ABSOLUTE dsa_temp_addr_saved_residual = 0; Patch to address of per-command +- ; saved residual code +-ABSOLUTE dsa_temp_addr_new_value = 0 ; Address of value for JUMP operand +-ABSOLUTE dsa_temp_addr_array_value = 0 ; Address to copy to +-ABSOLUTE dsa_temp_addr_dsa_value = 0 ; Address of this DSA value +- +-; +-; Once a device has initiated reselection, we need to compare it +-; against the singly linked list of commands which have disconnected +-; and are pending reselection. These commands are maintained in +-; an unordered singly linked list of DSA structures, through the +-; DSA pointers at their 'centers' headed by the reconnect_dsa_head +-; pointer. +-; +-; To avoid complications in removing commands from the list, +-; I minimize the amount of expensive (at eight operations per +-; addition @ 500-600ns each) pointer operations which must +-; be done in the NCR driver by precomputing them on the +-; host processor during dsa structure generation. +-; +-; The fixed-up per DSA code knows how to recognize the nexus +-; associated with the corresponding SCSI command, and modifies +-; the source and destination pointers for the MOVE MEMORY +-; instruction which is executed when reselected_ok is called +-; to remove the command from the list. Similarly, DSA is +-; loaded with the address of the next DSA structure and +-; reselected_check_next is called if a failure occurs. +-; +-; Perhaps more concisely, the net effect of the mess is +-; +-; for (dsa = reconnect_dsa_head, dest = &reconnect_dsa_head, +-; src = NULL; dsa; dest = &dsa->next, dsa = dsa->next) { +-; src = &dsa->next; +-; if (target_id == dsa->id && target_lun == dsa->lun) { +-; *dest = *src; +-; break; +-; } +-; } +-; +-; if (!dsa) +-; error (int_err_unexpected_reselect); +-; else +-; longjmp (dsa->jump_resume, 0); +-; +-; +- +- +-; Define DSA structure used for mailboxes +-ENTRY dsa_code_template +-dsa_code_template: +-ENTRY dsa_code_begin +-dsa_code_begin: +-; RGH: Don't care about TEMP and DSA here +- +- MOVE MEMORY 4, dsa_temp_addr_dsa_value, addr_scratch +- +-at 0x00000000 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- MOVE MEMORY 4, addr_scratch, saved_dsa +- +-at 0x00000003 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- ; We are about to go and select the device, so must set SSCF bits +- MOVE MEMORY 4, dsa_sscf_710, addr_scratch +- +-at 0x00000006 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- MOVE SCRATCH3 TO SFBR +- +-at 0x00000009 : */ 0x72370000,0x00000000, +-/* +- +- +- +- MOVE SFBR TO SBCL +- +-at 0x0000000b : */ 0x6a0b0000,0x00000000, +-/* +- MOVE MEMORY 4, saved_dsa, addr_dsa +- +-at 0x0000000d : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- +- CALL select +- +-at 0x00000010 : */ 0x88080000,0x000001f8, +-/* +-; Handle the phase mismatch which may have resulted from the +-; MOVE FROM dsa_msgout if we returned here. The CLEAR ATN +-; may or may not be necessary, and we should update script_asm.pl +-; to handle multiple pieces. +- CLEAR ATN +- +-at 0x00000012 : */ 0x60000008,0x00000000, +-/* +- CLEAR ACK +- +-at 0x00000014 : */ 0x60000040,0x00000000, +-/* +- +-; Replace second operand with address of JUMP instruction dest operand +-; in schedule table for this DSA. Becomes dsa_jump_dest in 53c7,8xx.c. +-ENTRY dsa_code_fix_jump +-dsa_code_fix_jump: +- MOVE MEMORY 4, NOP_insn, 0 +- +-at 0x00000016 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- JUMP select_done +- +-at 0x00000019 : */ 0x80080000,0x00000230, +-/* +- +-; wrong_dsa loads the DSA register with the value of the dsa_next +-; field. +-; +-wrong_dsa: +- +-; NOTE DSA is corrupt when we arrive here! +- +-; Patch the MOVE MEMORY INSTRUCTION such that +-; the destination address is the address of the OLD +-; next pointer. +-; +- MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 8 +- +-at 0x0000001b : */ 0xc0000004,0x00000000,0x000007ec, +-/* +- +-; +-; Move the _contents_ of the next pointer into the DSA register as +-; the next I_T_L or I_T_L_Q tupple to check against the established +-; nexus. +-; +- MOVE MEMORY 4, dsa_temp_next, addr_scratch +- +-at 0x0000001e : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- MOVE MEMORY 4, addr_scratch, saved_dsa +- +-at 0x00000021 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- MOVE MEMORY 4, saved_dsa, addr_dsa +- +-at 0x00000024 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- +- JUMP reselected_check_next +- +-at 0x00000027 : */ 0x80080000,0x000006f0, +-/* +- +-ABSOLUTE dsa_save_data_pointer = 0 +-ENTRY dsa_code_save_data_pointer +-dsa_code_save_data_pointer: +- +- ; When we get here, TEMP has been saved in jump_temp+4, DSA is corrupt +- ; We MUST return with DSA correct +- MOVE MEMORY 4, jump_temp+4, dsa_temp_addr_saved_pointer +- +-at 0x00000029 : */ 0xc0000004,0x000009c8,0x00000000, +-/* +-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h +- MOVE MEMORY 24, dsa_temp_addr_residual, dsa_temp_addr_saved_residual +- +-at 0x0000002c : */ 0xc0000018,0x00000000,0x00000000, +-/* +- CLEAR ACK +- +-at 0x0000002f : */ 0x60000040,0x00000000, +-/* +- +- +- +- MOVE MEMORY 4, saved_dsa, addr_dsa +- +-at 0x00000031 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- JUMP jump_temp +- +-at 0x00000034 : */ 0x80080000,0x000009c4, +-/* +- +-ABSOLUTE dsa_restore_pointers = 0 +-ENTRY dsa_code_restore_pointers +-dsa_code_restore_pointers: +- +- ; TEMP and DSA are corrupt when we get here, but who cares! +- MOVE MEMORY 4, dsa_temp_addr_saved_pointer, jump_temp + 4 +- +-at 0x00000036 : */ 0xc0000004,0x00000000,0x000009c8, +-/* +-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h +- MOVE MEMORY 24, dsa_temp_addr_saved_residual, dsa_temp_addr_residual +- +-at 0x00000039 : */ 0xc0000018,0x00000000,0x00000000, +-/* +- CLEAR ACK +- +-at 0x0000003c : */ 0x60000040,0x00000000, +-/* +- ; Restore DSA, note we don't care about TEMP +- MOVE MEMORY 4, saved_dsa, addr_dsa +- +-at 0x0000003e : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- +- JUMP jump_temp +- +-at 0x00000041 : */ 0x80080000,0x000009c4, +-/* +- +- +-ABSOLUTE dsa_check_reselect = 0 +-; dsa_check_reselect determines whether or not the current target and +-; lun match the current DSA +-ENTRY dsa_code_check_reselect +-dsa_code_check_reselect: +- +- +- +- MOVE LCRC TO SFBR ; LCRC has our ID and his ID bits set +- +-at 0x00000043 : */ 0x72230000,0x00000000, +-/* +- JUMP REL (wrong_dsa), IF NOT dsa_temp_target, AND MASK 0x80 +- +-at 0x00000045 : */ 0x80848000,0x00ffff50, +-/* +- +- +- +- +- +-; +-; Hack - move to scratch first, since SFBR is not writeable +-; via the CPU and hence a MOVE MEMORY instruction. +-; +- +- MOVE MEMORY 1, reselected_identify, addr_scratch +- +-at 0x00000047 : */ 0xc0000001,0x00000000,0x00000000, +-/* +- +- +- ; BIG ENDIAN ON MVME16x +- MOVE SCRATCH3 TO SFBR +- +-at 0x0000004a : */ 0x72370000,0x00000000, +-/* +- +- +- +-; FIXME : we need to accommodate bit fielded and binary here for '7xx/'8xx chips +-; Are you sure about that? richard@sleepie.demon.co.uk +- JUMP REL (wrong_dsa), IF NOT dsa_temp_lun, AND MASK 0xf8 +- +-at 0x0000004c : */ 0x8084f800,0x00ffff34, +-/* +-; Patch the MOVE MEMORY INSTRUCTION such that +-; the source address is the address of this dsa's +-; next pointer. +- MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 4 +- +-at 0x0000004e : */ 0xc0000004,0x00000000,0x000007e8, +-/* +- CALL reselected_ok +- +-at 0x00000051 : */ 0x88080000,0x00000798, +-/* +- +-; Restore DSA following memory moves in reselected_ok +-; dsa_temp_sync doesn't really care about DSA, but it has an +-; optional debug INT so a valid DSA is a good idea. +- MOVE MEMORY 4, saved_dsa, addr_dsa +- +-at 0x00000053 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- CALL dsa_temp_sync +- +-at 0x00000056 : */ 0x88080000,0x00000000, +-/* +-; Release ACK on the IDENTIFY message _after_ we've set the synchronous +-; transfer parameters! +- CLEAR ACK +- +-at 0x00000058 : */ 0x60000040,0x00000000, +-/* +-; Implicitly restore pointers on reselection, so a RETURN +-; will transfer control back to the right spot. +- CALL REL (dsa_code_restore_pointers) +- +-at 0x0000005a : */ 0x88880000,0x00ffff68, +-/* +- RETURN +- +-at 0x0000005c : */ 0x90080000,0x00000000, +-/* +-ENTRY dsa_zero +-dsa_zero: +-ENTRY dsa_code_template_end +-dsa_code_template_end: +- +-; Perform sanity check for dsa_fields_start == dsa_code_template_end - +-; dsa_zero, puke. +- +-ABSOLUTE dsa_fields_start = 0 ; Sanity marker +- ; pad 48 bytes (fix this RSN) +-ABSOLUTE dsa_next = 48 ; len 4 Next DSA +- ; del 4 Previous DSA address +-ABSOLUTE dsa_cmnd = 56 ; len 4 Scsi_Cmnd * for this thread. +-ABSOLUTE dsa_select = 60 ; len 4 Device ID, Period, Offset for +- ; table indirect select +-ABSOLUTE dsa_msgout = 64 ; len 8 table indirect move parameter for +- ; select message +-ABSOLUTE dsa_cmdout = 72 ; len 8 table indirect move parameter for +- ; command +-ABSOLUTE dsa_dataout = 80 ; len 4 code pointer for dataout +-ABSOLUTE dsa_datain = 84 ; len 4 code pointer for datain +-ABSOLUTE dsa_msgin = 88 ; len 8 table indirect move for msgin +-ABSOLUTE dsa_status = 96 ; len 8 table indirect move for status byte +-ABSOLUTE dsa_msgout_other = 104 ; len 8 table indirect for normal message out +- ; (Synchronous transfer negotiation, etc). +-ABSOLUTE dsa_end = 112 +- +-ABSOLUTE schedule = 0 ; Array of JUMP dsa_begin or JUMP (next), +- ; terminated by a call to JUMP wait_reselect +- +-; Linked lists of DSA structures +-ABSOLUTE reconnect_dsa_head = 0 ; Link list of DSAs which can reconnect +-ABSOLUTE addr_reconnect_dsa_head = 0 ; Address of variable containing +- ; address of reconnect_dsa_head +- +-; These select the source and destination of a MOVE MEMORY instruction +-ABSOLUTE dmode_memory_to_memory = 0x0 +-ABSOLUTE dmode_memory_to_ncr = 0x0 +-ABSOLUTE dmode_ncr_to_memory = 0x0 +- +-ABSOLUTE addr_scratch = 0x0 +-ABSOLUTE addr_temp = 0x0 +- +-ABSOLUTE saved_dsa = 0x0 +-ABSOLUTE emulfly = 0x0 +-ABSOLUTE addr_dsa = 0x0 +- +- +- +-; Interrupts - +-; MSB indicates type +-; 0 handle error condition +-; 1 handle message +-; 2 handle normal condition +-; 3 debugging interrupt +-; 4 testing interrupt +-; Next byte indicates specific error +- +-; XXX not yet implemented, I'm not sure if I want to - +-; Next byte indicates the routine the error occurred in +-; The LSB indicates the specific place the error occurred +- +-ABSOLUTE int_err_unexpected_phase = 0x00000000 ; Unexpected phase encountered +-ABSOLUTE int_err_selected = 0x00010000 ; SELECTED (nee RESELECTED) +-ABSOLUTE int_err_unexpected_reselect = 0x00020000 +-ABSOLUTE int_err_check_condition = 0x00030000 +-ABSOLUTE int_err_no_phase = 0x00040000 +-ABSOLUTE int_msg_wdtr = 0x01000000 ; WDTR message received +-ABSOLUTE int_msg_sdtr = 0x01010000 ; SDTR received +-ABSOLUTE int_msg_1 = 0x01020000 ; single byte special message +- ; received +- +-ABSOLUTE int_norm_select_complete = 0x02000000 ; Select complete, reprogram +- ; registers. +-ABSOLUTE int_norm_reselect_complete = 0x02010000 ; Nexus established +-ABSOLUTE int_norm_command_complete = 0x02020000 ; Command complete +-ABSOLUTE int_norm_disconnected = 0x02030000 ; Disconnected +-ABSOLUTE int_norm_aborted =0x02040000 ; Aborted *dsa +-ABSOLUTE int_norm_reset = 0x02050000 ; Generated BUS reset. +-ABSOLUTE int_norm_emulateintfly = 0x02060000 ; 53C710 Emulated intfly +-ABSOLUTE int_debug_break = 0x03000000 ; Break point +- +-ABSOLUTE int_debug_panic = 0x030b0000 ; Panic driver +- +- +-ABSOLUTE int_test_1 = 0x04000000 ; Test 1 complete +-ABSOLUTE int_test_2 = 0x04010000 ; Test 2 complete +-ABSOLUTE int_test_3 = 0x04020000 ; Test 3 complete +- +- +-; These should start with 0x05000000, with low bits incrementing for +-; each one. +- +- +- +-ABSOLUTE NCR53c7xx_msg_abort = 0 ; Pointer to abort message +-ABSOLUTE NCR53c7xx_msg_reject = 0 ; Pointer to reject message +-ABSOLUTE NCR53c7xx_zero = 0 ; long with zero in it, use for source +-ABSOLUTE NCR53c7xx_sink = 0 ; long to dump worthless data in +-ABSOLUTE NOP_insn = 0 ; NOP instruction +- +-; Pointer to message, potentially multi-byte +-ABSOLUTE msg_buf = 0 +- +-; Pointer to holding area for reselection information +-ABSOLUTE reselected_identify = 0 +-ABSOLUTE reselected_tag = 0 +- +-; Request sense command pointer, it's a 6 byte command, should +-; be constant for all commands since we always want 16 bytes of +-; sense and we don't need to change any fields as we did under +-; SCSI-I when we actually cared about the LUN field. +-;EXTERNAL NCR53c7xx_sense ; Request sense command +- +- +-; dsa_schedule +-; PURPOSE : after a DISCONNECT message has been received, and pointers +-; saved, insert the current DSA structure at the head of the +-; disconnected queue and fall through to the scheduler. +-; +-; CALLS : OK +-; +-; INPUTS : dsa - current DSA structure, reconnect_dsa_head - list +-; of disconnected commands +-; +-; MODIFIES : SCRATCH, reconnect_dsa_head +-; +-; EXITS : always passes control to schedule +- +-ENTRY dsa_schedule +-dsa_schedule: +- +- +- +- +-; +-; Calculate the address of the next pointer within the DSA +-; structure of the command that is currently disconnecting +-; +- +- ; Read what should be the current DSA from memory - actual DSA +- ; register is probably corrupt +- MOVE MEMORY 4, saved_dsa, addr_scratch +- +-at 0x0000005e : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- +- MOVE SCRATCH0 + dsa_next TO SCRATCH0 +- +-at 0x00000061 : */ 0x7e343000,0x00000000, +-/* +- MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY +- +-at 0x00000063 : */ 0x7f350000,0x00000000, +-/* +- MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY +- +-at 0x00000065 : */ 0x7f360000,0x00000000, +-/* +- MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY +- +-at 0x00000067 : */ 0x7f370000,0x00000000, +-/* +- +-; Point the next field of this DSA structure at the current disconnected +-; list +- +- MOVE MEMORY 4, addr_scratch, dsa_schedule_insert + 8 +- +-at 0x00000069 : */ 0xc0000004,0x00000000,0x000001b8, +-/* +- +-dsa_schedule_insert: +- MOVE MEMORY 4, reconnect_dsa_head, 0 +- +-at 0x0000006c : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +-; And update the head pointer. +- +- ; Read what should be the current DSA from memory - actual DSA +- ; register is probably corrupt +- MOVE MEMORY 4, saved_dsa, addr_scratch +- +-at 0x0000006f : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- +- +- MOVE MEMORY 4, addr_scratch, reconnect_dsa_head +- +-at 0x00000072 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- +- +- +- +- CLEAR ACK +- +-at 0x00000075 : */ 0x60000040,0x00000000, +-/* +- +- +- ; Time to correct DSA following memory move +- MOVE MEMORY 4, saved_dsa, addr_dsa +- +-at 0x00000077 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- WAIT DISCONNECT +- +-at 0x0000007a : */ 0x48000000,0x00000000, +-/* +- +- +- +- +- +- +- JUMP schedule +- +-at 0x0000007c : */ 0x80080000,0x00000000, +-/* +- +- +-; +-; select +-; +-; PURPOSE : establish a nexus for the SCSI command referenced by DSA. +-; On success, the current DSA structure is removed from the issue +-; queue. Usually, this is entered as a fall-through from schedule, +-; although the contingent allegiance handling code will write +-; the select entry address to the DSP to restart a command as a +-; REQUEST SENSE. A message is sent (usually IDENTIFY, although +-; additional SDTR or WDTR messages may be sent). COMMAND OUT +-; is handled. +-; +-; INPUTS : DSA - SCSI command, issue_dsa_head +-; +-; CALLS : NOT OK +-; +-; MODIFIES : SCRATCH, issue_dsa_head +-; +-; EXITS : on reselection or selection, go to select_failed +-; otherwise, RETURN so control is passed back to +-; dsa_begin. +-; +- +-ENTRY select +-select: +- +- +- +- +- +- +- +- +- CLEAR TARGET +- +-at 0x0000007e : */ 0x60000200,0x00000000, +-/* +- +-; XXX +-; +-; In effect, SELECTION operations are backgrounded, with execution +-; continuing until code which waits for REQ or a fatal interrupt is +-; encountered. +-; +-; So, for more performance, we could overlap the code which removes +-; the command from the NCRs issue queue with the selection, but +-; at this point I don't want to deal with the error recovery. +-; +- +- +- +- ; Enable selection timer +- +- +- +- MOVE CTEST7 & 0xef TO CTEST7 +- +-at 0x00000080 : */ 0x7c1bef00,0x00000000, +-/* +- +- +- SELECT ATN FROM dsa_select, select_failed +- +-at 0x00000082 : */ 0x4300003c,0x00000828, +-/* +- JUMP select_msgout, WHEN MSG_OUT +- +-at 0x00000084 : */ 0x860b0000,0x00000218, +-/* +-ENTRY select_msgout +-select_msgout: +- +- ; Disable selection timer +- MOVE CTEST7 | 0x10 TO CTEST7 +- +-at 0x00000086 : */ 0x7a1b1000,0x00000000, +-/* +- +- MOVE FROM dsa_msgout, WHEN MSG_OUT +- +-at 0x00000088 : */ 0x1e000000,0x00000040, +-/* +- +- +- +- +- +- +- +- +- +- +- RETURN +- +-at 0x0000008a : */ 0x90080000,0x00000000, +-/* +- +-; +-; select_done +-; +-; PURPOSE: continue on to normal data transfer; called as the exit +-; point from dsa_begin. +-; +-; INPUTS: dsa +-; +-; CALLS: OK +-; +-; +- +-select_done: +- +-; NOTE DSA is corrupt when we arrive here! +- MOVE MEMORY 4, saved_dsa, addr_dsa +- +-at 0x0000008c : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- +- +- +- +- +- +-; After a successful selection, we should get either a CMD phase or +-; some transfer request negotiation message. +- +- JUMP cmdout, WHEN CMD +- +-at 0x0000008f : */ 0x820b0000,0x0000025c, +-/* +- INT int_err_unexpected_phase, WHEN NOT MSG_IN +- +-at 0x00000091 : */ 0x9f030000,0x00000000, +-/* +- +-select_msg_in: +- CALL msg_in, WHEN MSG_IN +- +-at 0x00000093 : */ 0x8f0b0000,0x0000041c, +-/* +- JUMP select_msg_in, WHEN MSG_IN +- +-at 0x00000095 : */ 0x870b0000,0x0000024c, +-/* +- +-cmdout: +- INT int_err_unexpected_phase, WHEN NOT CMD +- +-at 0x00000097 : */ 0x9a030000,0x00000000, +-/* +- +- +- +-ENTRY cmdout_cmdout +-cmdout_cmdout: +- +- MOVE FROM dsa_cmdout, WHEN CMD +- +-at 0x00000099 : */ 0x1a000000,0x00000048, +-/* +- +- +- +- +-; +-; data_transfer +-; other_out +-; other_in +-; other_transfer +-; +-; PURPOSE : handle the main data transfer for a SCSI command in +-; several parts. In the first part, data_transfer, DATA_IN +-; and DATA_OUT phases are allowed, with the user provided +-; code (usually dynamically generated based on the scatter/gather +-; list associated with a SCSI command) called to handle these +-; phases. +-; +-; After control has passed to one of the user provided +-; DATA_IN or DATA_OUT routines, back calls are made to +-; other_transfer_in or other_transfer_out to handle non-DATA IN +-; and DATA OUT phases respectively, with the state of the active +-; data pointer being preserved in TEMP. +-; +-; On completion, the user code passes control to other_transfer +-; which causes DATA_IN and DATA_OUT to result in unexpected_phase +-; interrupts so that data overruns may be trapped. +-; +-; INPUTS : DSA - SCSI command +-; +-; CALLS : OK in data_transfer_start, not ok in other_out and other_in, ok in +-; other_transfer +-; +-; MODIFIES : SCRATCH +-; +-; EXITS : if STATUS IN is detected, signifying command completion, +-; the NCR jumps to command_complete. If MSG IN occurs, a +-; CALL is made to msg_in. Otherwise, other_transfer runs in +-; an infinite loop. +-; +- +-ENTRY data_transfer +-data_transfer: +- JUMP cmdout_cmdout, WHEN CMD +- +-at 0x0000009b : */ 0x820b0000,0x00000264, +-/* +- CALL msg_in, WHEN MSG_IN +- +-at 0x0000009d : */ 0x8f0b0000,0x0000041c, +-/* +- INT int_err_unexpected_phase, WHEN MSG_OUT +- +-at 0x0000009f : */ 0x9e0b0000,0x00000000, +-/* +- JUMP do_dataout, WHEN DATA_OUT +- +-at 0x000000a1 : */ 0x800b0000,0x000002a4, +-/* +- JUMP do_datain, WHEN DATA_IN +- +-at 0x000000a3 : */ 0x810b0000,0x000002fc, +-/* +- JUMP command_complete, WHEN STATUS +- +-at 0x000000a5 : */ 0x830b0000,0x0000065c, +-/* +- JUMP data_transfer +- +-at 0x000000a7 : */ 0x80080000,0x0000026c, +-/* +-ENTRY end_data_transfer +-end_data_transfer: +- +-; +-; FIXME: On NCR53c700 and NCR53c700-66 chips, do_dataout/do_datain +-; should be fixed up whenever the nexus changes so it can point to the +-; correct routine for that command. +-; +- +- +-; Nasty jump to dsa->dataout +-do_dataout: +- +- MOVE MEMORY 4, saved_dsa, addr_scratch +- +-at 0x000000a9 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- +- MOVE SCRATCH0 + dsa_dataout TO SCRATCH0 +- +-at 0x000000ac : */ 0x7e345000,0x00000000, +-/* +- MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY +- +-at 0x000000ae : */ 0x7f350000,0x00000000, +-/* +- MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY +- +-at 0x000000b0 : */ 0x7f360000,0x00000000, +-/* +- MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY +- +-at 0x000000b2 : */ 0x7f370000,0x00000000, +-/* +- +- MOVE MEMORY 4, addr_scratch, dataout_to_jump + 4 +- +-at 0x000000b4 : */ 0xc0000004,0x00000000,0x000002e0, +-/* +- +-dataout_to_jump: +- MOVE MEMORY 4, 0, dataout_jump + 4 +- +-at 0x000000b7 : */ 0xc0000004,0x00000000,0x000002f8, +-/* +- +- ; Time to correct DSA following memory move +- MOVE MEMORY 4, saved_dsa, addr_dsa +- +-at 0x000000ba : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +-dataout_jump: +- JUMP 0 +- +-at 0x000000bd : */ 0x80080000,0x00000000, +-/* +- +-; Nasty jump to dsa->dsain +-do_datain: +- +- MOVE MEMORY 4, saved_dsa, addr_scratch +- +-at 0x000000bf : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- +- MOVE SCRATCH0 + dsa_datain TO SCRATCH0 +- +-at 0x000000c2 : */ 0x7e345400,0x00000000, +-/* +- MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY +- +-at 0x000000c4 : */ 0x7f350000,0x00000000, +-/* +- MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY +- +-at 0x000000c6 : */ 0x7f360000,0x00000000, +-/* +- MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY +- +-at 0x000000c8 : */ 0x7f370000,0x00000000, +-/* +- +- MOVE MEMORY 4, addr_scratch, datain_to_jump + 4 +- +-at 0x000000ca : */ 0xc0000004,0x00000000,0x00000338, +-/* +- +-ENTRY datain_to_jump +-datain_to_jump: +- MOVE MEMORY 4, 0, datain_jump + 4 +- +-at 0x000000cd : */ 0xc0000004,0x00000000,0x00000350, +-/* +- +- ; Time to correct DSA following memory move +- MOVE MEMORY 4, saved_dsa, addr_dsa +- +-at 0x000000d0 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- +- +-datain_jump: +- JUMP 0 +- +-at 0x000000d3 : */ 0x80080000,0x00000000, +-/* +- +- +- +-; Note that other_out and other_in loop until a non-data phase +-; is discovered, so we only execute return statements when we +-; can go on to the next data phase block move statement. +- +-ENTRY other_out +-other_out: +- +- +- +- INT int_err_unexpected_phase, WHEN CMD +- +-at 0x000000d5 : */ 0x9a0b0000,0x00000000, +-/* +- JUMP msg_in_restart, WHEN MSG_IN +- +-at 0x000000d7 : */ 0x870b0000,0x000003fc, +-/* +- INT int_err_unexpected_phase, WHEN MSG_OUT +- +-at 0x000000d9 : */ 0x9e0b0000,0x00000000, +-/* +- INT int_err_unexpected_phase, WHEN DATA_IN +- +-at 0x000000db : */ 0x990b0000,0x00000000, +-/* +- JUMP command_complete, WHEN STATUS +- +-at 0x000000dd : */ 0x830b0000,0x0000065c, +-/* +- JUMP other_out, WHEN NOT DATA_OUT +- +-at 0x000000df : */ 0x80030000,0x00000354, +-/* +- +-; TEMP should be OK, as we got here from a call in the user dataout code. +- +- RETURN +- +-at 0x000000e1 : */ 0x90080000,0x00000000, +-/* +- +-ENTRY other_in +-other_in: +- +- +- +- INT int_err_unexpected_phase, WHEN CMD +- +-at 0x000000e3 : */ 0x9a0b0000,0x00000000, +-/* +- JUMP msg_in_restart, WHEN MSG_IN +- +-at 0x000000e5 : */ 0x870b0000,0x000003fc, +-/* +- INT int_err_unexpected_phase, WHEN MSG_OUT +- +-at 0x000000e7 : */ 0x9e0b0000,0x00000000, +-/* +- INT int_err_unexpected_phase, WHEN DATA_OUT +- +-at 0x000000e9 : */ 0x980b0000,0x00000000, +-/* +- JUMP command_complete, WHEN STATUS +- +-at 0x000000eb : */ 0x830b0000,0x0000065c, +-/* +- JUMP other_in, WHEN NOT DATA_IN +- +-at 0x000000ed : */ 0x81030000,0x0000038c, +-/* +- +-; TEMP should be OK, as we got here from a call in the user datain code. +- +- RETURN +- +-at 0x000000ef : */ 0x90080000,0x00000000, +-/* +- +- +-ENTRY other_transfer +-other_transfer: +- INT int_err_unexpected_phase, WHEN CMD +- +-at 0x000000f1 : */ 0x9a0b0000,0x00000000, +-/* +- CALL msg_in, WHEN MSG_IN +- +-at 0x000000f3 : */ 0x8f0b0000,0x0000041c, +-/* +- INT int_err_unexpected_phase, WHEN MSG_OUT +- +-at 0x000000f5 : */ 0x9e0b0000,0x00000000, +-/* +- INT int_err_unexpected_phase, WHEN DATA_OUT +- +-at 0x000000f7 : */ 0x980b0000,0x00000000, +-/* +- INT int_err_unexpected_phase, WHEN DATA_IN +- +-at 0x000000f9 : */ 0x990b0000,0x00000000, +-/* +- JUMP command_complete, WHEN STATUS +- +-at 0x000000fb : */ 0x830b0000,0x0000065c, +-/* +- JUMP other_transfer +- +-at 0x000000fd : */ 0x80080000,0x000003c4, +-/* +- +-; +-; msg_in_restart +-; msg_in +-; munge_msg +-; +-; PURPOSE : process messages from a target. msg_in is called when the +-; caller hasn't read the first byte of the message. munge_message +-; is called when the caller has read the first byte of the message, +-; and left it in SFBR. msg_in_restart is called when the caller +-; hasn't read the first byte of the message, and wishes RETURN +-; to transfer control back to the address of the conditional +-; CALL instruction rather than to the instruction after it. +-; +-; Various int_* interrupts are generated when the host system +-; needs to intervene, as is the case with SDTR, WDTR, and +-; INITIATE RECOVERY messages. +-; +-; When the host system handles one of these interrupts, +-; it can respond by reentering at reject_message, +-; which rejects the message and returns control to +-; the caller of msg_in or munge_msg, accept_message +-; which clears ACK and returns control, or reply_message +-; which sends the message pointed to by the DSA +-; msgout_other table indirect field. +-; +-; DISCONNECT messages are handled by moving the command +-; to the reconnect_dsa_queue. +- +-; NOTE: DSA should be valid when we get here - we cannot save both it +-; and TEMP in this routine. +- +-; +-; INPUTS : DSA - SCSI COMMAND, SFBR - first byte of message (munge_msg +-; only) +-; +-; CALLS : NO. The TEMP register isn't backed up to allow nested calls. +-; +-; MODIFIES : SCRATCH, DSA on DISCONNECT +-; +-; EXITS : On receipt of SAVE DATA POINTER, RESTORE POINTERS, +-; and normal return from message handlers running under +-; Linux, control is returned to the caller. Receipt +-; of DISCONNECT messages pass control to dsa_schedule. +-; +-ENTRY msg_in_restart +-msg_in_restart: +-; XXX - hackish +-; +-; Since it's easier to debug changes to the statically +-; compiled code, rather than the dynamically generated +-; stuff, such as +-; +-; MOVE x, y, WHEN data_phase +-; CALL other_z, WHEN NOT data_phase +-; MOVE x, y, WHEN data_phase +-; +-; I'd like to have certain routines (notably the message handler) +-; restart on the conditional call rather than the next instruction. +-; +-; So, subtract 8 from the return address +- +- MOVE TEMP0 + 0xf8 TO TEMP0 +- +-at 0x000000ff : */ 0x7e1cf800,0x00000000, +-/* +- MOVE TEMP1 + 0xff TO TEMP1 WITH CARRY +- +-at 0x00000101 : */ 0x7f1dff00,0x00000000, +-/* +- MOVE TEMP2 + 0xff TO TEMP2 WITH CARRY +- +-at 0x00000103 : */ 0x7f1eff00,0x00000000, +-/* +- MOVE TEMP3 + 0xff TO TEMP3 WITH CARRY +- +-at 0x00000105 : */ 0x7f1fff00,0x00000000, +-/* +- +-ENTRY msg_in +-msg_in: +- MOVE 1, msg_buf, WHEN MSG_IN +- +-at 0x00000107 : */ 0x0f000001,0x00000000, +-/* +- +-munge_msg: +- JUMP munge_extended, IF 0x01 ; EXTENDED MESSAGE +- +-at 0x00000109 : */ 0x800c0001,0x00000574, +-/* +- JUMP munge_2, IF 0x20, AND MASK 0xdf ; two byte message +- +-at 0x0000010b : */ 0x800cdf20,0x00000464, +-/* +-; +-; XXX - I've seen a handful of broken SCSI devices which fail to issue +-; a SAVE POINTERS message before disconnecting in the middle of +-; a transfer, assuming that the DATA POINTER will be implicitly +-; restored. +-; +-; Historically, I've often done an implicit save when the DISCONNECT +-; message is processed. We may want to consider having the option of +-; doing that here. +-; +- JUMP munge_save_data_pointer, IF 0x02 ; SAVE DATA POINTER +- +-at 0x0000010d : */ 0x800c0002,0x0000046c, +-/* +- JUMP munge_restore_pointers, IF 0x03 ; RESTORE POINTERS +- +-at 0x0000010f : */ 0x800c0003,0x00000518, +-/* +- JUMP munge_disconnect, IF 0x04 ; DISCONNECT +- +-at 0x00000111 : */ 0x800c0004,0x0000056c, +-/* +- INT int_msg_1, IF 0x07 ; MESSAGE REJECT +- +-at 0x00000113 : */ 0x980c0007,0x01020000, +-/* +- INT int_msg_1, IF 0x0f ; INITIATE RECOVERY +- +-at 0x00000115 : */ 0x980c000f,0x01020000, +-/* +- +- +- +- JUMP reject_message +- +-at 0x00000117 : */ 0x80080000,0x00000604, +-/* +- +-munge_2: +- JUMP reject_message +- +-at 0x00000119 : */ 0x80080000,0x00000604, +-/* +-; +-; The SCSI standard allows targets to recover from transient +-; error conditions by backing up the data pointer with a +-; RESTORE POINTERS message. +-; +-; So, we must save and restore the _residual_ code as well as +-; the current instruction pointer. Because of this messiness, +-; it is simpler to put dynamic code in the dsa for this and to +-; just do a simple jump down there. +-; +- +-munge_save_data_pointer: +- +- ; We have something in TEMP here, so first we must save that +- MOVE TEMP0 TO SFBR +- +-at 0x0000011b : */ 0x721c0000,0x00000000, +-/* +- MOVE SFBR TO SCRATCH0 +- +-at 0x0000011d : */ 0x6a340000,0x00000000, +-/* +- MOVE TEMP1 TO SFBR +- +-at 0x0000011f : */ 0x721d0000,0x00000000, +-/* +- MOVE SFBR TO SCRATCH1 +- +-at 0x00000121 : */ 0x6a350000,0x00000000, +-/* +- MOVE TEMP2 TO SFBR +- +-at 0x00000123 : */ 0x721e0000,0x00000000, +-/* +- MOVE SFBR TO SCRATCH2 +- +-at 0x00000125 : */ 0x6a360000,0x00000000, +-/* +- MOVE TEMP3 TO SFBR +- +-at 0x00000127 : */ 0x721f0000,0x00000000, +-/* +- MOVE SFBR TO SCRATCH3 +- +-at 0x00000129 : */ 0x6a370000,0x00000000, +-/* +- MOVE MEMORY 4, addr_scratch, jump_temp + 4 +- +-at 0x0000012b : */ 0xc0000004,0x00000000,0x000009c8, +-/* +- ; Now restore DSA +- MOVE MEMORY 4, saved_dsa, addr_dsa +- +-at 0x0000012e : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- MOVE DSA0 + dsa_save_data_pointer TO SFBR +- +-at 0x00000131 : */ 0x76100000,0x00000000, +-/* +- MOVE SFBR TO SCRATCH0 +- +-at 0x00000133 : */ 0x6a340000,0x00000000, +-/* +- MOVE DSA1 + 0xff TO SFBR WITH CARRY +- +-at 0x00000135 : */ 0x7711ff00,0x00000000, +-/* +- MOVE SFBR TO SCRATCH1 +- +-at 0x00000137 : */ 0x6a350000,0x00000000, +-/* +- MOVE DSA2 + 0xff TO SFBR WITH CARRY +- +-at 0x00000139 : */ 0x7712ff00,0x00000000, +-/* +- MOVE SFBR TO SCRATCH2 +- +-at 0x0000013b : */ 0x6a360000,0x00000000, +-/* +- MOVE DSA3 + 0xff TO SFBR WITH CARRY +- +-at 0x0000013d : */ 0x7713ff00,0x00000000, +-/* +- MOVE SFBR TO SCRATCH3 +- +-at 0x0000013f : */ 0x6a370000,0x00000000, +-/* +- +- +- MOVE MEMORY 4, addr_scratch, jump_dsa_save + 4 +- +-at 0x00000141 : */ 0xc0000004,0x00000000,0x00000514, +-/* +- +-jump_dsa_save: +- JUMP 0 +- +-at 0x00000144 : */ 0x80080000,0x00000000, +-/* +- +-munge_restore_pointers: +- +- ; The code at dsa_restore_pointers will RETURN, but we don't care +- ; about TEMP here, as it will overwrite it anyway. +- +- MOVE DSA0 + dsa_restore_pointers TO SFBR +- +-at 0x00000146 : */ 0x76100000,0x00000000, +-/* +- MOVE SFBR TO SCRATCH0 +- +-at 0x00000148 : */ 0x6a340000,0x00000000, +-/* +- MOVE DSA1 + 0xff TO SFBR WITH CARRY +- +-at 0x0000014a : */ 0x7711ff00,0x00000000, +-/* +- MOVE SFBR TO SCRATCH1 +- +-at 0x0000014c : */ 0x6a350000,0x00000000, +-/* +- MOVE DSA2 + 0xff TO SFBR WITH CARRY +- +-at 0x0000014e : */ 0x7712ff00,0x00000000, +-/* +- MOVE SFBR TO SCRATCH2 +- +-at 0x00000150 : */ 0x6a360000,0x00000000, +-/* +- MOVE DSA3 + 0xff TO SFBR WITH CARRY +- +-at 0x00000152 : */ 0x7713ff00,0x00000000, +-/* +- MOVE SFBR TO SCRATCH3 +- +-at 0x00000154 : */ 0x6a370000,0x00000000, +-/* +- +- +- MOVE MEMORY 4, addr_scratch, jump_dsa_restore + 4 +- +-at 0x00000156 : */ 0xc0000004,0x00000000,0x00000568, +-/* +- +-jump_dsa_restore: +- JUMP 0 +- +-at 0x00000159 : */ 0x80080000,0x00000000, +-/* +- +- +-munge_disconnect: +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- JUMP dsa_schedule +- +-at 0x0000015b : */ 0x80080000,0x00000178, +-/* +- +- +- +- +- +-munge_extended: +- CLEAR ACK +- +-at 0x0000015d : */ 0x60000040,0x00000000, +-/* +- INT int_err_unexpected_phase, WHEN NOT MSG_IN +- +-at 0x0000015f : */ 0x9f030000,0x00000000, +-/* +- MOVE 1, msg_buf + 1, WHEN MSG_IN +- +-at 0x00000161 : */ 0x0f000001,0x00000001, +-/* +- JUMP munge_extended_2, IF 0x02 +- +-at 0x00000163 : */ 0x800c0002,0x000005a4, +-/* +- JUMP munge_extended_3, IF 0x03 +- +-at 0x00000165 : */ 0x800c0003,0x000005d4, +-/* +- JUMP reject_message +- +-at 0x00000167 : */ 0x80080000,0x00000604, +-/* +- +-munge_extended_2: +- CLEAR ACK +- +-at 0x00000169 : */ 0x60000040,0x00000000, +-/* +- MOVE 1, msg_buf + 2, WHEN MSG_IN +- +-at 0x0000016b : */ 0x0f000001,0x00000002, +-/* +- JUMP reject_message, IF NOT 0x02 ; Must be WDTR +- +-at 0x0000016d : */ 0x80040002,0x00000604, +-/* +- CLEAR ACK +- +-at 0x0000016f : */ 0x60000040,0x00000000, +-/* +- MOVE 1, msg_buf + 3, WHEN MSG_IN +- +-at 0x00000171 : */ 0x0f000001,0x00000003, +-/* +- INT int_msg_wdtr +- +-at 0x00000173 : */ 0x98080000,0x01000000, +-/* +- +-munge_extended_3: +- CLEAR ACK +- +-at 0x00000175 : */ 0x60000040,0x00000000, +-/* +- MOVE 1, msg_buf + 2, WHEN MSG_IN +- +-at 0x00000177 : */ 0x0f000001,0x00000002, +-/* +- JUMP reject_message, IF NOT 0x01 ; Must be SDTR +- +-at 0x00000179 : */ 0x80040001,0x00000604, +-/* +- CLEAR ACK +- +-at 0x0000017b : */ 0x60000040,0x00000000, +-/* +- MOVE 2, msg_buf + 3, WHEN MSG_IN +- +-at 0x0000017d : */ 0x0f000002,0x00000003, +-/* +- INT int_msg_sdtr +- +-at 0x0000017f : */ 0x98080000,0x01010000, +-/* +- +-ENTRY reject_message +-reject_message: +- SET ATN +- +-at 0x00000181 : */ 0x58000008,0x00000000, +-/* +- CLEAR ACK +- +-at 0x00000183 : */ 0x60000040,0x00000000, +-/* +- MOVE 1, NCR53c7xx_msg_reject, WHEN MSG_OUT +- +-at 0x00000185 : */ 0x0e000001,0x00000000, +-/* +- RETURN +- +-at 0x00000187 : */ 0x90080000,0x00000000, +-/* +- +-ENTRY accept_message +-accept_message: +- CLEAR ATN +- +-at 0x00000189 : */ 0x60000008,0x00000000, +-/* +- CLEAR ACK +- +-at 0x0000018b : */ 0x60000040,0x00000000, +-/* +- RETURN +- +-at 0x0000018d : */ 0x90080000,0x00000000, +-/* +- +-ENTRY respond_message +-respond_message: +- SET ATN +- +-at 0x0000018f : */ 0x58000008,0x00000000, +-/* +- CLEAR ACK +- +-at 0x00000191 : */ 0x60000040,0x00000000, +-/* +- MOVE FROM dsa_msgout_other, WHEN MSG_OUT +- +-at 0x00000193 : */ 0x1e000000,0x00000068, +-/* +- RETURN +- +-at 0x00000195 : */ 0x90080000,0x00000000, +-/* +- +-; +-; command_complete +-; +-; PURPOSE : handle command termination when STATUS IN is detected by reading +-; a status byte followed by a command termination message. +-; +-; Normal termination results in an INTFLY instruction, and +-; the host system can pick out which command terminated by +-; examining the MESSAGE and STATUS buffers of all currently +-; executing commands; +-; +-; Abnormal (CHECK_CONDITION) termination results in an +-; int_err_check_condition interrupt so that a REQUEST SENSE +-; command can be issued out-of-order so that no other command +-; clears the contingent allegiance condition. +-; +-; +-; INPUTS : DSA - command +-; +-; CALLS : OK +-; +-; EXITS : On successful termination, control is passed to schedule. +-; On abnormal termination, the user will usually modify the +-; DSA fields and corresponding buffers and return control +-; to select. +-; +- +-ENTRY command_complete +-command_complete: +- MOVE FROM dsa_status, WHEN STATUS +- +-at 0x00000197 : */ 0x1b000000,0x00000060, +-/* +- +- MOVE SFBR TO SCRATCH0 ; Save status +- +-at 0x00000199 : */ 0x6a340000,0x00000000, +-/* +- +-ENTRY command_complete_msgin +-command_complete_msgin: +- MOVE FROM dsa_msgin, WHEN MSG_IN +- +-at 0x0000019b : */ 0x1f000000,0x00000058, +-/* +-; Indicate that we should be expecting a disconnect +- +- +- +- ; Above code cleared the Unexpected Disconnect bit, what do we do? +- +- CLEAR ACK +- +-at 0x0000019d : */ 0x60000040,0x00000000, +-/* +- +- WAIT DISCONNECT +- +-at 0x0000019f : */ 0x48000000,0x00000000, +-/* +- +-; +-; The SCSI specification states that when a UNIT ATTENTION condition +-; is pending, as indicated by a CHECK CONDITION status message, +-; the target shall revert to asynchronous transfers. Since +-; synchronous transfers parameters are maintained on a per INITIATOR/TARGET +-; basis, and returning control to our scheduler could work on a command +-; running on another lun on that target using the old parameters, we must +-; interrupt the host processor to get them changed, or change them ourselves. +-; +-; Once SCSI-II tagged queueing is implemented, things will be even more +-; hairy, since contingent allegiance conditions exist on a per-target/lun +-; basis, and issuing a new command with a different tag would clear it. +-; In these cases, we must interrupt the host processor to get a request +-; added to the HEAD of the queue with the request sense command, or we +-; must automatically issue the request sense command. +- +- +- +- +- +- +- +- INT int_norm_emulateintfly +- +-at 0x000001a1 : */ 0x98080000,0x02060000, +-/* +- +- +- +- +- +- +- ; Time to correct DSA following memory move +- MOVE MEMORY 4, saved_dsa, addr_dsa +- +-at 0x000001a3 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- +- +- +- JUMP schedule +- +-at 0x000001a6 : */ 0x80080000,0x00000000, +-/* +-command_failed: +- INT int_err_check_condition +- +-at 0x000001a8 : */ 0x98080000,0x00030000, +-/* +- +- +- +- +-; +-; wait_reselect +-; +-; PURPOSE : This is essentially the idle routine, where control lands +-; when there are no new processes to schedule. wait_reselect +-; waits for reselection, selection, and new commands. +-; +-; When a successful reselection occurs, with the aid +-; of fixed up code in each DSA, wait_reselect walks the +-; reconnect_dsa_queue, asking each dsa if the target ID +-; and LUN match its. +-; +-; If a match is found, a call is made back to reselected_ok, +-; which through the miracles of self modifying code, extracts +-; the found DSA from the reconnect_dsa_queue and then +-; returns control to the DSAs thread of execution. +-; +-; INPUTS : NONE +-; +-; CALLS : OK +-; +-; MODIFIES : DSA, +-; +-; EXITS : On successful reselection, control is returned to the +-; DSA which called reselected_ok. If the WAIT RESELECT +-; was interrupted by a new commands arrival signaled by +-; SIG_P, control is passed to schedule. If the NCR is +-; selected, the host system is interrupted with an +-; int_err_selected which is usually responded to by +-; setting DSP to the target_abort address. +- +-ENTRY wait_reselect +-wait_reselect: +- +- +- +- +- +- +- WAIT RESELECT wait_reselect_failed +- +-at 0x000001aa : */ 0x50000000,0x00000800, +-/* +- +-reselected: +- +- +- +- CLEAR TARGET +- +-at 0x000001ac : */ 0x60000200,0x00000000, +-/* +- +- ; Read all data needed to reestablish the nexus - +- MOVE 1, reselected_identify, WHEN MSG_IN +- +-at 0x000001ae : */ 0x0f000001,0x00000000, +-/* +- ; We used to CLEAR ACK here. +- +- +- +- +- +- ; Point DSA at the current head of the disconnected queue. +- +- MOVE MEMORY 4, reconnect_dsa_head, addr_scratch +- +-at 0x000001b0 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- MOVE MEMORY 4, addr_scratch, saved_dsa +- +-at 0x000001b3 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +- +- +- ; Fix the update-next pointer so that the reconnect_dsa_head +- ; pointer is the one that will be updated if this DSA is a hit +- ; and we remove it from the queue. +- +- MOVE MEMORY 4, addr_reconnect_dsa_head, reselected_ok_patch + 8 +- +-at 0x000001b6 : */ 0xc0000004,0x00000000,0x000007ec, +-/* +- +- ; Time to correct DSA following memory move +- MOVE MEMORY 4, saved_dsa, addr_dsa +- +-at 0x000001b9 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +- +-ENTRY reselected_check_next +-reselected_check_next: +- +- +- +- ; Check for a NULL pointer. +- MOVE DSA0 TO SFBR +- +-at 0x000001bc : */ 0x72100000,0x00000000, +-/* +- JUMP reselected_not_end, IF NOT 0 +- +-at 0x000001be : */ 0x80040000,0x00000738, +-/* +- MOVE DSA1 TO SFBR +- +-at 0x000001c0 : */ 0x72110000,0x00000000, +-/* +- JUMP reselected_not_end, IF NOT 0 +- +-at 0x000001c2 : */ 0x80040000,0x00000738, +-/* +- MOVE DSA2 TO SFBR +- +-at 0x000001c4 : */ 0x72120000,0x00000000, +-/* +- JUMP reselected_not_end, IF NOT 0 +- +-at 0x000001c6 : */ 0x80040000,0x00000738, +-/* +- MOVE DSA3 TO SFBR +- +-at 0x000001c8 : */ 0x72130000,0x00000000, +-/* +- JUMP reselected_not_end, IF NOT 0 +- +-at 0x000001ca : */ 0x80040000,0x00000738, +-/* +- INT int_err_unexpected_reselect +- +-at 0x000001cc : */ 0x98080000,0x00020000, +-/* +- +-reselected_not_end: +- ; +- ; XXX the ALU is only eight bits wide, and the assembler +- ; wont do the dirt work for us. As long as dsa_check_reselect +- ; is negative, we need to sign extend with 1 bits to the full +- ; 32 bit width of the address. +- ; +- ; A potential work around would be to have a known alignment +- ; of the DSA structure such that the base address plus +- ; dsa_check_reselect doesn't require carrying from bytes +- ; higher than the LSB. +- ; +- +- MOVE DSA0 TO SFBR +- +-at 0x000001ce : */ 0x72100000,0x00000000, +-/* +- MOVE SFBR + dsa_check_reselect TO SCRATCH0 +- +-at 0x000001d0 : */ 0x6e340000,0x00000000, +-/* +- MOVE DSA1 TO SFBR +- +-at 0x000001d2 : */ 0x72110000,0x00000000, +-/* +- MOVE SFBR + 0xff TO SCRATCH1 WITH CARRY +- +-at 0x000001d4 : */ 0x6f35ff00,0x00000000, +-/* +- MOVE DSA2 TO SFBR +- +-at 0x000001d6 : */ 0x72120000,0x00000000, +-/* +- MOVE SFBR + 0xff TO SCRATCH2 WITH CARRY +- +-at 0x000001d8 : */ 0x6f36ff00,0x00000000, +-/* +- MOVE DSA3 TO SFBR +- +-at 0x000001da : */ 0x72130000,0x00000000, +-/* +- MOVE SFBR + 0xff TO SCRATCH3 WITH CARRY +- +-at 0x000001dc : */ 0x6f37ff00,0x00000000, +-/* +- +- +- MOVE MEMORY 4, addr_scratch, reselected_check + 4 +- +-at 0x000001de : */ 0xc0000004,0x00000000,0x00000794, +-/* +- +- +- ; Time to correct DSA following memory move +- MOVE MEMORY 4, saved_dsa, addr_dsa +- +-at 0x000001e1 : */ 0xc0000004,0x00000000,0x00000000, +-/* +- +-reselected_check: +- JUMP 0 +- +-at 0x000001e4 : */ 0x80080000,0x00000000, +-/* +- +- +-; +-; +- +-; We have problems here - the memory move corrupts TEMP and DSA. This +-; routine is called from DSA code, and patched from many places. Scratch +-; is probably free when it is called. +-; We have to: +-; copy temp to scratch, one byte at a time +-; write scratch to patch a jump in place of the return +-; do the move memory +-; jump to the patched in return address +-; DSA is corrupt when we get here, and can be left corrupt +- +-ENTRY reselected_ok +-reselected_ok: +- MOVE TEMP0 TO SFBR +- +-at 0x000001e6 : */ 0x721c0000,0x00000000, +-/* +- MOVE SFBR TO SCRATCH0 +- +-at 0x000001e8 : */ 0x6a340000,0x00000000, +-/* +- MOVE TEMP1 TO SFBR +- +-at 0x000001ea : */ 0x721d0000,0x00000000, +-/* +- MOVE SFBR TO SCRATCH1 +- +-at 0x000001ec : */ 0x6a350000,0x00000000, +-/* +- MOVE TEMP2 TO SFBR +- +-at 0x000001ee : */ 0x721e0000,0x00000000, +-/* +- MOVE SFBR TO SCRATCH2 +- +-at 0x000001f0 : */ 0x6a360000,0x00000000, +-/* +- MOVE TEMP3 TO SFBR +- +-at 0x000001f2 : */ 0x721f0000,0x00000000, +-/* +- MOVE SFBR TO SCRATCH3 +- +-at 0x000001f4 : */ 0x6a370000,0x00000000, +-/* +- MOVE MEMORY 4, addr_scratch, reselected_ok_jump + 4 +- +-at 0x000001f6 : */ 0xc0000004,0x00000000,0x000007f4, +-/* +-reselected_ok_patch: +- MOVE MEMORY 4, 0, 0 +- +-at 0x000001f9 : */ 0xc0000004,0x00000000,0x00000000, +-/* +-reselected_ok_jump: +- JUMP 0 +- +-at 0x000001fc : */ 0x80080000,0x00000000, +-/* +- +- +- +- +- +-selected: +- INT int_err_selected; +- +-at 0x000001fe : */ 0x98080000,0x00010000, +-/* +- +-; +-; A select or reselect failure can be caused by one of two conditions : +-; 1. SIG_P was set. This will be the case if the user has written +-; a new value to a previously NULL head of the issue queue. +-; +-; 2. The NCR53c810 was selected or reselected by another device. +-; +-; 3. The bus was already busy since we were selected or reselected +-; before starting the command. +- +-wait_reselect_failed: +- +- +- +-; Check selected bit. +- +- ; Must work out how to tell if we are selected.... +- +- +- +- +-; Reading CTEST2 clears the SIG_P bit in the ISTAT register. +- MOVE CTEST2 & 0x40 TO SFBR +- +-at 0x00000200 : */ 0x74164000,0x00000000, +-/* +- JUMP schedule, IF 0x40 +- +-at 0x00000202 : */ 0x800c0040,0x00000000, +-/* +-; Check connected bit. +-; FIXME: this needs to change if we support target mode +- MOVE ISTAT & 0x08 TO SFBR +- +-at 0x00000204 : */ 0x74210800,0x00000000, +-/* +- JUMP reselected, IF 0x08 +- +-at 0x00000206 : */ 0x800c0008,0x000006b0, +-/* +-; FIXME : Something bogus happened, and we shouldn't fail silently. +- +- +- +- INT int_debug_panic +- +-at 0x00000208 : */ 0x98080000,0x030b0000, +-/* +- +- +- +-select_failed: +- +- ; Disable selection timer +- MOVE CTEST7 | 0x10 TO CTEST7 +- +-at 0x0000020a : */ 0x7a1b1000,0x00000000, +-/* +- +- +- +- +-; Otherwise, mask the selected and reselected bits off SIST0 +- +- ; Let's assume we don't get selected for now +- MOVE SSTAT0 & 0x10 TO SFBR +- +-at 0x0000020c : */ 0x740d1000,0x00000000, +-/* +- +- +- +- +- JUMP reselected, IF 0x10 +- +-at 0x0000020e : */ 0x800c0010,0x000006b0, +-/* +-; If SIGP is set, the user just gave us another command, and +-; we should restart or return to the scheduler. +-; Reading CTEST2 clears the SIG_P bit in the ISTAT register. +- MOVE CTEST2 & 0x40 TO SFBR +- +-at 0x00000210 : */ 0x74164000,0x00000000, +-/* +- JUMP select, IF 0x40 +- +-at 0x00000212 : */ 0x800c0040,0x000001f8, +-/* +-; Check connected bit. +-; FIXME: this needs to change if we support target mode +-; FIXME: is this really necessary? +- MOVE ISTAT & 0x08 TO SFBR +- +-at 0x00000214 : */ 0x74210800,0x00000000, +-/* +- JUMP reselected, IF 0x08 +- +-at 0x00000216 : */ 0x800c0008,0x000006b0, +-/* +-; FIXME : Something bogus happened, and we shouldn't fail silently. +- +- +- +- INT int_debug_panic +- +-at 0x00000218 : */ 0x98080000,0x030b0000, +-/* +- +- +-; +-; test_1 +-; test_2 +-; +-; PURPOSE : run some verification tests on the NCR. test_1 +-; copies test_src to test_dest and interrupts the host +-; processor, testing for cache coherency and interrupt +-; problems in the processes. +-; +-; test_2 runs a command with offsets relative to the +-; DSA on entry, and is useful for miscellaneous experimentation. +-; +- +-; Verify that interrupts are working correctly and that we don't +-; have a cache invalidation problem. +- +-ABSOLUTE test_src = 0, test_dest = 0 +-ENTRY test_1 +-test_1: +- MOVE MEMORY 4, test_src, test_dest +- +-at 0x0000021a : */ 0xc0000004,0x00000000,0x00000000, +-/* +- INT int_test_1 +- +-at 0x0000021d : */ 0x98080000,0x04000000, +-/* +- +-; +-; Run arbitrary commands, with test code establishing a DSA +-; +- +-ENTRY test_2 +-test_2: +- CLEAR TARGET +- +-at 0x0000021f : */ 0x60000200,0x00000000, +-/* +- +- ; Enable selection timer +- +- +- +- MOVE CTEST7 & 0xef TO CTEST7 +- +-at 0x00000221 : */ 0x7c1bef00,0x00000000, +-/* +- +- +- SELECT ATN FROM 0, test_2_fail +- +-at 0x00000223 : */ 0x43000000,0x000008dc, +-/* +- JUMP test_2_msgout, WHEN MSG_OUT +- +-at 0x00000225 : */ 0x860b0000,0x0000089c, +-/* +-ENTRY test_2_msgout +-test_2_msgout: +- +- ; Disable selection timer +- MOVE CTEST7 | 0x10 TO CTEST7 +- +-at 0x00000227 : */ 0x7a1b1000,0x00000000, +-/* +- +- MOVE FROM 8, WHEN MSG_OUT +- +-at 0x00000229 : */ 0x1e000000,0x00000008, +-/* +- MOVE FROM 16, WHEN CMD +- +-at 0x0000022b : */ 0x1a000000,0x00000010, +-/* +- MOVE FROM 24, WHEN DATA_IN +- +-at 0x0000022d : */ 0x19000000,0x00000018, +-/* +- MOVE FROM 32, WHEN STATUS +- +-at 0x0000022f : */ 0x1b000000,0x00000020, +-/* +- MOVE FROM 40, WHEN MSG_IN +- +-at 0x00000231 : */ 0x1f000000,0x00000028, +-/* +- +- +- +- CLEAR ACK +- +-at 0x00000233 : */ 0x60000040,0x00000000, +-/* +- WAIT DISCONNECT +- +-at 0x00000235 : */ 0x48000000,0x00000000, +-/* +-test_2_fail: +- +- ; Disable selection timer +- MOVE CTEST7 | 0x10 TO CTEST7 +- +-at 0x00000237 : */ 0x7a1b1000,0x00000000, +-/* +- +- INT int_test_2 +- +-at 0x00000239 : */ 0x98080000,0x04010000, +-/* +- +-ENTRY debug_break +-debug_break: +- INT int_debug_break +- +-at 0x0000023b : */ 0x98080000,0x03000000, +-/* +- +-; +-; initiator_abort +-; target_abort +-; +-; PURPOSE : Abort the currently established nexus from with initiator +-; or target mode. +-; +-; +- +-ENTRY target_abort +-target_abort: +- SET TARGET +- +-at 0x0000023d : */ 0x58000200,0x00000000, +-/* +- DISCONNECT +- +-at 0x0000023f : */ 0x48000000,0x00000000, +-/* +- CLEAR TARGET +- +-at 0x00000241 : */ 0x60000200,0x00000000, +-/* +- JUMP schedule +- +-at 0x00000243 : */ 0x80080000,0x00000000, +-/* +- +-ENTRY initiator_abort +-initiator_abort: +- SET ATN +- +-at 0x00000245 : */ 0x58000008,0x00000000, +-/* +-; +-; The SCSI-I specification says that targets may go into MSG out at +-; their leisure upon receipt of the ATN single. On all versions of the +-; specification, we can't change phases until REQ transitions true->false, +-; so we need to sink/source one byte of data to allow the transition. +-; +-; For the sake of safety, we'll only source one byte of data in all +-; cases, but to accommodate the SCSI-I dain bramage, we'll sink an +-; arbitrary number of bytes. +- JUMP spew_cmd, WHEN CMD +- +-at 0x00000247 : */ 0x820b0000,0x0000094c, +-/* +- JUMP eat_msgin, WHEN MSG_IN +- +-at 0x00000249 : */ 0x870b0000,0x0000095c, +-/* +- JUMP eat_datain, WHEN DATA_IN +- +-at 0x0000024b : */ 0x810b0000,0x0000098c, +-/* +- JUMP eat_status, WHEN STATUS +- +-at 0x0000024d : */ 0x830b0000,0x00000974, +-/* +- JUMP spew_dataout, WHEN DATA_OUT +- +-at 0x0000024f : */ 0x800b0000,0x000009a4, +-/* +- JUMP sated +- +-at 0x00000251 : */ 0x80080000,0x000009ac, +-/* +-spew_cmd: +- MOVE 1, NCR53c7xx_zero, WHEN CMD +- +-at 0x00000253 : */ 0x0a000001,0x00000000, +-/* +- JUMP sated +- +-at 0x00000255 : */ 0x80080000,0x000009ac, +-/* +-eat_msgin: +- MOVE 1, NCR53c7xx_sink, WHEN MSG_IN +- +-at 0x00000257 : */ 0x0f000001,0x00000000, +-/* +- JUMP eat_msgin, WHEN MSG_IN +- +-at 0x00000259 : */ 0x870b0000,0x0000095c, +-/* +- JUMP sated +- +-at 0x0000025b : */ 0x80080000,0x000009ac, +-/* +-eat_status: +- MOVE 1, NCR53c7xx_sink, WHEN STATUS +- +-at 0x0000025d : */ 0x0b000001,0x00000000, +-/* +- JUMP eat_status, WHEN STATUS +- +-at 0x0000025f : */ 0x830b0000,0x00000974, +-/* +- JUMP sated +- +-at 0x00000261 : */ 0x80080000,0x000009ac, +-/* +-eat_datain: +- MOVE 1, NCR53c7xx_sink, WHEN DATA_IN +- +-at 0x00000263 : */ 0x09000001,0x00000000, +-/* +- JUMP eat_datain, WHEN DATA_IN +- +-at 0x00000265 : */ 0x810b0000,0x0000098c, +-/* +- JUMP sated +- +-at 0x00000267 : */ 0x80080000,0x000009ac, +-/* +-spew_dataout: +- MOVE 1, NCR53c7xx_zero, WHEN DATA_OUT +- +-at 0x00000269 : */ 0x08000001,0x00000000, +-/* +-sated: +- +- +- +- MOVE 1, NCR53c7xx_msg_abort, WHEN MSG_OUT +- +-at 0x0000026b : */ 0x0e000001,0x00000000, +-/* +- WAIT DISCONNECT +- +-at 0x0000026d : */ 0x48000000,0x00000000, +-/* +- INT int_norm_aborted +- +-at 0x0000026f : */ 0x98080000,0x02040000, +-/* +- +- +- +- +-; Little patched jump, used to overcome problems with TEMP getting +-; corrupted on memory moves. +- +-jump_temp: +- JUMP 0 +- +-at 0x00000271 : */ 0x80080000,0x00000000, +-}; +- +-#define A_NCR53c7xx_msg_abort 0x00000000 +-static u32 A_NCR53c7xx_msg_abort_used[] __attribute((unused)) = { +- 0x0000026c, +-}; +- +-#define A_NCR53c7xx_msg_reject 0x00000000 +-static u32 A_NCR53c7xx_msg_reject_used[] __attribute((unused)) = { +- 0x00000186, +-}; +- +-#define A_NCR53c7xx_sink 0x00000000 +-static u32 A_NCR53c7xx_sink_used[] __attribute((unused)) = { +- 0x00000258, +- 0x0000025e, +- 0x00000264, +-}; +- +-#define A_NCR53c7xx_zero 0x00000000 +-static u32 A_NCR53c7xx_zero_used[] __attribute((unused)) = { +- 0x00000254, +- 0x0000026a, +-}; +- +-#define A_NOP_insn 0x00000000 +-static u32 A_NOP_insn_used[] __attribute((unused)) = { +- 0x00000017, +-}; +- +-#define A_addr_dsa 0x00000000 +-static u32 A_addr_dsa_used[] __attribute((unused)) = { +- 0x0000000f, +- 0x00000026, +- 0x00000033, +- 0x00000040, +- 0x00000055, +- 0x00000079, +- 0x0000008e, +- 0x000000bc, +- 0x000000d2, +- 0x00000130, +- 0x000001a5, +- 0x000001bb, +- 0x000001e3, +-}; +- +-#define A_addr_reconnect_dsa_head 0x00000000 +-static u32 A_addr_reconnect_dsa_head_used[] __attribute((unused)) = { +- 0x000001b7, +-}; +- +-#define A_addr_scratch 0x00000000 +-static u32 A_addr_scratch_used[] __attribute((unused)) = { +- 0x00000002, +- 0x00000004, +- 0x00000008, +- 0x00000020, +- 0x00000022, +- 0x00000049, +- 0x00000060, +- 0x0000006a, +- 0x00000071, +- 0x00000073, +- 0x000000ab, +- 0x000000b5, +- 0x000000c1, +- 0x000000cb, +- 0x0000012c, +- 0x00000142, +- 0x00000157, +- 0x000001b2, +- 0x000001b4, +- 0x000001df, +- 0x000001f7, +-}; +- +-#define A_addr_temp 0x00000000 +-static u32 A_addr_temp_used[] __attribute((unused)) = { +-}; +- +-#define A_dmode_memory_to_memory 0x00000000 +-static u32 A_dmode_memory_to_memory_used[] __attribute((unused)) = { +-}; +- +-#define A_dmode_memory_to_ncr 0x00000000 +-static u32 A_dmode_memory_to_ncr_used[] __attribute((unused)) = { +-}; +- +-#define A_dmode_ncr_to_memory 0x00000000 +-static u32 A_dmode_ncr_to_memory_used[] __attribute((unused)) = { +-}; +- +-#define A_dsa_check_reselect 0x00000000 +-static u32 A_dsa_check_reselect_used[] __attribute((unused)) = { +- 0x000001d0, +-}; +- +-#define A_dsa_cmdout 0x00000048 +-static u32 A_dsa_cmdout_used[] __attribute((unused)) = { +- 0x0000009a, +-}; +- +-#define A_dsa_cmnd 0x00000038 +-static u32 A_dsa_cmnd_used[] __attribute((unused)) = { +-}; +- +-#define A_dsa_datain 0x00000054 +-static u32 A_dsa_datain_used[] __attribute((unused)) = { +- 0x000000c2, +-}; +- +-#define A_dsa_dataout 0x00000050 +-static u32 A_dsa_dataout_used[] __attribute((unused)) = { +- 0x000000ac, +-}; +- +-#define A_dsa_end 0x00000070 +-static u32 A_dsa_end_used[] __attribute((unused)) = { +-}; +- +-#define A_dsa_fields_start 0x00000000 +-static u32 A_dsa_fields_start_used[] __attribute((unused)) = { +-}; +- +-#define A_dsa_msgin 0x00000058 +-static u32 A_dsa_msgin_used[] __attribute((unused)) = { +- 0x0000019c, +-}; +- +-#define A_dsa_msgout 0x00000040 +-static u32 A_dsa_msgout_used[] __attribute((unused)) = { +- 0x00000089, +-}; +- +-#define A_dsa_msgout_other 0x00000068 +-static u32 A_dsa_msgout_other_used[] __attribute((unused)) = { +- 0x00000194, +-}; +- +-#define A_dsa_next 0x00000030 +-static u32 A_dsa_next_used[] __attribute((unused)) = { +- 0x00000061, +-}; +- +-#define A_dsa_restore_pointers 0x00000000 +-static u32 A_dsa_restore_pointers_used[] __attribute((unused)) = { +- 0x00000146, +-}; +- +-#define A_dsa_save_data_pointer 0x00000000 +-static u32 A_dsa_save_data_pointer_used[] __attribute((unused)) = { +- 0x00000131, +-}; +- +-#define A_dsa_select 0x0000003c +-static u32 A_dsa_select_used[] __attribute((unused)) = { +- 0x00000082, +-}; +- +-#define A_dsa_sscf_710 0x00000000 +-static u32 A_dsa_sscf_710_used[] __attribute((unused)) = { +- 0x00000007, +-}; +- +-#define A_dsa_status 0x00000060 +-static u32 A_dsa_status_used[] __attribute((unused)) = { +- 0x00000198, +-}; +- +-#define A_dsa_temp_addr_array_value 0x00000000 +-static u32 A_dsa_temp_addr_array_value_used[] __attribute((unused)) = { +-}; +- +-#define A_dsa_temp_addr_dsa_value 0x00000000 +-static u32 A_dsa_temp_addr_dsa_value_used[] __attribute((unused)) = { +- 0x00000001, +-}; +- +-#define A_dsa_temp_addr_new_value 0x00000000 +-static u32 A_dsa_temp_addr_new_value_used[] __attribute((unused)) = { +-}; +- +-#define A_dsa_temp_addr_next 0x00000000 +-static u32 A_dsa_temp_addr_next_used[] __attribute((unused)) = { +- 0x0000001c, +- 0x0000004f, +-}; +- +-#define A_dsa_temp_addr_residual 0x00000000 +-static u32 A_dsa_temp_addr_residual_used[] __attribute((unused)) = { +- 0x0000002d, +- 0x0000003b, +-}; +- +-#define A_dsa_temp_addr_saved_pointer 0x00000000 +-static u32 A_dsa_temp_addr_saved_pointer_used[] __attribute((unused)) = { +- 0x0000002b, +- 0x00000037, +-}; +- +-#define A_dsa_temp_addr_saved_residual 0x00000000 +-static u32 A_dsa_temp_addr_saved_residual_used[] __attribute((unused)) = { +- 0x0000002e, +- 0x0000003a, +-}; +- +-#define A_dsa_temp_lun 0x00000000 +-static u32 A_dsa_temp_lun_used[] __attribute((unused)) = { +- 0x0000004c, +-}; +- +-#define A_dsa_temp_next 0x00000000 +-static u32 A_dsa_temp_next_used[] __attribute((unused)) = { +- 0x0000001f, +-}; +- +-#define A_dsa_temp_sync 0x00000000 +-static u32 A_dsa_temp_sync_used[] __attribute((unused)) = { +- 0x00000057, +-}; +- +-#define A_dsa_temp_target 0x00000000 +-static u32 A_dsa_temp_target_used[] __attribute((unused)) = { +- 0x00000045, +-}; +- +-#define A_emulfly 0x00000000 +-static u32 A_emulfly_used[] __attribute((unused)) = { +-}; +- +-#define A_int_debug_break 0x03000000 +-static u32 A_int_debug_break_used[] __attribute((unused)) = { +- 0x0000023c, +-}; +- +-#define A_int_debug_panic 0x030b0000 +-static u32 A_int_debug_panic_used[] __attribute((unused)) = { +- 0x00000209, +- 0x00000219, +-}; +- +-#define A_int_err_check_condition 0x00030000 +-static u32 A_int_err_check_condition_used[] __attribute((unused)) = { +- 0x000001a9, +-}; +- +-#define A_int_err_no_phase 0x00040000 +-static u32 A_int_err_no_phase_used[] __attribute((unused)) = { +-}; +- +-#define A_int_err_selected 0x00010000 +-static u32 A_int_err_selected_used[] __attribute((unused)) = { +- 0x000001ff, +-}; +- +-#define A_int_err_unexpected_phase 0x00000000 +-static u32 A_int_err_unexpected_phase_used[] __attribute((unused)) = { +- 0x00000092, +- 0x00000098, +- 0x000000a0, +- 0x000000d6, +- 0x000000da, +- 0x000000dc, +- 0x000000e4, +- 0x000000e8, +- 0x000000ea, +- 0x000000f2, +- 0x000000f6, +- 0x000000f8, +- 0x000000fa, +- 0x00000160, +-}; +- +-#define A_int_err_unexpected_reselect 0x00020000 +-static u32 A_int_err_unexpected_reselect_used[] __attribute((unused)) = { +- 0x000001cd, +-}; +- +-#define A_int_msg_1 0x01020000 +-static u32 A_int_msg_1_used[] __attribute((unused)) = { +- 0x00000114, +- 0x00000116, +-}; +- +-#define A_int_msg_sdtr 0x01010000 +-static u32 A_int_msg_sdtr_used[] __attribute((unused)) = { +- 0x00000180, +-}; +- +-#define A_int_msg_wdtr 0x01000000 +-static u32 A_int_msg_wdtr_used[] __attribute((unused)) = { +- 0x00000174, +-}; +- +-#define A_int_norm_aborted 0x02040000 +-static u32 A_int_norm_aborted_used[] __attribute((unused)) = { +- 0x00000270, +-}; +- +-#define A_int_norm_command_complete 0x02020000 +-static u32 A_int_norm_command_complete_used[] __attribute((unused)) = { +-}; +- +-#define A_int_norm_disconnected 0x02030000 +-static u32 A_int_norm_disconnected_used[] __attribute((unused)) = { +-}; +- +-#define A_int_norm_emulateintfly 0x02060000 +-static u32 A_int_norm_emulateintfly_used[] __attribute((unused)) = { +- 0x000001a2, +-}; +- +-#define A_int_norm_reselect_complete 0x02010000 +-static u32 A_int_norm_reselect_complete_used[] __attribute((unused)) = { +-}; +- +-#define A_int_norm_reset 0x02050000 +-static u32 A_int_norm_reset_used[] __attribute((unused)) = { +-}; +- +-#define A_int_norm_select_complete 0x02000000 +-static u32 A_int_norm_select_complete_used[] __attribute((unused)) = { +-}; +- +-#define A_int_test_1 0x04000000 +-static u32 A_int_test_1_used[] __attribute((unused)) = { +- 0x0000021e, +-}; +- +-#define A_int_test_2 0x04010000 +-static u32 A_int_test_2_used[] __attribute((unused)) = { +- 0x0000023a, +-}; +- +-#define A_int_test_3 0x04020000 +-static u32 A_int_test_3_used[] __attribute((unused)) = { +-}; +- +-#define A_msg_buf 0x00000000 +-static u32 A_msg_buf_used[] __attribute((unused)) = { +- 0x00000108, +- 0x00000162, +- 0x0000016c, +- 0x00000172, +- 0x00000178, +- 0x0000017e, +-}; +- +-#define A_reconnect_dsa_head 0x00000000 +-static u32 A_reconnect_dsa_head_used[] __attribute((unused)) = { +- 0x0000006d, +- 0x00000074, +- 0x000001b1, +-}; +- +-#define A_reselected_identify 0x00000000 +-static u32 A_reselected_identify_used[] __attribute((unused)) = { +- 0x00000048, +- 0x000001af, +-}; +- +-#define A_reselected_tag 0x00000000 +-static u32 A_reselected_tag_used[] __attribute((unused)) = { +-}; +- +-#define A_saved_dsa 0x00000000 +-static u32 A_saved_dsa_used[] __attribute((unused)) = { +- 0x00000005, +- 0x0000000e, +- 0x00000023, +- 0x00000025, +- 0x00000032, +- 0x0000003f, +- 0x00000054, +- 0x0000005f, +- 0x00000070, +- 0x00000078, +- 0x0000008d, +- 0x000000aa, +- 0x000000bb, +- 0x000000c0, +- 0x000000d1, +- 0x0000012f, +- 0x000001a4, +- 0x000001b5, +- 0x000001ba, +- 0x000001e2, +-}; +- +-#define A_schedule 0x00000000 +-static u32 A_schedule_used[] __attribute((unused)) = { +- 0x0000007d, +- 0x000001a7, +- 0x00000203, +- 0x00000244, +-}; +- +-#define A_test_dest 0x00000000 +-static u32 A_test_dest_used[] __attribute((unused)) = { +- 0x0000021c, +-}; +- +-#define A_test_src 0x00000000 +-static u32 A_test_src_used[] __attribute((unused)) = { +- 0x0000021b, +-}; +- +-#define Ent_accept_message 0x00000624 +-#define Ent_cmdout_cmdout 0x00000264 +-#define Ent_command_complete 0x0000065c +-#define Ent_command_complete_msgin 0x0000066c +-#define Ent_data_transfer 0x0000026c +-#define Ent_datain_to_jump 0x00000334 +-#define Ent_debug_break 0x000008ec +-#define Ent_dsa_code_begin 0x00000000 +-#define Ent_dsa_code_check_reselect 0x0000010c +-#define Ent_dsa_code_fix_jump 0x00000058 +-#define Ent_dsa_code_restore_pointers 0x000000d8 +-#define Ent_dsa_code_save_data_pointer 0x000000a4 +-#define Ent_dsa_code_template 0x00000000 +-#define Ent_dsa_code_template_end 0x00000178 +-#define Ent_dsa_schedule 0x00000178 +-#define Ent_dsa_zero 0x00000178 +-#define Ent_end_data_transfer 0x000002a4 +-#define Ent_initiator_abort 0x00000914 +-#define Ent_msg_in 0x0000041c +-#define Ent_msg_in_restart 0x000003fc +-#define Ent_other_in 0x0000038c +-#define Ent_other_out 0x00000354 +-#define Ent_other_transfer 0x000003c4 +-#define Ent_reject_message 0x00000604 +-#define Ent_reselected_check_next 0x000006f0 +-#define Ent_reselected_ok 0x00000798 +-#define Ent_respond_message 0x0000063c +-#define Ent_select 0x000001f8 +-#define Ent_select_msgout 0x00000218 +-#define Ent_target_abort 0x000008f4 +-#define Ent_test_1 0x00000868 +-#define Ent_test_2 0x0000087c +-#define Ent_test_2_msgout 0x0000089c +-#define Ent_wait_reselect 0x000006a8 +-static u32 LABELPATCHES[] __attribute((unused)) = { +- 0x00000011, +- 0x0000001a, +- 0x0000001d, +- 0x00000028, +- 0x0000002a, +- 0x00000035, +- 0x00000038, +- 0x00000042, +- 0x00000050, +- 0x00000052, +- 0x0000006b, +- 0x00000083, +- 0x00000085, +- 0x00000090, +- 0x00000094, +- 0x00000096, +- 0x0000009c, +- 0x0000009e, +- 0x000000a2, +- 0x000000a4, +- 0x000000a6, +- 0x000000a8, +- 0x000000b6, +- 0x000000b9, +- 0x000000cc, +- 0x000000cf, +- 0x000000d8, +- 0x000000de, +- 0x000000e0, +- 0x000000e6, +- 0x000000ec, +- 0x000000ee, +- 0x000000f4, +- 0x000000fc, +- 0x000000fe, +- 0x0000010a, +- 0x0000010c, +- 0x0000010e, +- 0x00000110, +- 0x00000112, +- 0x00000118, +- 0x0000011a, +- 0x0000012d, +- 0x00000143, +- 0x00000158, +- 0x0000015c, +- 0x00000164, +- 0x00000166, +- 0x00000168, +- 0x0000016e, +- 0x0000017a, +- 0x000001ab, +- 0x000001b8, +- 0x000001bf, +- 0x000001c3, +- 0x000001c7, +- 0x000001cb, +- 0x000001e0, +- 0x000001f8, +- 0x00000207, +- 0x0000020f, +- 0x00000213, +- 0x00000217, +- 0x00000224, +- 0x00000226, +- 0x00000248, +- 0x0000024a, +- 0x0000024c, +- 0x0000024e, +- 0x00000250, +- 0x00000252, +- 0x00000256, +- 0x0000025a, +- 0x0000025c, +- 0x00000260, +- 0x00000262, +- 0x00000266, +- 0x00000268, +-}; +- +-static struct { +- u32 offset; +- void *address; +-} EXTERNAL_PATCHES[] __attribute((unused)) = { +-}; +- +-static u32 INSTRUCTIONS __attribute((unused)) = 290; +-static u32 PATCHES __attribute((unused)) = 78; +-static u32 EXTERNAL_PATCHES_LEN __attribute((unused)) = 0; +diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx_u.h_shipped linux-2.6.22-591/drivers/scsi/53c7xx_u.h_shipped +--- linux-2.6.22-570/drivers/scsi/53c7xx_u.h_shipped 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/53c7xx_u.h_shipped 1969-12-31 19:00:00.000000000 -0500 +@@ -1,102 +0,0 @@ +-#undef A_NCR53c7xx_msg_abort +-#undef A_NCR53c7xx_msg_reject +-#undef A_NCR53c7xx_sink +-#undef A_NCR53c7xx_zero +-#undef A_NOP_insn +-#undef A_addr_dsa +-#undef A_addr_reconnect_dsa_head +-#undef A_addr_scratch +-#undef A_addr_temp +-#undef A_dmode_memory_to_memory +-#undef A_dmode_memory_to_ncr +-#undef A_dmode_ncr_to_memory +-#undef A_dsa_check_reselect +-#undef A_dsa_cmdout +-#undef A_dsa_cmnd +-#undef A_dsa_datain +-#undef A_dsa_dataout +-#undef A_dsa_end +-#undef A_dsa_fields_start +-#undef A_dsa_msgin +-#undef A_dsa_msgout +-#undef A_dsa_msgout_other +-#undef A_dsa_next +-#undef A_dsa_restore_pointers +-#undef A_dsa_save_data_pointer +-#undef A_dsa_select +-#undef A_dsa_sscf_710 +-#undef A_dsa_status +-#undef A_dsa_temp_addr_array_value +-#undef A_dsa_temp_addr_dsa_value +-#undef A_dsa_temp_addr_new_value +-#undef A_dsa_temp_addr_next +-#undef A_dsa_temp_addr_residual +-#undef A_dsa_temp_addr_saved_pointer +-#undef A_dsa_temp_addr_saved_residual +-#undef A_dsa_temp_lun +-#undef A_dsa_temp_next +-#undef A_dsa_temp_sync +-#undef A_dsa_temp_target +-#undef A_emulfly +-#undef A_int_debug_break +-#undef A_int_debug_panic +-#undef A_int_err_check_condition +-#undef A_int_err_no_phase +-#undef A_int_err_selected +-#undef A_int_err_unexpected_phase +-#undef A_int_err_unexpected_reselect +-#undef A_int_msg_1 +-#undef A_int_msg_sdtr +-#undef A_int_msg_wdtr +-#undef A_int_norm_aborted +-#undef A_int_norm_command_complete +-#undef A_int_norm_disconnected +-#undef A_int_norm_emulateintfly +-#undef A_int_norm_reselect_complete +-#undef A_int_norm_reset +-#undef A_int_norm_select_complete +-#undef A_int_test_1 +-#undef A_int_test_2 +-#undef A_int_test_3 +-#undef A_msg_buf +-#undef A_reconnect_dsa_head +-#undef A_reselected_identify +-#undef A_reselected_tag +-#undef A_saved_dsa +-#undef A_schedule +-#undef A_test_dest +-#undef A_test_src +-#undef Ent_accept_message +-#undef Ent_cmdout_cmdout +-#undef Ent_command_complete +-#undef Ent_command_complete_msgin +-#undef Ent_data_transfer +-#undef Ent_datain_to_jump +-#undef Ent_debug_break +-#undef Ent_dsa_code_begin +-#undef Ent_dsa_code_check_reselect +-#undef Ent_dsa_code_fix_jump +-#undef Ent_dsa_code_restore_pointers +-#undef Ent_dsa_code_save_data_pointer +-#undef Ent_dsa_code_template +-#undef Ent_dsa_code_template_end +-#undef Ent_dsa_schedule +-#undef Ent_dsa_zero +-#undef Ent_end_data_transfer +-#undef Ent_initiator_abort +-#undef Ent_msg_in +-#undef Ent_msg_in_restart +-#undef Ent_other_in +-#undef Ent_other_out +-#undef Ent_other_transfer +-#undef Ent_reject_message +-#undef Ent_reselected_check_next +-#undef Ent_reselected_ok +-#undef Ent_respond_message +-#undef Ent_select +-#undef Ent_select_msgout +-#undef Ent_target_abort +-#undef Ent_test_1 +-#undef Ent_test_2 +-#undef Ent_test_2_msgout +-#undef Ent_wait_reselect +diff -Nurb linux-2.6.22-570/drivers/scsi/BusLogic.c linux-2.6.22-591/drivers/scsi/BusLogic.c +--- linux-2.6.22-570/drivers/scsi/BusLogic.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/BusLogic.c 2007-12-21 15:36:12.000000000 -0500 +@@ -304,16 +304,8 @@ + static void BusLogic_DeallocateCCB(struct BusLogic_CCB *CCB) + { + struct BusLogic_HostAdapter *HostAdapter = CCB->HostAdapter; +- struct scsi_cmnd *cmd = CCB->Command; + +- if (cmd->use_sg != 0) { +- pci_unmap_sg(HostAdapter->PCI_Device, +- (struct scatterlist *)cmd->request_buffer, +- cmd->use_sg, cmd->sc_data_direction); +- } else if (cmd->request_bufflen != 0) { +- pci_unmap_single(HostAdapter->PCI_Device, CCB->DataPointer, +- CCB->DataLength, cmd->sc_data_direction); +- } ++ scsi_dma_unmap(CCB->Command); + pci_unmap_single(HostAdapter->PCI_Device, CCB->SenseDataPointer, + CCB->SenseDataLength, PCI_DMA_FROMDEVICE); + +@@ -2648,7 +2640,8 @@ + */ + if (CCB->CDB[0] == INQUIRY && CCB->CDB[1] == 0 && CCB->HostAdapterStatus == BusLogic_CommandCompletedNormally) { + struct BusLogic_TargetFlags *TargetFlags = &HostAdapter->TargetFlags[CCB->TargetID]; +- struct SCSI_Inquiry *InquiryResult = (struct SCSI_Inquiry *) Command->request_buffer; ++ struct SCSI_Inquiry *InquiryResult = ++ (struct SCSI_Inquiry *) scsi_sglist(Command); + TargetFlags->TargetExists = true; + TargetFlags->TaggedQueuingSupported = InquiryResult->CmdQue; + TargetFlags->WideTransfersSupported = InquiryResult->WBus16; +@@ -2819,9 +2812,8 @@ + int CDB_Length = Command->cmd_len; + int TargetID = Command->device->id; + int LogicalUnit = Command->device->lun; +- void *BufferPointer = Command->request_buffer; +- int BufferLength = Command->request_bufflen; +- int SegmentCount = Command->use_sg; ++ int BufferLength = scsi_bufflen(Command); ++ int Count; + struct BusLogic_CCB *CCB; + /* + SCSI REQUEST_SENSE commands will be executed automatically by the Host +@@ -2851,36 +2843,35 @@ + return 0; + } + } ++ + /* + Initialize the fields in the BusLogic Command Control Block (CCB). + */ +- if (SegmentCount == 0 && BufferLength != 0) { +- CCB->Opcode = BusLogic_InitiatorCCB; +- CCB->DataLength = BufferLength; +- CCB->DataPointer = pci_map_single(HostAdapter->PCI_Device, +- BufferPointer, BufferLength, +- Command->sc_data_direction); +- } else if (SegmentCount != 0) { +- struct scatterlist *ScatterList = (struct scatterlist *) BufferPointer; +- int Segment, Count; ++ Count = scsi_dma_map(Command); ++ BUG_ON(Count < 0); ++ if (Count) { ++ struct scatterlist *sg; ++ int i; + +- Count = pci_map_sg(HostAdapter->PCI_Device, ScatterList, SegmentCount, +- Command->sc_data_direction); + CCB->Opcode = BusLogic_InitiatorCCB_ScatterGather; + CCB->DataLength = Count * sizeof(struct BusLogic_ScatterGatherSegment); + if (BusLogic_MultiMasterHostAdapterP(HostAdapter)) + CCB->DataPointer = (unsigned int) CCB->DMA_Handle + ((unsigned long) &CCB->ScatterGatherList - (unsigned long) CCB); + else + CCB->DataPointer = Virtual_to_32Bit_Virtual(CCB->ScatterGatherList); +- for (Segment = 0; Segment < Count; Segment++) { +- CCB->ScatterGatherList[Segment].SegmentByteCount = sg_dma_len(ScatterList + Segment); +- CCB->ScatterGatherList[Segment].SegmentDataPointer = sg_dma_address(ScatterList + Segment); ++ ++ scsi_for_each_sg(Command, sg, Count, i) { ++ CCB->ScatterGatherList[i].SegmentByteCount = ++ sg_dma_len(sg); ++ CCB->ScatterGatherList[i].SegmentDataPointer = ++ sg_dma_address(sg); + } +- } else { ++ } else if (!Count) { + CCB->Opcode = BusLogic_InitiatorCCB; + CCB->DataLength = BufferLength; + CCB->DataPointer = 0; + } ++ + switch (CDB[0]) { + case READ_6: + case READ_10: +diff -Nurb linux-2.6.22-570/drivers/scsi/Kconfig linux-2.6.22-591/drivers/scsi/Kconfig +--- linux-2.6.22-570/drivers/scsi/Kconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/Kconfig 2007-12-21 15:36:12.000000000 -0500 +@@ -739,7 +739,7 @@ + + config SCSI_IBMMCA + tristate "IBMMCA SCSI support" +- depends on MCA_LEGACY && SCSI ++ depends on MCA && SCSI + ---help--- + This is support for the IBM SCSI adapter found in many of the PS/2 + series computers. These machines have an MCA bus, so you need to +@@ -1007,6 +1007,11 @@ + To compile this driver as a module, choose M here: the + module will be called stex. + ++config 53C700_BE_BUS ++ bool ++ depends on SCSI_A4000T || SCSI_ZORRO7XX || MVME16x_SCSI || BVME6000_SCSI ++ default y ++ + config SCSI_SYM53C8XX_2 + tristate "SYM53C8XX Version 2 SCSI support" + depends on PCI && SCSI +@@ -1611,13 +1616,25 @@ + If you have the Phase5 Fastlane Z3 SCSI controller, or plan to use + one in the near future, say Y to this question. Otherwise, say N. + +-config SCSI_AMIGA7XX +- bool "Amiga NCR53c710 SCSI support (EXPERIMENTAL)" +- depends on AMIGA && SCSI && EXPERIMENTAL && BROKEN ++config SCSI_A4000T ++ tristate "A4000T NCR53c710 SCSI support (EXPERIMENTAL)" ++ depends on AMIGA && SCSI && EXPERIMENTAL ++ select SCSI_SPI_ATTRS + help +- Support for various NCR53c710-based SCSI controllers on the Amiga. ++ If you have an Amiga 4000T and have SCSI devices connected to the ++ built-in SCSI controller, say Y. Otherwise, say N. ++ ++ To compile this driver as a module, choose M here: the ++ module will be called a4000t. ++ ++config SCSI_ZORRO7XX ++ tristate "Zorro NCR53c710 SCSI support (EXPERIMENTAL)" ++ depends on ZORRO && SCSI && EXPERIMENTAL ++ select SCSI_SPI_ATTRS ++ help ++ Support for various NCR53c710-based SCSI controllers on Zorro ++ expansion boards for the Amiga. + This includes: +- - the builtin SCSI controller on the Amiga 4000T, + - the Amiga 4091 Zorro III SCSI-2 controller, + - the MacroSystem Development's WarpEngine Amiga SCSI-2 controller + (info at +@@ -1625,10 +1642,6 @@ + - the SCSI controller on the Phase5 Blizzard PowerUP 603e+ + accelerator card for the Amiga 1200, + - the SCSI controller on the GVP Turbo 040/060 accelerator. +- Note that all of the above SCSI controllers, except for the builtin +- SCSI controller on the Amiga 4000T, reside on the Zorro expansion +- bus, so you also have to enable Zorro bus support if you want to use +- them. + + config OKTAGON_SCSI + tristate "BSC Oktagon SCSI support (EXPERIMENTAL)" +@@ -1712,8 +1725,8 @@ + single-board computer. + + config MVME16x_SCSI +- bool "NCR53C710 SCSI driver for MVME16x" +- depends on MVME16x && SCSI && BROKEN ++ tristate "NCR53C710 SCSI driver for MVME16x" ++ depends on MVME16x && SCSI + select SCSI_SPI_ATTRS + help + The Motorola MVME162, 166, 167, 172 and 177 boards use the NCR53C710 +@@ -1721,22 +1734,14 @@ + will want to say Y to this question. + + config BVME6000_SCSI +- bool "NCR53C710 SCSI driver for BVME6000" +- depends on BVME6000 && SCSI && BROKEN ++ tristate "NCR53C710 SCSI driver for BVME6000" ++ depends on BVME6000 && SCSI + select SCSI_SPI_ATTRS + help + The BVME4000 and BVME6000 boards from BVM Ltd use the NCR53C710 + SCSI controller chip. Almost everyone using one of these boards + will want to say Y to this question. + +-config SCSI_NCR53C7xx_FAST +- bool "allow FAST-SCSI [10MHz]" +- depends on SCSI_AMIGA7XX || MVME16x_SCSI || BVME6000_SCSI +- help +- This will enable 10MHz FAST-SCSI transfers with your host +- adapter. Some systems have problems with that speed, so it's safest +- to say N here. +- + config SUN3_SCSI + tristate "Sun3 NCR5380 SCSI" + depends on SUN3 && SCSI +diff -Nurb linux-2.6.22-570/drivers/scsi/Makefile linux-2.6.22-591/drivers/scsi/Makefile +--- linux-2.6.22-570/drivers/scsi/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/Makefile 2007-12-21 15:36:12.000000000 -0500 +@@ -37,7 +37,8 @@ + + obj-$(CONFIG_ISCSI_TCP) += libiscsi.o iscsi_tcp.o + obj-$(CONFIG_INFINIBAND_ISER) += libiscsi.o +-obj-$(CONFIG_SCSI_AMIGA7XX) += amiga7xx.o 53c7xx.o ++obj-$(CONFIG_SCSI_A4000T) += 53c700.o a4000t.o ++obj-$(CONFIG_SCSI_ZORRO7XX) += 53c700.o zorro7xx.o + obj-$(CONFIG_A3000_SCSI) += a3000.o wd33c93.o + obj-$(CONFIG_A2091_SCSI) += a2091.o wd33c93.o + obj-$(CONFIG_GVP11_SCSI) += gvp11.o wd33c93.o +@@ -53,8 +54,8 @@ + obj-$(CONFIG_MAC_SCSI) += mac_scsi.o + obj-$(CONFIG_SCSI_MAC_ESP) += mac_esp.o NCR53C9x.o + obj-$(CONFIG_SUN3_SCSI) += sun3_scsi.o sun3_scsi_vme.o +-obj-$(CONFIG_MVME16x_SCSI) += mvme16x.o 53c7xx.o +-obj-$(CONFIG_BVME6000_SCSI) += bvme6000.o 53c7xx.o ++obj-$(CONFIG_MVME16x_SCSI) += 53c700.o mvme16x_scsi.o ++obj-$(CONFIG_BVME6000_SCSI) += 53c700.o bvme6000_scsi.o + obj-$(CONFIG_SCSI_SIM710) += 53c700.o sim710.o + obj-$(CONFIG_SCSI_ADVANSYS) += advansys.o + obj-$(CONFIG_SCSI_PSI240I) += psi240i.o +@@ -168,10 +169,8 @@ + oktagon_esp_mod-objs := oktagon_esp.o oktagon_io.o + + # Files generated that shall be removed upon make clean +-clean-files := 53c7xx_d.h 53c700_d.h \ +- 53c7xx_u.h 53c700_u.h ++clean-files := 53c700_d.h 53c700_u.h + +-$(obj)/53c7xx.o: $(obj)/53c7xx_d.h $(obj)/53c7xx_u.h + $(obj)/53c700.o $(MODVERDIR)/$(obj)/53c700.ver: $(obj)/53c700_d.h + + # If you want to play with the firmware, uncomment +@@ -179,11 +178,6 @@ + + ifdef GENERATE_FIRMWARE + +-$(obj)/53c7xx_d.h: $(src)/53c7xx.scr $(src)/script_asm.pl +- $(CPP) -traditional -DCHIP=710 - < $< | grep -v '^#' | $(PERL) -s $(src)/script_asm.pl -ncr7x0_family $@ $(@:_d.h=_u.h) +- +-$(obj)/53c7xx_u.h: $(obj)/53c7xx_d.h +- + $(obj)/53c700_d.h: $(src)/53c700.scr $(src)/script_asm.pl + $(PERL) -s $(src)/script_asm.pl -ncr7x0_family $@ $(@:_d.h=_u.h) < $< + +diff -Nurb linux-2.6.22-570/drivers/scsi/NCR5380.c linux-2.6.22-591/drivers/scsi/NCR5380.c +--- linux-2.6.22-570/drivers/scsi/NCR5380.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/NCR5380.c 2007-12-21 15:36:12.000000000 -0500 +@@ -347,7 +347,7 @@ + if((r & bit) == val) + return 0; + if(!in_interrupt()) +- yield(); ++ cond_resched(); + else + cpu_relax(); + } +@@ -357,7 +357,7 @@ + static struct { + unsigned char value; + const char *name; +-} phases[] = { ++} phases[] __maybe_unused = { + {PHASE_DATAOUT, "DATAOUT"}, + {PHASE_DATAIN, "DATAIN"}, + {PHASE_CMDOUT, "CMDOUT"}, +@@ -575,7 +575,8 @@ + * Locks: none, irqs must be enabled on entry + */ + +-static int __init NCR5380_probe_irq(struct Scsi_Host *instance, int possible) ++static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance, ++ int possible) + { + NCR5380_local_declare(); + struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata; +@@ -629,7 +630,8 @@ + * Locks: none + */ + +-static void __init NCR5380_print_options(struct Scsi_Host *instance) ++static void __init __maybe_unused ++NCR5380_print_options(struct Scsi_Host *instance) + { + printk(" generic options" + #ifdef AUTOPROBE_IRQ +@@ -703,8 +705,8 @@ + static + char *lprint_opcode(int opcode, char *pos, char *buffer, int length); + +-static +-int NCR5380_proc_info(struct Scsi_Host *instance, char *buffer, char **start, off_t offset, int length, int inout) ++static int __maybe_unused NCR5380_proc_info(struct Scsi_Host *instance, ++ char *buffer, char **start, off_t offset, int length, int inout) + { + char *pos = buffer; + struct NCR5380_hostdata *hostdata; +diff -Nurb linux-2.6.22-570/drivers/scsi/NCR5380.h linux-2.6.22-591/drivers/scsi/NCR5380.h +--- linux-2.6.22-570/drivers/scsi/NCR5380.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/NCR5380.h 2007-12-21 15:36:12.000000000 -0500 +@@ -299,7 +299,7 @@ + static irqreturn_t NCR5380_intr(int irq, void *dev_id); + #endif + static void NCR5380_main(struct work_struct *work); +-static void NCR5380_print_options(struct Scsi_Host *instance); ++static void __maybe_unused NCR5380_print_options(struct Scsi_Host *instance); + #ifdef NDEBUG + static void NCR5380_print_phase(struct Scsi_Host *instance); + static void NCR5380_print(struct Scsi_Host *instance); +@@ -307,8 +307,8 @@ + static int NCR5380_abort(Scsi_Cmnd * cmd); + static int NCR5380_bus_reset(Scsi_Cmnd * cmd); + static int NCR5380_queue_command(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *)); +-static int NCR5380_proc_info(struct Scsi_Host *instance, char *buffer, char **start, +-off_t offset, int length, int inout); ++static int __maybe_unused NCR5380_proc_info(struct Scsi_Host *instance, ++ char *buffer, char **start, off_t offset, int length, int inout); + + static void NCR5380_reselect(struct Scsi_Host *instance); + static int NCR5380_select(struct Scsi_Host *instance, Scsi_Cmnd * cmd, int tag); +diff -Nurb linux-2.6.22-570/drivers/scsi/NCR53c406a.c linux-2.6.22-591/drivers/scsi/NCR53c406a.c +--- linux-2.6.22-570/drivers/scsi/NCR53c406a.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/NCR53c406a.c 2007-12-21 15:36:12.000000000 -0500 +@@ -698,7 +698,7 @@ + int i; + + VDEB(printk("NCR53c406a_queue called\n")); +- DEB(printk("cmd=%02x, cmd_len=%02x, target=%02x, lun=%02x, bufflen=%d\n", SCpnt->cmnd[0], SCpnt->cmd_len, SCpnt->target, SCpnt->lun, SCpnt->request_bufflen)); ++ DEB(printk("cmd=%02x, cmd_len=%02x, target=%02x, lun=%02x, bufflen=%d\n", SCpnt->cmnd[0], SCpnt->cmd_len, SCpnt->target, SCpnt->lun, scsi_bufflen(SCpnt))); + + #if 0 + VDEB(for (i = 0; i < SCpnt->cmd_len; i++) +@@ -785,8 +785,8 @@ + unsigned char status, int_reg; + #if USE_PIO + unsigned char pio_status; +- struct scatterlist *sglist; +- unsigned int sgcount; ++ struct scatterlist *sg; ++ int i; + #endif + + VDEB(printk("NCR53c406a_intr called\n")); +@@ -866,21 +866,17 @@ + current_SC->SCp.phase = data_out; + VDEB(printk("NCR53c406a: Data-Out phase\n")); + outb(FLUSH_FIFO, CMD_REG); +- LOAD_DMA_COUNT(current_SC->request_bufflen); /* Max transfer size */ ++ LOAD_DMA_COUNT(scsi_bufflen(current_SC)); /* Max transfer size */ + #if USE_DMA /* No s/g support for DMA */ +- NCR53c406a_dma_write(current_SC->request_buffer, current_SC->request_bufflen); ++ NCR53c406a_dma_write(scsi_sglist(current_SC), ++ scsdi_bufflen(current_SC)); ++ + #endif /* USE_DMA */ + outb(TRANSFER_INFO | DMA_OP, CMD_REG); + #if USE_PIO +- if (!current_SC->use_sg) /* Don't use scatter-gather */ +- NCR53c406a_pio_write(current_SC->request_buffer, current_SC->request_bufflen); +- else { /* use scatter-gather */ +- sgcount = current_SC->use_sg; +- sglist = current_SC->request_buffer; +- while (sgcount--) { +- NCR53c406a_pio_write(page_address(sglist->page) + sglist->offset, sglist->length); +- sglist++; +- } ++ scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) { ++ NCR53c406a_pio_write(page_address(sg->page) + sg->offset, ++ sg->length); + } + REG0; + #endif /* USE_PIO */ +@@ -893,21 +889,16 @@ + current_SC->SCp.phase = data_in; + VDEB(printk("NCR53c406a: Data-In phase\n")); + outb(FLUSH_FIFO, CMD_REG); +- LOAD_DMA_COUNT(current_SC->request_bufflen); /* Max transfer size */ ++ LOAD_DMA_COUNT(scsi_bufflen(current_SC)); /* Max transfer size */ + #if USE_DMA /* No s/g support for DMA */ +- NCR53c406a_dma_read(current_SC->request_buffer, current_SC->request_bufflen); ++ NCR53c406a_dma_read(scsi_sglist(current_SC), ++ scsdi_bufflen(current_SC)); + #endif /* USE_DMA */ + outb(TRANSFER_INFO | DMA_OP, CMD_REG); + #if USE_PIO +- if (!current_SC->use_sg) /* Don't use scatter-gather */ +- NCR53c406a_pio_read(current_SC->request_buffer, current_SC->request_bufflen); +- else { /* Use scatter-gather */ +- sgcount = current_SC->use_sg; +- sglist = current_SC->request_buffer; +- while (sgcount--) { +- NCR53c406a_pio_read(page_address(sglist->page) + sglist->offset, sglist->length); +- sglist++; +- } ++ scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) { ++ NCR53c406a_pio_read(page_address(sg->page) + sg->offset, ++ sg->length); + } + REG0; + #endif /* USE_PIO */ +diff -Nurb linux-2.6.22-570/drivers/scsi/a100u2w.c linux-2.6.22-591/drivers/scsi/a100u2w.c +--- linux-2.6.22-570/drivers/scsi/a100u2w.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/a100u2w.c 2007-12-21 15:36:12.000000000 -0500 +@@ -19,27 +19,6 @@ + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * +- * -------------------------------------------------------------------------- +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions +- * are met: +- * 1. Redistributions of source code must retain the above copyright +- * notice, this list of conditions, and the following disclaimer, +- * without modification, immediately at the beginning of the file. +- * 2. Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in the +- * documentation and/or other materials provided with the distribution. +- * 3. The name of the author may not be used to endorse or promote products +- * derived from this software without specific prior written permission. +- * +- * Where this Software is combined with software released under the terms of +- * the GNU General Public License ("GPL") and the terms of the GPL would require the +- * combined work to also be released under the terms of the GPL, the terms +- * and conditions of this License will apply in addition to those of the +- * GPL with the exception of any terms or conditions of this License that +- * conflict with, or are expressly prohibited by, the GPL. +- * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +@@ -75,6 +54,8 @@ + * 9/28/04 Christoph Hellwig + * - merge the two source files + * - remove internal queueing code ++ * 14/06/07 Alan Cox ++ * - Grand cleanup and Linuxisation + */ + + #include +@@ -102,14 +83,12 @@ + #include "a100u2w.h" + + +-#define JIFFIES_TO_MS(t) ((t) * 1000 / HZ) +-#define MS_TO_JIFFIES(j) ((j * HZ) / 1000) ++static struct orc_scb *__orc_alloc_scb(struct orc_host * host); ++static void inia100_scb_handler(struct orc_host *host, struct orc_scb *scb); + +-static ORC_SCB *orc_alloc_scb(ORC_HCS * hcsp); +-static void inia100SCBPost(BYTE * pHcb, BYTE * pScb); ++static struct orc_nvram nvram, *nvramp = &nvram; + +-static NVRAM nvram, *nvramp = &nvram; +-static UCHAR dftNvRam[64] = ++static u8 default_nvram[64] = + { + /*----------header -------------*/ + 0x01, /* 0x00: Sub System Vendor ID 0 */ +@@ -158,823 +137,882 @@ + }; + + +-/***************************************************************************/ +-static void waitForPause(unsigned amount) +-{ +- ULONG the_time = jiffies + MS_TO_JIFFIES(amount); +- while (time_before_eq(jiffies, the_time)) +- cpu_relax(); +-} +- +-/***************************************************************************/ +-static UCHAR waitChipReady(ORC_HCS * hcsp) ++static u8 wait_chip_ready(struct orc_host * host) + { + int i; + + for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */ +- if (ORC_RD(hcsp->HCS_Base, ORC_HCTRL) & HOSTSTOP) /* Wait HOSTSTOP set */ ++ if (inb(host->base + ORC_HCTRL) & HOSTSTOP) /* Wait HOSTSTOP set */ + return 1; +- waitForPause(100); /* wait 100ms before try again */ ++ mdelay(100); + } + return 0; + } + +-/***************************************************************************/ +-static UCHAR waitFWReady(ORC_HCS * hcsp) ++static u8 wait_firmware_ready(struct orc_host * host) + { + int i; + + for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */ +- if (ORC_RD(hcsp->HCS_Base, ORC_HSTUS) & RREADY) /* Wait READY set */ ++ if (inb(host->base + ORC_HSTUS) & RREADY) /* Wait READY set */ + return 1; +- waitForPause(100); /* wait 100ms before try again */ ++ mdelay(100); /* wait 100ms before try again */ + } + return 0; + } + + /***************************************************************************/ +-static UCHAR waitSCSIRSTdone(ORC_HCS * hcsp) ++static u8 wait_scsi_reset_done(struct orc_host * host) + { + int i; + + for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */ +- if (!(ORC_RD(hcsp->HCS_Base, ORC_HCTRL) & SCSIRST)) /* Wait SCSIRST done */ ++ if (!(inb(host->base + ORC_HCTRL) & SCSIRST)) /* Wait SCSIRST done */ + return 1; +- waitForPause(100); /* wait 100ms before try again */ ++ mdelay(100); /* wait 100ms before try again */ + } + return 0; + } + + /***************************************************************************/ +-static UCHAR waitHDOoff(ORC_HCS * hcsp) ++static u8 wait_HDO_off(struct orc_host * host) + { + int i; + + for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */ +- if (!(ORC_RD(hcsp->HCS_Base, ORC_HCTRL) & HDO)) /* Wait HDO off */ ++ if (!(inb(host->base + ORC_HCTRL) & HDO)) /* Wait HDO off */ + return 1; +- waitForPause(100); /* wait 100ms before try again */ ++ mdelay(100); /* wait 100ms before try again */ + } + return 0; + } + + /***************************************************************************/ +-static UCHAR waitHDIset(ORC_HCS * hcsp, UCHAR * pData) ++static u8 wait_hdi_set(struct orc_host * host, u8 * data) + { + int i; + + for (i = 0; i < 10; i++) { /* Wait 1 second for report timeout */ +- if ((*pData = ORC_RD(hcsp->HCS_Base, ORC_HSTUS)) & HDI) ++ if ((*data = inb(host->base + ORC_HSTUS)) & HDI) + return 1; /* Wait HDI set */ +- waitForPause(100); /* wait 100ms before try again */ ++ mdelay(100); /* wait 100ms before try again */ + } + return 0; + } + + /***************************************************************************/ +-static unsigned short get_FW_version(ORC_HCS * hcsp) ++static unsigned short orc_read_fwrev(struct orc_host * host) + { +- UCHAR bData; +- union { +- unsigned short sVersion; +- unsigned char cVersion[2]; +- } Version; ++ u16 version; ++ u8 data; + +- ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_VERSION); +- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); +- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ ++ outb(ORC_CMD_VERSION, host->base + ORC_HDATA); ++ outb(HDO, host->base + ORC_HCTRL); ++ if (wait_HDO_off(host) == 0) /* Wait HDO off */ + return 0; + +- if (waitHDIset(hcsp, &bData) == 0) /* Wait HDI set */ ++ if (wait_hdi_set(host, &data) == 0) /* Wait HDI set */ + return 0; +- Version.cVersion[0] = ORC_RD(hcsp->HCS_Base, ORC_HDATA); +- ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData); /* Clear HDI */ ++ version = inb(host->base + ORC_HDATA); ++ outb(data, host->base + ORC_HSTUS); /* Clear HDI */ + +- if (waitHDIset(hcsp, &bData) == 0) /* Wait HDI set */ ++ if (wait_hdi_set(host, &data) == 0) /* Wait HDI set */ + return 0; +- Version.cVersion[1] = ORC_RD(hcsp->HCS_Base, ORC_HDATA); +- ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData); /* Clear HDI */ ++ version |= inb(host->base + ORC_HDATA) << 8; ++ outb(data, host->base + ORC_HSTUS); /* Clear HDI */ + +- return (Version.sVersion); ++ return version; + } + + /***************************************************************************/ +-static UCHAR set_NVRAM(ORC_HCS * hcsp, unsigned char address, unsigned char value) ++static u8 orc_nv_write(struct orc_host * host, unsigned char address, unsigned char value) + { +- ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_SET_NVM); /* Write command */ +- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); +- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ ++ outb(ORC_CMD_SET_NVM, host->base + ORC_HDATA); /* Write command */ ++ outb(HDO, host->base + ORC_HCTRL); ++ if (wait_HDO_off(host) == 0) /* Wait HDO off */ + return 0; + +- ORC_WR(hcsp->HCS_Base + ORC_HDATA, address); /* Write address */ +- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); +- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ ++ outb(address, host->base + ORC_HDATA); /* Write address */ ++ outb(HDO, host->base + ORC_HCTRL); ++ if (wait_HDO_off(host) == 0) /* Wait HDO off */ + return 0; + +- ORC_WR(hcsp->HCS_Base + ORC_HDATA, value); /* Write value */ +- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); +- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ ++ outb(value, host->base + ORC_HDATA); /* Write value */ ++ outb(HDO, host->base + ORC_HCTRL); ++ if (wait_HDO_off(host) == 0) /* Wait HDO off */ + return 0; + + return 1; + } + + /***************************************************************************/ +-static UCHAR get_NVRAM(ORC_HCS * hcsp, unsigned char address, unsigned char *pDataIn) ++static u8 orc_nv_read(struct orc_host * host, u8 address, u8 *ptr) + { +- unsigned char bData; ++ unsigned char data; + +- ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_GET_NVM); /* Write command */ +- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); +- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ ++ outb(ORC_CMD_GET_NVM, host->base + ORC_HDATA); /* Write command */ ++ outb(HDO, host->base + ORC_HCTRL); ++ if (wait_HDO_off(host) == 0) /* Wait HDO off */ + return 0; + +- ORC_WR(hcsp->HCS_Base + ORC_HDATA, address); /* Write address */ +- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); +- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ ++ outb(address, host->base + ORC_HDATA); /* Write address */ ++ outb(HDO, host->base + ORC_HCTRL); ++ if (wait_HDO_off(host) == 0) /* Wait HDO off */ + return 0; + +- if (waitHDIset(hcsp, &bData) == 0) /* Wait HDI set */ ++ if (wait_hdi_set(host, &data) == 0) /* Wait HDI set */ + return 0; +- *pDataIn = ORC_RD(hcsp->HCS_Base, ORC_HDATA); +- ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData); /* Clear HDI */ ++ *ptr = inb(host->base + ORC_HDATA); ++ outb(data, host->base + ORC_HSTUS); /* Clear HDI */ + + return 1; ++ + } + +-/***************************************************************************/ +-static void orc_exec_scb(ORC_HCS * hcsp, ORC_SCB * scbp) ++/** ++ * orc_exec_sb - Queue an SCB with the HA ++ * @host: host adapter the SCB belongs to ++ * @scb: SCB to queue for execution ++ */ ++ ++static void orc_exec_scb(struct orc_host * host, struct orc_scb * scb) + { +- scbp->SCB_Status = ORCSCB_POST; +- ORC_WR(hcsp->HCS_Base + ORC_PQUEUE, scbp->SCB_ScbIdx); +- return; ++ scb->status = ORCSCB_POST; ++ outb(scb->scbidx, host->base + ORC_PQUEUE); + } + + +-/*********************************************************************** +- Read SCSI H/A configuration parameters from serial EEPROM +-************************************************************************/ +-static int se2_rd_all(ORC_HCS * hcsp) ++/** ++ * se2_rd_all - read SCSI parameters from EEPROM ++ * @host: Host whose EEPROM is being loaded ++ * ++ * Read SCSI H/A configuration parameters from serial EEPROM ++ */ ++ ++static int se2_rd_all(struct orc_host * host) + { + int i; +- UCHAR *np, chksum = 0; ++ u8 *np, chksum = 0; + +- np = (UCHAR *) nvramp; ++ np = (u8 *) nvramp; + for (i = 0; i < 64; i++, np++) { /* <01> */ +- if (get_NVRAM(hcsp, (unsigned char) i, np) == 0) ++ if (orc_nv_read(host, (u8) i, np) == 0) + return -1; +-// *np++ = get_NVRAM(hcsp, (unsigned char ) i); + } + +-/*------ Is ckecksum ok ? ------*/ +- np = (UCHAR *) nvramp; ++ /*------ Is ckecksum ok ? ------*/ ++ np = (u8 *) nvramp; + for (i = 0; i < 63; i++) + chksum += *np++; + +- if (nvramp->CheckSum != (UCHAR) chksum) ++ if (nvramp->CheckSum != (u8) chksum) + return -1; + return 1; + } + +-/************************************************************************ +- Update SCSI H/A configuration parameters from serial EEPROM +-*************************************************************************/ +-static void se2_update_all(ORC_HCS * hcsp) ++/** ++ * se2_update_all - update the EEPROM ++ * @host: Host whose EEPROM is being updated ++ * ++ * Update changed bytes in the EEPROM image. ++ */ ++ ++static void se2_update_all(struct orc_host * host) + { /* setup default pattern */ + int i; +- UCHAR *np, *np1, chksum = 0; ++ u8 *np, *np1, chksum = 0; + + /* Calculate checksum first */ +- np = (UCHAR *) dftNvRam; ++ np = (u8 *) default_nvram; + for (i = 0; i < 63; i++) + chksum += *np++; + *np = chksum; + +- np = (UCHAR *) dftNvRam; +- np1 = (UCHAR *) nvramp; ++ np = (u8 *) default_nvram; ++ np1 = (u8 *) nvramp; + for (i = 0; i < 64; i++, np++, np1++) { +- if (*np != *np1) { +- set_NVRAM(hcsp, (unsigned char) i, *np); +- } ++ if (*np != *np1) ++ orc_nv_write(host, (u8) i, *np); + } +- return; + } + +-/************************************************************************* +- Function name : read_eeprom +-**************************************************************************/ +-static void read_eeprom(ORC_HCS * hcsp) +-{ +- if (se2_rd_all(hcsp) != 1) { +- se2_update_all(hcsp); /* setup default pattern */ +- se2_rd_all(hcsp); /* load again */ ++/** ++ * read_eeprom - load EEPROM ++ * @host: Host EEPROM to read ++ * ++ * Read the EEPROM for a given host. If it is invalid or fails ++ * the restore the defaults and use them. ++ */ ++ ++static void read_eeprom(struct orc_host * host) ++{ ++ if (se2_rd_all(host) != 1) { ++ se2_update_all(host); /* setup default pattern */ ++ se2_rd_all(host); /* load again */ + } + } + + +-/***************************************************************************/ +-static UCHAR load_FW(ORC_HCS * hcsp) ++/** ++ * orc_load_firmware - initialise firmware ++ * @host: Host to set up ++ * ++ * Load the firmware from the EEPROM into controller SRAM. This ++ * is basically a 4K block copy and then a 4K block read to check ++ * correctness. The rest is convulted by the indirect interfaces ++ * in the hardware ++ */ ++ ++static u8 orc_load_firmware(struct orc_host * host) + { +- U32 dData; +- USHORT wBIOSAddress; +- USHORT i; +- UCHAR *pData, bData; +- +- +- bData = ORC_RD(hcsp->HCS_Base, ORC_GCFG); +- ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData | EEPRG); /* Enable EEPROM programming */ +- ORC_WR(hcsp->HCS_Base + ORC_EBIOSADR2, 0x00); +- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x00); +- if (ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA) != 0x55) { +- ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData); /* Disable EEPROM programming */ +- return 0; +- } +- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x01); +- if (ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA) != 0xAA) { +- ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData); /* Disable EEPROM programming */ +- return 0; +- } +- ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST | DOWNLOAD); /* Enable SRAM programming */ +- pData = (UCHAR *) & dData; +- dData = 0; /* Initial FW address to 0 */ +- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x10); +- *pData = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */ +- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x11); +- *(pData + 1) = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */ +- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x12); +- *(pData + 2) = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */ +- ORC_WR(hcsp->HCS_Base + ORC_EBIOSADR2, *(pData + 2)); +- ORC_WRLONG(hcsp->HCS_Base + ORC_FWBASEADR, dData); /* Write FW address */ ++ u32 data32; ++ u16 bios_addr; ++ u16 i; ++ u8 *data32_ptr, data; ++ ++ ++ /* Set up the EEPROM for access */ + +- wBIOSAddress = (USHORT) dData; /* FW code locate at BIOS address + ? */ +- for (i = 0, pData = (UCHAR *) & dData; /* Download the code */ ++ data = inb(host->base + ORC_GCFG); ++ outb(data | EEPRG, host->base + ORC_GCFG); /* Enable EEPROM programming */ ++ outb(0x00, host->base + ORC_EBIOSADR2); ++ outw(0x0000, host->base + ORC_EBIOSADR0); ++ if (inb(host->base + ORC_EBIOSDATA) != 0x55) { ++ outb(data, host->base + ORC_GCFG); /* Disable EEPROM programming */ ++ return 0; ++ } ++ outw(0x0001, host->base + ORC_EBIOSADR0); ++ if (inb(host->base + ORC_EBIOSDATA) != 0xAA) { ++ outb(data, host->base + ORC_GCFG); /* Disable EEPROM programming */ ++ return 0; ++ } ++ ++ outb(PRGMRST | DOWNLOAD, host->base + ORC_RISCCTL); /* Enable SRAM programming */ ++ data32_ptr = (u8 *) & data32; ++ data32 = 0; /* Initial FW address to 0 */ ++ outw(0x0010, host->base + ORC_EBIOSADR0); ++ *data32_ptr = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */ ++ outw(0x0011, host->base + ORC_EBIOSADR0); ++ *(data32_ptr + 1) = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */ ++ outw(0x0012, host->base + ORC_EBIOSADR0); ++ *(data32_ptr + 2) = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */ ++ outw(*(data32_ptr + 2), host->base + ORC_EBIOSADR2); ++ outl(data32, host->base + ORC_FWBASEADR); /* Write FW address */ ++ ++ /* Copy the code from the BIOS to the SRAM */ ++ ++ bios_addr = (u16) data32; /* FW code locate at BIOS address + ? */ ++ for (i = 0, data32_ptr = (u8 *) & data32; /* Download the code */ + i < 0x1000; /* Firmware code size = 4K */ +- i++, wBIOSAddress++) { +- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, wBIOSAddress); +- *pData++ = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */ ++ i++, bios_addr++) { ++ outw(bios_addr, host->base + ORC_EBIOSADR0); ++ *data32_ptr++ = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */ + if ((i % 4) == 3) { +- ORC_WRLONG(hcsp->HCS_Base + ORC_RISCRAM, dData); /* Write every 4 bytes */ +- pData = (UCHAR *) & dData; ++ outl(data32, host->base + ORC_RISCRAM); /* Write every 4 bytes */ ++ data32_ptr = (u8 *) & data32; + } + } + +- ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST | DOWNLOAD); /* Reset program count 0 */ +- wBIOSAddress -= 0x1000; /* Reset the BIOS adddress */ +- for (i = 0, pData = (UCHAR *) & dData; /* Check the code */ ++ /* Go back and check they match */ ++ ++ outb(PRGMRST | DOWNLOAD, host->base + ORC_RISCCTL); /* Reset program count 0 */ ++ bios_addr -= 0x1000; /* Reset the BIOS adddress */ ++ for (i = 0, data32_ptr = (u8 *) & data32; /* Check the code */ + i < 0x1000; /* Firmware code size = 4K */ +- i++, wBIOSAddress++) { +- ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, wBIOSAddress); +- *pData++ = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA); /* Read from BIOS */ ++ i++, bios_addr++) { ++ outw(bios_addr, host->base + ORC_EBIOSADR0); ++ *data32_ptr++ = inb(host->base + ORC_EBIOSDATA); /* Read from BIOS */ + if ((i % 4) == 3) { +- if (ORC_RDLONG(hcsp->HCS_Base, ORC_RISCRAM) != dData) { +- ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST); /* Reset program to 0 */ +- ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData); /*Disable EEPROM programming */ ++ if (inl(host->base + ORC_RISCRAM) != data32) { ++ outb(PRGMRST, host->base + ORC_RISCCTL); /* Reset program to 0 */ ++ outb(data, host->base + ORC_GCFG); /*Disable EEPROM programming */ + return 0; + } +- pData = (UCHAR *) & dData; ++ data32_ptr = (u8 *) & data32; + } + } +- ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST); /* Reset program to 0 */ +- ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData); /* Disable EEPROM programming */ ++ ++ /* Success */ ++ outb(PRGMRST, host->base + ORC_RISCCTL); /* Reset program to 0 */ ++ outb(data, host->base + ORC_GCFG); /* Disable EEPROM programming */ + return 1; + } + + /***************************************************************************/ +-static void setup_SCBs(ORC_HCS * hcsp) ++static void setup_SCBs(struct orc_host * host) + { +- ORC_SCB *pVirScb; ++ struct orc_scb *scb; + int i; +- ESCB *pVirEscb; +- dma_addr_t pPhysEscb; ++ struct orc_extended_scb *escb; ++ dma_addr_t escb_phys; + +- /* Setup SCB HCS_Base and SCB Size registers */ +- ORC_WR(hcsp->HCS_Base + ORC_SCBSIZE, ORC_MAXQUEUE); /* Total number of SCBs */ +- /* SCB HCS_Base address 0 */ +- ORC_WRLONG(hcsp->HCS_Base + ORC_SCBBASE0, hcsp->HCS_physScbArray); +- /* SCB HCS_Base address 1 */ +- ORC_WRLONG(hcsp->HCS_Base + ORC_SCBBASE1, hcsp->HCS_physScbArray); ++ /* Setup SCB base and SCB Size registers */ ++ outb(ORC_MAXQUEUE, host->base + ORC_SCBSIZE); /* Total number of SCBs */ ++ /* SCB base address 0 */ ++ outl(host->scb_phys, host->base + ORC_SCBBASE0); ++ /* SCB base address 1 */ ++ outl(host->scb_phys, host->base + ORC_SCBBASE1); + + /* setup scatter list address with one buffer */ +- pVirScb = hcsp->HCS_virScbArray; +- pVirEscb = hcsp->HCS_virEscbArray; ++ scb = host->scb_virt; ++ escb = host->escb_virt; + + for (i = 0; i < ORC_MAXQUEUE; i++) { +- pPhysEscb = (hcsp->HCS_physEscbArray + (sizeof(ESCB) * i)); +- pVirScb->SCB_SGPAddr = (U32) pPhysEscb; +- pVirScb->SCB_SensePAddr = (U32) pPhysEscb; +- pVirScb->SCB_EScb = pVirEscb; +- pVirScb->SCB_ScbIdx = i; +- pVirScb++; +- pVirEscb++; ++ escb_phys = (host->escb_phys + (sizeof(struct orc_extended_scb) * i)); ++ scb->sg_addr = (u32) escb_phys; ++ scb->sense_addr = (u32) escb_phys; ++ scb->escb = escb; ++ scb->scbidx = i; ++ scb++; ++ escb++; + } +- +- return; + } + +-/***************************************************************************/ +-static void initAFlag(ORC_HCS * hcsp) ++/** ++ * init_alloc_map - initialise allocation map ++ * @host: host map to configure ++ * ++ * Initialise the allocation maps for this device. If the device ++ * is not quiescent the caller must hold the allocation lock ++ */ ++ ++static void init_alloc_map(struct orc_host * host) + { +- UCHAR i, j; ++ u8 i, j; + + for (i = 0; i < MAX_CHANNELS; i++) { + for (j = 0; j < 8; j++) { +- hcsp->BitAllocFlag[i][j] = 0xffffffff; ++ host->allocation_map[i][j] = 0xffffffff; + } + } + } + +-/***************************************************************************/ +-static int init_orchid(ORC_HCS * hcsp) ++/** ++ * init_orchid - initialise the host adapter ++ * @host:host adapter to initialise ++ * ++ * Initialise the controller and if neccessary load the firmware. ++ * ++ * Returns -1 if the initialisation fails. ++ */ ++ ++static int init_orchid(struct orc_host * host) + { +- UBYTE *readBytep; +- USHORT revision; +- UCHAR i; +- +- initAFlag(hcsp); +- ORC_WR(hcsp->HCS_Base + ORC_GIMSK, 0xFF); /* Disable all interrupt */ +- if (ORC_RD(hcsp->HCS_Base, ORC_HSTUS) & RREADY) { /* Orchid is ready */ +- revision = get_FW_version(hcsp); ++ u8 *ptr; ++ u16 revision; ++ u8 i; ++ ++ init_alloc_map(host); ++ outb(0xFF, host->base + ORC_GIMSK); /* Disable all interrupts */ ++ ++ if (inb(host->base + ORC_HSTUS) & RREADY) { /* Orchid is ready */ ++ revision = orc_read_fwrev(host); + if (revision == 0xFFFF) { +- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, DEVRST); /* Reset Host Adapter */ +- if (waitChipReady(hcsp) == 0) +- return (-1); +- load_FW(hcsp); /* Download FW */ +- setup_SCBs(hcsp); /* Setup SCB HCS_Base and SCB Size registers */ +- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, 0); /* clear HOSTSTOP */ +- if (waitFWReady(hcsp) == 0) +- return (-1); ++ outb(DEVRST, host->base + ORC_HCTRL); /* Reset Host Adapter */ ++ if (wait_chip_ready(host) == 0) ++ return -1; ++ orc_load_firmware(host); /* Download FW */ ++ setup_SCBs(host); /* Setup SCB base and SCB Size registers */ ++ outb(0x00, host->base + ORC_HCTRL); /* clear HOSTSTOP */ ++ if (wait_firmware_ready(host) == 0) ++ return -1; + /* Wait for firmware ready */ + } else { +- setup_SCBs(hcsp); /* Setup SCB HCS_Base and SCB Size registers */ ++ setup_SCBs(host); /* Setup SCB base and SCB Size registers */ + } + } else { /* Orchid is not Ready */ +- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, DEVRST); /* Reset Host Adapter */ +- if (waitChipReady(hcsp) == 0) +- return (-1); +- load_FW(hcsp); /* Download FW */ +- setup_SCBs(hcsp); /* Setup SCB HCS_Base and SCB Size registers */ +- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); /* Do Hardware Reset & */ ++ outb(DEVRST, host->base + ORC_HCTRL); /* Reset Host Adapter */ ++ if (wait_chip_ready(host) == 0) ++ return -1; ++ orc_load_firmware(host); /* Download FW */ ++ setup_SCBs(host); /* Setup SCB base and SCB Size registers */ ++ outb(HDO, host->base + ORC_HCTRL); /* Do Hardware Reset & */ + + /* clear HOSTSTOP */ +- if (waitFWReady(hcsp) == 0) /* Wait for firmware ready */ +- return (-1); ++ if (wait_firmware_ready(host) == 0) /* Wait for firmware ready */ ++ return -1; + } + +-/*------------- get serial EEProm settting -------*/ ++ /* Load an EEProm copy into RAM */ ++ /* Assumes single threaded at this point */ ++ read_eeprom(host); + +- read_eeprom(hcsp); +- +- if (nvramp->Revision != 1) +- return (-1); +- +- hcsp->HCS_SCSI_ID = nvramp->SCSI0Id; +- hcsp->HCS_BIOS = nvramp->BIOSConfig1; +- hcsp->HCS_MaxTar = MAX_TARGETS; +- readBytep = (UCHAR *) & (nvramp->Target00Config); +- for (i = 0; i < 16; readBytep++, i++) { +- hcsp->TargetFlag[i] = *readBytep; +- hcsp->MaximumTags[i] = ORC_MAXTAGS; +- } /* for */ ++ if (nvramp->revision != 1) ++ return -1; + +- if (nvramp->SCSI0Config & NCC_BUSRESET) { /* Reset SCSI bus */ +- hcsp->HCS_Flags |= HCF_SCSI_RESET; ++ host->scsi_id = nvramp->scsi_id; ++ host->BIOScfg = nvramp->BIOSConfig1; ++ host->max_targets = MAX_TARGETS; ++ ptr = (u8 *) & (nvramp->Target00Config); ++ for (i = 0; i < 16; ptr++, i++) { ++ host->target_flag[i] = *ptr; ++ host->max_tags[i] = ORC_MAXTAGS; + } +- ORC_WR(hcsp->HCS_Base + ORC_GIMSK, 0xFB); /* enable RP FIFO interrupt */ +- return (0); ++ ++ if (nvramp->SCSI0Config & NCC_BUSRESET) ++ host->flags |= HCF_SCSI_RESET; ++ outb(0xFB, host->base + ORC_GIMSK); /* enable RP FIFO interrupt */ ++ return 0; + } + +-/***************************************************************************** +- Function name : orc_reset_scsi_bus +- Description : Reset registers, reset a hanging bus and +- kill active and disconnected commands for target w/o soft reset +- Input : pHCB - Pointer to host adapter structure +- Output : None. +- Return : pSRB - Pointer to SCSI request block. +-*****************************************************************************/ +-static int orc_reset_scsi_bus(ORC_HCS * pHCB) ++/** ++ * orc_reset_scsi_bus - perform bus reset ++ * @host: host being reset ++ * ++ * Perform a full bus reset on the adapter. ++ */ ++ ++static int orc_reset_scsi_bus(struct orc_host * host) + { /* I need Host Control Block Information */ +- ULONG flags; ++ unsigned long flags; + +- spin_lock_irqsave(&(pHCB->BitAllocFlagLock), flags); ++ spin_lock_irqsave(&host->allocation_lock, flags); + +- initAFlag(pHCB); ++ init_alloc_map(host); + /* reset scsi bus */ +- ORC_WR(pHCB->HCS_Base + ORC_HCTRL, SCSIRST); +- if (waitSCSIRSTdone(pHCB) == 0) { +- spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags); ++ outb(SCSIRST, host->base + ORC_HCTRL); ++ /* FIXME: We can spend up to a second with the lock held and ++ interrupts off here */ ++ if (wait_scsi_reset_done(host) == 0) { ++ spin_unlock_irqrestore(&host->allocation_lock, flags); + return FAILED; + } else { +- spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags); ++ spin_unlock_irqrestore(&host->allocation_lock, flags); + return SUCCESS; + } + } + +-/***************************************************************************** +- Function name : orc_device_reset +- Description : Reset registers, reset a hanging bus and +- kill active and disconnected commands for target w/o soft reset +- Input : pHCB - Pointer to host adapter structure +- Output : None. +- Return : pSRB - Pointer to SCSI request block. +-*****************************************************************************/ +-static int orc_device_reset(ORC_HCS * pHCB, struct scsi_cmnd *SCpnt, unsigned int target) ++/** ++ * orc_device_reset - device reset handler ++ * @host: host to reset ++ * @cmd: command causing the reset ++ * @target; target device ++ * ++ * Reset registers, reset a hanging bus and kill active and disconnected ++ * commands for target w/o soft reset ++ */ ++ ++static int orc_device_reset(struct orc_host * host, struct scsi_cmnd *cmd, unsigned int target) + { /* I need Host Control Block Information */ +- ORC_SCB *pScb; +- ESCB *pVirEscb; +- ORC_SCB *pVirScb; +- UCHAR i; +- ULONG flags; +- +- spin_lock_irqsave(&(pHCB->BitAllocFlagLock), flags); +- pScb = (ORC_SCB *) NULL; +- pVirEscb = (ESCB *) NULL; ++ struct orc_scb *scb; ++ struct orc_extended_scb *escb; ++ struct orc_scb *host_scb; ++ u8 i; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&(host->allocation_lock), flags); ++ scb = (struct orc_scb *) NULL; ++ escb = (struct orc_extended_scb *) NULL; + + /* setup scatter list address with one buffer */ +- pVirScb = pHCB->HCS_virScbArray; ++ host_scb = host->scb_virt; + +- initAFlag(pHCB); +- /* device reset */ ++ /* FIXME: is this safe if we then fail to issue the reset or race ++ a completion ? */ ++ init_alloc_map(host); ++ ++ /* Find the scb corresponding to the command */ + for (i = 0; i < ORC_MAXQUEUE; i++) { +- pVirEscb = pVirScb->SCB_EScb; +- if ((pVirScb->SCB_Status) && (pVirEscb->SCB_Srb == SCpnt)) ++ escb = host_scb->escb; ++ if (host_scb->status && escb->srb == cmd) + break; +- pVirScb++; ++ host_scb++; + } + + if (i == ORC_MAXQUEUE) { +- printk("Unable to Reset - No SCB Found\n"); +- spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags); ++ printk(KERN_ERR "Unable to Reset - No SCB Found\n"); ++ spin_unlock_irqrestore(&(host->allocation_lock), flags); + return FAILED; + } +- if ((pScb = orc_alloc_scb(pHCB)) == NULL) { +- spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags); ++ ++ /* Allocate a new SCB for the reset command to the firmware */ ++ if ((scb = __orc_alloc_scb(host)) == NULL) { ++ /* Can't happen.. */ ++ spin_unlock_irqrestore(&(host->allocation_lock), flags); + return FAILED; + } +- pScb->SCB_Opcode = ORC_BUSDEVRST; +- pScb->SCB_Target = target; +- pScb->SCB_HaStat = 0; +- pScb->SCB_TaStat = 0; +- pScb->SCB_Status = 0x0; +- pScb->SCB_Link = 0xFF; +- pScb->SCB_Reserved0 = 0; +- pScb->SCB_Reserved1 = 0; +- pScb->SCB_XferLen = 0; +- pScb->SCB_SGLen = 0; +- +- pVirEscb->SCB_Srb = NULL; +- pVirEscb->SCB_Srb = SCpnt; +- orc_exec_scb(pHCB, pScb); /* Start execute SCB */ +- spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags); ++ ++ /* Reset device is handled by the firmare, we fill in an SCB and ++ fire it at the controller, it does the rest */ ++ scb->opcode = ORC_BUSDEVRST; ++ scb->target = target; ++ scb->hastat = 0; ++ scb->tastat = 0; ++ scb->status = 0x0; ++ scb->link = 0xFF; ++ scb->reserved0 = 0; ++ scb->reserved1 = 0; ++ scb->xferlen = 0; ++ scb->sg_len = 0; ++ ++ escb->srb = NULL; ++ escb->srb = cmd; ++ orc_exec_scb(host, scb); /* Start execute SCB */ ++ spin_unlock_irqrestore(&host->allocation_lock, flags); + return SUCCESS; + } + ++/** ++ * __orc_alloc_scb - allocate an SCB ++ * @host: host to allocate from ++ * ++ * Allocate an SCB and return a pointer to the SCB object. NULL ++ * is returned if no SCB is free. The caller must already hold ++ * the allocator lock at this point. ++ */ + +-/***************************************************************************/ +-static ORC_SCB *__orc_alloc_scb(ORC_HCS * hcsp) ++ ++static struct orc_scb *__orc_alloc_scb(struct orc_host * host) + { +- ORC_SCB *pTmpScb; +- UCHAR Ch; +- ULONG idx; +- UCHAR index; +- UCHAR i; ++ u8 channel; ++ unsigned long idx; ++ u8 index; ++ u8 i; + +- Ch = hcsp->HCS_Index; ++ channel = host->index; + for (i = 0; i < 8; i++) { + for (index = 0; index < 32; index++) { +- if ((hcsp->BitAllocFlag[Ch][i] >> index) & 0x01) { +- hcsp->BitAllocFlag[Ch][i] &= ~(1 << index); ++ if ((host->allocation_map[channel][i] >> index) & 0x01) { ++ host->allocation_map[channel][i] &= ~(1 << index); + break; + } + } + idx = index + 32 * i; +- pTmpScb = (ORC_SCB *) ((ULONG) hcsp->HCS_virScbArray + (idx * sizeof(ORC_SCB))); +- return (pTmpScb); ++ /* Translate the index to a structure instance */ ++ return (struct orc_scb *) ((unsigned long) host->scb_virt + (idx * sizeof(struct orc_scb))); + } +- return (NULL); ++ return NULL; + } + +-static ORC_SCB *orc_alloc_scb(ORC_HCS * hcsp) ++/** ++ * orc_alloc_scb - allocate an SCB ++ * @host: host to allocate from ++ * ++ * Allocate an SCB and return a pointer to the SCB object. NULL ++ * is returned if no SCB is free. ++ */ ++ ++static struct orc_scb *orc_alloc_scb(struct orc_host * host) + { +- ORC_SCB *pTmpScb; +- ULONG flags; ++ struct orc_scb *scb; ++ unsigned long flags; + +- spin_lock_irqsave(&(hcsp->BitAllocFlagLock), flags); +- pTmpScb = __orc_alloc_scb(hcsp); +- spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags); +- return (pTmpScb); ++ spin_lock_irqsave(&host->allocation_lock, flags); ++ scb = __orc_alloc_scb(host); ++ spin_unlock_irqrestore(&host->allocation_lock, flags); ++ return scb; + } + ++/** ++ * orc_release_scb - release an SCB ++ * @host: host owning the SCB ++ * @scb: SCB that is now free ++ * ++ * Called to return a completed SCB to the allocation pool. Before ++ * calling the SCB must be out of use on both the host and the HA. ++ */ + +-/***************************************************************************/ +-static void orc_release_scb(ORC_HCS * hcsp, ORC_SCB * scbp) ++static void orc_release_scb(struct orc_host *host, struct orc_scb *scb) + { +- ULONG flags; +- UCHAR Index; +- UCHAR i; +- UCHAR Ch; +- +- spin_lock_irqsave(&(hcsp->BitAllocFlagLock), flags); +- Ch = hcsp->HCS_Index; +- Index = scbp->SCB_ScbIdx; +- i = Index / 32; +- Index %= 32; +- hcsp->BitAllocFlag[Ch][i] |= (1 << Index); +- spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags); ++ unsigned long flags; ++ u8 index, i, channel; ++ ++ spin_lock_irqsave(&(host->allocation_lock), flags); ++ channel = host->index; /* Channel */ ++ index = scb->scbidx; ++ i = index / 32; ++ index %= 32; ++ host->allocation_map[channel][i] |= (1 << index); ++ spin_unlock_irqrestore(&(host->allocation_lock), flags); + } + +-/***************************************************************************** +- Function name : abort_SCB +- Description : Abort a queued command. +- (commands that are on the bus can't be aborted easily) +- Input : pHCB - Pointer to host adapter structure +- Output : None. +- Return : pSRB - Pointer to SCSI request block. +-*****************************************************************************/ +-static int abort_SCB(ORC_HCS * hcsp, ORC_SCB * pScb) ++/** ++ * orchid_abort_scb - abort a command ++ * ++ * Abort a queued command that has been passed to the firmware layer ++ * if possible. This is all handled by the firmware. We aks the firmware ++ * and it either aborts the command or fails ++ */ ++ ++static int orchid_abort_scb(struct orc_host * host, struct orc_scb * scb) + { +- unsigned char bData, bStatus; ++ unsigned char data, status; + +- ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_ABORT_SCB); /* Write command */ +- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); +- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ ++ outb(ORC_CMD_ABORT_SCB, host->base + ORC_HDATA); /* Write command */ ++ outb(HDO, host->base + ORC_HCTRL); ++ if (wait_HDO_off(host) == 0) /* Wait HDO off */ + return 0; + +- ORC_WR(hcsp->HCS_Base + ORC_HDATA, pScb->SCB_ScbIdx); /* Write address */ +- ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO); +- if (waitHDOoff(hcsp) == 0) /* Wait HDO off */ ++ outb(scb->scbidx, host->base + ORC_HDATA); /* Write address */ ++ outb(HDO, host->base + ORC_HCTRL); ++ if (wait_HDO_off(host) == 0) /* Wait HDO off */ + return 0; + +- if (waitHDIset(hcsp, &bData) == 0) /* Wait HDI set */ ++ if (wait_hdi_set(host, &data) == 0) /* Wait HDI set */ + return 0; +- bStatus = ORC_RD(hcsp->HCS_Base, ORC_HDATA); +- ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData); /* Clear HDI */ ++ status = inb(host->base + ORC_HDATA); ++ outb(data, host->base + ORC_HSTUS); /* Clear HDI */ + +- if (bStatus == 1) /* 0 - Successfully */ ++ if (status == 1) /* 0 - Successfully */ + return 0; /* 1 - Fail */ + return 1; + } + +-/***************************************************************************** +- Function name : inia100_abort +- Description : Abort a queued command. +- (commands that are on the bus can't be aborted easily) +- Input : pHCB - Pointer to host adapter structure +- Output : None. +- Return : pSRB - Pointer to SCSI request block. +-*****************************************************************************/ +-static int orc_abort_srb(ORC_HCS * hcsp, struct scsi_cmnd *SCpnt) ++static int inia100_abort_cmd(struct orc_host * host, struct scsi_cmnd *cmd) + { +- ESCB *pVirEscb; +- ORC_SCB *pVirScb; +- UCHAR i; +- ULONG flags; +- +- spin_lock_irqsave(&(hcsp->BitAllocFlagLock), flags); +- +- pVirScb = hcsp->HCS_virScbArray; +- +- for (i = 0; i < ORC_MAXQUEUE; i++, pVirScb++) { +- pVirEscb = pVirScb->SCB_EScb; +- if ((pVirScb->SCB_Status) && (pVirEscb->SCB_Srb == SCpnt)) { +- if (pVirScb->SCB_TagMsg == 0) { +- spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags); +- return FAILED; ++ struct orc_extended_scb *escb; ++ struct orc_scb *scb; ++ u8 i; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&(host->allocation_lock), flags); ++ ++ scb = host->scb_virt; ++ ++ /* Walk the queue until we find the SCB that belongs to the command ++ block. This isn't a performance critical path so a walk in the park ++ here does no harm */ ++ ++ for (i = 0; i < ORC_MAXQUEUE; i++, scb++) { ++ escb = scb->escb; ++ if (scb->status && escb->srb == cmd) { ++ if (scb->tag_msg == 0) { ++ goto out; + } else { +- if (abort_SCB(hcsp, pVirScb)) { +- pVirEscb->SCB_Srb = NULL; +- spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags); ++ /* Issue an ABORT to the firmware */ ++ if (orchid_abort_scb(host, scb)) { ++ escb->srb = NULL; ++ spin_unlock_irqrestore(&host->allocation_lock, flags); + return SUCCESS; +- } else { +- spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags); +- return FAILED; +- } ++ } else ++ goto out; + } + } + } +- spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags); ++out: ++ spin_unlock_irqrestore(&host->allocation_lock, flags); + return FAILED; + } + +-/*********************************************************************** +- Routine Description: +- This is the interrupt service routine for the Orchid SCSI adapter. +- It reads the interrupt register to determine if the adapter is indeed +- the source of the interrupt and clears the interrupt at the device. +- Arguments: +- HwDeviceExtension - HBA miniport driver's adapter data storage +- Return Value: +-***********************************************************************/ +-static void orc_interrupt( +- ORC_HCS * hcsp +-) ++/** ++ * orc_interrupt - IRQ processing ++ * @host: Host causing the interrupt ++ * ++ * This function is called from the IRQ handler and protected ++ * by the host lock. While the controller reports that there are ++ * scb's for processing we pull them off the controller, turn the ++ * index into a host address pointer to the scb and call the scb ++ * handler. ++ * ++ * Returns IRQ_HANDLED if any SCBs were processed, IRQ_NONE otherwise ++ */ ++ ++static irqreturn_t orc_interrupt(struct orc_host * host) + { +- BYTE bScbIdx; +- ORC_SCB *pScb; ++ u8 scb_index; ++ struct orc_scb *scb; + +- if (ORC_RD(hcsp->HCS_Base, ORC_RQUEUECNT) == 0) { +- return; // 0; ++ /* Check if we have an SCB queued for servicing */ ++ if (inb(host->base + ORC_RQUEUECNT) == 0) ++ return IRQ_NONE; + +- } + do { +- bScbIdx = ORC_RD(hcsp->HCS_Base, ORC_RQUEUE); +- +- pScb = (ORC_SCB *) ((ULONG) hcsp->HCS_virScbArray + (ULONG) (sizeof(ORC_SCB) * bScbIdx)); +- pScb->SCB_Status = 0x0; +- +- inia100SCBPost((BYTE *) hcsp, (BYTE *) pScb); +- } while (ORC_RD(hcsp->HCS_Base, ORC_RQUEUECNT)); +- return; //1; ++ /* Get the SCB index of the SCB to service */ ++ scb_index = inb(host->base + ORC_RQUEUE); + ++ /* Translate it back to a host pointer */ ++ scb = (struct orc_scb *) ((unsigned long) host->scb_virt + (unsigned long) (sizeof(struct orc_scb) * scb_index)); ++ scb->status = 0x0; ++ /* Process the SCB */ ++ inia100_scb_handler(host, scb); ++ } while (inb(host->base + ORC_RQUEUECNT)); ++ return IRQ_HANDLED; + } /* End of I1060Interrupt() */ + +-/***************************************************************************** +- Function name : inia100BuildSCB +- Description : +- Input : pHCB - Pointer to host adapter structure +- Output : None. +- Return : pSRB - Pointer to SCSI request block. +-*****************************************************************************/ +-static void inia100BuildSCB(ORC_HCS * pHCB, ORC_SCB * pSCB, struct scsi_cmnd * SCpnt) ++/** ++ * inia100_build_scb - build SCB ++ * @host: host owing the control block ++ * @scb: control block to use ++ * @cmd: Mid layer command ++ * ++ * Build a host adapter control block from the SCSI mid layer command ++ */ ++ ++static void inia100_build_scb(struct orc_host * host, struct orc_scb * scb, struct scsi_cmnd * cmd) + { /* Create corresponding SCB */ +- struct scatterlist *pSrbSG; +- ORC_SG *pSG; /* Pointer to SG list */ ++ struct scatterlist *sg; ++ struct orc_sgent *sgent; /* Pointer to SG list */ + int i, count_sg; +- ESCB *pEScb; ++ struct orc_extended_scb *escb; + +- pEScb = pSCB->SCB_EScb; +- pEScb->SCB_Srb = SCpnt; +- pSG = NULL; +- +- pSCB->SCB_Opcode = ORC_EXECSCSI; +- pSCB->SCB_Flags = SCF_NO_DCHK; /* Clear done bit */ +- pSCB->SCB_Target = SCpnt->device->id; +- pSCB->SCB_Lun = SCpnt->device->lun; +- pSCB->SCB_Reserved0 = 0; +- pSCB->SCB_Reserved1 = 0; +- pSCB->SCB_SGLen = 0; +- +- if ((pSCB->SCB_XferLen = (U32) SCpnt->request_bufflen)) { +- pSG = (ORC_SG *) & pEScb->ESCB_SGList[0]; +- if (SCpnt->use_sg) { +- pSrbSG = (struct scatterlist *) SCpnt->request_buffer; +- count_sg = pci_map_sg(pHCB->pdev, pSrbSG, SCpnt->use_sg, +- SCpnt->sc_data_direction); +- pSCB->SCB_SGLen = (U32) (count_sg * 8); +- for (i = 0; i < count_sg; i++, pSG++, pSrbSG++) { +- pSG->SG_Ptr = (U32) sg_dma_address(pSrbSG); +- pSG->SG_Len = (U32) sg_dma_len(pSrbSG); +- } +- } else if (SCpnt->request_bufflen != 0) {/* Non SG */ +- pSCB->SCB_SGLen = 0x8; +- SCpnt->SCp.dma_handle = pci_map_single(pHCB->pdev, +- SCpnt->request_buffer, +- SCpnt->request_bufflen, +- SCpnt->sc_data_direction); +- pSG->SG_Ptr = (U32) SCpnt->SCp.dma_handle; +- pSG->SG_Len = (U32) SCpnt->request_bufflen; ++ /* Links between the escb, scb and Linux scsi midlayer cmd */ ++ escb = scb->escb; ++ escb->srb = cmd; ++ sgent = NULL; ++ ++ /* Set up the SCB to do a SCSI command block */ ++ scb->opcode = ORC_EXECSCSI; ++ scb->flags = SCF_NO_DCHK; /* Clear done bit */ ++ scb->target = cmd->device->id; ++ scb->lun = cmd->device->lun; ++ scb->reserved0 = 0; ++ scb->reserved1 = 0; ++ scb->sg_len = 0; ++ ++ scb->xferlen = (u32) scsi_bufflen(cmd); ++ sgent = (struct orc_sgent *) & escb->sglist[0]; ++ ++ count_sg = scsi_dma_map(cmd); ++ BUG_ON(count_sg < 0); ++ ++ /* Build the scatter gather lists */ ++ if (count_sg) { ++ scb->sg_len = (u32) (count_sg * 8); ++ scsi_for_each_sg(cmd, sg, count_sg, i) { ++ sgent->base = (u32) sg_dma_address(sg); ++ sgent->length = (u32) sg_dma_len(sg); ++ sgent++; ++ } + } else { +- pSCB->SCB_SGLen = 0; +- pSG->SG_Ptr = 0; +- pSG->SG_Len = 0; +- } +- } +- pSCB->SCB_SGPAddr = (U32) pSCB->SCB_SensePAddr; +- pSCB->SCB_HaStat = 0; +- pSCB->SCB_TaStat = 0; +- pSCB->SCB_Link = 0xFF; +- pSCB->SCB_SenseLen = SENSE_SIZE; +- pSCB->SCB_CDBLen = SCpnt->cmd_len; +- if (pSCB->SCB_CDBLen >= IMAX_CDB) { +- printk("max cdb length= %x\b", SCpnt->cmd_len); +- pSCB->SCB_CDBLen = IMAX_CDB; +- } +- pSCB->SCB_Ident = SCpnt->device->lun | DISC_ALLOW; +- if (SCpnt->device->tagged_supported) { /* Tag Support */ +- pSCB->SCB_TagMsg = SIMPLE_QUEUE_TAG; /* Do simple tag only */ ++ scb->sg_len = 0; ++ sgent->base = 0; ++ sgent->length = 0; ++ } ++ scb->sg_addr = (u32) scb->sense_addr; ++ scb->hastat = 0; ++ scb->tastat = 0; ++ scb->link = 0xFF; ++ scb->sense_len = SENSE_SIZE; ++ scb->cdb_len = cmd->cmd_len; ++ if (scb->cdb_len >= IMAX_CDB) { ++ printk("max cdb length= %x\b", cmd->cmd_len); ++ scb->cdb_len = IMAX_CDB; ++ } ++ scb->ident = cmd->device->lun | DISC_ALLOW; ++ if (cmd->device->tagged_supported) { /* Tag Support */ ++ scb->tag_msg = SIMPLE_QUEUE_TAG; /* Do simple tag only */ + } else { +- pSCB->SCB_TagMsg = 0; /* No tag support */ ++ scb->tag_msg = 0; /* No tag support */ + } +- memcpy(&pSCB->SCB_CDB[0], &SCpnt->cmnd, pSCB->SCB_CDBLen); +- return; ++ memcpy(&scb->cdb[0], &cmd->cmnd, scb->cdb_len); + } + +-/***************************************************************************** +- Function name : inia100_queue +- Description : Queue a command and setup interrupts for a free bus. +- Input : pHCB - Pointer to host adapter structure +- Output : None. +- Return : pSRB - Pointer to SCSI request block. +-*****************************************************************************/ +-static int inia100_queue(struct scsi_cmnd * SCpnt, void (*done) (struct scsi_cmnd *)) ++/** ++ * inia100_queue - queue command with host ++ * @cmd: Command block ++ * @done: Completion function ++ * ++ * Called by the mid layer to queue a command. Process the command ++ * block, build the host specific scb structures and if there is room ++ * queue the command down to the controller ++ */ ++ ++static int inia100_queue(struct scsi_cmnd * cmd, void (*done) (struct scsi_cmnd *)) + { +- register ORC_SCB *pSCB; +- ORC_HCS *pHCB; /* Point to Host adapter control block */ ++ struct orc_scb *scb; ++ struct orc_host *host; /* Point to Host adapter control block */ + +- pHCB = (ORC_HCS *) SCpnt->device->host->hostdata; +- SCpnt->scsi_done = done; ++ host = (struct orc_host *) cmd->device->host->hostdata; ++ cmd->scsi_done = done; + /* Get free SCSI control block */ +- if ((pSCB = orc_alloc_scb(pHCB)) == NULL) ++ if ((scb = orc_alloc_scb(host)) == NULL) + return SCSI_MLQUEUE_HOST_BUSY; + +- inia100BuildSCB(pHCB, pSCB, SCpnt); +- orc_exec_scb(pHCB, pSCB); /* Start execute SCB */ +- +- return (0); ++ inia100_build_scb(host, scb, cmd); ++ orc_exec_scb(host, scb); /* Start execute SCB */ ++ return 0; + } + + /***************************************************************************** + Function name : inia100_abort + Description : Abort a queued command. + (commands that are on the bus can't be aborted easily) +- Input : pHCB - Pointer to host adapter structure ++ Input : host - Pointer to host adapter structure + Output : None. + Return : pSRB - Pointer to SCSI request block. + *****************************************************************************/ +-static int inia100_abort(struct scsi_cmnd * SCpnt) ++static int inia100_abort(struct scsi_cmnd * cmd) + { +- ORC_HCS *hcsp; ++ struct orc_host *host; + +- hcsp = (ORC_HCS *) SCpnt->device->host->hostdata; +- return orc_abort_srb(hcsp, SCpnt); ++ host = (struct orc_host *) cmd->device->host->hostdata; ++ return inia100_abort_cmd(host, cmd); + } + + /***************************************************************************** + Function name : inia100_reset + Description : Reset registers, reset a hanging bus and + kill active and disconnected commands for target w/o soft reset +- Input : pHCB - Pointer to host adapter structure ++ Input : host - Pointer to host adapter structure + Output : None. + Return : pSRB - Pointer to SCSI request block. + *****************************************************************************/ +-static int inia100_bus_reset(struct scsi_cmnd * SCpnt) ++static int inia100_bus_reset(struct scsi_cmnd * cmd) + { /* I need Host Control Block Information */ +- ORC_HCS *pHCB; +- pHCB = (ORC_HCS *) SCpnt->device->host->hostdata; +- return orc_reset_scsi_bus(pHCB); ++ struct orc_host *host; ++ host = (struct orc_host *) cmd->device->host->hostdata; ++ return orc_reset_scsi_bus(host); + } + + /***************************************************************************** + Function name : inia100_device_reset + Description : Reset the device +- Input : pHCB - Pointer to host adapter structure ++ Input : host - Pointer to host adapter structure + Output : None. + Return : pSRB - Pointer to SCSI request block. + *****************************************************************************/ +-static int inia100_device_reset(struct scsi_cmnd * SCpnt) ++static int inia100_device_reset(struct scsi_cmnd * cmd) + { /* I need Host Control Block Information */ +- ORC_HCS *pHCB; +- pHCB = (ORC_HCS *) SCpnt->device->host->hostdata; +- return orc_device_reset(pHCB, SCpnt, scmd_id(SCpnt)); ++ struct orc_host *host; ++ host = (struct orc_host *) cmd->device->host->hostdata; ++ return orc_device_reset(host, cmd, scmd_id(cmd)); + + } + +-/***************************************************************************** +- Function name : inia100SCBPost +- Description : This is callback routine be called when orc finish one +- SCSI command. +- Input : pHCB - Pointer to host adapter control block. +- pSCB - Pointer to SCSI control block. +- Output : None. +- Return : None. +-*****************************************************************************/ +-static void inia100SCBPost(BYTE * pHcb, BYTE * pScb) ++/** ++ * inia100_scb_handler - interrupt callback ++ * @host: Host causing the interrupt ++ * @scb: SCB the controller returned as needing processing ++ * ++ * Perform completion processing on a control block. Do the conversions ++ * from host to SCSI midlayer error coding, save any sense data and ++ * the complete with the midlayer and recycle the scb. ++ */ ++ ++static void inia100_scb_handler(struct orc_host *host, struct orc_scb *scb) + { +- struct scsi_cmnd *pSRB; /* Pointer to SCSI request block */ +- ORC_HCS *pHCB; +- ORC_SCB *pSCB; +- ESCB *pEScb; +- +- pHCB = (ORC_HCS *) pHcb; +- pSCB = (ORC_SCB *) pScb; +- pEScb = pSCB->SCB_EScb; +- if ((pSRB = (struct scsi_cmnd *) pEScb->SCB_Srb) == 0) { +- printk("inia100SCBPost: SRB pointer is empty\n"); +- orc_release_scb(pHCB, pSCB); /* Release SCB for current channel */ ++ struct scsi_cmnd *cmd; /* Pointer to SCSI request block */ ++ struct orc_extended_scb *escb; ++ ++ escb = scb->escb; ++ if ((cmd = (struct scsi_cmnd *) escb->srb) == NULL) { ++ printk(KERN_ERR "inia100_scb_handler: SRB pointer is empty\n"); ++ orc_release_scb(host, scb); /* Release SCB for current channel */ + return; + } +- pEScb->SCB_Srb = NULL; ++ escb->srb = NULL; + +- switch (pSCB->SCB_HaStat) { ++ switch (scb->hastat) { + case 0x0: + case 0xa: /* Linked command complete without error and linked normally */ + case 0xb: /* Linked command complete without error interrupt generated */ +- pSCB->SCB_HaStat = 0; ++ scb->hastat = 0; + break; + + case 0x11: /* Selection time out-The initiator selection or target + reselection was not complete within the SCSI Time out period */ +- pSCB->SCB_HaStat = DID_TIME_OUT; ++ scb->hastat = DID_TIME_OUT; + break; + + case 0x14: /* Target bus phase sequence failure-An invalid bus phase or bus + phase sequence was requested by the target. The host adapter + will generate a SCSI Reset Condition, notifying the host with + a SCRD interrupt */ +- pSCB->SCB_HaStat = DID_RESET; ++ scb->hastat = DID_RESET; + break; + + case 0x1a: /* SCB Aborted. 07/21/98 */ +- pSCB->SCB_HaStat = DID_ABORT; ++ scb->hastat = DID_ABORT; + break; + + case 0x12: /* Data overrun/underrun-The target attempted to transfer more data +@@ -984,46 +1022,41 @@ + case 0x16: /* Invalid CCB Operation Code-The first byte of the CCB was invalid. */ + + default: +- printk("inia100: %x %x\n", pSCB->SCB_HaStat, pSCB->SCB_TaStat); +- pSCB->SCB_HaStat = DID_ERROR; /* Couldn't find any better */ ++ printk(KERN_DEBUG "inia100: %x %x\n", scb->hastat, scb->tastat); ++ scb->hastat = DID_ERROR; /* Couldn't find any better */ + break; + } + +- if (pSCB->SCB_TaStat == 2) { /* Check condition */ +- memcpy((unsigned char *) &pSRB->sense_buffer[0], +- (unsigned char *) &pEScb->ESCB_SGList[0], SENSE_SIZE); +- } +- pSRB->result = pSCB->SCB_TaStat | (pSCB->SCB_HaStat << 16); +- +- if (pSRB->use_sg) { +- pci_unmap_sg(pHCB->pdev, +- (struct scatterlist *)pSRB->request_buffer, +- pSRB->use_sg, pSRB->sc_data_direction); +- } else if (pSRB->request_bufflen != 0) { +- pci_unmap_single(pHCB->pdev, pSRB->SCp.dma_handle, +- pSRB->request_bufflen, +- pSRB->sc_data_direction); +- } +- +- pSRB->scsi_done(pSRB); /* Notify system DONE */ +- +- orc_release_scb(pHCB, pSCB); /* Release SCB for current channel */ ++ if (scb->tastat == 2) { /* Check condition */ ++ memcpy((unsigned char *) &cmd->sense_buffer[0], ++ (unsigned char *) &escb->sglist[0], SENSE_SIZE); ++ } ++ cmd->result = scb->tastat | (scb->hastat << 16); ++ scsi_dma_unmap(cmd); ++ cmd->scsi_done(cmd); /* Notify system DONE */ ++ orc_release_scb(host, scb); /* Release SCB for current channel */ + } + +-/* +- * Interrupt handler (main routine of the driver) ++/** ++ * inia100_intr - interrupt handler ++ * @irqno: Interrupt value ++ * @devid: Host adapter ++ * ++ * Entry point for IRQ handling. All the real work is performed ++ * by orc_interrupt. + */ + static irqreturn_t inia100_intr(int irqno, void *devid) + { +- struct Scsi_Host *host = (struct Scsi_Host *)devid; +- ORC_HCS *pHcb = (ORC_HCS *)host->hostdata; ++ struct Scsi_Host *shost = (struct Scsi_Host *)devid; ++ struct orc_host *host = (struct orc_host *)shost->hostdata; + unsigned long flags; ++ irqreturn_t res; + +- spin_lock_irqsave(host->host_lock, flags); +- orc_interrupt(pHcb); +- spin_unlock_irqrestore(host->host_lock, flags); ++ spin_lock_irqsave(shost->host_lock, flags); ++ res = orc_interrupt(host); ++ spin_unlock_irqrestore(shost->host_lock, flags); + +- return IRQ_HANDLED; ++ return res; + } + + static struct scsi_host_template inia100_template = { +@@ -1044,12 +1077,12 @@ + const struct pci_device_id *id) + { + struct Scsi_Host *shost; +- ORC_HCS *pHCB; ++ struct orc_host *host; + unsigned long port, bios; + int error = -ENODEV; + u32 sz; +- unsigned long dBiosAdr; +- char *pbBiosAdr; ++ unsigned long biosaddr; ++ char *bios_phys; + + if (pci_enable_device(pdev)) + goto out; +@@ -1068,55 +1101,55 @@ + } + + /* <02> read from base address + 0x50 offset to get the bios value. */ +- bios = ORC_RDWORD(port, 0x50); ++ bios = inw(port + 0x50); + + +- shost = scsi_host_alloc(&inia100_template, sizeof(ORC_HCS)); ++ shost = scsi_host_alloc(&inia100_template, sizeof(struct orc_host)); + if (!shost) + goto out_release_region; + +- pHCB = (ORC_HCS *)shost->hostdata; +- pHCB->pdev = pdev; +- pHCB->HCS_Base = port; +- pHCB->HCS_BIOS = bios; +- spin_lock_init(&pHCB->BitAllocFlagLock); ++ host = (struct orc_host *)shost->hostdata; ++ host->pdev = pdev; ++ host->base = port; ++ host->BIOScfg = bios; ++ spin_lock_init(&host->allocation_lock); + + /* Get total memory needed for SCB */ +- sz = ORC_MAXQUEUE * sizeof(ORC_SCB); +- pHCB->HCS_virScbArray = pci_alloc_consistent(pdev, sz, +- &pHCB->HCS_physScbArray); +- if (!pHCB->HCS_virScbArray) { ++ sz = ORC_MAXQUEUE * sizeof(struct orc_scb); ++ host->scb_virt = pci_alloc_consistent(pdev, sz, ++ &host->scb_phys); ++ if (!host->scb_virt) { + printk("inia100: SCB memory allocation error\n"); + goto out_host_put; + } +- memset(pHCB->HCS_virScbArray, 0, sz); ++ memset(host->scb_virt, 0, sz); + + /* Get total memory needed for ESCB */ +- sz = ORC_MAXQUEUE * sizeof(ESCB); +- pHCB->HCS_virEscbArray = pci_alloc_consistent(pdev, sz, +- &pHCB->HCS_physEscbArray); +- if (!pHCB->HCS_virEscbArray) { ++ sz = ORC_MAXQUEUE * sizeof(struct orc_extended_scb); ++ host->escb_virt = pci_alloc_consistent(pdev, sz, ++ &host->escb_phys); ++ if (!host->escb_virt) { + printk("inia100: ESCB memory allocation error\n"); + goto out_free_scb_array; + } +- memset(pHCB->HCS_virEscbArray, 0, sz); ++ memset(host->escb_virt, 0, sz); + +- dBiosAdr = pHCB->HCS_BIOS; +- dBiosAdr = (dBiosAdr << 4); +- pbBiosAdr = phys_to_virt(dBiosAdr); +- if (init_orchid(pHCB)) { /* Initialize orchid chip */ ++ biosaddr = host->BIOScfg; ++ biosaddr = (biosaddr << 4); ++ bios_phys = phys_to_virt(biosaddr); ++ if (init_orchid(host)) { /* Initialize orchid chip */ + printk("inia100: initial orchid fail!!\n"); + goto out_free_escb_array; + } + +- shost->io_port = pHCB->HCS_Base; ++ shost->io_port = host->base; + shost->n_io_port = 0xff; + shost->can_queue = ORC_MAXQUEUE; + shost->unique_id = shost->io_port; +- shost->max_id = pHCB->HCS_MaxTar; ++ shost->max_id = host->max_targets; + shost->max_lun = 16; +- shost->irq = pHCB->HCS_Intr = pdev->irq; +- shost->this_id = pHCB->HCS_SCSI_ID; /* Assign HCS index */ ++ shost->irq = pdev->irq; ++ shost->this_id = host->scsi_id; /* Assign HCS index */ + shost->sg_tablesize = TOTAL_SG_ENTRY; + + /* Initial orc chip */ +@@ -1137,36 +1170,36 @@ + scsi_scan_host(shost); + return 0; + +- out_free_irq: ++out_free_irq: + free_irq(shost->irq, shost); +- out_free_escb_array: +- pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ESCB), +- pHCB->HCS_virEscbArray, pHCB->HCS_physEscbArray); +- out_free_scb_array: +- pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ORC_SCB), +- pHCB->HCS_virScbArray, pHCB->HCS_physScbArray); +- out_host_put: ++out_free_escb_array: ++ pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_extended_scb), ++ host->escb_virt, host->escb_phys); ++out_free_scb_array: ++ pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_scb), ++ host->scb_virt, host->scb_phys); ++out_host_put: + scsi_host_put(shost); +- out_release_region: ++out_release_region: + release_region(port, 256); +- out_disable_device: ++out_disable_device: + pci_disable_device(pdev); +- out: ++out: + return error; + } + + static void __devexit inia100_remove_one(struct pci_dev *pdev) + { + struct Scsi_Host *shost = pci_get_drvdata(pdev); +- ORC_HCS *pHCB = (ORC_HCS *)shost->hostdata; ++ struct orc_host *host = (struct orc_host *)shost->hostdata; + + scsi_remove_host(shost); + + free_irq(shost->irq, shost); +- pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ESCB), +- pHCB->HCS_virEscbArray, pHCB->HCS_physEscbArray); +- pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ORC_SCB), +- pHCB->HCS_virScbArray, pHCB->HCS_physScbArray); ++ pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_extended_scb), ++ host->escb_virt, host->escb_phys); ++ pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_scb), ++ host->scb_virt, host->scb_phys); + release_region(shost->io_port, 256); + + scsi_host_put(shost); +diff -Nurb linux-2.6.22-570/drivers/scsi/a100u2w.h linux-2.6.22-591/drivers/scsi/a100u2w.h +--- linux-2.6.22-570/drivers/scsi/a100u2w.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/a100u2w.h 2007-12-21 15:36:12.000000000 -0500 +@@ -18,27 +18,6 @@ + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * +- * -------------------------------------------------------------------------- +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions +- * are met: +- * 1. Redistributions of source code must retain the above copyright +- * notice, this list of conditions, and the following disclaimer, +- * without modification, immediately at the beginning of the file. +- * 2. Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in the +- * documentation and/or other materials provided with the distribution. +- * 3. The name of the author may not be used to endorse or promote products +- * derived from this software without specific prior written permission. +- * +- * Where this Software is combined with software released under the terms of +- * the GNU General Public License ("GPL") and the terms of the GPL would require the +- * combined work to also be released under the terms of the GPL, the terms +- * and conditions of this License will apply in addition to those of the +- * GPL with the exception of any terms or conditions of this License that +- * conflict with, or are expressly prohibited by, the GPL. +- * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +@@ -50,30 +29,19 @@ + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. +- */ +- +-/* ++ * + * Revision History: + * 06/18/98 HL, Initial production Version 1.02 + * 12/19/98 bv, Use spinlocks for 2.1.95 and up + * 06/25/02 Doug Ledford + * - This and the i60uscsi.h file are almost identical, + * merged them into a single header used by both .c files. ++ * 14/06/07 Alan Cox ++ * - Grand cleanup and Linuxisation + */ + + #define inia100_REVID "Initio INI-A100U2W SCSI device driver; Revision: 1.02d" + +-#define ULONG unsigned long +-#define USHORT unsigned short +-#define UCHAR unsigned char +-#define BYTE unsigned char +-#define WORD unsigned short +-#define DWORD unsigned long +-#define UBYTE unsigned char +-#define UWORD unsigned short +-#define UDWORD unsigned long +-#define U32 u32 +- + #if 1 + #define ORC_MAXQUEUE 245 + #define ORC_MAXTAGS 64 +@@ -90,10 +58,10 @@ + /************************************************************************/ + /* Scatter-Gather Element Structure */ + /************************************************************************/ +-typedef struct ORC_SG_Struc { +- U32 SG_Ptr; /* Data Pointer */ +- U32 SG_Len; /* Data Length */ +-} ORC_SG; ++struct orc_sgent { ++ u32 base; /* Data Pointer */ ++ u32 length; /* Data Length */ ++}; + + /* SCSI related definition */ + #define DISC_NOT_ALLOW 0x80 /* Disconnect is not allowed */ +@@ -165,42 +133,45 @@ + #define ORC_PRGMCTR1 0xE3 /* RISC program counter */ + #define ORC_RISCRAM 0xEC /* RISC RAM data port 4 bytes */ + +-typedef struct orc_extended_scb { /* Extended SCB */ +- ORC_SG ESCB_SGList[TOTAL_SG_ENTRY]; /*0 Start of SG list */ +- struct scsi_cmnd *SCB_Srb; /*50 SRB Pointer */ +-} ESCB; ++struct orc_extended_scb { /* Extended SCB */ ++ struct orc_sgent sglist[TOTAL_SG_ENTRY]; /*0 Start of SG list */ ++ struct scsi_cmnd *srb; /*50 SRB Pointer */ ++}; + + /*********************************************************************** + SCSI Control Block ++ ++ 0x40 bytes long, the last 8 are user bytes + ************************************************************************/ +-typedef struct orc_scb { /* Scsi_Ctrl_Blk */ +- UBYTE SCB_Opcode; /*00 SCB command code&residual */ +- UBYTE SCB_Flags; /*01 SCB Flags */ +- UBYTE SCB_Target; /*02 Target Id */ +- UBYTE SCB_Lun; /*03 Lun */ +- U32 SCB_Reserved0; /*04 Reserved for ORCHID must 0 */ +- U32 SCB_XferLen; /*08 Data Transfer Length */ +- U32 SCB_Reserved1; /*0C Reserved for ORCHID must 0 */ +- U32 SCB_SGLen; /*10 SG list # * 8 */ +- U32 SCB_SGPAddr; /*14 SG List Buf physical Addr */ +- U32 SCB_SGPAddrHigh; /*18 SG Buffer high physical Addr */ +- UBYTE SCB_HaStat; /*1C Host Status */ +- UBYTE SCB_TaStat; /*1D Target Status */ +- UBYTE SCB_Status; /*1E SCB status */ +- UBYTE SCB_Link; /*1F Link pointer, default 0xFF */ +- UBYTE SCB_SenseLen; /*20 Sense Allocation Length */ +- UBYTE SCB_CDBLen; /*21 CDB Length */ +- UBYTE SCB_Ident; /*22 Identify */ +- UBYTE SCB_TagMsg; /*23 Tag Message */ +- UBYTE SCB_CDB[IMAX_CDB]; /*24 SCSI CDBs */ +- UBYTE SCB_ScbIdx; /*3C Index for this ORCSCB */ +- U32 SCB_SensePAddr; /*34 Sense Buffer physical Addr */ +- +- ESCB *SCB_EScb; /*38 Extended SCB Pointer */ +-#ifndef ALPHA +- UBYTE SCB_Reserved2[4]; /*3E Reserved for Driver use */ ++struct orc_scb { /* Scsi_Ctrl_Blk */ ++ u8 opcode; /*00 SCB command code&residual */ ++ u8 flags; /*01 SCB Flags */ ++ u8 target; /*02 Target Id */ ++ u8 lun; /*03 Lun */ ++ u32 reserved0; /*04 Reserved for ORCHID must 0 */ ++ u32 xferlen; /*08 Data Transfer Length */ ++ u32 reserved1; /*0C Reserved for ORCHID must 0 */ ++ u32 sg_len; /*10 SG list # * 8 */ ++ u32 sg_addr; /*14 SG List Buf physical Addr */ ++ u32 sg_addrhigh; /*18 SG Buffer high physical Addr */ ++ u8 hastat; /*1C Host Status */ ++ u8 tastat; /*1D Target Status */ ++ u8 status; /*1E SCB status */ ++ u8 link; /*1F Link pointer, default 0xFF */ ++ u8 sense_len; /*20 Sense Allocation Length */ ++ u8 cdb_len; /*21 CDB Length */ ++ u8 ident; /*22 Identify */ ++ u8 tag_msg; /*23 Tag Message */ ++ u8 cdb[IMAX_CDB]; /*24 SCSI CDBs */ ++ u8 scbidx; /*3C Index for this ORCSCB */ ++ u32 sense_addr; /*34 Sense Buffer physical Addr */ ++ ++ struct orc_extended_scb *escb; /*38 Extended SCB Pointer */ ++ /* 64bit pointer or 32bit pointer + reserved ? */ ++#ifndef CONFIG_64BIT ++ u8 reserved2[4]; /*3E Reserved for Driver use */ + #endif +-} ORC_SCB; ++}; + + /* Opcodes of ORCSCB_Opcode */ + #define ORC_EXECSCSI 0x00 /* SCSI initiator command with residual */ +@@ -239,13 +210,13 @@ + Target Device Control Structure + **********************************************************************/ + +-typedef struct ORC_Tar_Ctrl_Struc { +- UBYTE TCS_DrvDASD; /* 6 */ +- UBYTE TCS_DrvSCSI; /* 7 */ +- UBYTE TCS_DrvHead; /* 8 */ +- UWORD TCS_DrvFlags; /* 4 */ +- UBYTE TCS_DrvSector; /* 7 */ +-} ORC_TCS; ++struct orc_target { ++ u8 TCS_DrvDASD; /* 6 */ ++ u8 TCS_DrvSCSI; /* 7 */ ++ u8 TCS_DrvHead; /* 8 */ ++ u16 TCS_DrvFlags; /* 4 */ ++ u8 TCS_DrvSector; /* 7 */ ++}; + + /* Bit Definition for TCF_DrvFlags */ + #define TCS_DF_NODASD_SUPT 0x20 /* Suppress OS/2 DASD Mgr support */ +@@ -255,32 +226,23 @@ + /*********************************************************************** + Host Adapter Control Structure + ************************************************************************/ +-typedef struct ORC_Ha_Ctrl_Struc { +- USHORT HCS_Base; /* 00 */ +- UBYTE HCS_Index; /* 02 */ +- UBYTE HCS_Intr; /* 04 */ +- UBYTE HCS_SCSI_ID; /* 06 H/A SCSI ID */ +- UBYTE HCS_BIOS; /* 07 BIOS configuration */ +- +- UBYTE HCS_Flags; /* 0B */ +- UBYTE HCS_HAConfig1; /* 1B SCSI0MAXTags */ +- UBYTE HCS_MaxTar; /* 1B SCSI0MAXTags */ +- +- USHORT HCS_Units; /* Number of units this adapter */ +- USHORT HCS_AFlags; /* Adapter info. defined flags */ +- ULONG HCS_Timeout; /* Adapter timeout value */ +- ORC_SCB *HCS_virScbArray; /* 28 Virtual Pointer to SCB array */ +- dma_addr_t HCS_physScbArray; /* Scb Physical address */ +- ESCB *HCS_virEscbArray; /* Virtual pointer to ESCB Scatter list */ +- dma_addr_t HCS_physEscbArray; /* scatter list Physical address */ +- UBYTE TargetFlag[16]; /* 30 target configuration, TCF_EN_TAG */ +- UBYTE MaximumTags[16]; /* 40 ORC_MAX_SCBS */ +- UBYTE ActiveTags[16][16]; /* 50 */ +- ORC_TCS HCS_Tcs[16]; /* 28 */ +- U32 BitAllocFlag[MAX_CHANNELS][8]; /* Max STB is 256, So 256/32 */ +- spinlock_t BitAllocFlagLock; ++struct orc_host { ++ unsigned long base; /* Base address */ ++ u8 index; /* Index (Channel)*/ ++ u8 scsi_id; /* H/A SCSI ID */ ++ u8 BIOScfg; /*BIOS configuration */ ++ u8 flags; ++ u8 max_targets; /* SCSI0MAXTags */ ++ struct orc_scb *scb_virt; /* Virtual Pointer to SCB array */ ++ dma_addr_t scb_phys; /* Scb Physical address */ ++ struct orc_extended_scb *escb_virt; /* Virtual pointer to ESCB Scatter list */ ++ dma_addr_t escb_phys; /* scatter list Physical address */ ++ u8 target_flag[16]; /* target configuration, TCF_EN_TAG */ ++ u8 max_tags[16]; /* ORC_MAX_SCBS */ ++ u32 allocation_map[MAX_CHANNELS][8]; /* Max STB is 256, So 256/32 */ ++ spinlock_t allocation_lock; + struct pci_dev *pdev; +-} ORC_HCS; ++}; + + /* Bit Definition for HCS_Flags */ + +@@ -301,79 +263,79 @@ + #define HCS_AF_DISABLE_RESET 0x10 /* Adapter disable reset */ + #define HCS_AF_DISABLE_ADPT 0x80 /* Adapter disable */ + +-typedef struct _NVRAM { ++struct orc_nvram { + /*----------header ---------------*/ +- UCHAR SubVendorID0; /* 00 - Sub Vendor ID */ +- UCHAR SubVendorID1; /* 00 - Sub Vendor ID */ +- UCHAR SubSysID0; /* 02 - Sub System ID */ +- UCHAR SubSysID1; /* 02 - Sub System ID */ +- UCHAR SubClass; /* 04 - Sub Class */ +- UCHAR VendorID0; /* 05 - Vendor ID */ +- UCHAR VendorID1; /* 05 - Vendor ID */ +- UCHAR DeviceID0; /* 07 - Device ID */ +- UCHAR DeviceID1; /* 07 - Device ID */ +- UCHAR Reserved0[2]; /* 09 - Reserved */ +- UCHAR Revision; /* 0B - Revision of data structure */ ++ u8 SubVendorID0; /* 00 - Sub Vendor ID */ ++ u8 SubVendorID1; /* 00 - Sub Vendor ID */ ++ u8 SubSysID0; /* 02 - Sub System ID */ ++ u8 SubSysID1; /* 02 - Sub System ID */ ++ u8 SubClass; /* 04 - Sub Class */ ++ u8 VendorID0; /* 05 - Vendor ID */ ++ u8 VendorID1; /* 05 - Vendor ID */ ++ u8 DeviceID0; /* 07 - Device ID */ ++ u8 DeviceID1; /* 07 - Device ID */ ++ u8 Reserved0[2]; /* 09 - Reserved */ ++ u8 revision; /* 0B - revision of data structure */ + /* ----Host Adapter Structure ---- */ +- UCHAR NumOfCh; /* 0C - Number of SCSI channel */ +- UCHAR BIOSConfig1; /* 0D - BIOS configuration 1 */ +- UCHAR BIOSConfig2; /* 0E - BIOS boot channel&target ID */ +- UCHAR BIOSConfig3; /* 0F - BIOS configuration 3 */ ++ u8 NumOfCh; /* 0C - Number of SCSI channel */ ++ u8 BIOSConfig1; /* 0D - BIOS configuration 1 */ ++ u8 BIOSConfig2; /* 0E - BIOS boot channel&target ID */ ++ u8 BIOSConfig3; /* 0F - BIOS configuration 3 */ + /* ----SCSI channel Structure ---- */ + /* from "CTRL-I SCSI Host Adapter SetUp menu " */ +- UCHAR SCSI0Id; /* 10 - Channel 0 SCSI ID */ +- UCHAR SCSI0Config; /* 11 - Channel 0 SCSI configuration */ +- UCHAR SCSI0MaxTags; /* 12 - Channel 0 Maximum tags */ +- UCHAR SCSI0ResetTime; /* 13 - Channel 0 Reset recovering time */ +- UCHAR ReservedforChannel0[2]; /* 14 - Reserved */ ++ u8 scsi_id; /* 10 - Channel 0 SCSI ID */ ++ u8 SCSI0Config; /* 11 - Channel 0 SCSI configuration */ ++ u8 SCSI0MaxTags; /* 12 - Channel 0 Maximum tags */ ++ u8 SCSI0ResetTime; /* 13 - Channel 0 Reset recovering time */ ++ u8 ReservedforChannel0[2]; /* 14 - Reserved */ + + /* ----SCSI target Structure ---- */ + /* from "CTRL-I SCSI device SetUp menu " */ +- UCHAR Target00Config; /* 16 - Channel 0 Target 0 config */ +- UCHAR Target01Config; /* 17 - Channel 0 Target 1 config */ +- UCHAR Target02Config; /* 18 - Channel 0 Target 2 config */ +- UCHAR Target03Config; /* 19 - Channel 0 Target 3 config */ +- UCHAR Target04Config; /* 1A - Channel 0 Target 4 config */ +- UCHAR Target05Config; /* 1B - Channel 0 Target 5 config */ +- UCHAR Target06Config; /* 1C - Channel 0 Target 6 config */ +- UCHAR Target07Config; /* 1D - Channel 0 Target 7 config */ +- UCHAR Target08Config; /* 1E - Channel 0 Target 8 config */ +- UCHAR Target09Config; /* 1F - Channel 0 Target 9 config */ +- UCHAR Target0AConfig; /* 20 - Channel 0 Target A config */ +- UCHAR Target0BConfig; /* 21 - Channel 0 Target B config */ +- UCHAR Target0CConfig; /* 22 - Channel 0 Target C config */ +- UCHAR Target0DConfig; /* 23 - Channel 0 Target D config */ +- UCHAR Target0EConfig; /* 24 - Channel 0 Target E config */ +- UCHAR Target0FConfig; /* 25 - Channel 0 Target F config */ +- +- UCHAR SCSI1Id; /* 26 - Channel 1 SCSI ID */ +- UCHAR SCSI1Config; /* 27 - Channel 1 SCSI configuration */ +- UCHAR SCSI1MaxTags; /* 28 - Channel 1 Maximum tags */ +- UCHAR SCSI1ResetTime; /* 29 - Channel 1 Reset recovering time */ +- UCHAR ReservedforChannel1[2]; /* 2A - Reserved */ ++ u8 Target00Config; /* 16 - Channel 0 Target 0 config */ ++ u8 Target01Config; /* 17 - Channel 0 Target 1 config */ ++ u8 Target02Config; /* 18 - Channel 0 Target 2 config */ ++ u8 Target03Config; /* 19 - Channel 0 Target 3 config */ ++ u8 Target04Config; /* 1A - Channel 0 Target 4 config */ ++ u8 Target05Config; /* 1B - Channel 0 Target 5 config */ ++ u8 Target06Config; /* 1C - Channel 0 Target 6 config */ ++ u8 Target07Config; /* 1D - Channel 0 Target 7 config */ ++ u8 Target08Config; /* 1E - Channel 0 Target 8 config */ ++ u8 Target09Config; /* 1F - Channel 0 Target 9 config */ ++ u8 Target0AConfig; /* 20 - Channel 0 Target A config */ ++ u8 Target0BConfig; /* 21 - Channel 0 Target B config */ ++ u8 Target0CConfig; /* 22 - Channel 0 Target C config */ ++ u8 Target0DConfig; /* 23 - Channel 0 Target D config */ ++ u8 Target0EConfig; /* 24 - Channel 0 Target E config */ ++ u8 Target0FConfig; /* 25 - Channel 0 Target F config */ ++ ++ u8 SCSI1Id; /* 26 - Channel 1 SCSI ID */ ++ u8 SCSI1Config; /* 27 - Channel 1 SCSI configuration */ ++ u8 SCSI1MaxTags; /* 28 - Channel 1 Maximum tags */ ++ u8 SCSI1ResetTime; /* 29 - Channel 1 Reset recovering time */ ++ u8 ReservedforChannel1[2]; /* 2A - Reserved */ + + /* ----SCSI target Structure ---- */ + /* from "CTRL-I SCSI device SetUp menu " */ +- UCHAR Target10Config; /* 2C - Channel 1 Target 0 config */ +- UCHAR Target11Config; /* 2D - Channel 1 Target 1 config */ +- UCHAR Target12Config; /* 2E - Channel 1 Target 2 config */ +- UCHAR Target13Config; /* 2F - Channel 1 Target 3 config */ +- UCHAR Target14Config; /* 30 - Channel 1 Target 4 config */ +- UCHAR Target15Config; /* 31 - Channel 1 Target 5 config */ +- UCHAR Target16Config; /* 32 - Channel 1 Target 6 config */ +- UCHAR Target17Config; /* 33 - Channel 1 Target 7 config */ +- UCHAR Target18Config; /* 34 - Channel 1 Target 8 config */ +- UCHAR Target19Config; /* 35 - Channel 1 Target 9 config */ +- UCHAR Target1AConfig; /* 36 - Channel 1 Target A config */ +- UCHAR Target1BConfig; /* 37 - Channel 1 Target B config */ +- UCHAR Target1CConfig; /* 38 - Channel 1 Target C config */ +- UCHAR Target1DConfig; /* 39 - Channel 1 Target D config */ +- UCHAR Target1EConfig; /* 3A - Channel 1 Target E config */ +- UCHAR Target1FConfig; /* 3B - Channel 1 Target F config */ +- UCHAR reserved[3]; /* 3C - Reserved */ ++ u8 Target10Config; /* 2C - Channel 1 Target 0 config */ ++ u8 Target11Config; /* 2D - Channel 1 Target 1 config */ ++ u8 Target12Config; /* 2E - Channel 1 Target 2 config */ ++ u8 Target13Config; /* 2F - Channel 1 Target 3 config */ ++ u8 Target14Config; /* 30 - Channel 1 Target 4 config */ ++ u8 Target15Config; /* 31 - Channel 1 Target 5 config */ ++ u8 Target16Config; /* 32 - Channel 1 Target 6 config */ ++ u8 Target17Config; /* 33 - Channel 1 Target 7 config */ ++ u8 Target18Config; /* 34 - Channel 1 Target 8 config */ ++ u8 Target19Config; /* 35 - Channel 1 Target 9 config */ ++ u8 Target1AConfig; /* 36 - Channel 1 Target A config */ ++ u8 Target1BConfig; /* 37 - Channel 1 Target B config */ ++ u8 Target1CConfig; /* 38 - Channel 1 Target C config */ ++ u8 Target1DConfig; /* 39 - Channel 1 Target D config */ ++ u8 Target1EConfig; /* 3A - Channel 1 Target E config */ ++ u8 Target1FConfig; /* 3B - Channel 1 Target F config */ ++ u8 reserved[3]; /* 3C - Reserved */ + /* ---------- CheckSum ---------- */ +- UCHAR CheckSum; /* 3F - Checksum of NVRam */ +-} NVRAM, *PNVRAM; ++ u8 CheckSum; /* 3F - Checksum of NVRam */ ++}; + + /* Bios Configuration for nvram->BIOSConfig1 */ + #define NBC_BIOSENABLE 0x01 /* BIOS enable */ +@@ -407,10 +369,3 @@ + #define NCC_RESET_TIME 0x0A /* SCSI RESET recovering time */ + #define NTC_DEFAULT (NTC_1GIGA | NTC_NO_WIDESYNC | NTC_DISC_ENABLE) + +-#define ORC_RD(x,y) (UCHAR)(inb( (int)((ULONG)((ULONG)x+(UCHAR)y)) )) +-#define ORC_RDWORD(x,y) (short)(inl((int)((ULONG)((ULONG)x+(UCHAR)y)) )) +-#define ORC_RDLONG(x,y) (long)(inl((int)((ULONG)((ULONG)x+(UCHAR)y)) )) +- +-#define ORC_WR( adr,data) outb( (UCHAR)(data), (int)(adr)) +-#define ORC_WRSHORT(adr,data) outw( (UWORD)(data), (int)(adr)) +-#define ORC_WRLONG( adr,data) outl( (ULONG)(data), (int)(adr)) +diff -Nurb linux-2.6.22-570/drivers/scsi/a4000t.c linux-2.6.22-591/drivers/scsi/a4000t.c +--- linux-2.6.22-570/drivers/scsi/a4000t.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/scsi/a4000t.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,143 @@ ++/* ++ * Detection routine for the NCR53c710 based Amiga SCSI Controllers for Linux. ++ * Amiga Technologies A4000T SCSI controller. ++ * ++ * Written 1997 by Alan Hourihane ++ * plus modifications of the 53c7xx.c driver to support the Amiga. ++ * ++ * Rewritten to use 53c700.c by Kars de Jong ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "53c700.h" ++ ++MODULE_AUTHOR("Alan Hourihane / Kars de Jong "); ++MODULE_DESCRIPTION("Amiga A4000T NCR53C710 driver"); ++MODULE_LICENSE("GPL"); ++ ++ ++static struct scsi_host_template a4000t_scsi_driver_template = { ++ .name = "A4000T builtin SCSI", ++ .proc_name = "A4000t", ++ .this_id = 7, ++ .module = THIS_MODULE, ++}; ++ ++static struct platform_device *a4000t_scsi_device; ++ ++#define A4000T_SCSI_ADDR 0xdd0040 ++ ++static int __devinit a4000t_probe(struct device *dev) ++{ ++ struct Scsi_Host * host = NULL; ++ struct NCR_700_Host_Parameters *hostdata; ++ ++ if (!(MACH_IS_AMIGA && AMIGAHW_PRESENT(A4000_SCSI))) ++ goto out; ++ ++ if (!request_mem_region(A4000T_SCSI_ADDR, 0x1000, ++ "A4000T builtin SCSI")) ++ goto out; ++ ++ hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL); ++ if (hostdata == NULL) { ++ printk(KERN_ERR "a4000t-scsi: Failed to allocate host data\n"); ++ goto out_release; ++ } ++ memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters)); ++ ++ /* Fill in the required pieces of hostdata */ ++ hostdata->base = (void __iomem *)ZTWO_VADDR(A4000T_SCSI_ADDR); ++ hostdata->clock = 50; ++ hostdata->chip710 = 1; ++ hostdata->dmode_extra = DMODE_FC2; ++ hostdata->dcntl_extra = EA_710; ++ ++ /* and register the chip */ ++ host = NCR_700_detect(&a4000t_scsi_driver_template, hostdata, dev); ++ if (!host) { ++ printk(KERN_ERR "a4000t-scsi: No host detected; " ++ "board configuration problem?\n"); ++ goto out_free; ++ } ++ ++ host->this_id = 7; ++ host->base = A4000T_SCSI_ADDR; ++ host->irq = IRQ_AMIGA_PORTS; ++ ++ if (request_irq(host->irq, NCR_700_intr, IRQF_SHARED, "a4000t-scsi", ++ host)) { ++ printk(KERN_ERR "a4000t-scsi: request_irq failed\n"); ++ goto out_put_host; ++ } ++ ++ scsi_scan_host(host); ++ ++ return 0; ++ ++ out_put_host: ++ scsi_host_put(host); ++ out_free: ++ kfree(hostdata); ++ out_release: ++ release_mem_region(A4000T_SCSI_ADDR, 0x1000); ++ out: ++ return -ENODEV; ++} ++ ++static __devexit int a4000t_device_remove(struct device *dev) ++{ ++ struct Scsi_Host *host = dev_to_shost(dev); ++ struct NCR_700_Host_Parameters *hostdata = shost_priv(host); ++ ++ scsi_remove_host(host); ++ ++ NCR_700_release(host); ++ kfree(hostdata); ++ free_irq(host->irq, host); ++ release_mem_region(A4000T_SCSI_ADDR, 0x1000); ++ ++ return 0; ++} ++ ++static struct device_driver a4000t_scsi_driver = { ++ .name = "a4000t-scsi", ++ .bus = &platform_bus_type, ++ .probe = a4000t_probe, ++ .remove = __devexit_p(a4000t_device_remove), ++}; ++ ++static int __init a4000t_scsi_init(void) ++{ ++ int err; ++ ++ err = driver_register(&a4000t_scsi_driver); ++ if (err) ++ return err; ++ ++ a4000t_scsi_device = platform_device_register_simple("a4000t-scsi", ++ -1, NULL, 0); ++ if (IS_ERR(a4000t_scsi_device)) { ++ driver_unregister(&a4000t_scsi_driver); ++ return PTR_ERR(a4000t_scsi_device); ++ } ++ ++ return err; ++} ++ ++static void __exit a4000t_scsi_exit(void) ++{ ++ platform_device_unregister(a4000t_scsi_device); ++ driver_unregister(&a4000t_scsi_driver); ++} ++ ++module_init(a4000t_scsi_init); ++module_exit(a4000t_scsi_exit); +diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/aachba.c linux-2.6.22-591/drivers/scsi/aacraid/aachba.c +--- linux-2.6.22-570/drivers/scsi/aacraid/aachba.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/aacraid/aachba.c 2007-12-21 15:36:12.000000000 -0500 +@@ -169,6 +169,18 @@ + module_param(acbsize, int, S_IRUGO|S_IWUSR); + MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size. Valid values are 512, 2048, 4096 and 8192. Default is to use suggestion from Firmware."); + ++int update_interval = 30 * 60; ++module_param(update_interval, int, S_IRUGO|S_IWUSR); ++MODULE_PARM_DESC(update_interval, "Interval in seconds between time sync updates issued to adapter."); ++ ++int check_interval = 24 * 60 * 60; ++module_param(check_interval, int, S_IRUGO|S_IWUSR); ++MODULE_PARM_DESC(check_interval, "Interval in seconds between adapter health checks."); ++ ++int check_reset = 1; ++module_param(check_reset, int, S_IRUGO|S_IWUSR); ++MODULE_PARM_DESC(check_reset, "If adapter fails health check, reset the adapter."); ++ + int expose_physicals = -1; + module_param(expose_physicals, int, S_IRUGO|S_IWUSR); + MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. -1=protect 0=off, 1=on"); +@@ -312,11 +324,10 @@ + + if (maximum_num_containers < MAXIMUM_NUM_CONTAINERS) + maximum_num_containers = MAXIMUM_NUM_CONTAINERS; +- fsa_dev_ptr = kmalloc(sizeof(*fsa_dev_ptr) * maximum_num_containers, ++ fsa_dev_ptr = kzalloc(sizeof(*fsa_dev_ptr) * maximum_num_containers, + GFP_KERNEL); + if (!fsa_dev_ptr) + return -ENOMEM; +- memset(fsa_dev_ptr, 0, sizeof(*fsa_dev_ptr) * maximum_num_containers); + + dev->fsa_dev = fsa_dev_ptr; + dev->maximum_num_containers = maximum_num_containers; +@@ -344,20 +355,15 @@ + { + void *buf; + int transfer_len; +- struct scatterlist *sg = scsicmd->request_buffer; ++ struct scatterlist *sg = scsi_sglist(scsicmd); + +- if (scsicmd->use_sg) { + buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + transfer_len = min(sg->length, len + offset); +- } else { +- buf = scsicmd->request_buffer; +- transfer_len = min(scsicmd->request_bufflen, len + offset); +- } ++ + transfer_len -= offset; + if (buf && transfer_len > 0) + memcpy(buf + offset, data, transfer_len); + +- if (scsicmd->use_sg) + kunmap_atomic(buf - sg->offset, KM_IRQ0); + + } +@@ -451,7 +457,7 @@ + { + struct fsa_dev_info *fsa_dev_ptr = ((struct aac_dev *)(scsicmd->device->host->hostdata))->fsa_dev; + +- if (fsa_dev_ptr[scmd_id(scsicmd)].valid) ++ if ((fsa_dev_ptr[scmd_id(scsicmd)].valid & 1)) + return aac_scsi_cmd(scsicmd); + + scsicmd->result = DID_NO_CONNECT << 16; +@@ -459,18 +465,18 @@ + return 0; + } + +-static int _aac_probe_container2(void * context, struct fib * fibptr) ++static void _aac_probe_container2(void * context, struct fib * fibptr) + { + struct fsa_dev_info *fsa_dev_ptr; + int (*callback)(struct scsi_cmnd *); + struct scsi_cmnd * scsicmd = (struct scsi_cmnd *)context; + +- if (!aac_valid_context(scsicmd, fibptr)) +- return 0; + +- fsa_dev_ptr = ((struct aac_dev *)(scsicmd->device->host->hostdata))->fsa_dev; ++ if (!aac_valid_context(scsicmd, fibptr)) ++ return; + + scsicmd->SCp.Status = 0; ++ fsa_dev_ptr = fibptr->dev->fsa_dev; + if (fsa_dev_ptr) { + struct aac_mount * dresp = (struct aac_mount *) fib_data(fibptr); + fsa_dev_ptr += scmd_id(scsicmd); +@@ -493,10 +499,11 @@ + aac_fib_free(fibptr); + callback = (int (*)(struct scsi_cmnd *))(scsicmd->SCp.ptr); + scsicmd->SCp.ptr = NULL; +- return (*callback)(scsicmd); ++ (*callback)(scsicmd); ++ return; + } + +-static int _aac_probe_container1(void * context, struct fib * fibptr) ++static void _aac_probe_container1(void * context, struct fib * fibptr) + { + struct scsi_cmnd * scsicmd; + struct aac_mount * dresp; +@@ -506,13 +513,14 @@ + dresp = (struct aac_mount *) fib_data(fibptr); + dresp->mnt[0].capacityhigh = 0; + if ((le32_to_cpu(dresp->status) != ST_OK) || +- (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) +- return _aac_probe_container2(context, fibptr); ++ (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) { ++ _aac_probe_container2(context, fibptr); ++ return; ++ } + scsicmd = (struct scsi_cmnd *) context; +- scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL; + + if (!aac_valid_context(scsicmd, fibptr)) +- return 0; ++ return; + + aac_fib_init(fibptr); + +@@ -527,21 +535,18 @@ + sizeof(struct aac_query_mount), + FsaNormal, + 0, 1, +- (fib_callback) _aac_probe_container2, ++ _aac_probe_container2, + (void *) scsicmd); + /* + * Check that the command queued to the controller + */ +- if (status == -EINPROGRESS) { ++ if (status == -EINPROGRESS) + scsicmd->SCp.phase = AAC_OWNER_FIRMWARE; +- return 0; +- } +- if (status < 0) { ++ else if (status < 0) { + /* Inherit results from VM_NameServe, if any */ + dresp->status = cpu_to_le32(ST_OK); +- return _aac_probe_container2(context, fibptr); ++ _aac_probe_container2(context, fibptr); + } +- return 0; + } + + static int _aac_probe_container(struct scsi_cmnd * scsicmd, int (*callback)(struct scsi_cmnd *)) +@@ -566,7 +571,7 @@ + sizeof(struct aac_query_mount), + FsaNormal, + 0, 1, +- (fib_callback) _aac_probe_container1, ++ _aac_probe_container1, + (void *) scsicmd); + /* + * Check that the command queued to the controller +@@ -620,7 +625,7 @@ + return -ENOMEM; + } + scsicmd->list.next = NULL; +- scsicmd->scsi_done = (void (*)(struct scsi_cmnd*))_aac_probe_container1; ++ scsicmd->scsi_done = (void (*)(struct scsi_cmnd*))aac_probe_container_callback1; + + scsicmd->device = scsidev; + scsidev->sdev_state = 0; +@@ -825,7 +830,7 @@ + readcmd->block[1] = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32)); + readcmd->count = cpu_to_le32(count<<9); + readcmd->cid = cpu_to_le16(scmd_id(cmd)); +- readcmd->flags = cpu_to_le16(1); ++ readcmd->flags = cpu_to_le16(IO_TYPE_READ); + readcmd->bpTotal = 0; + readcmd->bpComplete = 0; + +@@ -904,7 +909,7 @@ + (void *) cmd); + } + +-static int aac_write_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count) ++static int aac_write_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua) + { + u16 fibsize; + struct aac_raw_io *writecmd; +@@ -914,7 +919,9 @@ + writecmd->block[1] = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32)); + writecmd->count = cpu_to_le32(count<<9); + writecmd->cid = cpu_to_le16(scmd_id(cmd)); +- writecmd->flags = 0; ++ writecmd->flags = fua ? ++ cpu_to_le16(IO_TYPE_WRITE|IO_SUREWRITE) : ++ cpu_to_le16(IO_TYPE_WRITE); + writecmd->bpTotal = 0; + writecmd->bpComplete = 0; + +@@ -933,7 +940,7 @@ + (void *) cmd); + } + +-static int aac_write_block64(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count) ++static int aac_write_block64(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua) + { + u16 fibsize; + struct aac_write64 *writecmd; +@@ -964,7 +971,7 @@ + (void *) cmd); + } + +-static int aac_write_block(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count) ++static int aac_write_block(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua) + { + u16 fibsize; + struct aac_write *writecmd; +@@ -1041,7 +1048,7 @@ + struct aac_srb * srbcmd = aac_scsi_common(fib, cmd); + + aac_build_sg64(cmd, (struct sgmap64*) &srbcmd->sg); +- srbcmd->count = cpu_to_le32(cmd->request_bufflen); ++ srbcmd->count = cpu_to_le32(scsi_bufflen(cmd)); + + memset(srbcmd->cdb, 0, sizeof(srbcmd->cdb)); + memcpy(srbcmd->cdb, cmd->cmnd, cmd->cmd_len); +@@ -1069,7 +1076,7 @@ + struct aac_srb * srbcmd = aac_scsi_common(fib, cmd); + + aac_build_sg(cmd, (struct sgmap*)&srbcmd->sg); +- srbcmd->count = cpu_to_le32(cmd->request_bufflen); ++ srbcmd->count = cpu_to_le32(scsi_bufflen(cmd)); + + memset(srbcmd->cdb, 0, sizeof(srbcmd->cdb)); + memcpy(srbcmd->cdb, cmd->cmnd, cmd->cmd_len); +@@ -1172,6 +1179,7 @@ + } + + if (!dev->in_reset) { ++ char buffer[16]; + tmp = le32_to_cpu(dev->adapter_info.kernelrev); + printk(KERN_INFO "%s%d: kernel %d.%d-%d[%d] %.*s\n", + dev->name, +@@ -1192,16 +1200,23 @@ + dev->name, dev->id, + tmp>>24,(tmp>>16)&0xff,tmp&0xff, + le32_to_cpu(dev->adapter_info.biosbuild)); +- if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0) +- printk(KERN_INFO "%s%d: serial %x\n", +- dev->name, dev->id, +- le32_to_cpu(dev->adapter_info.serial[0])); ++ buffer[0] = '\0'; ++ if (aac_show_serial_number( ++ shost_to_class(dev->scsi_host_ptr), buffer)) ++ printk(KERN_INFO "%s%d: serial %s", ++ dev->name, dev->id, buffer); + if (dev->supplement_adapter_info.VpdInfo.Tsid[0]) { + printk(KERN_INFO "%s%d: TSID %.*s\n", + dev->name, dev->id, + (int)sizeof(dev->supplement_adapter_info.VpdInfo.Tsid), + dev->supplement_adapter_info.VpdInfo.Tsid); + } ++ if (!check_reset || ++ (dev->supplement_adapter_info.SupportedOptions2 & ++ le32_to_cpu(AAC_OPTION_IGNORE_RESET))) { ++ printk(KERN_INFO "%s%d: Reset Adapter Ignored\n", ++ dev->name, dev->id); ++ } + } + + dev->nondasd_support = 0; +@@ -1332,7 +1347,7 @@ + if (!aac_valid_context(scsicmd, fibptr)) + return; + +- dev = (struct aac_dev *)scsicmd->device->host->hostdata; ++ dev = fibptr->dev; + cid = scmd_id(scsicmd); + + if (nblank(dprintk(x))) { +@@ -1372,15 +1387,8 @@ + + BUG_ON(fibptr == NULL); + +- if(scsicmd->use_sg) +- pci_unmap_sg(dev->pdev, +- (struct scatterlist *)scsicmd->request_buffer, +- scsicmd->use_sg, +- scsicmd->sc_data_direction); +- else if(scsicmd->request_bufflen) +- pci_unmap_single(dev->pdev, scsicmd->SCp.dma_handle, +- scsicmd->request_bufflen, +- scsicmd->sc_data_direction); ++ scsi_dma_unmap(scsicmd); ++ + readreply = (struct aac_read_reply *)fib_data(fibptr); + if (le32_to_cpu(readreply->status) == ST_OK) + scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD; +@@ -1498,6 +1506,7 @@ + { + u64 lba; + u32 count; ++ int fua; + int status; + struct aac_dev *dev; + struct fib * cmd_fibcontext; +@@ -1512,6 +1521,7 @@ + count = scsicmd->cmnd[4]; + if (count == 0) + count = 256; ++ fua = 0; + } else if (scsicmd->cmnd[0] == WRITE_16) { /* 16 byte command */ + dprintk((KERN_DEBUG "aachba: received a write(16) command on id %d.\n", scmd_id(scsicmd))); + +@@ -1524,6 +1534,7 @@ + (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9]; + count = (scsicmd->cmnd[10] << 24) | (scsicmd->cmnd[11] << 16) | + (scsicmd->cmnd[12] << 8) | scsicmd->cmnd[13]; ++ fua = scsicmd->cmnd[1] & 0x8; + } else if (scsicmd->cmnd[0] == WRITE_12) { /* 12 byte command */ + dprintk((KERN_DEBUG "aachba: received a write(12) command on id %d.\n", scmd_id(scsicmd))); + +@@ -1531,10 +1542,12 @@ + | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5]; + count = (scsicmd->cmnd[6] << 24) | (scsicmd->cmnd[7] << 16) + | (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9]; ++ fua = scsicmd->cmnd[1] & 0x8; + } else { + dprintk((KERN_DEBUG "aachba: received a write(10) command on id %d.\n", scmd_id(scsicmd))); + lba = ((u64)scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16) | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5]; + count = (scsicmd->cmnd[7] << 8) | scsicmd->cmnd[8]; ++ fua = scsicmd->cmnd[1] & 0x8; + } + dprintk((KERN_DEBUG "aac_write[cpu %d]: lba = %llu, t = %ld.\n", + smp_processor_id(), (unsigned long long)lba, jiffies)); +@@ -1549,7 +1562,7 @@ + return 0; + } + +- status = aac_adapter_write(cmd_fibcontext, scsicmd, lba, count); ++ status = aac_adapter_write(cmd_fibcontext, scsicmd, lba, count, fua); + + /* + * Check that the command queued to the controller +@@ -1592,7 +1605,7 @@ + COMMAND_COMPLETE << 8 | SAM_STAT_GOOD; + else { + struct scsi_device *sdev = cmd->device; +- struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata; ++ struct aac_dev *dev = fibptr->dev; + u32 cid = sdev_id(sdev); + printk(KERN_WARNING + "synchronize_callback: synchronize failed, status = %d\n", +@@ -1699,7 +1712,7 @@ + + int aac_scsi_cmd(struct scsi_cmnd * scsicmd) + { +- u32 cid = 0; ++ u32 cid; + struct Scsi_Host *host = scsicmd->device->host; + struct aac_dev *dev = (struct aac_dev *)host->hostdata; + struct fsa_dev_info *fsa_dev_ptr = dev->fsa_dev; +@@ -1711,15 +1724,15 @@ + * Test does not apply to ID 16, the pseudo id for the controller + * itself. + */ +- if (scmd_id(scsicmd) != host->this_id) { +- if ((scmd_channel(scsicmd) == CONTAINER_CHANNEL)) { +- if((scmd_id(scsicmd) >= dev->maximum_num_containers) || ++ cid = scmd_id(scsicmd); ++ if (cid != host->this_id) { ++ if (scmd_channel(scsicmd) == CONTAINER_CHANNEL) { ++ if((cid >= dev->maximum_num_containers) || + (scsicmd->device->lun != 0)) { + scsicmd->result = DID_NO_CONNECT << 16; + scsicmd->scsi_done(scsicmd); + return 0; + } +- cid = scmd_id(scsicmd); + + /* + * If the target container doesn't exist, it may have +@@ -1782,7 +1795,7 @@ + { + struct inquiry_data inq_data; + +- dprintk((KERN_DEBUG "INQUIRY command, ID: %d.\n", scmd_id(scsicmd))); ++ dprintk((KERN_DEBUG "INQUIRY command, ID: %d.\n", cid)); + memset(&inq_data, 0, sizeof (struct inquiry_data)); + + inq_data.inqd_ver = 2; /* claim compliance to SCSI-2 */ +@@ -1794,7 +1807,7 @@ + * Set the Vendor, Product, and Revision Level + * see: .c i.e. aac.c + */ +- if (scmd_id(scsicmd) == host->this_id) { ++ if (cid == host->this_id) { + setinqstr(dev, (void *) (inq_data.inqd_vid), ARRAY_SIZE(container_types)); + inq_data.inqd_pdt = INQD_PDT_PROC; /* Processor device */ + aac_internal_transfer(scsicmd, &inq_data, 0, sizeof(inq_data)); +@@ -1886,15 +1899,29 @@ + + case MODE_SENSE: + { +- char mode_buf[4]; ++ char mode_buf[7]; ++ int mode_buf_length = 4; + + dprintk((KERN_DEBUG "MODE SENSE command.\n")); + mode_buf[0] = 3; /* Mode data length */ + mode_buf[1] = 0; /* Medium type - default */ +- mode_buf[2] = 0; /* Device-specific param, bit 8: 0/1 = write enabled/protected */ ++ mode_buf[2] = 0; /* Device-specific param, ++ bit 8: 0/1 = write enabled/protected ++ bit 4: 0/1 = FUA enabled */ ++ if (dev->raw_io_interface) ++ mode_buf[2] = 0x10; + mode_buf[3] = 0; /* Block descriptor length */ +- +- aac_internal_transfer(scsicmd, mode_buf, 0, sizeof(mode_buf)); ++ if (((scsicmd->cmnd[2] & 0x3f) == 8) || ++ ((scsicmd->cmnd[2] & 0x3f) == 0x3f)) { ++ mode_buf[0] = 6; ++ mode_buf[4] = 8; ++ mode_buf[5] = 1; ++ mode_buf[6] = 0x04; /* WCE */ ++ mode_buf_length = 7; ++ if (mode_buf_length > scsicmd->cmnd[4]) ++ mode_buf_length = scsicmd->cmnd[4]; ++ } ++ aac_internal_transfer(scsicmd, mode_buf, 0, mode_buf_length); + scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD; + scsicmd->scsi_done(scsicmd); + +@@ -1902,18 +1929,33 @@ + } + case MODE_SENSE_10: + { +- char mode_buf[8]; ++ char mode_buf[11]; ++ int mode_buf_length = 8; + + dprintk((KERN_DEBUG "MODE SENSE 10 byte command.\n")); + mode_buf[0] = 0; /* Mode data length (MSB) */ + mode_buf[1] = 6; /* Mode data length (LSB) */ + mode_buf[2] = 0; /* Medium type - default */ +- mode_buf[3] = 0; /* Device-specific param, bit 8: 0/1 = write enabled/protected */ ++ mode_buf[3] = 0; /* Device-specific param, ++ bit 8: 0/1 = write enabled/protected ++ bit 4: 0/1 = FUA enabled */ ++ if (dev->raw_io_interface) ++ mode_buf[3] = 0x10; + mode_buf[4] = 0; /* reserved */ + mode_buf[5] = 0; /* reserved */ + mode_buf[6] = 0; /* Block descriptor length (MSB) */ + mode_buf[7] = 0; /* Block descriptor length (LSB) */ +- aac_internal_transfer(scsicmd, mode_buf, 0, sizeof(mode_buf)); ++ if (((scsicmd->cmnd[2] & 0x3f) == 8) || ++ ((scsicmd->cmnd[2] & 0x3f) == 0x3f)) { ++ mode_buf[1] = 9; ++ mode_buf[8] = 8; ++ mode_buf[9] = 1; ++ mode_buf[10] = 0x04; /* WCE */ ++ mode_buf_length = 11; ++ if (mode_buf_length > scsicmd->cmnd[8]) ++ mode_buf_length = scsicmd->cmnd[8]; ++ } ++ aac_internal_transfer(scsicmd, mode_buf, 0, mode_buf_length); + + scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD; + scsicmd->scsi_done(scsicmd); +@@ -2136,10 +2178,10 @@ + if (!aac_valid_context(scsicmd, fibptr)) + return; + +- dev = (struct aac_dev *)scsicmd->device->host->hostdata; +- + BUG_ON(fibptr == NULL); + ++ dev = fibptr->dev; ++ + srbreply = (struct aac_srb_reply *) fib_data(fibptr); + + scsicmd->sense_buffer[0] = '\0'; /* Initialize sense valid flag to false */ +@@ -2147,17 +2189,10 @@ + * Calculate resid for sg + */ + +- scsicmd->resid = scsicmd->request_bufflen - +- le32_to_cpu(srbreply->data_xfer_length); ++ scsi_set_resid(scsicmd, scsi_bufflen(scsicmd) ++ - le32_to_cpu(srbreply->data_xfer_length)); + +- if(scsicmd->use_sg) +- pci_unmap_sg(dev->pdev, +- (struct scatterlist *)scsicmd->request_buffer, +- scsicmd->use_sg, +- scsicmd->sc_data_direction); +- else if(scsicmd->request_bufflen) +- pci_unmap_single(dev->pdev, scsicmd->SCp.dma_handle, scsicmd->request_bufflen, +- scsicmd->sc_data_direction); ++ scsi_dma_unmap(scsicmd); + + /* + * First check the fib status +@@ -2233,7 +2268,7 @@ + break; + + case SRB_STATUS_BUSY: +- scsicmd->result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8; ++ scsicmd->result = DID_BUS_BUSY << 16 | COMMAND_COMPLETE << 8; + break; + + case SRB_STATUS_BUS_RESET: +@@ -2343,34 +2378,33 @@ + { + struct aac_dev *dev; + unsigned long byte_count = 0; ++ int nseg; + + dev = (struct aac_dev *)scsicmd->device->host->hostdata; + // Get rid of old data + psg->count = 0; + psg->sg[0].addr = 0; + psg->sg[0].count = 0; +- if (scsicmd->use_sg) { ++ ++ nseg = scsi_dma_map(scsicmd); ++ BUG_ON(nseg < 0); ++ if (nseg) { + struct scatterlist *sg; + int i; +- int sg_count; +- sg = (struct scatterlist *) scsicmd->request_buffer; + +- sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg, +- scsicmd->sc_data_direction); +- psg->count = cpu_to_le32(sg_count); ++ psg->count = cpu_to_le32(nseg); + +- for (i = 0; i < sg_count; i++) { ++ scsi_for_each_sg(scsicmd, sg, nseg, i) { + psg->sg[i].addr = cpu_to_le32(sg_dma_address(sg)); + psg->sg[i].count = cpu_to_le32(sg_dma_len(sg)); + byte_count += sg_dma_len(sg); +- sg++; + } + /* hba wants the size to be exact */ +- if(byte_count > scsicmd->request_bufflen){ ++ if (byte_count > scsi_bufflen(scsicmd)) { + u32 temp = le32_to_cpu(psg->sg[i-1].count) - +- (byte_count - scsicmd->request_bufflen); ++ (byte_count - scsi_bufflen(scsicmd)); + psg->sg[i-1].count = cpu_to_le32(temp); +- byte_count = scsicmd->request_bufflen; ++ byte_count = scsi_bufflen(scsicmd); + } + /* Check for command underflow */ + if(scsicmd->underflow && (byte_count < scsicmd->underflow)){ +@@ -2378,18 +2412,6 @@ + byte_count, scsicmd->underflow); + } + } +- else if(scsicmd->request_bufflen) { +- u32 addr; +- scsicmd->SCp.dma_handle = pci_map_single(dev->pdev, +- scsicmd->request_buffer, +- scsicmd->request_bufflen, +- scsicmd->sc_data_direction); +- addr = scsicmd->SCp.dma_handle; +- psg->count = cpu_to_le32(1); +- psg->sg[0].addr = cpu_to_le32(addr); +- psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen); +- byte_count = scsicmd->request_bufflen; +- } + return byte_count; + } + +@@ -2399,6 +2421,7 @@ + struct aac_dev *dev; + unsigned long byte_count = 0; + u64 addr; ++ int nseg; + + dev = (struct aac_dev *)scsicmd->device->host->hostdata; + // Get rid of old data +@@ -2406,31 +2429,28 @@ + psg->sg[0].addr[0] = 0; + psg->sg[0].addr[1] = 0; + psg->sg[0].count = 0; +- if (scsicmd->use_sg) { ++ ++ nseg = scsi_dma_map(scsicmd); ++ BUG_ON(nseg < 0); ++ if (nseg) { + struct scatterlist *sg; + int i; +- int sg_count; +- sg = (struct scatterlist *) scsicmd->request_buffer; +- +- sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg, +- scsicmd->sc_data_direction); + +- for (i = 0; i < sg_count; i++) { ++ scsi_for_each_sg(scsicmd, sg, nseg, i) { + int count = sg_dma_len(sg); + addr = sg_dma_address(sg); + psg->sg[i].addr[0] = cpu_to_le32(addr & 0xffffffff); + psg->sg[i].addr[1] = cpu_to_le32(addr>>32); + psg->sg[i].count = cpu_to_le32(count); + byte_count += count; +- sg++; + } +- psg->count = cpu_to_le32(sg_count); ++ psg->count = cpu_to_le32(nseg); + /* hba wants the size to be exact */ +- if(byte_count > scsicmd->request_bufflen){ ++ if (byte_count > scsi_bufflen(scsicmd)) { + u32 temp = le32_to_cpu(psg->sg[i-1].count) - +- (byte_count - scsicmd->request_bufflen); ++ (byte_count - scsi_bufflen(scsicmd)); + psg->sg[i-1].count = cpu_to_le32(temp); +- byte_count = scsicmd->request_bufflen; ++ byte_count = scsi_bufflen(scsicmd); + } + /* Check for command underflow */ + if(scsicmd->underflow && (byte_count < scsicmd->underflow)){ +@@ -2438,26 +2458,13 @@ + byte_count, scsicmd->underflow); + } + } +- else if(scsicmd->request_bufflen) { +- scsicmd->SCp.dma_handle = pci_map_single(dev->pdev, +- scsicmd->request_buffer, +- scsicmd->request_bufflen, +- scsicmd->sc_data_direction); +- addr = scsicmd->SCp.dma_handle; +- psg->count = cpu_to_le32(1); +- psg->sg[0].addr[0] = cpu_to_le32(addr & 0xffffffff); +- psg->sg[0].addr[1] = cpu_to_le32(addr >> 32); +- psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen); +- byte_count = scsicmd->request_bufflen; +- } + return byte_count; + } + + static unsigned long aac_build_sgraw(struct scsi_cmnd* scsicmd, struct sgmapraw* psg) + { +- struct Scsi_Host *host = scsicmd->device->host; +- struct aac_dev *dev = (struct aac_dev *)host->hostdata; + unsigned long byte_count = 0; ++ int nseg; + + // Get rid of old data + psg->count = 0; +@@ -2467,16 +2474,14 @@ + psg->sg[0].addr[1] = 0; + psg->sg[0].count = 0; + psg->sg[0].flags = 0; +- if (scsicmd->use_sg) { ++ ++ nseg = scsi_dma_map(scsicmd); ++ BUG_ON(nseg < 0); ++ if (nseg) { + struct scatterlist *sg; + int i; +- int sg_count; +- sg = (struct scatterlist *) scsicmd->request_buffer; + +- sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg, +- scsicmd->sc_data_direction); +- +- for (i = 0; i < sg_count; i++) { ++ scsi_for_each_sg(scsicmd, sg, nseg, i) { + int count = sg_dma_len(sg); + u64 addr = sg_dma_address(sg); + psg->sg[i].next = 0; +@@ -2486,15 +2491,14 @@ + psg->sg[i].count = cpu_to_le32(count); + psg->sg[i].flags = 0; + byte_count += count; +- sg++; + } +- psg->count = cpu_to_le32(sg_count); ++ psg->count = cpu_to_le32(nseg); + /* hba wants the size to be exact */ +- if(byte_count > scsicmd->request_bufflen){ ++ if (byte_count > scsi_bufflen(scsicmd)) { + u32 temp = le32_to_cpu(psg->sg[i-1].count) - +- (byte_count - scsicmd->request_bufflen); ++ (byte_count - scsi_bufflen(scsicmd)); + psg->sg[i-1].count = cpu_to_le32(temp); +- byte_count = scsicmd->request_bufflen; ++ byte_count = scsi_bufflen(scsicmd); + } + /* Check for command underflow */ + if(scsicmd->underflow && (byte_count < scsicmd->underflow)){ +@@ -2502,24 +2506,6 @@ + byte_count, scsicmd->underflow); + } + } +- else if(scsicmd->request_bufflen) { +- int count; +- u64 addr; +- scsicmd->SCp.dma_handle = pci_map_single(dev->pdev, +- scsicmd->request_buffer, +- scsicmd->request_bufflen, +- scsicmd->sc_data_direction); +- addr = scsicmd->SCp.dma_handle; +- count = scsicmd->request_bufflen; +- psg->count = cpu_to_le32(1); +- psg->sg[0].next = 0; +- psg->sg[0].prev = 0; +- psg->sg[0].addr[1] = cpu_to_le32((u32)(addr>>32)); +- psg->sg[0].addr[0] = cpu_to_le32((u32)(addr & 0xffffffff)); +- psg->sg[0].count = cpu_to_le32(count); +- psg->sg[0].flags = 0; +- byte_count = scsicmd->request_bufflen; +- } + return byte_count; + } + +diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/aacraid.h linux-2.6.22-591/drivers/scsi/aacraid/aacraid.h +--- linux-2.6.22-570/drivers/scsi/aacraid/aacraid.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/aacraid/aacraid.h 2007-12-21 15:36:12.000000000 -0500 +@@ -12,8 +12,8 @@ + *----------------------------------------------------------------------------*/ + + #ifndef AAC_DRIVER_BUILD +-# define AAC_DRIVER_BUILD 2437 +-# define AAC_DRIVER_BRANCH "-mh4" ++# define AAC_DRIVER_BUILD 2447 ++# define AAC_DRIVER_BRANCH "-ms" + #endif + #define MAXIMUM_NUM_CONTAINERS 32 + +@@ -464,12 +464,12 @@ + int (*adapter_restart)(struct aac_dev *dev, int bled); + /* Transport operations */ + int (*adapter_ioremap)(struct aac_dev * dev, u32 size); +- irqreturn_t (*adapter_intr)(int irq, void *dev_id); ++ irq_handler_t adapter_intr; + /* Packet operations */ + int (*adapter_deliver)(struct fib * fib); + int (*adapter_bounds)(struct aac_dev * dev, struct scsi_cmnd * cmd, u64 lba); + int (*adapter_read)(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count); +- int (*adapter_write)(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count); ++ int (*adapter_write)(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua); + int (*adapter_scsi)(struct fib * fib, struct scsi_cmnd * cmd); + /* Administrative operations */ + int (*adapter_comm)(struct aac_dev * dev, int comm); +@@ -860,10 +860,12 @@ + __le32 FlashFirmwareBootBuild; + u8 MfgPcbaSerialNo[12]; + u8 MfgWWNName[8]; +- __le32 MoreFeatureBits; ++ __le32 SupportedOptions2; + __le32 ReservedGrowth[1]; + }; + #define AAC_FEATURE_FALCON 0x00000010 ++#define AAC_OPTION_MU_RESET 0x00000001 ++#define AAC_OPTION_IGNORE_RESET 0x00000002 + #define AAC_SIS_VERSION_V3 3 + #define AAC_SIS_SLOT_UNKNOWN 0xFF + +@@ -1054,8 +1056,8 @@ + #define aac_adapter_read(fib,cmd,lba,count) \ + ((fib)->dev)->a_ops.adapter_read(fib,cmd,lba,count) + +-#define aac_adapter_write(fib,cmd,lba,count) \ +- ((fib)->dev)->a_ops.adapter_write(fib,cmd,lba,count) ++#define aac_adapter_write(fib,cmd,lba,count,fua) \ ++ ((fib)->dev)->a_ops.adapter_write(fib,cmd,lba,count,fua) + + #define aac_adapter_scsi(fib,cmd) \ + ((fib)->dev)->a_ops.adapter_scsi(fib,cmd) +@@ -1213,6 +1215,9 @@ + __le32 block; + __le16 pad; + __le16 flags; ++#define IO_TYPE_WRITE 0x00000000 ++#define IO_TYPE_READ 0x00000001 ++#define IO_SUREWRITE 0x00000008 + struct sgmap64 sg; // Must be last in struct because it is variable + }; + struct aac_write_reply +@@ -1257,6 +1262,19 @@ + u8 data[16]; + }; + ++#define CT_PAUSE_IO 65 ++#define CT_RELEASE_IO 66 ++struct aac_pause { ++ __le32 command; /* VM_ContainerConfig */ ++ __le32 type; /* CT_PAUSE_IO */ ++ __le32 timeout; /* 10ms ticks */ ++ __le32 min; ++ __le32 noRescan; ++ __le32 parm3; ++ __le32 parm4; ++ __le32 count; /* sizeof(((struct aac_pause_reply *)NULL)->data) */ ++}; ++ + struct aac_srb + { + __le32 function; +@@ -1804,6 +1822,10 @@ + int aac_get_containers(struct aac_dev *dev); + int aac_scsi_cmd(struct scsi_cmnd *cmd); + int aac_dev_ioctl(struct aac_dev *dev, int cmd, void __user *arg); ++#ifndef shost_to_class ++#define shost_to_class(shost) &shost->shost_classdev ++#endif ++ssize_t aac_show_serial_number(struct class_device *class_dev, char *buf); + int aac_do_ioctl(struct aac_dev * dev, int cmd, void __user *arg); + int aac_rx_init(struct aac_dev *dev); + int aac_rkt_init(struct aac_dev *dev); +@@ -1813,6 +1835,7 @@ + unsigned int aac_response_normal(struct aac_queue * q); + unsigned int aac_command_normal(struct aac_queue * q); + unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index); ++int aac_reset_adapter(struct aac_dev * dev, int forced); + int aac_check_health(struct aac_dev * dev); + int aac_command_thread(void *data); + int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context *fibctx); +@@ -1832,3 +1855,6 @@ + extern int expose_physicals; + extern int aac_reset_devices; + extern int aac_commit; ++extern int update_interval; ++extern int check_interval; ++extern int check_reset; +diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/commsup.c linux-2.6.22-591/drivers/scsi/aacraid/commsup.c +--- linux-2.6.22-570/drivers/scsi/aacraid/commsup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/aacraid/commsup.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1021,7 +1021,7 @@ + + } + +-static int _aac_reset_adapter(struct aac_dev *aac) ++static int _aac_reset_adapter(struct aac_dev *aac, int forced) + { + int index, quirks; + int retval; +@@ -1029,25 +1029,32 @@ + struct scsi_device *dev; + struct scsi_cmnd *command; + struct scsi_cmnd *command_list; ++ int jafo = 0; + + /* + * Assumptions: +- * - host is locked. ++ * - host is locked, unless called by the aacraid thread. ++ * (a matter of convenience, due to legacy issues surrounding ++ * eh_host_adapter_reset). + * - in_reset is asserted, so no new i/o is getting to the + * card. +- * - The card is dead. ++ * - The card is dead, or will be very shortly ;-/ so no new ++ * commands are completing in the interrupt service. + */ + host = aac->scsi_host_ptr; + scsi_block_requests(host); + aac_adapter_disable_int(aac); ++ if (aac->thread->pid != current->pid) { + spin_unlock_irq(host->host_lock); + kthread_stop(aac->thread); ++ jafo = 1; ++ } + + /* + * If a positive health, means in a known DEAD PANIC + * state and the adapter could be reset to `try again'. + */ +- retval = aac_adapter_restart(aac, aac_adapter_check_health(aac)); ++ retval = aac_adapter_restart(aac, forced ? 0 : aac_adapter_check_health(aac)); + + if (retval) + goto out; +@@ -1104,11 +1111,13 @@ + if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT) + if ((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK))) + goto out; ++ if (jafo) { + aac->thread = kthread_run(aac_command_thread, aac, aac->name); + if (IS_ERR(aac->thread)) { + retval = PTR_ERR(aac->thread); + goto out; + } ++ } + (void)aac_get_adapter_info(aac); + quirks = aac_get_driver_ident(index)->quirks; + if ((quirks & AAC_QUIRK_34SG) && (host->sg_tablesize > 34)) { +@@ -1150,7 +1159,98 @@ + out: + aac->in_reset = 0; + scsi_unblock_requests(host); ++ if (jafo) { + spin_lock_irq(host->host_lock); ++ } ++ return retval; ++} ++ ++int aac_reset_adapter(struct aac_dev * aac, int forced) ++{ ++ unsigned long flagv = 0; ++ int retval; ++ struct Scsi_Host * host; ++ ++ if (spin_trylock_irqsave(&aac->fib_lock, flagv) == 0) ++ return -EBUSY; ++ ++ if (aac->in_reset) { ++ spin_unlock_irqrestore(&aac->fib_lock, flagv); ++ return -EBUSY; ++ } ++ aac->in_reset = 1; ++ spin_unlock_irqrestore(&aac->fib_lock, flagv); ++ ++ /* ++ * Wait for all commands to complete to this specific ++ * target (block maximum 60 seconds). Although not necessary, ++ * it does make us a good storage citizen. ++ */ ++ host = aac->scsi_host_ptr; ++ scsi_block_requests(host); ++ if (forced < 2) for (retval = 60; retval; --retval) { ++ struct scsi_device * dev; ++ struct scsi_cmnd * command; ++ int active = 0; ++ ++ __shost_for_each_device(dev, host) { ++ spin_lock_irqsave(&dev->list_lock, flagv); ++ list_for_each_entry(command, &dev->cmd_list, list) { ++ if (command->SCp.phase == AAC_OWNER_FIRMWARE) { ++ active++; ++ break; ++ } ++ } ++ spin_unlock_irqrestore(&dev->list_lock, flagv); ++ if (active) ++ break; ++ ++ } ++ /* ++ * We can exit If all the commands are complete ++ */ ++ if (active == 0) ++ break; ++ ssleep(1); ++ } ++ ++ /* Quiesce build, flush cache, write through mode */ ++ aac_send_shutdown(aac); ++ spin_lock_irqsave(host->host_lock, flagv); ++ retval = _aac_reset_adapter(aac, forced); ++ spin_unlock_irqrestore(host->host_lock, flagv); ++ ++ if (retval == -ENODEV) { ++ /* Unwind aac_send_shutdown() IOP_RESET unsupported/disabled */ ++ struct fib * fibctx = aac_fib_alloc(aac); ++ if (fibctx) { ++ struct aac_pause *cmd; ++ int status; ++ ++ aac_fib_init(fibctx); ++ ++ cmd = (struct aac_pause *) fib_data(fibctx); ++ ++ cmd->command = cpu_to_le32(VM_ContainerConfig); ++ cmd->type = cpu_to_le32(CT_PAUSE_IO); ++ cmd->timeout = cpu_to_le32(1); ++ cmd->min = cpu_to_le32(1); ++ cmd->noRescan = cpu_to_le32(1); ++ cmd->count = cpu_to_le32(0); ++ ++ status = aac_fib_send(ContainerCommand, ++ fibctx, ++ sizeof(struct aac_pause), ++ FsaNormal, ++ -2 /* Timeout silently */, 1, ++ NULL, NULL); ++ ++ if (status >= 0) ++ aac_fib_complete(fibctx); ++ aac_fib_free(fibctx); ++ } ++ } ++ + return retval; + } + +@@ -1270,9 +1370,14 @@ + + printk(KERN_ERR "%s: Host adapter BLINK LED 0x%x\n", aac->name, BlinkLED); + ++ if (!check_reset || (aac->supplement_adapter_info.SupportedOptions2 & ++ le32_to_cpu(AAC_OPTION_IGNORE_RESET))) ++ goto out; + host = aac->scsi_host_ptr; ++ if (aac->thread->pid != current->pid) + spin_lock_irqsave(host->host_lock, flagv); +- BlinkLED = _aac_reset_adapter(aac); ++ BlinkLED = _aac_reset_adapter(aac, 0); ++ if (aac->thread->pid != current->pid) + spin_unlock_irqrestore(host->host_lock, flagv); + return BlinkLED; + +@@ -1300,6 +1405,9 @@ + struct aac_fib_context *fibctx; + unsigned long flags; + DECLARE_WAITQUEUE(wait, current); ++ unsigned long next_jiffies = jiffies + HZ; ++ unsigned long next_check_jiffies = next_jiffies; ++ long difference = HZ; + + /* + * We can only have one thread per adapter for AIF's. +@@ -1507,11 +1615,79 @@ + * There are no more AIF's + */ + spin_unlock_irqrestore(dev->queues->queue[HostNormCmdQueue].lock, flags); +- schedule(); + +- if (kthread_should_stop()) ++ /* ++ * Background activity ++ */ ++ if ((time_before(next_check_jiffies,next_jiffies)) ++ && ((difference = next_check_jiffies - jiffies) <= 0)) { ++ next_check_jiffies = next_jiffies; ++ if (aac_check_health(dev) == 0) { ++ difference = ((long)(unsigned)check_interval) ++ * HZ; ++ next_check_jiffies = jiffies + difference; ++ } else if (!dev->queues) + break; ++ } ++ if (!time_before(next_check_jiffies,next_jiffies) ++ && ((difference = next_jiffies - jiffies) <= 0)) { ++ struct timeval now; ++ int ret; ++ ++ /* Don't even try to talk to adapter if its sick */ ++ ret = aac_check_health(dev); ++ if (!ret && !dev->queues) ++ break; ++ next_check_jiffies = jiffies ++ + ((long)(unsigned)check_interval) ++ * HZ; ++ do_gettimeofday(&now); ++ ++ /* Synchronize our watches */ ++ if (((1000000 - (1000000 / HZ)) > now.tv_usec) ++ && (now.tv_usec > (1000000 / HZ))) ++ difference = (((1000000 - now.tv_usec) * HZ) ++ + 500000) / 1000000; ++ else if (ret == 0) { ++ struct fib *fibptr; ++ ++ if ((fibptr = aac_fib_alloc(dev))) { ++ u32 * info; ++ ++ aac_fib_init(fibptr); ++ ++ info = (u32 *) fib_data(fibptr); ++ if (now.tv_usec > 500000) ++ ++now.tv_sec; ++ ++ *info = cpu_to_le32(now.tv_sec); ++ ++ (void)aac_fib_send(SendHostTime, ++ fibptr, ++ sizeof(*info), ++ FsaNormal, ++ 1, 1, ++ NULL, ++ NULL); ++ aac_fib_complete(fibptr); ++ aac_fib_free(fibptr); ++ } ++ difference = (long)(unsigned)update_interval*HZ; ++ } else { ++ /* retry shortly */ ++ difference = 10 * HZ; ++ } ++ next_jiffies = jiffies + difference; ++ if (time_before(next_check_jiffies,next_jiffies)) ++ difference = next_check_jiffies - jiffies; ++ } ++ if (difference <= 0) ++ difference = 1; + set_current_state(TASK_INTERRUPTIBLE); ++ schedule_timeout(difference); ++ ++ if (kthread_should_stop()) ++ break; + } + if (dev->queues) + remove_wait_queue(&dev->queues->queue[HostNormCmdQueue].cmdready, &wait); +diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/linit.c linux-2.6.22-591/drivers/scsi/aacraid/linit.c +--- linux-2.6.22-570/drivers/scsi/aacraid/linit.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/scsi/aacraid/linit.c 2007-12-21 15:36:12.000000000 -0500 +@@ -39,10 +39,8 @@ + #include + #include + #include +-#include + #include + #include +-#include + #include + #include + +@@ -223,12 +221,12 @@ + { aac_rx_init, "percraid", "DELL ", "PERC 320/DC ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Perc 320/DC*/ + { aac_sa_init, "aacraid", "ADAPTEC ", "Adaptec 5400S ", 4, AAC_QUIRK_34SG }, /* Adaptec 5400S (Mustang)*/ + { aac_sa_init, "aacraid", "ADAPTEC ", "AAC-364 ", 4, AAC_QUIRK_34SG }, /* Adaptec 5400S (Mustang)*/ +- { aac_sa_init, "percraid", "DELL ", "PERCRAID ", 4, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Dell PERC2/QC */ ++ { aac_sa_init, "percraid", "DELL ", "PERCRAID ", 4, AAC_QUIRK_34SG }, /* Dell PERC2/QC */ + { aac_sa_init, "hpnraid", "HP ", "NetRAID ", 4, AAC_QUIRK_34SG }, /* HP NetRAID-4M */ + + { aac_rx_init, "aacraid", "DELL ", "RAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Dell Catchall */ + { aac_rx_init, "aacraid", "Legend ", "RAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Legend Catchall */ +- { aac_rx_init, "aacraid", "ADAPTEC ", "RAID ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec Catch All */ ++ { aac_rx_init, "aacraid", "ADAPTEC ", "RAID ", 2 }, /* Adaptec Catch All */ + { aac_rkt_init, "aacraid", "ADAPTEC ", "RAID ", 2 }, /* Adaptec Rocket Catch All */ + { aac_nark_init, "aacraid", "ADAPTEC ", "RAID ", 2 } /* Adaptec NEMER/ARK Catch All */ + }; +@@ -403,10 +401,6 @@ + + static int aac_slave_configure(struct scsi_device *sdev) + { +- if (sdev_channel(sdev) == CONTAINER_CHANNEL) { +- sdev->skip_ms_page_8 = 1; +- sdev->skip_ms_page_3f = 1; +- } + if ((sdev->type == TYPE_DISK) && + (sdev_channel(sdev) != CONTAINER_CHANNEL)) { + if (expose_physicals == 0) +@@ -450,6 +444,43 @@ + return 0; + } + ++/** ++ * aac_change_queue_depth - alter queue depths ++ * @sdev: SCSI device we are considering ++ * @depth: desired queue depth ++ * ++ * Alters queue depths for target device based on the host adapter's ++ * total capacity and the queue depth supported by the target device. ++ */ ++ ++static int aac_change_queue_depth(struct scsi_device *sdev, int depth) ++{ ++ if (sdev->tagged_supported && (sdev->type == TYPE_DISK) && ++ (sdev_channel(sdev) == CONTAINER_CHANNEL)) { ++ struct scsi_device * dev; ++ struct Scsi_Host *host = sdev->host; ++ unsigned num = 0; ++ ++ __shost_for_each_device(dev, host) { ++ if (dev->tagged_supported && (dev->type == TYPE_DISK) && ++ (sdev_channel(dev) == CONTAINER_CHANNEL)) ++ ++num; ++ ++num; ++ } ++ if (num >= host->can_queue) ++ num = host->can_queue - 1; ++ if (depth > (host->can_queue - num)) ++ depth = host->can_queue - num; ++ if (depth > 256) ++ depth = 256; ++ else if (depth < 2) ++ depth = 2; ++ scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, depth); ++ } else ++ scsi_adjust_queue_depth(sdev, 0, 1); ++ return sdev->queue_depth; ++} ++ + static int aac_ioctl(struct scsi_device *sdev, int cmd, void __user * arg) + { + struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata; +@@ -548,6 +579,14 @@ + ssleep(1); + } + printk(KERN_ERR "%s: SCSI bus appears hung\n", AAC_DRIVERNAME); ++ /* ++ * This adapter needs a blind reset, only do so for Adapters that ++ * support a register, instead of a commanded, reset. ++ */ ++ if ((aac->supplement_adapter_info.SupportedOptions2 & ++ le32_to_cpu(AAC_OPTION_MU_RESET|AAC_OPTION_IGNORE_RESET)) == ++ le32_to_cpu(AAC_OPTION_MU_RESET)) ++ aac_reset_adapter(aac, 2); /* Bypass wait for command quiesce */ + return SUCCESS; /* Cause an immediate retry of the command with a ten second delay after successful tur */ + } + +@@ -735,15 +774,21 @@ + return len; + } + +-static ssize_t aac_show_serial_number(struct class_device *class_dev, +- char *buf) ++ssize_t aac_show_serial_number(struct class_device *class_dev, char *buf) + { + struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata; + int len = 0; + + if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0) +- len = snprintf(buf, PAGE_SIZE, "%x\n", ++ len = snprintf(buf, PAGE_SIZE, "%06X\n", + le32_to_cpu(dev->adapter_info.serial[0])); ++ if (len && ++ !memcmp(&dev->supplement_adapter_info.MfgPcbaSerialNo[ ++ sizeof(dev->supplement_adapter_info.MfgPcbaSerialNo)+2-len], ++ buf, len)) ++ len = snprintf(buf, PAGE_SIZE, "%.*s\n", ++ (int)sizeof(dev->supplement_adapter_info.MfgPcbaSerialNo), ++ dev->supplement_adapter_info.MfgPcbaSerialNo); + return len; + } + +@@ -759,6 +804,31 @@ + class_to_shost(class_dev)->max_id); + } + ++static ssize_t aac_store_reset_adapter(struct class_device *class_dev, ++ const char *buf, size_t count) ++{ ++ int retval = -EACCES; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return retval; ++ retval = aac_reset_adapter((struct aac_dev*)class_to_shost(class_dev)->hostdata, buf[0] == '!'); ++ if (retval >= 0) ++ retval = count; ++ return retval; ++} ++ ++static ssize_t aac_show_reset_adapter(struct class_device *class_dev, ++ char *buf) ++{ ++ struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata; ++ int len, tmp; ++ ++ tmp = aac_adapter_check_health(dev); ++ if ((tmp == 0) && dev->in_reset) ++ tmp = -EBUSY; ++ len = snprintf(buf, PAGE_SIZE, "0x%x", tmp); ++ return len; ++} + + static struct class_device_attribute aac_model = { + .attr = { +@@ -816,6 +886,14 @@ + }, + .show = aac_show_max_id, + }; ++static struct class_device_attribute aac_reset = { ++ .attr = { ++ .name = "reset_host", ++ .mode = S_IWUSR|S_IRUGO, ++ }, ++ .store = aac_store_reset_adapter, ++ .show = aac_show_reset_adapter, ++}; + + static struct class_device_attribute *aac_attrs[] = { + &aac_model, +@@ -826,6 +904,7 @@ + &aac_serial_number, + &aac_max_channel, + &aac_max_id, ++ &aac_reset, + NULL + }; + +@@ -852,6 +931,7 @@ + .bios_param = aac_biosparm, + .shost_attrs = aac_attrs, + .slave_configure = aac_slave_configure, ++ .change_queue_depth = aac_change_queue_depth, + .eh_abort_handler = aac_eh_abort, + .eh_host_reset_handler = aac_eh_reset, + .can_queue = AAC_NUM_IO_FIB, +@@ -1090,7 +1170,7 @@ + { + int error; + +- printk(KERN_INFO "Adaptec %s driver (%s)\n", ++ printk(KERN_INFO "Adaptec %s driver %s\n", + AAC_DRIVERNAME, aac_driver_version); + + error = pci_register_driver(&aac_pci_driver); +diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/rx.c linux-2.6.22-591/drivers/scsi/aacraid/rx.c +--- linux-2.6.22-570/drivers/scsi/aacraid/rx.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/aacraid/rx.c 2007-12-21 15:36:12.000000000 -0500 +@@ -464,6 +464,8 @@ + { + u32 var; + ++ if (!(dev->supplement_adapter_info.SupportedOptions2 & ++ le32_to_cpu(AAC_OPTION_MU_RESET)) || (bled >= 0) || (bled == -2)) { + if (bled) + printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n", + dev->name, dev->id, bled); +@@ -479,6 +481,7 @@ + + if (bled && (bled != -ETIMEDOUT)) + return -EINVAL; ++ } + if (bled || (var == 0x3803000F)) { /* USE_OTHER_METHOD */ + rx_writel(dev, MUnit.reserved2, 3); + msleep(5000); /* Delay 5 seconds */ +@@ -596,7 +599,7 @@ + } + msleep(1); + } +- if (restart) ++ if (restart && aac_commit) + aac_commit = 1; + /* + * Fill in the common function dispatch table. +diff -Nurb linux-2.6.22-570/drivers/scsi/advansys.c linux-2.6.22-591/drivers/scsi/advansys.c +--- linux-2.6.22-570/drivers/scsi/advansys.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/advansys.c 2007-12-21 15:36:12.000000000 -0500 +@@ -798,7 +798,6 @@ + #include + #include + #include +-#include "advansys.h" + #ifdef CONFIG_PCI + #include + #endif /* CONFIG_PCI */ +@@ -2014,7 +2013,7 @@ + STATIC void AscEnableIsaDma(uchar); + #endif /* CONFIG_ISA */ + STATIC ASC_DCNT AscGetMaxDmaCount(ushort); +- ++static const char *advansys_info(struct Scsi_Host *shp); + + /* + * --- Adv Library Constants and Macros +@@ -3970,10 +3969,6 @@ + ASC_IS_PCI, + }; + +-/* +- * Used with the LILO 'advansys' option to eliminate or +- * limit I/O port probing at boot time, cf. advansys_setup(). +- */ + STATIC int asc_iopflag = ASC_FALSE; + STATIC int asc_ioport[ASC_NUM_IOPORT_PROBE] = { 0, 0, 0, 0 }; + +@@ -4055,10 +4050,6 @@ + #endif /* ADVANSYS_DEBUG */ + + +-/* +- * --- Linux 'struct scsi_host_template' and advansys_setup() Functions +- */ +- + #ifdef CONFIG_PROC_FS + /* + * advansys_proc_info() - /proc/scsi/advansys/[0-(ASC_NUM_BOARD_SUPPORTED-1)] +@@ -4080,7 +4071,7 @@ + * if 'prtbuf' is too small it will not be overwritten. Instead the + * user just won't get all the available statistics. + */ +-int ++static int + advansys_proc_info(struct Scsi_Host *shost, char *buffer, char **start, + off_t offset, int length, int inout) + { +@@ -4296,7 +4287,7 @@ + * it must not call SCSI mid-level functions including scsi_malloc() + * and scsi_free(). + */ +-int __init ++static int __init + advansys_detect(struct scsi_host_template *tpnt) + { + static int detect_called = ASC_FALSE; +@@ -5428,7 +5419,7 @@ + * + * Release resources allocated for a single AdvanSys adapter. + */ +-int ++static int + advansys_release(struct Scsi_Host *shp) + { + asc_board_t *boardp; +@@ -5475,7 +5466,7 @@ + * Note: The information line should not exceed ASC_INFO_SIZE bytes, + * otherwise the static 'info' array will be overrun. + */ +-const char * ++static const char * + advansys_info(struct Scsi_Host *shp) + { + static char info[ASC_INFO_SIZE]; +@@ -5568,7 +5559,7 @@ + * This function always returns 0. Command return status is saved + * in the 'scp' result field. + */ +-int ++static int + advansys_queuecommand(struct scsi_cmnd *scp, void (*done)(struct scsi_cmnd *)) + { + struct Scsi_Host *shp; +@@ -5656,7 +5647,7 @@ + * sleeping is allowed and no locking other than for host structures is + * required. Returns SUCCESS or FAILED. + */ +-int ++static int + advansys_reset(struct scsi_cmnd *scp) + { + struct Scsi_Host *shp; +@@ -5841,7 +5832,7 @@ + * ip[1]: sectors + * ip[2]: cylinders + */ +-int ++static int + advansys_biosparam(struct scsi_device *sdev, struct block_device *bdev, + sector_t capacity, int ip[]) + { +@@ -5875,82 +5866,6 @@ + } + + /* +- * advansys_setup() +- * +- * This function is called from init/main.c at boot time. +- * It it passed LILO parameters that can be set from the +- * LILO command line or in /etc/lilo.conf. +- * +- * It is used by the AdvanSys driver to either disable I/O +- * port scanning or to limit scanning to 1 - 4 I/O ports. +- * Regardless of the option setting EISA and PCI boards +- * will still be searched for and detected. This option +- * only affects searching for ISA and VL boards. +- * +- * If ADVANSYS_DEBUG is defined the driver debug level may +- * be set using the 5th (ASC_NUM_IOPORT_PROBE + 1) I/O Port. +- * +- * Examples: +- * 1. Eliminate I/O port scanning: +- * boot: linux advansys= +- * or +- * boot: linux advansys=0x0 +- * 2. Limit I/O port scanning to one I/O port: +- * boot: linux advansys=0x110 +- * 3. Limit I/O port scanning to four I/O ports: +- * boot: linux advansys=0x110,0x210,0x230,0x330 +- * 4. If ADVANSYS_DEBUG, limit I/O port scanning to four I/O ports and +- * set the driver debug level to 2. +- * boot: linux advansys=0x110,0x210,0x230,0x330,0xdeb2 +- * +- * ints[0] - number of arguments +- * ints[1] - first argument +- * ints[2] - second argument +- * ... +- */ +-void __init +-advansys_setup(char *str, int *ints) +-{ +- int i; +- +- if (asc_iopflag == ASC_TRUE) { +- printk("AdvanSys SCSI: 'advansys' LILO option may appear only once\n"); +- return; +- } +- +- asc_iopflag = ASC_TRUE; +- +- if (ints[0] > ASC_NUM_IOPORT_PROBE) { +-#ifdef ADVANSYS_DEBUG +- if ((ints[0] == ASC_NUM_IOPORT_PROBE + 1) && +- (ints[ASC_NUM_IOPORT_PROBE + 1] >> 4 == 0xdeb)) { +- asc_dbglvl = ints[ASC_NUM_IOPORT_PROBE + 1] & 0xf; +- } else { +-#endif /* ADVANSYS_DEBUG */ +- printk("AdvanSys SCSI: only %d I/O ports accepted\n", +- ASC_NUM_IOPORT_PROBE); +-#ifdef ADVANSYS_DEBUG +- } +-#endif /* ADVANSYS_DEBUG */ +- } +- +-#ifdef ADVANSYS_DEBUG +- ASC_DBG1(1, "advansys_setup: ints[0] %d\n", ints[0]); +- for (i = 1; i < ints[0]; i++) { +- ASC_DBG2(1, " ints[%d] 0x%x", i, ints[i]); +- } +- ASC_DBG(1, "\n"); +-#endif /* ADVANSYS_DEBUG */ +- +- for (i = 1; i <= ints[0] && i <= ASC_NUM_IOPORT_PROBE; i++) { +- asc_ioport[i-1] = ints[i]; +- ASC_DBG2(1, "advansys_setup: asc_ioport[%d] 0x%x\n", +- i - 1, asc_ioport[i-1]); +- } +-} +- +- +-/* + * --- Loadable Driver Support + */ + +diff -Nurb linux-2.6.22-570/drivers/scsi/advansys.h linux-2.6.22-591/drivers/scsi/advansys.h +--- linux-2.6.22-570/drivers/scsi/advansys.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/advansys.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,36 +0,0 @@ +-/* +- * advansys.h - Linux Host Driver for AdvanSys SCSI Adapters +- * +- * Copyright (c) 1995-2000 Advanced System Products, Inc. +- * Copyright (c) 2000-2001 ConnectCom Solutions, Inc. +- * All Rights Reserved. +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that redistributions of source +- * code retain the above copyright notice and this comment without +- * modification. +- * +- * As of March 8, 2000 Advanced System Products, Inc. (AdvanSys) +- * changed its name to ConnectCom Solutions, Inc. +- * +- */ +- +-#ifndef _ADVANSYS_H +-#define _ADVANSYS_H +- +-/* +- * struct scsi_host_template function prototypes. +- */ +-int advansys_detect(struct scsi_host_template *); +-int advansys_release(struct Scsi_Host *); +-const char *advansys_info(struct Scsi_Host *); +-int advansys_queuecommand(struct scsi_cmnd *, void (* done)(struct scsi_cmnd *)); +-int advansys_reset(struct scsi_cmnd *); +-int advansys_biosparam(struct scsi_device *, struct block_device *, +- sector_t, int[]); +-static int advansys_slave_configure(struct scsi_device *); +- +-/* init/main.c setup function */ +-void advansys_setup(char *, int *); +- +-#endif /* _ADVANSYS_H */ +diff -Nurb linux-2.6.22-570/drivers/scsi/aha152x.c linux-2.6.22-591/drivers/scsi/aha152x.c +--- linux-2.6.22-570/drivers/scsi/aha152x.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/aha152x.c 2007-12-21 15:36:12.000000000 -0500 +@@ -240,6 +240,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -253,7 +254,6 @@ + #include + #include + #include +-#include + #include + + #include "scsi.h" +@@ -551,7 +551,7 @@ + */ + struct aha152x_scdata { + Scsi_Cmnd *next; /* next sc in queue */ +- struct semaphore *sem; /* semaphore to block on */ ++ struct completion *done;/* semaphore to block on */ + unsigned char cmd_len; + unsigned char cmnd[MAX_COMMAND_SIZE]; + unsigned short use_sg; +@@ -608,7 +608,7 @@ + + #define SCDATA(SCpnt) ((struct aha152x_scdata *) (SCpnt)->host_scribble) + #define SCNEXT(SCpnt) SCDATA(SCpnt)->next +-#define SCSEM(SCpnt) SCDATA(SCpnt)->sem ++#define SCSEM(SCpnt) SCDATA(SCpnt)->done + + #define SG_ADDRESS(buffer) ((char *) (page_address((buffer)->page)+(buffer)->offset)) + +@@ -969,7 +969,8 @@ + /* + * Queue a command and setup interrupts for a free bus. + */ +-static int aha152x_internal_queue(Scsi_Cmnd *SCpnt, struct semaphore *sem, int phase, void (*done)(Scsi_Cmnd *)) ++static int aha152x_internal_queue(Scsi_Cmnd *SCpnt, struct completion *complete, ++ int phase, void (*done)(Scsi_Cmnd *)) + { + struct Scsi_Host *shpnt = SCpnt->device->host; + unsigned long flags; +@@ -1013,7 +1014,7 @@ + } + + SCNEXT(SCpnt) = NULL; +- SCSEM(SCpnt) = sem; ++ SCSEM(SCpnt) = complete; + + /* setup scratch area + SCp.ptr : buffer pointer +@@ -1084,9 +1085,9 @@ + DPRINTK(debug_eh, INFO_LEAD "reset_done called\n", CMDINFO(SCpnt)); + #endif + if(SCSEM(SCpnt)) { +- up(SCSEM(SCpnt)); ++ complete(SCSEM(SCpnt)); + } else { +- printk(KERN_ERR "aha152x: reset_done w/o semaphore\n"); ++ printk(KERN_ERR "aha152x: reset_done w/o completion\n"); + } + } + +@@ -1139,21 +1140,6 @@ + return FAILED; + } + +-static void timer_expired(unsigned long p) +-{ +- Scsi_Cmnd *SCp = (Scsi_Cmnd *)p; +- struct semaphore *sem = SCSEM(SCp); +- struct Scsi_Host *shpnt = SCp->device->host; +- unsigned long flags; +- +- /* remove command from issue queue */ +- DO_LOCK(flags); +- remove_SC(&ISSUE_SC, SCp); +- DO_UNLOCK(flags); +- +- up(sem); +-} +- + /* + * Reset a device + * +@@ -1161,14 +1147,14 @@ + static int aha152x_device_reset(Scsi_Cmnd * SCpnt) + { + struct Scsi_Host *shpnt = SCpnt->device->host; +- DECLARE_MUTEX_LOCKED(sem); +- struct timer_list timer; ++ DECLARE_COMPLETION(done); + int ret, issued, disconnected; + unsigned char old_cmd_len = SCpnt->cmd_len; + unsigned short old_use_sg = SCpnt->use_sg; + void *old_buffer = SCpnt->request_buffer; + unsigned old_bufflen = SCpnt->request_bufflen; + unsigned long flags; ++ unsigned long timeleft; + + #if defined(AHA152X_DEBUG) + if(HOSTDATA(shpnt)->debug & debug_eh) { +@@ -1192,15 +1178,15 @@ + SCpnt->request_buffer = NULL; + SCpnt->request_bufflen = 0; + +- init_timer(&timer); +- timer.data = (unsigned long) SCpnt; +- timer.expires = jiffies + 100*HZ; /* 10s */ +- timer.function = (void (*)(unsigned long)) timer_expired; +- +- aha152x_internal_queue(SCpnt, &sem, resetting, reset_done); +- add_timer(&timer); +- down(&sem); +- del_timer(&timer); ++ aha152x_internal_queue(SCpnt, &done, resetting, reset_done); ++ ++ timeleft = wait_for_completion_timeout(&done, 100*HZ); ++ if (!timeleft) { ++ /* remove command from issue queue */ ++ DO_LOCK(flags); ++ remove_SC(&ISSUE_SC, SCpnt); ++ DO_UNLOCK(flags); ++ } + + SCpnt->cmd_len = old_cmd_len; + SCpnt->use_sg = old_use_sg; +diff -Nurb linux-2.6.22-570/drivers/scsi/aha1740.c linux-2.6.22-591/drivers/scsi/aha1740.c +--- linux-2.6.22-570/drivers/scsi/aha1740.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/aha1740.c 2007-12-21 15:36:12.000000000 -0500 +@@ -271,19 +271,7 @@ + continue; + } + sgptr = (struct aha1740_sg *) SCtmp->host_scribble; +- if (SCtmp->use_sg) { +- /* We used scatter-gather. +- Do the unmapping dance. */ +- dma_unmap_sg (&edev->dev, +- (struct scatterlist *) SCtmp->request_buffer, +- SCtmp->use_sg, +- SCtmp->sc_data_direction); +- } else { +- dma_unmap_single (&edev->dev, +- sgptr->buf_dma_addr, +- SCtmp->request_bufflen, +- DMA_BIDIRECTIONAL); +- } ++ scsi_dma_unmap(SCtmp); + + /* Free the sg block */ + dma_free_coherent (&edev->dev, +@@ -349,11 +337,9 @@ + unchar target = scmd_id(SCpnt); + struct aha1740_hostdata *host = HOSTDATA(SCpnt->device->host); + unsigned long flags; +- void *buff = SCpnt->request_buffer; +- int bufflen = SCpnt->request_bufflen; + dma_addr_t sg_dma; + struct aha1740_sg *sgptr; +- int ecbno; ++ int ecbno, nseg; + DEB(int i); + + if(*cmd == REQUEST_SENSE) { +@@ -424,23 +410,22 @@ + sgptr = (struct aha1740_sg *) SCpnt->host_scribble; + sgptr->sg_dma_addr = sg_dma; + +- if (SCpnt->use_sg) { +- struct scatterlist * sgpnt; ++ nseg = scsi_dma_map(SCpnt); ++ BUG_ON(nseg < 0); ++ if (nseg) { ++ struct scatterlist *sg; + struct aha1740_chain * cptr; +- int i, count; ++ int i; + DEB(unsigned char * ptr); + + host->ecb[ecbno].sg = 1; /* SCSI Initiator Command + * w/scatter-gather*/ +- sgpnt = (struct scatterlist *) SCpnt->request_buffer; + cptr = sgptr->sg_chain; +- count = dma_map_sg (&host->edev->dev, sgpnt, SCpnt->use_sg, +- SCpnt->sc_data_direction); +- for(i=0; i < count; i++) { +- cptr[i].datalen = sg_dma_len (sgpnt + i); +- cptr[i].dataptr = sg_dma_address (sgpnt + i); ++ scsi_for_each_sg(SCpnt, sg, nseg, i) { ++ cptr[i].datalen = sg_dma_len (sg); ++ cptr[i].dataptr = sg_dma_address (sg); + } +- host->ecb[ecbno].datalen = count*sizeof(struct aha1740_chain); ++ host->ecb[ecbno].datalen = nseg * sizeof(struct aha1740_chain); + host->ecb[ecbno].dataptr = sg_dma; + #ifdef DEBUG + printk("cptr %x: ",cptr); +@@ -448,11 +433,8 @@ + for(i=0;i<24;i++) printk("%02x ", ptr[i]); + #endif + } else { +- host->ecb[ecbno].datalen = bufflen; +- sgptr->buf_dma_addr = dma_map_single (&host->edev->dev, +- buff, bufflen, +- DMA_BIDIRECTIONAL); +- host->ecb[ecbno].dataptr = sgptr->buf_dma_addr; ++ host->ecb[ecbno].datalen = 0; ++ host->ecb[ecbno].dataptr = 0; + } + host->ecb[ecbno].lun = SCpnt->device->lun; + host->ecb[ecbno].ses = 1; /* Suppress underrun errors */ +diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.c linux-2.6.22-591/drivers/scsi/aic7xxx/aic79xx_osm.c +--- linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/aic7xxx/aic79xx_osm.c 2007-12-21 15:36:12.000000000 -0500 +@@ -376,21 +376,10 @@ + ahd_linux_unmap_scb(struct ahd_softc *ahd, struct scb *scb) + { + struct scsi_cmnd *cmd; +- int direction; + + cmd = scb->io_ctx; +- direction = cmd->sc_data_direction; + ahd_sync_sglist(ahd, scb, BUS_DMASYNC_POSTWRITE); +- if (cmd->use_sg != 0) { +- struct scatterlist *sg; +- +- sg = (struct scatterlist *)cmd->request_buffer; +- pci_unmap_sg(ahd->dev_softc, sg, cmd->use_sg, direction); +- } else if (cmd->request_bufflen != 0) { +- pci_unmap_single(ahd->dev_softc, +- scb->platform_data->buf_busaddr, +- cmd->request_bufflen, direction); +- } ++ scsi_dma_unmap(cmd); + } + + /******************************** Macros **************************************/ +@@ -1422,6 +1411,7 @@ + u_int col_idx; + uint16_t mask; + unsigned long flags; ++ int nseg; + + ahd_lock(ahd, &flags); + +@@ -1494,18 +1484,17 @@ + ahd_set_residual(scb, 0); + ahd_set_sense_residual(scb, 0); + scb->sg_count = 0; +- if (cmd->use_sg != 0) { +- void *sg; ++ ++ nseg = scsi_dma_map(cmd); ++ BUG_ON(nseg < 0); ++ if (nseg > 0) { ++ void *sg = scb->sg_list; + struct scatterlist *cur_seg; +- u_int nseg; +- int dir; ++ int i; + +- cur_seg = (struct scatterlist *)cmd->request_buffer; +- dir = cmd->sc_data_direction; +- nseg = pci_map_sg(ahd->dev_softc, cur_seg, +- cmd->use_sg, dir); + scb->platform_data->xfer_len = 0; +- for (sg = scb->sg_list; nseg > 0; nseg--, cur_seg++) { ++ ++ scsi_for_each_sg(cmd, cur_seg, nseg, i) { + dma_addr_t addr; + bus_size_t len; + +@@ -1513,22 +1502,8 @@ + len = sg_dma_len(cur_seg); + scb->platform_data->xfer_len += len; + sg = ahd_sg_setup(ahd, scb, sg, addr, len, +- /*last*/nseg == 1); ++ i == (nseg - 1)); + } +- } else if (cmd->request_bufflen != 0) { +- void *sg; +- dma_addr_t addr; +- int dir; +- +- sg = scb->sg_list; +- dir = cmd->sc_data_direction; +- addr = pci_map_single(ahd->dev_softc, +- cmd->request_buffer, +- cmd->request_bufflen, dir); +- scb->platform_data->xfer_len = cmd->request_bufflen; +- scb->platform_data->buf_busaddr = addr; +- sg = ahd_sg_setup(ahd, scb, sg, addr, +- cmd->request_bufflen, /*last*/TRUE); + } + + LIST_INSERT_HEAD(&ahd->pending_scbs, scb, pending_links); +diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.h linux-2.6.22-591/drivers/scsi/aic7xxx/aic79xx_osm.h +--- linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/aic7xxx/aic79xx_osm.h 2007-12-21 15:36:12.000000000 -0500 +@@ -781,7 +781,7 @@ + static __inline + void ahd_set_residual(struct scb *scb, u_long resid) + { +- scb->io_ctx->resid = resid; ++ scsi_set_resid(scb->io_ctx, resid); + } + + static __inline +@@ -793,7 +793,7 @@ + static __inline + u_long ahd_get_residual(struct scb *scb) + { +- return (scb->io_ctx->resid); ++ return scsi_get_resid(scb->io_ctx); + } + + static __inline +diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.c linux-2.6.22-591/drivers/scsi/aic7xxx/aic7xxx_osm.c +--- linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/aic7xxx/aic7xxx_osm.c 2007-12-21 15:36:12.000000000 -0500 +@@ -402,18 +402,8 @@ + + cmd = scb->io_ctx; + ahc_sync_sglist(ahc, scb, BUS_DMASYNC_POSTWRITE); +- if (cmd->use_sg != 0) { +- struct scatterlist *sg; + +- sg = (struct scatterlist *)cmd->request_buffer; +- pci_unmap_sg(ahc->dev_softc, sg, cmd->use_sg, +- cmd->sc_data_direction); +- } else if (cmd->request_bufflen != 0) { +- pci_unmap_single(ahc->dev_softc, +- scb->platform_data->buf_busaddr, +- cmd->request_bufflen, +- cmd->sc_data_direction); +- } ++ scsi_dma_unmap(cmd); + } + + static __inline int +@@ -1381,6 +1371,7 @@ + struct ahc_tmode_tstate *tstate; + uint16_t mask; + struct scb_tailq *untagged_q = NULL; ++ int nseg; + + /* + * Schedule us to run later. The only reason we are not +@@ -1472,23 +1463,21 @@ + ahc_set_residual(scb, 0); + ahc_set_sense_residual(scb, 0); + scb->sg_count = 0; +- if (cmd->use_sg != 0) { ++ ++ nseg = scsi_dma_map(cmd); ++ BUG_ON(nseg < 0); ++ if (nseg > 0) { + struct ahc_dma_seg *sg; + struct scatterlist *cur_seg; +- struct scatterlist *end_seg; +- int nseg; ++ int i; + +- cur_seg = (struct scatterlist *)cmd->request_buffer; +- nseg = pci_map_sg(ahc->dev_softc, cur_seg, cmd->use_sg, +- cmd->sc_data_direction); +- end_seg = cur_seg + nseg; + /* Copy the segments into the SG list. */ + sg = scb->sg_list; + /* + * The sg_count may be larger than nseg if + * a transfer crosses a 32bit page. + */ +- while (cur_seg < end_seg) { ++ scsi_for_each_sg(cmd, cur_seg, nseg, i) { + dma_addr_t addr; + bus_size_t len; + int consumed; +@@ -1499,7 +1488,6 @@ + sg, addr, len); + sg += consumed; + scb->sg_count += consumed; +- cur_seg++; + } + sg--; + sg->len |= ahc_htole32(AHC_DMA_LAST_SEG); +@@ -1516,33 +1504,6 @@ + */ + scb->hscb->dataptr = scb->sg_list->addr; + scb->hscb->datacnt = scb->sg_list->len; +- } else if (cmd->request_bufflen != 0) { +- struct ahc_dma_seg *sg; +- dma_addr_t addr; +- +- sg = scb->sg_list; +- addr = pci_map_single(ahc->dev_softc, +- cmd->request_buffer, +- cmd->request_bufflen, +- cmd->sc_data_direction); +- scb->platform_data->buf_busaddr = addr; +- scb->sg_count = ahc_linux_map_seg(ahc, scb, +- sg, addr, +- cmd->request_bufflen); +- sg->len |= ahc_htole32(AHC_DMA_LAST_SEG); +- +- /* +- * Reset the sg list pointer. +- */ +- scb->hscb->sgptr = +- ahc_htole32(scb->sg_list_phys | SG_FULL_RESID); +- +- /* +- * Copy the first SG into the "current" +- * data pointer area. +- */ +- scb->hscb->dataptr = sg->addr; +- scb->hscb->datacnt = sg->len; + } else { + scb->hscb->sgptr = ahc_htole32(SG_LIST_NULL); + scb->hscb->dataptr = 0; +diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.h linux-2.6.22-591/drivers/scsi/aic7xxx/aic7xxx_osm.h +--- linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/aic7xxx/aic7xxx_osm.h 2007-12-21 15:36:12.000000000 -0500 +@@ -751,7 +751,7 @@ + static __inline + void ahc_set_residual(struct scb *scb, u_long resid) + { +- scb->io_ctx->resid = resid; ++ scsi_set_resid(scb->io_ctx, resid); + } + + static __inline +@@ -763,7 +763,7 @@ + static __inline + u_long ahc_get_residual(struct scb *scb) + { +- return (scb->io_ctx->resid); ++ return scsi_get_resid(scb->io_ctx); + } + + static __inline +diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx_old.c linux-2.6.22-591/drivers/scsi/aic7xxx_old.c +--- linux-2.6.22-570/drivers/scsi/aic7xxx_old.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/aic7xxx_old.c 2007-12-21 15:36:12.000000000 -0500 +@@ -2690,17 +2690,8 @@ + struct aic7xxx_scb *scbp; + unsigned char queue_depth; + +- if (cmd->use_sg > 1) +- { +- struct scatterlist *sg; ++ scsi_dma_unmap(cmd); + +- sg = (struct scatterlist *)cmd->request_buffer; +- pci_unmap_sg(p->pdev, sg, cmd->use_sg, cmd->sc_data_direction); +- } +- else if (cmd->request_bufflen) +- pci_unmap_single(p->pdev, aic7xxx_mapping(cmd), +- cmd->request_bufflen, +- cmd->sc_data_direction); + if (scb->flags & SCB_SENSE) + { + pci_unmap_single(p->pdev, +@@ -3869,7 +3860,7 @@ + * the mid layer didn't check residual data counts to see if the + * command needs retried. + */ +- cmd->resid = scb->sg_length - actual; ++ scsi_set_resid(cmd, scb->sg_length - actual); + aic7xxx_status(cmd) = hscb->target_status; + } + } +@@ -10137,6 +10128,7 @@ + struct scsi_device *sdptr = cmd->device; + unsigned char tindex = TARGET_INDEX(cmd); + struct request *req = cmd->request; ++ int use_sg; + + mask = (0x01 << tindex); + hscb = scb->hscb; +@@ -10209,8 +10201,10 @@ + memcpy(scb->cmnd, cmd->cmnd, cmd->cmd_len); + hscb->SCSI_cmd_pointer = cpu_to_le32(SCB_DMA_ADDR(scb, scb->cmnd)); + +- if (cmd->use_sg) +- { ++ use_sg = scsi_dma_map(cmd); ++ BUG_ON(use_sg < 0); ++ ++ if (use_sg) { + struct scatterlist *sg; /* Must be mid-level SCSI code scatterlist */ + + /* +@@ -10219,11 +10213,11 @@ + * differences and the kernel SG list uses virtual addresses where + * we need physical addresses. + */ +- int i, use_sg; ++ int i; + +- sg = (struct scatterlist *)cmd->request_buffer; + scb->sg_length = 0; +- use_sg = pci_map_sg(p->pdev, sg, cmd->use_sg, cmd->sc_data_direction); ++ ++ + /* + * Copy the segments into the SG array. NOTE!!! - We used to + * have the first entry both in the data_pointer area and the first +@@ -10231,10 +10225,9 @@ + * entry in both places, but now we download the address of + * scb->sg_list[1] instead of 0 to the sg pointer in the hscb. + */ +- for (i = 0; i < use_sg; i++) +- { +- unsigned int len = sg_dma_len(sg+i); +- scb->sg_list[i].address = cpu_to_le32(sg_dma_address(sg+i)); ++ scsi_for_each_sg(cmd, sg, use_sg, i) { ++ unsigned int len = sg_dma_len(sg); ++ scb->sg_list[i].address = cpu_to_le32(sg_dma_address(sg)); + scb->sg_list[i].length = cpu_to_le32(len); + scb->sg_length += len; + } +@@ -10244,26 +10237,7 @@ + scb->sg_count = i; + hscb->SG_segment_count = i; + hscb->SG_list_pointer = cpu_to_le32(SCB_DMA_ADDR(scb, &scb->sg_list[1])); +- } +- else +- { +- if (cmd->request_bufflen) +- { +- unsigned int address = pci_map_single(p->pdev, cmd->request_buffer, +- cmd->request_bufflen, +- cmd->sc_data_direction); +- aic7xxx_mapping(cmd) = address; +- scb->sg_list[0].address = cpu_to_le32(address); +- scb->sg_list[0].length = cpu_to_le32(cmd->request_bufflen); +- scb->sg_count = 1; +- scb->sg_length = cmd->request_bufflen; +- hscb->SG_segment_count = 1; +- hscb->SG_list_pointer = cpu_to_le32(SCB_DMA_ADDR(scb, &scb->sg_list[0])); +- hscb->data_count = scb->sg_list[0].length; +- hscb->data_pointer = scb->sg_list[0].address; +- } +- else +- { ++ } else { + scb->sg_count = 0; + scb->sg_length = 0; + hscb->SG_segment_count = 0; +@@ -10271,7 +10245,6 @@ + hscb->data_count = 0; + hscb->data_pointer = 0; + } +- } + } + + /*+F************************************************************************* +diff -Nurb linux-2.6.22-570/drivers/scsi/amiga7xx.c linux-2.6.22-591/drivers/scsi/amiga7xx.c +--- linux-2.6.22-570/drivers/scsi/amiga7xx.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/amiga7xx.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,138 +0,0 @@ +-/* +- * Detection routine for the NCR53c710 based Amiga SCSI Controllers for Linux. +- * Amiga MacroSystemUS WarpEngine SCSI controller. +- * Amiga Technologies A4000T SCSI controller. +- * Amiga Technologies/DKB A4091 SCSI controller. +- * +- * Written 1997 by Alan Hourihane +- * plus modifications of the 53c7xx.c driver to support the Amiga. +- */ +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include "scsi.h" +-#include +-#include "53c7xx.h" +-#include "amiga7xx.h" +- +- +-static int amiga7xx_register_one(struct scsi_host_template *tpnt, +- unsigned long address) +-{ +- long long options; +- int clock; +- +- if (!request_mem_region(address, 0x1000, "ncr53c710")) +- return 0; +- +- address = (unsigned long)z_ioremap(address, 0x1000); +- options = OPTION_MEMORY_MAPPED | OPTION_DEBUG_TEST1 | OPTION_INTFLY | +- OPTION_SYNCHRONOUS | OPTION_ALWAYS_SYNCHRONOUS | +- OPTION_DISCONNECT; +- clock = 50000000; /* 50 MHz SCSI Clock */ +- ncr53c7xx_init(tpnt, 0, 710, address, 0, IRQ_AMIGA_PORTS, DMA_NONE, +- options, clock); +- return 1; +-} +- +- +-#ifdef CONFIG_ZORRO +- +-static struct { +- zorro_id id; +- unsigned long offset; +- int absolute; /* offset is absolute address */ +-} amiga7xx_table[] = { +- { .id = ZORRO_PROD_PHASE5_BLIZZARD_603E_PLUS, .offset = 0xf40000, +- .absolute = 1 }, +- { .id = ZORRO_PROD_MACROSYSTEMS_WARP_ENGINE_40xx, .offset = 0x40000 }, +- { .id = ZORRO_PROD_CBM_A4091_1, .offset = 0x800000 }, +- { .id = ZORRO_PROD_CBM_A4091_2, .offset = 0x800000 }, +- { .id = ZORRO_PROD_GVP_GFORCE_040_060, .offset = 0x40000 }, +- { 0 } +-}; +- +-static int __init amiga7xx_zorro_detect(struct scsi_host_template *tpnt) +-{ +- int num = 0, i; +- struct zorro_dev *z = NULL; +- unsigned long address; +- +- while ((z = zorro_find_device(ZORRO_WILDCARD, z))) { +- for (i = 0; amiga7xx_table[i].id; i++) +- if (z->id == amiga7xx_table[i].id) +- break; +- if (!amiga7xx_table[i].id) +- continue; +- if (amiga7xx_table[i].absolute) +- address = amiga7xx_table[i].offset; +- else +- address = z->resource.start + amiga7xx_table[i].offset; +- num += amiga7xx_register_one(tpnt, address); +- } +- return num; +-} +- +-#endif /* CONFIG_ZORRO */ +- +- +-int __init amiga7xx_detect(struct scsi_host_template *tpnt) +-{ +- static unsigned char called = 0; +- int num = 0; +- +- if (called || !MACH_IS_AMIGA) +- return 0; +- +- tpnt->proc_name = "Amiga7xx"; +- +- if (AMIGAHW_PRESENT(A4000_SCSI)) +- num += amiga7xx_register_one(tpnt, 0xdd0040); +- +-#ifdef CONFIG_ZORRO +- num += amiga7xx_zorro_detect(tpnt); +-#endif +- +- called = 1; +- return num; +-} +- +-static int amiga7xx_release(struct Scsi_Host *shost) +-{ +- if (shost->irq) +- free_irq(shost->irq, NULL); +- if (shost->dma_channel != 0xff) +- free_dma(shost->dma_channel); +- if (shost->io_port && shost->n_io_port) +- release_region(shost->io_port, shost->n_io_port); +- scsi_unregister(shost); +- return 0; +-} +- +-static struct scsi_host_template driver_template = { +- .name = "Amiga NCR53c710 SCSI", +- .detect = amiga7xx_detect, +- .release = amiga7xx_release, +- .queuecommand = NCR53c7xx_queue_command, +- .abort = NCR53c7xx_abort, +- .reset = NCR53c7xx_reset, +- .can_queue = 24, +- .this_id = 7, +- .sg_tablesize = 63, +- .cmd_per_lun = 3, +- .use_clustering = DISABLE_CLUSTERING +-}; +- +- +-#include "scsi_module.c" +diff -Nurb linux-2.6.22-570/drivers/scsi/amiga7xx.h linux-2.6.22-591/drivers/scsi/amiga7xx.h +--- linux-2.6.22-570/drivers/scsi/amiga7xx.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/amiga7xx.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,23 +0,0 @@ +-#ifndef AMIGA7XX_H +- +-#include +- +-int amiga7xx_detect(struct scsi_host_template *); +-const char *NCR53c7x0_info(void); +-int NCR53c7xx_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *)); +-int NCR53c7xx_abort(Scsi_Cmnd *); +-int NCR53c7x0_release (struct Scsi_Host *); +-int NCR53c7xx_reset(Scsi_Cmnd *, unsigned int); +-void NCR53c7x0_intr(int irq, void *dev_id); +- +-#ifndef CMD_PER_LUN +-#define CMD_PER_LUN 3 +-#endif +- +-#ifndef CAN_QUEUE +-#define CAN_QUEUE 24 +-#endif +- +-#include +- +-#endif /* AMIGA7XX_H */ +diff -Nurb linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr.h linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr.h +--- linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr.h 2007-12-21 15:36:12.000000000 -0500 +@@ -48,9 +48,10 @@ + + #define ARCMSR_MAX_OUTSTANDING_CMD 256 + #define ARCMSR_MAX_FREECCB_NUM 288 +-#define ARCMSR_DRIVER_VERSION "Driver Version 1.20.00.13" ++#define ARCMSR_DRIVER_VERSION "Driver Version 1.20.00.14" + #define ARCMSR_SCSI_INITIATOR_ID 255 + #define ARCMSR_MAX_XFER_SECTORS 512 ++#define ARCMSR_MAX_XFER_SECTORS_B 4096 + #define ARCMSR_MAX_TARGETID 17 + #define ARCMSR_MAX_TARGETLUN 8 + #define ARCMSR_MAX_CMD_PERLUN ARCMSR_MAX_OUTSTANDING_CMD +@@ -469,4 +470,3 @@ + extern struct class_device_attribute *arcmsr_host_attrs[]; + extern int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb); + void arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb); +- +diff -Nurb linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_attr.c linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr_attr.c +--- linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_attr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr_attr.c 2007-12-21 15:36:12.000000000 -0500 +@@ -59,8 +59,9 @@ + struct class_device_attribute *arcmsr_host_attrs[]; + + static ssize_t +-arcmsr_sysfs_iop_message_read(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++arcmsr_sysfs_iop_message_read(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct class_device *cdev = container_of(kobj,struct class_device,kobj); + struct Scsi_Host *host = class_to_shost(cdev); +@@ -105,8 +106,9 @@ + } + + static ssize_t +-arcmsr_sysfs_iop_message_write(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++arcmsr_sysfs_iop_message_write(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct class_device *cdev = container_of(kobj,struct class_device,kobj); + struct Scsi_Host *host = class_to_shost(cdev); +@@ -152,8 +154,9 @@ + } + + static ssize_t +-arcmsr_sysfs_iop_message_clear(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++arcmsr_sysfs_iop_message_clear(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct class_device *cdev = container_of(kobj,struct class_device,kobj); + struct Scsi_Host *host = class_to_shost(cdev); +@@ -188,7 +191,6 @@ + .attr = { + .name = "mu_read", + .mode = S_IRUSR , +- .owner = THIS_MODULE, + }, + .size = 1032, + .read = arcmsr_sysfs_iop_message_read, +@@ -198,7 +200,6 @@ + .attr = { + .name = "mu_write", + .mode = S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = 1032, + .write = arcmsr_sysfs_iop_message_write, +@@ -208,7 +209,6 @@ + .attr = { + .name = "mu_clear", + .mode = S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = 1, + .write = arcmsr_sysfs_iop_message_clear, +diff -Nurb linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_hba.c linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr_hba.c +--- linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_hba.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/arcmsr/arcmsr_hba.c 2007-12-21 15:36:12.000000000 -0500 +@@ -57,6 +57,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -71,7 +72,7 @@ + #include "arcmsr.h" + + MODULE_AUTHOR("Erich Chen "); +-MODULE_DESCRIPTION("ARECA (ARC11xx/12xx) SATA RAID HOST Adapter"); ++MODULE_DESCRIPTION("ARECA (ARC11xx/12xx/13xx/16xx) SATA/SAS RAID HOST Adapter"); + MODULE_LICENSE("Dual BSD/GPL"); + MODULE_VERSION(ARCMSR_DRIVER_VERSION); + +@@ -93,7 +94,9 @@ + static uint8_t arcmsr_wait_msgint_ready(struct AdapterControlBlock *acb); + static const char *arcmsr_info(struct Scsi_Host *); + static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb); +- ++static pci_ers_result_t arcmsr_pci_error_detected(struct pci_dev *pdev, ++ pci_channel_state_t state); ++static pci_ers_result_t arcmsr_pci_slot_reset(struct pci_dev *pdev); + static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev, int queue_depth) + { + if (queue_depth > ARCMSR_MAX_CMD_PERLUN) +@@ -104,7 +107,8 @@ + + static struct scsi_host_template arcmsr_scsi_host_template = { + .module = THIS_MODULE, +- .name = "ARCMSR ARECA SATA RAID HOST Adapter" ARCMSR_DRIVER_VERSION, ++ .name = "ARCMSR ARECA SATA/SAS RAID HOST Adapter" ++ ARCMSR_DRIVER_VERSION, + .info = arcmsr_info, + .queuecommand = arcmsr_queue_command, + .eh_abort_handler = arcmsr_abort, +@@ -119,6 +123,10 @@ + .use_clustering = ENABLE_CLUSTERING, + .shost_attrs = arcmsr_host_attrs, + }; ++static struct pci_error_handlers arcmsr_pci_error_handlers = { ++ .error_detected = arcmsr_pci_error_detected, ++ .slot_reset = arcmsr_pci_slot_reset, ++}; + + static struct pci_device_id arcmsr_device_id_table[] = { + {PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1110)}, +@@ -144,7 +152,8 @@ + .id_table = arcmsr_device_id_table, + .probe = arcmsr_probe, + .remove = arcmsr_remove, +- .shutdown = arcmsr_shutdown ++ .shutdown = arcmsr_shutdown, ++ .err_handler = &arcmsr_pci_error_handlers, + }; + + static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id) +@@ -328,6 +337,8 @@ + + arcmsr_iop_init(acb); + pci_set_drvdata(pdev, host); ++ if (strncmp(acb->firm_version, "V1.42", 5) >= 0) ++ host->max_sectors= ARCMSR_MAX_XFER_SECTORS_B; + + error = scsi_add_host(host, &pdev->dev); + if (error) +@@ -338,6 +349,7 @@ + goto out_free_sysfs; + + scsi_scan_host(host); ++ pci_enable_pcie_error_reporting(pdev); + return 0; + out_free_sysfs: + out_free_irq: +@@ -369,19 +381,9 @@ + + static void arcmsr_pci_unmap_dma(struct CommandControlBlock *ccb) + { +- struct AdapterControlBlock *acb = ccb->acb; + struct scsi_cmnd *pcmd = ccb->pcmd; + +- if (pcmd->use_sg != 0) { +- struct scatterlist *sl; +- +- sl = (struct scatterlist *)pcmd->request_buffer; +- pci_unmap_sg(acb->pdev, sl, pcmd->use_sg, pcmd->sc_data_direction); +- } +- else if (pcmd->request_bufflen != 0) +- pci_unmap_single(acb->pdev, +- pcmd->SCp.dma_handle, +- pcmd->request_bufflen, pcmd->sc_data_direction); ++ scsi_dma_unmap(pcmd); + } + + static void arcmsr_ccb_complete(struct CommandControlBlock *ccb, int stand_flag) +@@ -498,7 +500,7 @@ + + static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb) + { +- struct MessageUnit __iomem *reg=acb->pmu; ++ struct MessageUnit __iomem *reg = acb->pmu; + + writel(ARCMSR_INBOUND_MESG0_FLUSH_CACHE, ®->inbound_msgaddr0); + if (arcmsr_wait_msgint_ready(acb)) +@@ -551,6 +553,7 @@ + int8_t *psge = (int8_t *)&arcmsr_cdb->u; + uint32_t address_lo, address_hi; + int arccdbsize = 0x30; ++ int nseg; + + ccb->pcmd = pcmd; + memset(arcmsr_cdb, 0, sizeof (struct ARCMSR_CDB)); +@@ -561,20 +564,20 @@ + arcmsr_cdb->CdbLength = (uint8_t)pcmd->cmd_len; + arcmsr_cdb->Context = (unsigned long)arcmsr_cdb; + memcpy(arcmsr_cdb->Cdb, pcmd->cmnd, pcmd->cmd_len); +- if (pcmd->use_sg) { +- int length, sgcount, i, cdb_sgcount = 0; +- struct scatterlist *sl; +- +- /* Get Scatter Gather List from scsiport. */ +- sl = (struct scatterlist *) pcmd->request_buffer; +- sgcount = pci_map_sg(acb->pdev, sl, pcmd->use_sg, +- pcmd->sc_data_direction); ++ ++ nseg = scsi_dma_map(pcmd); ++ BUG_ON(nseg < 0); ++ ++ if (nseg) { ++ int length, i, cdb_sgcount = 0; ++ struct scatterlist *sg; ++ + /* map stor port SG list to our iop SG List. */ +- for (i = 0; i < sgcount; i++) { ++ scsi_for_each_sg(pcmd, sg, nseg, i) { + /* Get the physical address of the current data pointer */ +- length = cpu_to_le32(sg_dma_len(sl)); +- address_lo = cpu_to_le32(dma_addr_lo32(sg_dma_address(sl))); +- address_hi = cpu_to_le32(dma_addr_hi32(sg_dma_address(sl))); ++ length = cpu_to_le32(sg_dma_len(sg)); ++ address_lo = cpu_to_le32(dma_addr_lo32(sg_dma_address(sg))); ++ address_hi = cpu_to_le32(dma_addr_hi32(sg_dma_address(sg))); + if (address_hi == 0) { + struct SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge; + +@@ -591,32 +594,12 @@ + psge += sizeof (struct SG64ENTRY); + arccdbsize += sizeof (struct SG64ENTRY); + } +- sl++; + cdb_sgcount++; + } + arcmsr_cdb->sgcount = (uint8_t)cdb_sgcount; +- arcmsr_cdb->DataLength = pcmd->request_bufflen; ++ arcmsr_cdb->DataLength = scsi_bufflen(pcmd); + if ( arccdbsize > 256) + arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_SGL_BSIZE; +- } else if (pcmd->request_bufflen) { +- dma_addr_t dma_addr; +- dma_addr = pci_map_single(acb->pdev, pcmd->request_buffer, +- pcmd->request_bufflen, pcmd->sc_data_direction); +- pcmd->SCp.dma_handle = dma_addr; +- address_lo = cpu_to_le32(dma_addr_lo32(dma_addr)); +- address_hi = cpu_to_le32(dma_addr_hi32(dma_addr)); +- if (address_hi == 0) { +- struct SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge; +- pdma_sg->address = address_lo; +- pdma_sg->length = pcmd->request_bufflen; +- } else { +- struct SG64ENTRY *pdma_sg = (struct SG64ENTRY *)psge; +- pdma_sg->addresshigh = address_hi; +- pdma_sg->address = address_lo; +- pdma_sg->length = pcmd->request_bufflen|IS_SG64_ADDR; +- } +- arcmsr_cdb->sgcount = 1; +- arcmsr_cdb->DataLength = pcmd->request_bufflen; + } + if (pcmd->sc_data_direction == DMA_TO_DEVICE ) { + arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_WRITE; +@@ -758,20 +741,20 @@ + (flag_ccb << 5)); + if ((ccb->acb != acb) || (ccb->startdone != ARCMSR_CCB_START)) { + if (ccb->startdone == ARCMSR_CCB_ABORTED) { +- struct scsi_cmnd *abortcmd=ccb->pcmd; ++ struct scsi_cmnd *abortcmd = ccb->pcmd; + if (abortcmd) { + abortcmd->result |= DID_ABORT >> 16; + arcmsr_ccb_complete(ccb, 1); + printk(KERN_NOTICE +- "arcmsr%d: ccb='0x%p' isr got aborted command \n" ++ "arcmsr%d: ccb ='0x%p' isr got aborted command \n" + , acb->host->host_no, ccb); + } + continue; + } + printk(KERN_NOTICE +- "arcmsr%d: isr get an illegal ccb command done acb='0x%p'" +- "ccb='0x%p' ccbacb='0x%p' startdone = 0x%x" +- " ccboutstandingcount=%d \n" ++ "arcmsr%d: isr get an illegal ccb command done acb = '0x%p'" ++ "ccb = '0x%p' ccbacb = '0x%p' startdone = 0x%x" ++ " ccboutstandingcount = %d \n" + , acb->host->host_no + , acb + , ccb +@@ -791,7 +774,7 @@ + switch(ccb->arcmsr_cdb.DeviceStatus) { + case ARCMSR_DEV_SELECT_TIMEOUT: { + acb->devstate[id][lun] = ARECA_RAID_GONE; +- ccb->pcmd->result = DID_TIME_OUT << 16; ++ ccb->pcmd->result = DID_NO_CONNECT << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; +@@ -810,8 +793,8 @@ + break; + default: + printk(KERN_NOTICE +- "arcmsr%d: scsi id=%d lun=%d" +- " isr get command error done," ++ "arcmsr%d: scsi id = %d lun = %d" ++ " isr get command error done, " + "but got unknown DeviceStatus = 0x%x \n" + , acb->host->host_no + , id +@@ -848,24 +831,21 @@ + struct CMD_MESSAGE_FIELD *pcmdmessagefld; + int retvalue = 0, transfer_len = 0; + char *buffer; ++ struct scatterlist *sg; + uint32_t controlcode = (uint32_t ) cmd->cmnd[5] << 24 | + (uint32_t ) cmd->cmnd[6] << 16 | + (uint32_t ) cmd->cmnd[7] << 8 | + (uint32_t ) cmd->cmnd[8]; + /* 4 bytes: Areca io control code */ +- if (cmd->use_sg) { +- struct scatterlist *sg = (struct scatterlist *)cmd->request_buffer; + ++ sg = scsi_sglist(cmd); + buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; +- if (cmd->use_sg > 1) { ++ if (scsi_sg_count(cmd) > 1) { + retvalue = ARCMSR_MESSAGE_FAIL; + goto message_out; + } + transfer_len += sg->length; +- } else { +- buffer = cmd->request_buffer; +- transfer_len = cmd->request_bufflen; +- } ++ + if (transfer_len > sizeof(struct CMD_MESSAGE_FIELD)) { + retvalue = ARCMSR_MESSAGE_FAIL; + goto message_out; +@@ -1057,12 +1037,9 @@ + retvalue = ARCMSR_MESSAGE_FAIL; + } + message_out: +- if (cmd->use_sg) { +- struct scatterlist *sg; +- +- sg = (struct scatterlist *) cmd->request_buffer; ++ sg = scsi_sglist(cmd); + kunmap_atomic(buffer - sg->offset, KM_IRQ0); +- } ++ + return retvalue; + } + +@@ -1085,6 +1062,7 @@ + case INQUIRY: { + unsigned char inqdata[36]; + char *buffer; ++ struct scatterlist *sg; + + if (cmd->device->lun) { + cmd->result = (DID_TIME_OUT << 16); +@@ -1096,7 +1074,7 @@ + inqdata[1] = 0; + /* rem media bit & Dev Type Modifier */ + inqdata[2] = 0; +- /* ISO,ECMA,& ANSI versions */ ++ /* ISO, ECMA, & ANSI versions */ + inqdata[4] = 31; + /* length of additional data */ + strncpy(&inqdata[8], "Areca ", 8); +@@ -1104,21 +1082,14 @@ + strncpy(&inqdata[16], "RAID controller ", 16); + /* Product Identification */ + strncpy(&inqdata[32], "R001", 4); /* Product Revision */ +- if (cmd->use_sg) { +- struct scatterlist *sg; + +- sg = (struct scatterlist *) cmd->request_buffer; ++ sg = scsi_sglist(cmd); + buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; +- } else { +- buffer = cmd->request_buffer; +- } +- memcpy(buffer, inqdata, sizeof(inqdata)); +- if (cmd->use_sg) { +- struct scatterlist *sg; + +- sg = (struct scatterlist *) cmd->request_buffer; ++ memcpy(buffer, inqdata, sizeof(inqdata)); ++ sg = scsi_sglist(cmd); + kunmap_atomic(buffer - sg->offset, KM_IRQ0); +- } ++ + cmd->scsi_done(cmd); + } + break; +@@ -1153,7 +1124,7 @@ + , acb->host->host_no); + return SCSI_MLQUEUE_HOST_BUSY; + } +- if(target == 16) { ++ if (target == 16) { + /* virtual device for iop message transfer */ + arcmsr_handle_virtual_command(acb, cmd); + return 0; +@@ -1166,7 +1137,7 @@ + printk(KERN_NOTICE + "arcmsr%d: block 'read/write'" + "command with gone raid volume" +- " Cmd=%2x, TargetId=%d, Lun=%d \n" ++ " Cmd = %2x, TargetId = %d, Lun = %d \n" + , acb->host->host_no + , cmd->cmnd[0] + , target, lun); +@@ -1257,7 +1228,7 @@ + if ((ccb->startdone == ARCMSR_CCB_ABORTED) || + (ccb == poll_ccb)) { + printk(KERN_NOTICE +- "arcmsr%d: scsi id=%d lun=%d ccb='0x%p'" ++ "arcmsr%d: scsi id = %d lun = %d ccb = '0x%p'" + " poll command abort successfully \n" + , acb->host->host_no + , ccb->pcmd->device->id +@@ -1270,8 +1241,8 @@ + } + printk(KERN_NOTICE + "arcmsr%d: polling get an illegal ccb" +- " command done ccb='0x%p'" +- "ccboutstandingcount=%d \n" ++ " command done ccb ='0x%p'" ++ "ccboutstandingcount = %d \n" + , acb->host->host_no + , ccb + , atomic_read(&acb->ccboutstandingcount)); +@@ -1288,7 +1259,7 @@ + switch(ccb->arcmsr_cdb.DeviceStatus) { + case ARCMSR_DEV_SELECT_TIMEOUT: { + acb->devstate[id][lun] = ARECA_RAID_GONE; +- ccb->pcmd->result = DID_TIME_OUT << 16; ++ ccb->pcmd->result = DID_NO_CONNECT << 16; + arcmsr_ccb_complete(ccb, 1); + } + break; +@@ -1307,7 +1278,7 @@ + break; + default: + printk(KERN_NOTICE +- "arcmsr%d: scsi id=%d lun=%d" ++ "arcmsr%d: scsi id = %d lun = %d" + " polling and getting command error done" + "but got unknown DeviceStatus = 0x%x \n" + , acb->host->host_no +@@ -1322,6 +1293,94 @@ + } + } + } ++static void arcmsr_done4_abort_postqueue(struct AdapterControlBlock *acb) ++{ ++ int i = 0, found = 0; ++ int id, lun; ++ uint32_t flag_ccb, outbound_intstatus; ++ struct MessageUnit __iomem *reg = acb->pmu; ++ struct CommandControlBlock *ccb; ++ /*clear and abort all outbound posted Q*/ ++ ++ while (((flag_ccb = readl(®->outbound_queueport)) != 0xFFFFFFFF) && ++(i++ < 256)){ ++ ccb = (struct CommandControlBlock *)(acb->vir2phy_offset + ++(flag_ccb << 5)); ++ if (ccb){ ++ if ((ccb->acb != acb)||(ccb->startdone != \ ++ARCMSR_CCB_START)){ ++ printk(KERN_NOTICE "arcmsr%d: polling get \ ++an illegal ccb" "command done ccb = '0x%p'""ccboutstandingcount = %d \n", ++ acb->host->host_no, ccb, ++ atomic_read(&acb->ccboutstandingcount)); ++ continue; ++ } ++ ++ id = ccb->pcmd->device->id; ++ lun = ccb->pcmd->device->lun; ++ if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)){ ++ if (acb->devstate[id][lun] == ARECA_RAID_GONE) ++ acb->devstate[id][lun] = ARECA_RAID_GOOD; ++ ccb->pcmd->result = DID_OK << 16; ++ arcmsr_ccb_complete(ccb, 1); ++ } ++ else { ++ switch(ccb->arcmsr_cdb.DeviceStatus) { ++ case ARCMSR_DEV_SELECT_TIMEOUT: { ++ acb->devstate[id][lun] = ARECA_RAID_GONE; ++ ccb->pcmd->result = DID_NO_CONNECT << 16; ++ arcmsr_ccb_complete(ccb, 1); ++ } ++ break; ++ ++ case ARCMSR_DEV_ABORTED: ++ ++ case ARCMSR_DEV_INIT_FAIL: { ++ acb->devstate[id][lun] = ++ ARECA_RAID_GONE; ++ ccb->pcmd->result = ++ DID_BAD_TARGET << 16; ++ arcmsr_ccb_complete(ccb, 1); ++ } ++ break; ++ ++ case ARCMSR_DEV_CHECK_CONDITION: { ++ acb->devstate[id][lun] = ++ ARECA_RAID_GOOD; ++ arcmsr_report_sense_info(ccb); ++ arcmsr_ccb_complete(ccb, 1); ++ } ++ break; ++ ++ default: ++ printk(KERN_NOTICE ++ "arcmsr%d: scsi id = %d \ ++ lun = %d""polling and \ ++ getting command error \ ++ done""but got unknown \ ++ DeviceStatus = 0x%x \n", ++ acb->host->host_no, id, ++ lun, ccb->arcmsr_cdb.DeviceStatus); ++ acb->devstate[id][lun] = ++ ARECA_RAID_GONE; ++ ccb->pcmd->result = ++ DID_BAD_TARGET << 16; ++ arcmsr_ccb_complete(ccb, 1); ++ break; ++ } ++ } ++ found = 1; ++ } ++ } ++ if (found){ ++ outbound_intstatus = readl(®->outbound_intstatus) & \ ++ acb->outbound_int_enable; ++ writel(outbound_intstatus, ®->outbound_intstatus); ++ /*clear interrupt*/ ++ } ++ return; ++} ++ + + static void arcmsr_iop_init(struct AdapterControlBlock *acb) + { +@@ -1355,7 +1414,6 @@ + + static void arcmsr_iop_reset(struct AdapterControlBlock *acb) + { +- struct MessageUnit __iomem *reg = acb->pmu; + struct CommandControlBlock *ccb; + uint32_t intmask_org; + int i = 0; +@@ -1368,21 +1426,17 @@ + /* disable all outbound interrupt */ + intmask_org = arcmsr_disable_outbound_ints(acb); + /* clear all outbound posted Q */ +- for (i = 0; i < ARCMSR_MAX_OUTSTANDING_CMD; i++) +- readl(®->outbound_queueport); ++ arcmsr_done4_abort_postqueue(acb); + for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) { + ccb = acb->pccb_pool[i]; +- if ((ccb->startdone == ARCMSR_CCB_START) || +- (ccb->startdone == ARCMSR_CCB_ABORTED)) { ++ if (ccb->startdone == ARCMSR_CCB_START) { + ccb->startdone = ARCMSR_CCB_ABORTED; +- ccb->pcmd->result = DID_ABORT << 16; +- arcmsr_ccb_complete(ccb, 1); + } + } + /* enable all outbound interrupt */ + arcmsr_enable_outbound_ints(acb, intmask_org); + } +- atomic_set(&acb->ccboutstandingcount, 0); ++ + } + + static int arcmsr_bus_reset(struct scsi_cmnd *cmd) +@@ -1428,10 +1482,9 @@ + int i = 0; + + printk(KERN_NOTICE +- "arcmsr%d: abort device command of scsi id=%d lun=%d \n", ++ "arcmsr%d: abort device command of scsi id = %d lun = %d \n", + acb->host->host_no, cmd->device->id, cmd->device->lun); + acb->num_aborts++; +- + /* + ************************************************ + ** the all interrupt service routine is locked +@@ -1492,4 +1545,300 @@ + return buf; + } + ++static pci_ers_result_t arcmsr_pci_slot_reset(struct pci_dev *pdev) ++{ ++ struct Scsi_Host *host; ++ struct AdapterControlBlock *acb; ++ uint8_t bus, dev_fun; ++ int error; ++ ++ error = pci_enable_device(pdev); ++ if (error) ++ return PCI_ERS_RESULT_DISCONNECT; ++ pci_set_master(pdev); ++ ++ host = scsi_host_alloc(&arcmsr_scsi_host_template, sizeof \ ++(struct AdapterControlBlock)); ++ if (!host) ++ return PCI_ERS_RESULT_DISCONNECT; ++ acb = (struct AdapterControlBlock *)host->hostdata; ++ memset(acb, 0, sizeof (struct AdapterControlBlock)); ++ ++ error = pci_set_dma_mask(pdev, DMA_64BIT_MASK); ++ if (error) { ++ error = pci_set_dma_mask(pdev, DMA_32BIT_MASK); ++ if (error) { ++ printk(KERN_WARNING ++ "scsi%d: No suitable DMA mask available\n", ++ host->host_no); ++ return PCI_ERS_RESULT_DISCONNECT; ++ } ++ } ++ bus = pdev->bus->number; ++ dev_fun = pdev->devfn; ++ acb = (struct AdapterControlBlock *) host->hostdata; ++ memset(acb, 0, sizeof(struct AdapterControlBlock)); ++ acb->pdev = pdev; ++ acb->host = host; ++ host->max_sectors = ARCMSR_MAX_XFER_SECTORS; ++ host->max_lun = ARCMSR_MAX_TARGETLUN; ++ host->max_id = ARCMSR_MAX_TARGETID;/*16:8*/ ++ host->max_cmd_len = 16; /*this is issue of 64bit LBA, over 2T byte*/ ++ host->sg_tablesize = ARCMSR_MAX_SG_ENTRIES; ++ host->can_queue = ARCMSR_MAX_FREECCB_NUM; /* max simultaneous cmds */ ++ host->cmd_per_lun = ARCMSR_MAX_CMD_PERLUN; ++ host->this_id = ARCMSR_SCSI_INITIATOR_ID; ++ host->unique_id = (bus << 8) | dev_fun; ++ host->irq = pdev->irq; ++ error = pci_request_regions(pdev, "arcmsr"); ++ if (error) ++ return PCI_ERS_RESULT_DISCONNECT; + ++ acb->pmu = ioremap(pci_resource_start(pdev, 0), ++ pci_resource_len(pdev, 0)); ++ if (!acb->pmu) { ++ printk(KERN_NOTICE "arcmsr%d: memory" ++ " mapping region fail \n", acb->host->host_no); ++ return PCI_ERS_RESULT_DISCONNECT; ++ } ++ acb->acb_flags |= (ACB_F_MESSAGE_WQBUFFER_CLEARED | ++ ACB_F_MESSAGE_RQBUFFER_CLEARED | ++ ACB_F_MESSAGE_WQBUFFER_READED); ++ acb->acb_flags &= ~ACB_F_SCSISTOPADAPTER; ++ INIT_LIST_HEAD(&acb->ccb_free_list); ++ ++ error = arcmsr_alloc_ccb_pool(acb); ++ if (error) ++ return PCI_ERS_RESULT_DISCONNECT; ++ ++ error = request_irq(pdev->irq, arcmsr_do_interrupt, ++ IRQF_DISABLED | IRQF_SHARED, "arcmsr", acb); ++ if (error) ++ return PCI_ERS_RESULT_DISCONNECT; ++ ++ arcmsr_iop_init(acb); ++ if (strncmp(acb->firm_version, "V1.42", 5) >= 0) ++ host->max_sectors = ARCMSR_MAX_XFER_SECTORS_B; ++ ++ pci_set_drvdata(pdev, host); ++ ++ error = scsi_add_host(host, &pdev->dev); ++ if (error) ++ return PCI_ERS_RESULT_DISCONNECT; ++ ++ error = arcmsr_alloc_sysfs_attr(acb); ++ if (error) ++ return PCI_ERS_RESULT_DISCONNECT; ++ ++ scsi_scan_host(host); ++ return PCI_ERS_RESULT_RECOVERED; ++} ++ ++static void arcmsr_pci_ers_need_reset_forepart(struct pci_dev *pdev) ++{ ++ struct Scsi_Host *host = pci_get_drvdata(pdev); ++ struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; ++ struct MessageUnit __iomem *reg = acb->pmu; ++ struct CommandControlBlock *ccb; ++ /*clear and abort all outbound posted Q*/ ++ int i = 0, found = 0; ++ int id, lun; ++ uint32_t flag_ccb, outbound_intstatus; ++ ++ while (((flag_ccb = readl(®->outbound_queueport)) != 0xFFFFFFFF) && ++ (i++ < 256)){ ++ ccb = (struct CommandControlBlock *)(acb->vir2phy_offset ++ + (flag_ccb << 5)); ++ if (ccb){ ++ if ((ccb->acb != acb)||(ccb->startdone != ++ ARCMSR_CCB_START)){ ++ printk(KERN_NOTICE "arcmsr%d: polling \ ++ get an illegal ccb"" command done ccb = '0x%p'" ++ "ccboutstandingcount = %d \n", ++ acb->host->host_no, ccb, ++ atomic_read(&acb->ccboutstandingcount)); ++ continue; ++ } ++ ++ id = ccb->pcmd->device->id; ++ lun = ccb->pcmd->device->lun; ++ if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) { ++ if (acb->devstate[id][lun] == ++ ARECA_RAID_GONE) ++ acb->devstate[id][lun] = ++ ARECA_RAID_GOOD; ++ ccb->pcmd->result = DID_OK << 16; ++ arcmsr_ccb_complete(ccb, 1); ++ } ++ else { ++ switch(ccb->arcmsr_cdb.DeviceStatus) { ++ case ARCMSR_DEV_SELECT_TIMEOUT: { ++ acb->devstate[id][lun] = ++ ARECA_RAID_GONE; ++ ccb->pcmd->result = ++ DID_NO_CONNECT << 16; ++ arcmsr_ccb_complete(ccb, 1); ++ } ++ break; ++ ++ case ARCMSR_DEV_ABORTED: ++ ++ case ARCMSR_DEV_INIT_FAIL: { ++ acb->devstate[id][lun] = ++ ARECA_RAID_GONE; ++ ccb->pcmd->result = ++ DID_BAD_TARGET << 16; ++ arcmsr_ccb_complete(ccb, 1); ++ } ++ break; ++ ++ case ARCMSR_DEV_CHECK_CONDITION: { ++ acb->devstate[id][lun] = ++ ARECA_RAID_GOOD; ++ arcmsr_report_sense_info(ccb); ++ arcmsr_ccb_complete(ccb, 1); ++ } ++ break; ++ ++ default: ++ printk(KERN_NOTICE ++ "arcmsr%d: scsi \ ++ id = %d lun = %d" ++ " polling and \ ++ getting command \ ++ error done" ++ "but got unknown \ ++ DeviceStatus = 0x%x \n" ++ , acb->host->host_no, ++ id, lun, ++ ccb->arcmsr_cdb.DeviceStatus); ++ acb->devstate[id][lun] = ++ ARECA_RAID_GONE; ++ ccb->pcmd->result = ++ DID_BAD_TARGET << 16; ++ arcmsr_ccb_complete(ccb, 1); ++ break; ++ } ++ } ++ found = 1; ++ } ++ } ++ if (found){ ++ outbound_intstatus = readl(®->outbound_intstatus) & ++ acb->outbound_int_enable; ++ writel(outbound_intstatus, ®->outbound_intstatus); ++ /*clear interrupt*/ ++ } ++ return; ++} ++ ++ ++static void arcmsr_pci_ers_disconnect_forepart(struct pci_dev *pdev) ++{ ++ struct Scsi_Host *host = pci_get_drvdata(pdev); ++ struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; ++ struct MessageUnit __iomem *reg = acb->pmu; ++ struct CommandControlBlock *ccb; ++ /*clear and abort all outbound posted Q*/ ++ int i = 0, found = 0; ++ int id, lun; ++ uint32_t flag_ccb, outbound_intstatus; ++ ++ while (((flag_ccb = readl(®->outbound_queueport)) != 0xFFFFFFFF) && ++ (i++ < 256)){ ++ ccb = (struct CommandControlBlock *)(acb->vir2phy_offset + ++ (flag_ccb << 5)); ++ if (ccb){ ++ if ((ccb->acb != acb)||(ccb->startdone != ++ ARCMSR_CCB_START)){ ++ printk(KERN_NOTICE ++ "arcmsr%d: polling get an illegal ccb" ++ " command done ccb = '0x%p'" ++ "ccboutstandingcount = %d \n", ++ acb->host->host_no, ccb, ++ atomic_read(&acb->ccboutstandingcount)); ++ continue; ++ } ++ ++ id = ccb->pcmd->device->id; ++ lun = ccb->pcmd->device->lun; ++ if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) { ++ if (acb->devstate[id][lun] == ARECA_RAID_GONE) ++ acb->devstate[id][lun] = ARECA_RAID_GOOD; ++ ccb->pcmd->result = DID_OK << 16; ++ arcmsr_ccb_complete(ccb, 1); ++ } ++ else { ++ switch(ccb->arcmsr_cdb.DeviceStatus) { ++ case ARCMSR_DEV_SELECT_TIMEOUT: { ++ acb->devstate[id][lun] = ++ ARECA_RAID_GONE; ++ ccb->pcmd->result = ++ DID_NO_CONNECT << 16; ++ arcmsr_ccb_complete(ccb, 1); ++ } ++ break; ++ ++ case ARCMSR_DEV_ABORTED: ++ ++ case ARCMSR_DEV_INIT_FAIL: { ++ acb->devstate[id][lun] = ++ ARECA_RAID_GONE; ++ ccb->pcmd->result = ++ DID_BAD_TARGET << 16; ++ arcmsr_ccb_complete(ccb, 1); ++ } ++ break; ++ ++ case ARCMSR_DEV_CHECK_CONDITION: { ++ acb->devstate[id][lun] = ++ ARECA_RAID_GOOD; ++ arcmsr_report_sense_info(ccb); ++ arcmsr_ccb_complete(ccb, 1); ++ } ++ break; ++ ++ default: ++ printk(KERN_NOTICE "arcmsr%d: \ ++ scsi id = %d lun = %d" ++ " polling and \ ++ getting command error done" ++ "but got unknown \ ++ DeviceStatus = 0x%x \n" ++ , acb->host->host_no, ++ id, lun, ccb->arcmsr_cdb.DeviceStatus); ++ acb->devstate[id][lun] = ++ ARECA_RAID_GONE; ++ ccb->pcmd->result = ++ DID_BAD_TARGET << 16; ++ arcmsr_ccb_complete(ccb, 1); ++ break; ++ } ++ } ++ found = 1; ++ } ++ } ++ if (found){ ++ outbound_intstatus = readl(®->outbound_intstatus) & ++ acb->outbound_int_enable; ++ writel(outbound_intstatus, ®->outbound_intstatus); ++ /*clear interrupt*/ ++ } ++ return; ++} ++ ++static pci_ers_result_t arcmsr_pci_error_detected(struct pci_dev *pdev, ++ pci_channel_state_t state) ++{ ++ switch (state) { ++ case pci_channel_io_frozen: ++ arcmsr_pci_ers_need_reset_forepart(pdev); ++ return PCI_ERS_RESULT_NEED_RESET; ++ case pci_channel_io_perm_failure: ++ arcmsr_pci_ers_disconnect_forepart(pdev); ++ return PCI_ERS_RESULT_DISCONNECT; ++ break; ++ default: ++ return PCI_ERS_RESULT_NEED_RESET; ++ } ++} +diff -Nurb linux-2.6.22-570/drivers/scsi/bvme6000.c linux-2.6.22-591/drivers/scsi/bvme6000.c +--- linux-2.6.22-570/drivers/scsi/bvme6000.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/bvme6000.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,76 +0,0 @@ +-/* +- * Detection routine for the NCR53c710 based BVME6000 SCSI Controllers for Linux. +- * +- * Based on work by Alan Hourihane +- */ +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +-#include +-#include +- +-#include "scsi.h" +-#include +-#include "53c7xx.h" +-#include "bvme6000.h" +- +-#include +- +- +-int bvme6000_scsi_detect(struct scsi_host_template *tpnt) +-{ +- static unsigned char called = 0; +- int clock; +- long long options; +- +- if (called) +- return 0; +- if (!MACH_IS_BVME6000) +- return 0; +- +- tpnt->proc_name = "BVME6000"; +- +- options = OPTION_MEMORY_MAPPED|OPTION_DEBUG_TEST1|OPTION_INTFLY|OPTION_SYNCHRONOUS|OPTION_ALWAYS_SYNCHRONOUS|OPTION_DISCONNECT; +- +- clock = 40000000; /* 66MHz SCSI Clock */ +- +- ncr53c7xx_init(tpnt, 0, 710, (unsigned long)BVME_NCR53C710_BASE, +- 0, BVME_IRQ_SCSI, DMA_NONE, +- options, clock); +- called = 1; +- return 1; +-} +- +-static int bvme6000_scsi_release(struct Scsi_Host *shost) +-{ +- if (shost->irq) +- free_irq(shost->irq, NULL); +- if (shost->dma_channel != 0xff) +- free_dma(shost->dma_channel); +- if (shost->io_port && shost->n_io_port) +- release_region(shost->io_port, shost->n_io_port); +- scsi_unregister(shost); +- return 0; +-} +- +-static struct scsi_host_template driver_template = { +- .name = "BVME6000 NCR53c710 SCSI", +- .detect = bvme6000_scsi_detect, +- .release = bvme6000_scsi_release, +- .queuecommand = NCR53c7xx_queue_command, +- .abort = NCR53c7xx_abort, +- .reset = NCR53c7xx_reset, +- .can_queue = 24, +- .this_id = 7, +- .sg_tablesize = 63, +- .cmd_per_lun = 3, +- .use_clustering = DISABLE_CLUSTERING +-}; +- +- +-#include "scsi_module.c" +diff -Nurb linux-2.6.22-570/drivers/scsi/bvme6000.h linux-2.6.22-591/drivers/scsi/bvme6000.h +--- linux-2.6.22-570/drivers/scsi/bvme6000.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/bvme6000.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,24 +0,0 @@ +-#ifndef BVME6000_SCSI_H +-#define BVME6000_SCSI_H +- +-#include +- +-int bvme6000_scsi_detect(struct scsi_host_template *); +-const char *NCR53c7x0_info(void); +-int NCR53c7xx_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *)); +-int NCR53c7xx_abort(Scsi_Cmnd *); +-int NCR53c7x0_release (struct Scsi_Host *); +-int NCR53c7xx_reset(Scsi_Cmnd *, unsigned int); +-void NCR53c7x0_intr(int irq, void *dev_id); +- +-#ifndef CMD_PER_LUN +-#define CMD_PER_LUN 3 +-#endif +- +-#ifndef CAN_QUEUE +-#define CAN_QUEUE 24 +-#endif +- +-#include +- +-#endif /* BVME6000_SCSI_H */ +diff -Nurb linux-2.6.22-570/drivers/scsi/bvme6000_scsi.c linux-2.6.22-591/drivers/scsi/bvme6000_scsi.c +--- linux-2.6.22-570/drivers/scsi/bvme6000_scsi.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/scsi/bvme6000_scsi.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,135 @@ ++/* ++ * Detection routine for the NCR53c710 based BVME6000 SCSI Controllers for Linux. ++ * ++ * Based on work by Alan Hourihane and Kars de Jong ++ * ++ * Rewritten to use 53c700.c by Richard Hirst ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "53c700.h" ++ ++MODULE_AUTHOR("Richard Hirst "); ++MODULE_DESCRIPTION("BVME6000 NCR53C710 driver"); ++MODULE_LICENSE("GPL"); ++ ++static struct scsi_host_template bvme6000_scsi_driver_template = { ++ .name = "BVME6000 NCR53c710 SCSI", ++ .proc_name = "BVME6000", ++ .this_id = 7, ++ .module = THIS_MODULE, ++}; ++ ++static struct platform_device *bvme6000_scsi_device; ++ ++static __devinit int ++bvme6000_probe(struct device *dev) ++{ ++ struct Scsi_Host * host = NULL; ++ struct NCR_700_Host_Parameters *hostdata; ++ ++ if (!MACH_IS_BVME6000) ++ goto out; ++ ++ hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL); ++ if (hostdata == NULL) { ++ printk(KERN_ERR "bvme6000-scsi: " ++ "Failed to allocate host data\n"); ++ goto out; ++ } ++ memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters)); ++ ++ /* Fill in the required pieces of hostdata */ ++ hostdata->base = (void __iomem *)BVME_NCR53C710_BASE; ++ hostdata->clock = 40; /* XXX - depends on the CPU clock! */ ++ hostdata->chip710 = 1; ++ hostdata->dmode_extra = DMODE_FC2; ++ hostdata->dcntl_extra = EA_710; ++ hostdata->ctest7_extra = CTEST7_TT1; ++ ++ /* and register the chip */ ++ host = NCR_700_detect(&bvme6000_scsi_driver_template, hostdata, dev); ++ if (!host) { ++ printk(KERN_ERR "bvme6000-scsi: No host detected; " ++ "board configuration problem?\n"); ++ goto out_free; ++ } ++ host->base = BVME_NCR53C710_BASE; ++ host->this_id = 7; ++ host->irq = BVME_IRQ_SCSI; ++ if (request_irq(BVME_IRQ_SCSI, NCR_700_intr, 0, "bvme6000-scsi", ++ host)) { ++ printk(KERN_ERR "bvme6000-scsi: request_irq failed\n"); ++ goto out_put_host; ++ } ++ ++ scsi_scan_host(host); ++ ++ return 0; ++ ++ out_put_host: ++ scsi_host_put(host); ++ out_free: ++ kfree(hostdata); ++ out: ++ return -ENODEV; ++} ++ ++static __devexit int ++bvme6000_device_remove(struct device *dev) ++{ ++ struct Scsi_Host *host = dev_to_shost(dev); ++ struct NCR_700_Host_Parameters *hostdata = shost_priv(host); ++ ++ scsi_remove_host(host); ++ NCR_700_release(host); ++ kfree(hostdata); ++ free_irq(host->irq, host); ++ ++ return 0; ++} ++ ++static struct device_driver bvme6000_scsi_driver = { ++ .name = "bvme6000-scsi", ++ .bus = &platform_bus_type, ++ .probe = bvme6000_probe, ++ .remove = __devexit_p(bvme6000_device_remove), ++}; ++ ++static int __init bvme6000_scsi_init(void) ++{ ++ int err; ++ ++ err = driver_register(&bvme6000_scsi_driver); ++ if (err) ++ return err; ++ ++ bvme6000_scsi_device = platform_device_register_simple("bvme6000-scsi", ++ -1, NULL, 0); ++ if (IS_ERR(bvme6000_scsi_device)) { ++ driver_unregister(&bvme6000_scsi_driver); ++ return PTR_ERR(bvme6000_scsi_device); ++ } ++ ++ return 0; ++} ++ ++static void __exit bvme6000_scsi_exit(void) ++{ ++ platform_device_unregister(bvme6000_scsi_device); ++ driver_unregister(&bvme6000_scsi_driver); ++} ++ ++module_init(bvme6000_scsi_init); ++module_exit(bvme6000_scsi_exit); +diff -Nurb linux-2.6.22-570/drivers/scsi/dpt_i2o.c linux-2.6.22-591/drivers/scsi/dpt_i2o.c +--- linux-2.6.22-570/drivers/scsi/dpt_i2o.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/dpt_i2o.c 2007-12-21 15:36:12.000000000 -0500 +@@ -2078,12 +2078,13 @@ + u32 *lenptr; + int direction; + int scsidir; ++ int nseg; + u32 len; + u32 reqlen; + s32 rcode; + + memset(msg, 0 , sizeof(msg)); +- len = cmd->request_bufflen; ++ len = scsi_bufflen(cmd); + direction = 0x00000000; + + scsidir = 0x00000000; // DATA NO XFER +@@ -2140,21 +2141,21 @@ + lenptr=mptr++; /* Remember me - fill in when we know */ + reqlen = 14; // SINGLE SGE + /* Now fill in the SGList and command */ +- if(cmd->use_sg) { +- struct scatterlist *sg = (struct scatterlist *)cmd->request_buffer; +- int sg_count = pci_map_sg(pHba->pDev, sg, cmd->use_sg, +- cmd->sc_data_direction); + ++ nseg = scsi_dma_map(cmd); ++ BUG_ON(nseg < 0); ++ if (nseg) { ++ struct scatterlist *sg; + + len = 0; +- for(i = 0 ; i < sg_count; i++) { ++ scsi_for_each_sg(cmd, sg, nseg, i) { + *mptr++ = direction|0x10000000|sg_dma_len(sg); + len+=sg_dma_len(sg); + *mptr++ = sg_dma_address(sg); +- sg++; +- } + /* Make this an end of list */ +- mptr[-2] = direction|0xD0000000|sg_dma_len(sg-1); ++ if (i == nseg - 1) ++ mptr[-2] = direction|0xD0000000|sg_dma_len(sg); ++ } + reqlen = mptr - msg; + *lenptr = len; + +@@ -2163,16 +2164,8 @@ + len, cmd->underflow); + } + } else { +- *lenptr = len = cmd->request_bufflen; +- if(len == 0) { ++ *lenptr = len = 0; + reqlen = 12; +- } else { +- *mptr++ = 0xD0000000|direction|cmd->request_bufflen; +- *mptr++ = pci_map_single(pHba->pDev, +- cmd->request_buffer, +- cmd->request_bufflen, +- cmd->sc_data_direction); +- } + } + + /* Stick the headers on */ +@@ -2232,7 +2225,7 @@ + hba_status = detailed_status >> 8; + + // calculate resid for sg +- cmd->resid = cmd->request_bufflen - readl(reply+5); ++ scsi_set_resid(cmd, scsi_bufflen(cmd) - readl(reply+5)); + + pHba = (adpt_hba*) cmd->device->host->hostdata[0]; + +diff -Nurb linux-2.6.22-570/drivers/scsi/eata.c linux-2.6.22-591/drivers/scsi/eata.c +--- linux-2.6.22-570/drivers/scsi/eata.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/eata.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1609,8 +1609,9 @@ + + static void map_dma(unsigned int i, struct hostdata *ha) + { +- unsigned int k, count, pci_dir; +- struct scatterlist *sgpnt; ++ unsigned int k, pci_dir; ++ int count; ++ struct scatterlist *sg; + struct mscp *cpp; + struct scsi_cmnd *SCpnt; + +@@ -1625,38 +1626,19 @@ + + cpp->sense_len = sizeof SCpnt->sense_buffer; + +- if (!SCpnt->use_sg) { +- +- /* If we get here with PCI_DMA_NONE, pci_map_single triggers a BUG() */ +- if (!SCpnt->request_bufflen) +- pci_dir = PCI_DMA_BIDIRECTIONAL; +- +- if (SCpnt->request_buffer) +- cpp->data_address = H2DEV(pci_map_single(ha->pdev, +- SCpnt-> +- request_buffer, +- SCpnt-> +- request_bufflen, +- pci_dir)); +- +- cpp->data_len = H2DEV(SCpnt->request_bufflen); +- return; +- } +- +- sgpnt = (struct scatterlist *)SCpnt->request_buffer; +- count = pci_map_sg(ha->pdev, sgpnt, SCpnt->use_sg, pci_dir); +- +- for (k = 0; k < count; k++) { +- cpp->sglist[k].address = H2DEV(sg_dma_address(&sgpnt[k])); +- cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(&sgpnt[k])); ++ count = scsi_dma_map(SCpnt); ++ BUG_ON(count < 0); ++ scsi_for_each_sg(SCpnt, sg, count, k) { ++ cpp->sglist[k].address = H2DEV(sg_dma_address(sg)); ++ cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(sg)); + } + + cpp->sg = 1; + cpp->data_address = H2DEV(pci_map_single(ha->pdev, cpp->sglist, +- SCpnt->use_sg * ++ scsi_sg_count(SCpnt) * + sizeof(struct sg_list), + pci_dir)); +- cpp->data_len = H2DEV((SCpnt->use_sg * sizeof(struct sg_list))); ++ cpp->data_len = H2DEV((scsi_sg_count(SCpnt) * sizeof(struct sg_list))); + } + + static void unmap_dma(unsigned int i, struct hostdata *ha) +@@ -1673,9 +1655,7 @@ + pci_unmap_single(ha->pdev, DEV2H(cpp->sense_addr), + DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE); + +- if (SCpnt->use_sg) +- pci_unmap_sg(ha->pdev, SCpnt->request_buffer, SCpnt->use_sg, +- pci_dir); ++ scsi_dma_unmap(SCpnt); + + if (!DEV2H(cpp->data_len)) + pci_dir = PCI_DMA_BIDIRECTIONAL; +@@ -1700,9 +1680,9 @@ + DEV2H(cpp->sense_len), + PCI_DMA_FROMDEVICE); + +- if (SCpnt->use_sg) +- pci_dma_sync_sg_for_cpu(ha->pdev, SCpnt->request_buffer, +- SCpnt->use_sg, pci_dir); ++ if (scsi_sg_count(SCpnt)) ++ pci_dma_sync_sg_for_cpu(ha->pdev, scsi_sglist(SCpnt), ++ scsi_sg_count(SCpnt), pci_dir); + + if (!DEV2H(cpp->data_len)) + pci_dir = PCI_DMA_BIDIRECTIONAL; +diff -Nurb linux-2.6.22-570/drivers/scsi/esp_scsi.c linux-2.6.22-591/drivers/scsi/esp_scsi.c +--- linux-2.6.22-570/drivers/scsi/esp_scsi.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/scsi/esp_scsi.c 2007-12-21 15:36:12.000000000 -0500 +@@ -324,17 +324,14 @@ + static void esp_map_dma(struct esp *esp, struct scsi_cmnd *cmd) + { + struct esp_cmd_priv *spriv = ESP_CMD_PRIV(cmd); +- struct scatterlist *sg = cmd->request_buffer; ++ struct scatterlist *sg = scsi_sglist(cmd); + int dir = cmd->sc_data_direction; + int total, i; + + if (dir == DMA_NONE) + return; + +- BUG_ON(cmd->use_sg == 0); +- +- spriv->u.num_sg = esp->ops->map_sg(esp, sg, +- cmd->use_sg, dir); ++ spriv->u.num_sg = esp->ops->map_sg(esp, sg, scsi_sg_count(cmd), dir); + spriv->cur_residue = sg_dma_len(sg); + spriv->cur_sg = sg; + +@@ -407,8 +404,7 @@ + if (dir == DMA_NONE) + return; + +- esp->ops->unmap_sg(esp, cmd->request_buffer, +- spriv->u.num_sg, dir); ++ esp->ops->unmap_sg(esp, scsi_sglist(cmd), spriv->u.num_sg, dir); + } + + static void esp_save_pointers(struct esp *esp, struct esp_cmd_entry *ent) +@@ -921,7 +917,7 @@ + static int esp_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) + { + struct scsi_device *dev = cmd->device; +- struct esp *esp = host_to_esp(dev->host); ++ struct esp *esp = shost_priv(dev->host); + struct esp_cmd_priv *spriv; + struct esp_cmd_entry *ent; + +@@ -2358,7 +2354,7 @@ + + static int esp_slave_alloc(struct scsi_device *dev) + { +- struct esp *esp = host_to_esp(dev->host); ++ struct esp *esp = shost_priv(dev->host); + struct esp_target_data *tp = &esp->target[dev->id]; + struct esp_lun_data *lp; + +@@ -2382,7 +2378,7 @@ + + static int esp_slave_configure(struct scsi_device *dev) + { +- struct esp *esp = host_to_esp(dev->host); ++ struct esp *esp = shost_priv(dev->host); + struct esp_target_data *tp = &esp->target[dev->id]; + int goal_tags, queue_depth; + +@@ -2424,7 +2420,7 @@ + + static int esp_eh_abort_handler(struct scsi_cmnd *cmd) + { +- struct esp *esp = host_to_esp(cmd->device->host); ++ struct esp *esp = shost_priv(cmd->device->host); + struct esp_cmd_entry *ent, *tmp; + struct completion eh_done; + unsigned long flags; +@@ -2540,7 +2536,7 @@ + + static int esp_eh_bus_reset_handler(struct scsi_cmnd *cmd) + { +- struct esp *esp = host_to_esp(cmd->device->host); ++ struct esp *esp = shost_priv(cmd->device->host); + struct completion eh_reset; + unsigned long flags; + +@@ -2576,7 +2572,7 @@ + /* All bets are off, reset the entire device. */ + static int esp_eh_host_reset_handler(struct scsi_cmnd *cmd) + { +- struct esp *esp = host_to_esp(cmd->device->host); ++ struct esp *esp = shost_priv(cmd->device->host); + unsigned long flags; + + spin_lock_irqsave(esp->host->host_lock, flags); +@@ -2616,7 +2612,7 @@ + + static void esp_get_signalling(struct Scsi_Host *host) + { +- struct esp *esp = host_to_esp(host); ++ struct esp *esp = shost_priv(host); + enum spi_signal_type type; + + if (esp->flags & ESP_FLAG_DIFFERENTIAL) +@@ -2630,7 +2626,7 @@ + static void esp_set_offset(struct scsi_target *target, int offset) + { + struct Scsi_Host *host = dev_to_shost(target->dev.parent); +- struct esp *esp = host_to_esp(host); ++ struct esp *esp = shost_priv(host); + struct esp_target_data *tp = &esp->target[target->id]; + + tp->nego_goal_offset = offset; +@@ -2640,7 +2636,7 @@ + static void esp_set_period(struct scsi_target *target, int period) + { + struct Scsi_Host *host = dev_to_shost(target->dev.parent); +- struct esp *esp = host_to_esp(host); ++ struct esp *esp = shost_priv(host); + struct esp_target_data *tp = &esp->target[target->id]; + + tp->nego_goal_period = period; +@@ -2650,7 +2646,7 @@ + static void esp_set_width(struct scsi_target *target, int width) + { + struct Scsi_Host *host = dev_to_shost(target->dev.parent); +- struct esp *esp = host_to_esp(host); ++ struct esp *esp = shost_priv(host); + struct esp_target_data *tp = &esp->target[target->id]; + + tp->nego_goal_width = (width ? 1 : 0); +diff -Nurb linux-2.6.22-570/drivers/scsi/esp_scsi.h linux-2.6.22-591/drivers/scsi/esp_scsi.h +--- linux-2.6.22-570/drivers/scsi/esp_scsi.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/esp_scsi.h 2007-12-21 15:36:12.000000000 -0500 +@@ -517,8 +517,6 @@ + struct sbus_dma *dma; + }; + +-#define host_to_esp(host) ((struct esp *)(host)->hostdata) +- + /* A front-end driver for the ESP chip should do the following in + * it's device probe routine: + * 1) Allocate the host and private area using scsi_host_alloc() +diff -Nurb linux-2.6.22-570/drivers/scsi/fdomain.c linux-2.6.22-591/drivers/scsi/fdomain.c +--- linux-2.6.22-570/drivers/scsi/fdomain.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/fdomain.c 2007-12-21 15:36:12.000000000 -0500 +@@ -410,6 +410,8 @@ + static char * fdomain = NULL; + module_param(fdomain, charp, 0); + ++#ifndef PCMCIA ++ + static unsigned long addresses[] = { + 0xc8000, + 0xca000, +@@ -426,6 +428,8 @@ + + static unsigned short ints[] = { 3, 5, 10, 11, 12, 14, 15, 0 }; + ++#endif /* !PCMCIA */ ++ + /* + + READ THIS BEFORE YOU ADD A SIGNATURE! +@@ -458,6 +462,8 @@ + + */ + ++#ifndef PCMCIA ++ + static struct signature { + const char *signature; + int sig_offset; +@@ -503,6 +509,8 @@ + + #define SIGNATURE_COUNT ARRAY_SIZE(signatures) + ++#endif /* !PCMCIA */ ++ + static void print_banner( struct Scsi_Host *shpnt ) + { + if (!shpnt) return; /* This won't ever happen */ +@@ -633,6 +641,8 @@ + return 0; + } + ++#ifndef PCMCIA ++ + /* fdomain_get_irq assumes that we have a valid MCA ID for a + TMC-1660/TMC-1680 Future Domain board. Now, check to be sure the + bios_base matches these ports. If someone was unlucky enough to have +@@ -667,7 +677,6 @@ + + static int fdomain_isa_detect( int *irq, int *iobase ) + { +-#ifndef PCMCIA + int i, j; + int base = 0xdeadbeef; + int flag = 0; +@@ -786,11 +795,22 @@ + *iobase = base; + + return 1; /* success */ +-#else ++} ++ ++#else /* PCMCIA */ ++ ++static int fdomain_isa_detect( int *irq, int *iobase ) ++{ ++ if (irq) ++ *irq = 0; ++ if (iobase) ++ *iobase = 0; + return 0; +-#endif + } + ++#endif /* !PCMCIA */ ++ ++ + /* PCI detection function: int fdomain_pci_bios_detect(int* irq, int* + iobase) This function gets the Interrupt Level and I/O base address from + the PCI configuration registers. */ +@@ -1345,16 +1365,15 @@ + + #if ERRORS_ONLY + if (current_SC->cmnd[0] == REQUEST_SENSE && !current_SC->SCp.Status) { +- if ((unsigned char)(*((char *)current_SC->request_buffer+2)) & 0x0f) { ++ char *buf = scsi_sglist(current_SC); ++ if ((unsigned char)(*(buf + 2)) & 0x0f) { + unsigned char key; + unsigned char code; + unsigned char qualifier; + +- key = (unsigned char)(*((char *)current_SC->request_buffer + 2)) +- & 0x0f; +- code = (unsigned char)(*((char *)current_SC->request_buffer + 12)); +- qualifier = (unsigned char)(*((char *)current_SC->request_buffer +- + 13)); ++ key = (unsigned char)(*(buf + 2)) & 0x0f; ++ code = (unsigned char)(*(buf + 12)); ++ qualifier = (unsigned char)(*(buf + 13)); + + if (key != UNIT_ATTENTION + && !(key == NOT_READY +@@ -1405,8 +1424,8 @@ + printk( "queue: target = %d cmnd = 0x%02x pieces = %d size = %u\n", + SCpnt->target, + *(unsigned char *)SCpnt->cmnd, +- SCpnt->use_sg, +- SCpnt->request_bufflen ); ++ scsi_sg_count(SCpnt), ++ scsi_bufflen(SCpnt)); + #endif + + fdomain_make_bus_idle(); +@@ -1416,20 +1435,19 @@ + + /* Initialize static data */ + +- if (current_SC->use_sg) { +- current_SC->SCp.buffer = +- (struct scatterlist *)current_SC->request_buffer; +- current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset; ++ if (scsi_sg_count(current_SC)) { ++ current_SC->SCp.buffer = scsi_sglist(current_SC); ++ current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page) ++ + current_SC->SCp.buffer->offset; + current_SC->SCp.this_residual = current_SC->SCp.buffer->length; +- current_SC->SCp.buffers_residual = current_SC->use_sg - 1; ++ current_SC->SCp.buffers_residual = scsi_sg_count(current_SC) - 1; + } else { +- current_SC->SCp.ptr = (char *)current_SC->request_buffer; +- current_SC->SCp.this_residual = current_SC->request_bufflen; ++ current_SC->SCp.ptr = 0; ++ current_SC->SCp.this_residual = 0; + current_SC->SCp.buffer = NULL; + current_SC->SCp.buffers_residual = 0; + } + +- + current_SC->SCp.Status = 0; + current_SC->SCp.Message = 0; + current_SC->SCp.have_data_in = 0; +@@ -1472,8 +1490,8 @@ + SCpnt->SCp.phase, + SCpnt->device->id, + *(unsigned char *)SCpnt->cmnd, +- SCpnt->use_sg, +- SCpnt->request_bufflen ); ++ scsi_sg_count(SCpnt), ++ scsi_bufflen(SCpnt)); + printk( "sent_command = %d, have_data_in = %d, timeout = %d\n", + SCpnt->SCp.sent_command, + SCpnt->SCp.have_data_in, +diff -Nurb linux-2.6.22-570/drivers/scsi/gdth.c linux-2.6.22-591/drivers/scsi/gdth.c +--- linux-2.6.22-570/drivers/scsi/gdth.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/gdth.c 2007-12-21 15:36:12.000000000 -0500 +@@ -876,7 +876,7 @@ + /* Vortex only makes RAID controllers. + * We do not really want to specify all 550 ids here, so wildcard match. + */ +-static struct pci_device_id gdthtable[] __attribute_used__ = { ++static struct pci_device_id gdthtable[] __maybe_unused = { + {PCI_VENDOR_ID_VORTEX,PCI_ANY_ID,PCI_ANY_ID, PCI_ANY_ID}, + {PCI_VENDOR_ID_INTEL,PCI_DEVICE_ID_INTEL_SRC,PCI_ANY_ID,PCI_ANY_ID}, + {PCI_VENDOR_ID_INTEL,PCI_DEVICE_ID_INTEL_SRC_XSCALE,PCI_ANY_ID,PCI_ANY_ID}, +@@ -1955,7 +1955,7 @@ + for (j = 0; j < 12; ++j) + rtc[j] = CMOS_READ(j); + } while (rtc[0] != CMOS_READ(0)); +- spin_lock_irqrestore(&rtc_lock, flags); ++ spin_unlock_irqrestore(&rtc_lock, flags); + TRACE2(("gdth_search_drives(): RTC: %x/%x/%x\n",*(ulong32 *)&rtc[0], + *(ulong32 *)&rtc[4], *(ulong32 *)&rtc[8])); + /* 3. send to controller firmware */ +diff -Nurb linux-2.6.22-570/drivers/scsi/hptiop.c linux-2.6.22-591/drivers/scsi/hptiop.c +--- linux-2.6.22-570/drivers/scsi/hptiop.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/scsi/hptiop.c 2007-12-21 15:36:12.000000000 -0500 +@@ -339,20 +339,8 @@ + + scp = hba->reqs[tag].scp; + +- if (HPT_SCP(scp)->mapped) { +- if (scp->use_sg) +- pci_unmap_sg(hba->pcidev, +- (struct scatterlist *)scp->request_buffer, +- scp->use_sg, +- scp->sc_data_direction +- ); +- else +- pci_unmap_single(hba->pcidev, +- HPT_SCP(scp)->dma_handle, +- scp->request_bufflen, +- scp->sc_data_direction +- ); +- } ++ if (HPT_SCP(scp)->mapped) ++ scsi_dma_unmap(scp); + + switch (le32_to_cpu(req->header.result)) { + case IOP_RESULT_SUCCESS: +@@ -449,43 +437,26 @@ + { + struct Scsi_Host *host = scp->device->host; + struct hptiop_hba *hba = (struct hptiop_hba *)host->hostdata; +- struct scatterlist *sglist = (struct scatterlist *)scp->request_buffer; ++ struct scatterlist *sg; ++ int idx, nseg; + +- /* +- * though we'll not get non-use_sg fields anymore, +- * keep use_sg checking anyway +- */ +- if (scp->use_sg) { +- int idx; +- +- HPT_SCP(scp)->sgcnt = pci_map_sg(hba->pcidev, +- sglist, scp->use_sg, +- scp->sc_data_direction); ++ nseg = scsi_dma_map(scp); ++ BUG_ON(nseg < 0); ++ if (!nseg) ++ return 0; ++ ++ HPT_SCP(scp)->sgcnt = nseg; + HPT_SCP(scp)->mapped = 1; ++ + BUG_ON(HPT_SCP(scp)->sgcnt > hba->max_sg_descriptors); + +- for (idx = 0; idx < HPT_SCP(scp)->sgcnt; idx++) { +- psg[idx].pci_address = +- cpu_to_le64(sg_dma_address(&sglist[idx])); +- psg[idx].size = cpu_to_le32(sg_dma_len(&sglist[idx])); ++ scsi_for_each_sg(scp, sg, HPT_SCP(scp)->sgcnt, idx) { ++ psg[idx].pci_address = cpu_to_le64(sg_dma_address(sg)); ++ psg[idx].size = cpu_to_le32(sg_dma_len(sg)); + psg[idx].eot = (idx == HPT_SCP(scp)->sgcnt - 1) ? + cpu_to_le32(1) : 0; + } +- + return HPT_SCP(scp)->sgcnt; +- } else { +- HPT_SCP(scp)->dma_handle = pci_map_single( +- hba->pcidev, +- scp->request_buffer, +- scp->request_bufflen, +- scp->sc_data_direction +- ); +- HPT_SCP(scp)->mapped = 1; +- psg->pci_address = cpu_to_le64(HPT_SCP(scp)->dma_handle); +- psg->size = cpu_to_le32(scp->request_bufflen); +- psg->eot = cpu_to_le32(1); +- return 1; +- } + } + + static int hptiop_queuecommand(struct scsi_cmnd *scp, +@@ -530,9 +501,8 @@ + req = (struct hpt_iop_request_scsi_command *)_req->req_virt; + + /* build S/G table */ +- if (scp->request_bufflen) + sg_count = hptiop_buildsgl(scp, req->sg_list); +- else ++ if (!sg_count) + HPT_SCP(scp)->mapped = 0; + + req->header.flags = cpu_to_le32(IOP_REQUEST_FLAG_OUTPUT_CONTEXT); +@@ -541,7 +511,7 @@ + req->header.context = cpu_to_le32(IOPMU_QUEUE_ADDR_HOST_BIT | + (u32)_req->index); + req->header.context_hi32 = 0; +- req->dataxfer_length = cpu_to_le32(scp->request_bufflen); ++ req->dataxfer_length = cpu_to_le32(scsi_bufflen(scp)); + req->channel = scp->device->channel; + req->target = scp->device->id; + req->lun = scp->device->lun; +diff -Nurb linux-2.6.22-570/drivers/scsi/ibmmca.c linux-2.6.22-591/drivers/scsi/ibmmca.c +--- linux-2.6.22-570/drivers/scsi/ibmmca.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/ibmmca.c 2007-12-21 15:36:12.000000000 -0500 +@@ -31,14 +31,21 @@ + #include + #include + #include +-#include + + #include + #include + + #include "scsi.h" + #include +-#include "ibmmca.h" ++ ++/* Common forward declarations for all Linux-versions: */ ++static int ibmmca_queuecommand (Scsi_Cmnd *, void (*done) (Scsi_Cmnd *)); ++static int ibmmca_abort (Scsi_Cmnd *); ++static int ibmmca_host_reset (Scsi_Cmnd *); ++static int ibmmca_biosparam (struct scsi_device *, struct block_device *, sector_t, int *); ++static int ibmmca_proc_info(struct Scsi_Host *shpnt, char *buffer, char **start, off_t offset, int length, int inout); ++ ++ + + /* current version of this driver-source: */ + #define IBMMCA_SCSI_DRIVER_VERSION "4.0b-ac" +@@ -65,11 +72,11 @@ + #define IM_DEBUG_CMD_DEVICE TYPE_TAPE + + /* relative addresses of hardware registers on a subsystem */ +-#define IM_CMD_REG(hi) (hosts[(hi)]->io_port) /*Command Interface, (4 bytes long) */ +-#define IM_ATTN_REG(hi) (hosts[(hi)]->io_port+4) /*Attention (1 byte) */ +-#define IM_CTR_REG(hi) (hosts[(hi)]->io_port+5) /*Basic Control (1 byte) */ +-#define IM_INTR_REG(hi) (hosts[(hi)]->io_port+6) /*Interrupt Status (1 byte, r/o) */ +-#define IM_STAT_REG(hi) (hosts[(hi)]->io_port+7) /*Basic Status (1 byte, read only) */ ++#define IM_CMD_REG(h) ((h)->io_port) /*Command Interface, (4 bytes long) */ ++#define IM_ATTN_REG(h) ((h)->io_port+4) /*Attention (1 byte) */ ++#define IM_CTR_REG(h) ((h)->io_port+5) /*Basic Control (1 byte) */ ++#define IM_INTR_REG(h) ((h)->io_port+6) /*Interrupt Status (1 byte, r/o) */ ++#define IM_STAT_REG(h) ((h)->io_port+7) /*Basic Status (1 byte, read only) */ + + /* basic I/O-port of first adapter */ + #define IM_IO_PORT 0x3540 +@@ -266,30 +273,36 @@ + if ((display_mode & LED_ACTIVITY)||(!display_mode)) \ + outb(inb(PS2_SYS_CTR) & 0x3f, PS2_SYS_CTR); } + +-/*list of supported subsystems */ +-struct subsys_list_struct { +- unsigned short mca_id; +- char *description; +-}; +- + /* types of different supported hardware that goes to hostdata special */ + #define IBM_SCSI2_FW 0 + #define IBM_7568_WCACHE 1 + #define IBM_EXP_UNIT 2 + #define IBM_SCSI_WCACHE 3 + #define IBM_SCSI 4 ++#define IBM_INTEGSCSI 5 + + /* other special flags for hostdata structure */ + #define FORCED_DETECTION 100 + #define INTEGRATED_SCSI 101 + + /* List of possible IBM-SCSI-adapters */ +-static struct subsys_list_struct subsys_list[] = { +- {0x8efc, "IBM SCSI-2 F/W Adapter"}, /* special = 0 */ +- {0x8efd, "IBM 7568 Industrial Computer SCSI Adapter w/Cache"}, /* special = 1 */ +- {0x8ef8, "IBM Expansion Unit SCSI Controller"}, /* special = 2 */ +- {0x8eff, "IBM SCSI Adapter w/Cache"}, /* special = 3 */ +- {0x8efe, "IBM SCSI Adapter"}, /* special = 4 */ ++static short ibmmca_id_table[] = { ++ 0x8efc, ++ 0x8efd, ++ 0x8ef8, ++ 0x8eff, ++ 0x8efe, ++ /* No entry for integrated SCSI, that's part of the register */ ++ 0 ++}; ++ ++static const char *ibmmca_description[] = { ++ "IBM SCSI-2 F/W Adapter", /* special = 0 */ ++ "IBM 7568 Industrial Computer SCSI Adapter w/Cache", /* special = 1 */ ++ "IBM Expansion Unit SCSI Controller", /* special = 2 */ ++ "IBM SCSI Adapter w/Cache", /* special = 3 */ ++ "IBM SCSI Adapter", /* special = 4 */ ++ "IBM Integrated SCSI Controller", /* special = 5 */ + }; + + /* Max number of logical devices (can be up from 0 to 14). 15 is the address +@@ -375,30 +388,30 @@ + }; + + /* macros to access host data structure */ +-#define subsystem_pun(hi) (hosts[(hi)]->this_id) +-#define subsystem_maxid(hi) (hosts[(hi)]->max_id) +-#define ld(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_ld) +-#define get_ldn(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_get_ldn) +-#define get_scsi(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_get_scsi) +-#define local_checking_phase_flag(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_local_checking_phase_flag) +-#define got_interrupt(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_got_interrupt) +-#define stat_result(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_stat_result) +-#define reset_status(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_reset_status) +-#define last_scsi_command(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_command) +-#define last_scsi_type(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_type) +-#define last_scsi_blockcount(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_blockcount) +-#define last_scsi_logical_block(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_logical_block) +-#define last_scsi_type(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_type) +-#define next_ldn(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_next_ldn) +-#define IBM_DS(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_IBM_DS) +-#define special(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_special) +-#define subsystem_connector_size(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_connector_size) +-#define adapter_speed(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_adapter_speed) +-#define pos2(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[2]) +-#define pos3(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[3]) +-#define pos4(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[4]) +-#define pos5(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[5]) +-#define pos6(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[6]) ++#define subsystem_pun(h) ((h)->this_id) ++#define subsystem_maxid(h) ((h)->max_id) ++#define ld(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_ld) ++#define get_ldn(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_get_ldn) ++#define get_scsi(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_get_scsi) ++#define local_checking_phase_flag(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_local_checking_phase_flag) ++#define got_interrupt(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_got_interrupt) ++#define stat_result(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_stat_result) ++#define reset_status(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_reset_status) ++#define last_scsi_command(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_command) ++#define last_scsi_type(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_type) ++#define last_scsi_blockcount(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_blockcount) ++#define last_scsi_logical_block(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_logical_block) ++#define last_scsi_type(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_type) ++#define next_ldn(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_next_ldn) ++#define IBM_DS(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_IBM_DS) ++#define special(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_special) ++#define subsystem_connector_size(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_connector_size) ++#define adapter_speed(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_adapter_speed) ++#define pos2(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[2]) ++#define pos3(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[3]) ++#define pos4(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[4]) ++#define pos5(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[5]) ++#define pos6(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[6]) + + /* Define a arbitrary number as subsystem-marker-type. This number is, as + described in the ANSI-SCSI-standard, not occupied by other device-types. */ +@@ -459,11 +472,6 @@ + /*counter of concurrent disk read/writes, to turn on/off disk led */ + static int disk_rw_in_progress = 0; + +-/* host information */ +-static int found = 0; +-static struct Scsi_Host *hosts[IM_MAX_HOSTS + 1] = { +- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL +-}; + static unsigned int pos[8]; /* whole pos register-line for diagnosis */ + /* Taking into account the additions, made by ZP Gu. + * This selects now the preset value from the configfile and +@@ -474,70 +482,68 @@ + static char ibm_ansi_order = 0; + #endif + +-static void issue_cmd(int, unsigned long, unsigned char); ++static void issue_cmd(struct Scsi_Host *, unsigned long, unsigned char); + static void internal_done(Scsi_Cmnd * cmd); +-static void check_devices(int, int); +-static int immediate_assign(int, unsigned int, unsigned int, unsigned int, unsigned int); +-static int immediate_feature(int, unsigned int, unsigned int); ++static void check_devices(struct Scsi_Host *, int); ++static int immediate_assign(struct Scsi_Host *, unsigned int, unsigned int, unsigned int, unsigned int); ++static int immediate_feature(struct Scsi_Host *, unsigned int, unsigned int); + #ifdef CONFIG_IBMMCA_SCSI_DEV_RESET +-static int immediate_reset(int, unsigned int); ++static int immediate_reset(struct Scsi_Host *, unsigned int); + #endif +-static int device_inquiry(int, int); +-static int read_capacity(int, int); +-static int get_pos_info(int); ++static int device_inquiry(struct Scsi_Host *, int); ++static int read_capacity(struct Scsi_Host *, int); ++static int get_pos_info(struct Scsi_Host *); + static char *ti_p(int); + static char *ti_l(int); + static char *ibmrate(unsigned int, int); + static int probe_display(int); +-static int probe_bus_mode(int); +-static int device_exists(int, int, int *, int *); +-static struct Scsi_Host *ibmmca_register(struct scsi_host_template *, int, int, int, char *); ++static int probe_bus_mode(struct Scsi_Host *); ++static int device_exists(struct Scsi_Host *, int, int *, int *); + static int option_setup(char *); + /* local functions needed for proc_info */ +-static int ldn_access_load(int, int); +-static int ldn_access_total_read_write(int); ++static int ldn_access_load(struct Scsi_Host *, int); ++static int ldn_access_total_read_write(struct Scsi_Host *); + + static irqreturn_t interrupt_handler(int irq, void *dev_id) + { +- int host_index, ihost_index; + unsigned int intr_reg; + unsigned int cmd_result; + unsigned int ldn; ++ unsigned long flags; + Scsi_Cmnd *cmd; + int lastSCSI; +- struct Scsi_Host *dev = dev_id; ++ struct device *dev = dev_id; ++ struct Scsi_Host *shpnt = dev_get_drvdata(dev); ++ ++ spin_lock_irqsave(shpnt->host_lock, flags); + +- spin_lock(dev->host_lock); +- /* search for one adapter-response on shared interrupt */ +- for (host_index = 0; hosts[host_index] && !(inb(IM_STAT_REG(host_index)) & IM_INTR_REQUEST); host_index++); +- /* return if some other device on this IRQ caused the interrupt */ +- if (!hosts[host_index]) { +- spin_unlock(dev->host_lock); ++ if(!(inb(IM_STAT_REG(shpnt)) & IM_INTR_REQUEST)) { ++ spin_unlock_irqrestore(shpnt->host_lock, flags); + return IRQ_NONE; + } + + /* the reset-function already did all the job, even ints got + renabled on the subsystem, so just return */ +- if ((reset_status(host_index) == IM_RESET_NOT_IN_PROGRESS_NO_INT) || (reset_status(host_index) == IM_RESET_FINISHED_OK_NO_INT)) { +- reset_status(host_index) = IM_RESET_NOT_IN_PROGRESS; +- spin_unlock(dev->host_lock); ++ if ((reset_status(shpnt) == IM_RESET_NOT_IN_PROGRESS_NO_INT) || (reset_status(shpnt) == IM_RESET_FINISHED_OK_NO_INT)) { ++ reset_status(shpnt) = IM_RESET_NOT_IN_PROGRESS; ++ spin_unlock_irqrestore(shpnt->host_lock, flags); + return IRQ_HANDLED; + } + + /*must wait for attention reg not busy, then send EOI to subsystem */ + while (1) { +- if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY)) ++ if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY)) + break; + cpu_relax(); + } +- ihost_index = host_index; ++ + /*get command result and logical device */ +- intr_reg = (unsigned char) (inb(IM_INTR_REG(ihost_index))); ++ intr_reg = (unsigned char) (inb(IM_INTR_REG(shpnt))); + cmd_result = intr_reg & 0xf0; + ldn = intr_reg & 0x0f; + /* get the last_scsi_command here */ +- lastSCSI = last_scsi_command(ihost_index)[ldn]; +- outb(IM_EOI | ldn, IM_ATTN_REG(ihost_index)); ++ lastSCSI = last_scsi_command(shpnt)[ldn]; ++ outb(IM_EOI | ldn, IM_ATTN_REG(shpnt)); + + /*these should never happen (hw fails, or a local programming bug) */ + if (!global_command_error_excuse) { +@@ -547,38 +553,38 @@ + case IM_SOFTWARE_SEQUENCING_ERROR: + case IM_CMD_ERROR: + printk(KERN_ERR "IBM MCA SCSI: Fatal Subsystem ERROR!\n"); +- printk(KERN_ERR " Last cmd=0x%x, ena=%x, len=", lastSCSI, ld(ihost_index)[ldn].scb.enable); +- if (ld(ihost_index)[ldn].cmd) +- printk("%ld/%ld,", (long) (ld(ihost_index)[ldn].cmd->request_bufflen), (long) (ld(ihost_index)[ldn].scb.sys_buf_length)); ++ printk(KERN_ERR " Last cmd=0x%x, ena=%x, len=", lastSCSI, ld(shpnt)[ldn].scb.enable); ++ if (ld(shpnt)[ldn].cmd) ++ printk("%ld/%ld,", (long) (scsi_bufflen(ld(shpnt)[ldn].cmd)), (long) (ld(shpnt)[ldn].scb.sys_buf_length)); + else + printk("none,"); +- if (ld(ihost_index)[ldn].cmd) +- printk("Blocksize=%d", ld(ihost_index)[ldn].scb.u2.blk.length); ++ if (ld(shpnt)[ldn].cmd) ++ printk("Blocksize=%d", ld(shpnt)[ldn].scb.u2.blk.length); + else + printk("Blocksize=none"); +- printk(", host=0x%x, ldn=0x%x\n", ihost_index, ldn); +- if (ld(ihost_index)[ldn].cmd) { +- printk(KERN_ERR "Blockcount=%d/%d\n", last_scsi_blockcount(ihost_index)[ldn], ld(ihost_index)[ldn].scb.u2.blk.count); +- printk(KERN_ERR "Logical block=%lx/%lx\n", last_scsi_logical_block(ihost_index)[ldn], ld(ihost_index)[ldn].scb.u1.log_blk_adr); ++ printk(", host=%p, ldn=0x%x\n", shpnt, ldn); ++ if (ld(shpnt)[ldn].cmd) { ++ printk(KERN_ERR "Blockcount=%d/%d\n", last_scsi_blockcount(shpnt)[ldn], ld(shpnt)[ldn].scb.u2.blk.count); ++ printk(KERN_ERR "Logical block=%lx/%lx\n", last_scsi_logical_block(shpnt)[ldn], ld(shpnt)[ldn].scb.u1.log_blk_adr); + } + printk(KERN_ERR "Reason given: %s\n", (cmd_result == IM_ADAPTER_HW_FAILURE) ? "HARDWARE FAILURE" : (cmd_result == IM_SOFTWARE_SEQUENCING_ERROR) ? "SOFTWARE SEQUENCING ERROR" : (cmd_result == IM_CMD_ERROR) ? "COMMAND ERROR" : "UNKNOWN"); + /* if errors appear, enter this section to give detailed info */ + printk(KERN_ERR "IBM MCA SCSI: Subsystem Error-Status follows:\n"); +- printk(KERN_ERR " Command Type................: %x\n", last_scsi_type(ihost_index)[ldn]); +- printk(KERN_ERR " Attention Register..........: %x\n", inb(IM_ATTN_REG(ihost_index))); +- printk(KERN_ERR " Basic Control Register......: %x\n", inb(IM_CTR_REG(ihost_index))); ++ printk(KERN_ERR " Command Type................: %x\n", last_scsi_type(shpnt)[ldn]); ++ printk(KERN_ERR " Attention Register..........: %x\n", inb(IM_ATTN_REG(shpnt))); ++ printk(KERN_ERR " Basic Control Register......: %x\n", inb(IM_CTR_REG(shpnt))); + printk(KERN_ERR " Interrupt Status Register...: %x\n", intr_reg); +- printk(KERN_ERR " Basic Status Register.......: %x\n", inb(IM_STAT_REG(ihost_index))); +- if ((last_scsi_type(ihost_index)[ldn] == IM_SCB) || (last_scsi_type(ihost_index)[ldn] == IM_LONG_SCB)) { +- printk(KERN_ERR " SCB-Command.................: %x\n", ld(ihost_index)[ldn].scb.command); +- printk(KERN_ERR " SCB-Enable..................: %x\n", ld(ihost_index)[ldn].scb.enable); +- printk(KERN_ERR " SCB-logical block address...: %lx\n", ld(ihost_index)[ldn].scb.u1.log_blk_adr); +- printk(KERN_ERR " SCB-system buffer address...: %lx\n", ld(ihost_index)[ldn].scb.sys_buf_adr); +- printk(KERN_ERR " SCB-system buffer length....: %lx\n", ld(ihost_index)[ldn].scb.sys_buf_length); +- printk(KERN_ERR " SCB-tsb address.............: %lx\n", ld(ihost_index)[ldn].scb.tsb_adr); +- printk(KERN_ERR " SCB-Chain address...........: %lx\n", ld(ihost_index)[ldn].scb.scb_chain_adr); +- printk(KERN_ERR " SCB-block count.............: %x\n", ld(ihost_index)[ldn].scb.u2.blk.count); +- printk(KERN_ERR " SCB-block length............: %x\n", ld(ihost_index)[ldn].scb.u2.blk.length); ++ printk(KERN_ERR " Basic Status Register.......: %x\n", inb(IM_STAT_REG(shpnt))); ++ if ((last_scsi_type(shpnt)[ldn] == IM_SCB) || (last_scsi_type(shpnt)[ldn] == IM_LONG_SCB)) { ++ printk(KERN_ERR " SCB-Command.................: %x\n", ld(shpnt)[ldn].scb.command); ++ printk(KERN_ERR " SCB-Enable..................: %x\n", ld(shpnt)[ldn].scb.enable); ++ printk(KERN_ERR " SCB-logical block address...: %lx\n", ld(shpnt)[ldn].scb.u1.log_blk_adr); ++ printk(KERN_ERR " SCB-system buffer address...: %lx\n", ld(shpnt)[ldn].scb.sys_buf_adr); ++ printk(KERN_ERR " SCB-system buffer length....: %lx\n", ld(shpnt)[ldn].scb.sys_buf_length); ++ printk(KERN_ERR " SCB-tsb address.............: %lx\n", ld(shpnt)[ldn].scb.tsb_adr); ++ printk(KERN_ERR " SCB-Chain address...........: %lx\n", ld(shpnt)[ldn].scb.scb_chain_adr); ++ printk(KERN_ERR " SCB-block count.............: %x\n", ld(shpnt)[ldn].scb.u2.blk.count); ++ printk(KERN_ERR " SCB-block length............: %x\n", ld(shpnt)[ldn].scb.u2.blk.length); + } + printk(KERN_ERR " Send this report to the maintainer.\n"); + panic("IBM MCA SCSI: Fatal error message from the subsystem (0x%X,0x%X)!\n", lastSCSI, cmd_result); +@@ -600,72 +606,73 @@ + } + } + /* if no panic appeared, increase the interrupt-counter */ +- IBM_DS(ihost_index).total_interrupts++; ++ IBM_DS(shpnt).total_interrupts++; + /*only for local checking phase */ +- if (local_checking_phase_flag(ihost_index)) { +- stat_result(ihost_index) = cmd_result; +- got_interrupt(ihost_index) = 1; +- reset_status(ihost_index) = IM_RESET_FINISHED_OK; +- last_scsi_command(ihost_index)[ldn] = NO_SCSI; +- spin_unlock(dev->host_lock); ++ if (local_checking_phase_flag(shpnt)) { ++ stat_result(shpnt) = cmd_result; ++ got_interrupt(shpnt) = 1; ++ reset_status(shpnt) = IM_RESET_FINISHED_OK; ++ last_scsi_command(shpnt)[ldn] = NO_SCSI; ++ spin_unlock_irqrestore(shpnt->host_lock, flags); + return IRQ_HANDLED; + } + /* handling of commands coming from upper level of scsi driver */ +- if (last_scsi_type(ihost_index)[ldn] == IM_IMM_CMD) { ++ if (last_scsi_type(shpnt)[ldn] == IM_IMM_CMD) { + /* verify ldn, and may handle rare reset immediate command */ +- if ((reset_status(ihost_index) == IM_RESET_IN_PROGRESS) && (last_scsi_command(ihost_index)[ldn] == IM_RESET_IMM_CMD)) { ++ if ((reset_status(shpnt) == IM_RESET_IN_PROGRESS) && (last_scsi_command(shpnt)[ldn] == IM_RESET_IMM_CMD)) { + if (cmd_result == IM_CMD_COMPLETED_WITH_FAILURE) { + disk_rw_in_progress = 0; + PS2_DISK_LED_OFF(); +- reset_status(ihost_index) = IM_RESET_FINISHED_FAIL; ++ reset_status(shpnt) = IM_RESET_FINISHED_FAIL; + } else { + /*reset disk led counter, turn off disk led */ + disk_rw_in_progress = 0; + PS2_DISK_LED_OFF(); +- reset_status(ihost_index) = IM_RESET_FINISHED_OK; ++ reset_status(shpnt) = IM_RESET_FINISHED_OK; + } +- stat_result(ihost_index) = cmd_result; +- last_scsi_command(ihost_index)[ldn] = NO_SCSI; +- last_scsi_type(ihost_index)[ldn] = 0; +- spin_unlock(dev->host_lock); ++ stat_result(shpnt) = cmd_result; ++ last_scsi_command(shpnt)[ldn] = NO_SCSI; ++ last_scsi_type(shpnt)[ldn] = 0; ++ spin_unlock_irqrestore(shpnt->host_lock, flags); + return IRQ_HANDLED; +- } else if (last_scsi_command(ihost_index)[ldn] == IM_ABORT_IMM_CMD) { ++ } else if (last_scsi_command(shpnt)[ldn] == IM_ABORT_IMM_CMD) { + /* react on SCSI abort command */ + #ifdef IM_DEBUG_PROBE + printk("IBM MCA SCSI: Interrupt from SCSI-abort.\n"); + #endif + disk_rw_in_progress = 0; + PS2_DISK_LED_OFF(); +- cmd = ld(ihost_index)[ldn].cmd; +- ld(ihost_index)[ldn].cmd = NULL; ++ cmd = ld(shpnt)[ldn].cmd; ++ ld(shpnt)[ldn].cmd = NULL; + if (cmd_result == IM_CMD_COMPLETED_WITH_FAILURE) + cmd->result = DID_NO_CONNECT << 16; + else + cmd->result = DID_ABORT << 16; +- stat_result(ihost_index) = cmd_result; +- last_scsi_command(ihost_index)[ldn] = NO_SCSI; +- last_scsi_type(ihost_index)[ldn] = 0; ++ stat_result(shpnt) = cmd_result; ++ last_scsi_command(shpnt)[ldn] = NO_SCSI; ++ last_scsi_type(shpnt)[ldn] = 0; + if (cmd->scsi_done) + (cmd->scsi_done) (cmd); /* should be the internal_done */ +- spin_unlock(dev->host_lock); ++ spin_unlock_irqrestore(shpnt->host_lock, flags); + return IRQ_HANDLED; + } else { + disk_rw_in_progress = 0; + PS2_DISK_LED_OFF(); +- reset_status(ihost_index) = IM_RESET_FINISHED_OK; +- stat_result(ihost_index) = cmd_result; +- last_scsi_command(ihost_index)[ldn] = NO_SCSI; +- spin_unlock(dev->host_lock); ++ reset_status(shpnt) = IM_RESET_FINISHED_OK; ++ stat_result(shpnt) = cmd_result; ++ last_scsi_command(shpnt)[ldn] = NO_SCSI; ++ spin_unlock_irqrestore(shpnt->host_lock, flags); + return IRQ_HANDLED; + } + } +- last_scsi_command(ihost_index)[ldn] = NO_SCSI; +- last_scsi_type(ihost_index)[ldn] = 0; +- cmd = ld(ihost_index)[ldn].cmd; +- ld(ihost_index)[ldn].cmd = NULL; ++ last_scsi_command(shpnt)[ldn] = NO_SCSI; ++ last_scsi_type(shpnt)[ldn] = 0; ++ cmd = ld(shpnt)[ldn].cmd; ++ ld(shpnt)[ldn].cmd = NULL; + #ifdef IM_DEBUG_TIMEOUT + if (cmd) { + if ((cmd->target == TIMEOUT_PUN) && (cmd->device->lun == TIMEOUT_LUN)) { ++ spin_unlock_irqsave(shpnt->host_lock, flags); + printk("IBM MCA SCSI: Ignoring interrupt from pun=%x, lun=%x.\n", cmd->target, cmd->device->lun); + return IRQ_HANDLED; + } +@@ -674,15 +681,15 @@ + /*if no command structure, just return, else clear cmd */ + if (!cmd) + { +- spin_unlock(dev->host_lock); ++ spin_unlock_irqrestore(shpnt->host_lock, flags); + return IRQ_HANDLED; + } + + #ifdef IM_DEBUG_INT +- printk("cmd=%02x ireg=%02x ds=%02x cs=%02x de=%02x ce=%02x\n", cmd->cmnd[0], intr_reg, ld(ihost_index)[ldn].tsb.dev_status, ld(ihost_index)[ldn].tsb.cmd_status, ld(ihost_index)[ldn].tsb.dev_error, ld(ihost_index)[ldn].tsb.cmd_error); ++ printk("cmd=%02x ireg=%02x ds=%02x cs=%02x de=%02x ce=%02x\n", cmd->cmnd[0], intr_reg, ld(shpnt)[ldn].tsb.dev_status, ld(shpnt)[ldn].tsb.cmd_status, ld(shpnt)[ldn].tsb.dev_error, ld(shpnt)[ldn].tsb.cmd_error); + #endif + /*if this is end of media read/write, may turn off PS/2 disk led */ +- if ((ld(ihost_index)[ldn].device_type != TYPE_NO_LUN) && (ld(ihost_index)[ldn].device_type != TYPE_NO_DEVICE)) { ++ if ((ld(shpnt)[ldn].device_type != TYPE_NO_LUN) && (ld(shpnt)[ldn].device_type != TYPE_NO_DEVICE)) { + /* only access this, if there was a valid device addressed */ + if (--disk_rw_in_progress == 0) + PS2_DISK_LED_OFF(); +@@ -693,8 +700,8 @@ + * adapters do not support CMD_TERMINATED, TASK_SET_FULL and + * ACA_ACTIVE as returning statusbyte information. (ML) */ + if (cmd_result == IM_CMD_COMPLETED_WITH_FAILURE) { +- cmd->result = (unsigned char) (ld(ihost_index)[ldn].tsb.dev_status & 0x1e); +- IBM_DS(ihost_index).total_errors++; ++ cmd->result = (unsigned char) (ld(shpnt)[ldn].tsb.dev_status & 0x1e); ++ IBM_DS(shpnt).total_errors++; + } else + cmd->result = 0; + /* write device status into cmd->result, and call done function */ +@@ -705,24 +712,25 @@ + cmd->result |= DID_OK << 16; + if (cmd->scsi_done) + (cmd->scsi_done) (cmd); +- spin_unlock(dev->host_lock); ++ spin_unlock_irqrestore(shpnt->host_lock, flags); + return IRQ_HANDLED; + } + +-static void issue_cmd(int host_index, unsigned long cmd_reg, unsigned char attn_reg) ++static void issue_cmd(struct Scsi_Host *shpnt, unsigned long cmd_reg, ++ unsigned char attn_reg) + { + unsigned long flags; + /* must wait for attention reg not busy */ + while (1) { +- spin_lock_irqsave(hosts[host_index]->host_lock, flags); +- if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY)) ++ spin_lock_irqsave(shpnt->host_lock, flags); ++ if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY)) + break; +- spin_unlock_irqrestore(hosts[host_index]->host_lock, flags); ++ spin_unlock_irqrestore(shpnt->host_lock, flags); + } + /* write registers and enable system interrupts */ +- outl(cmd_reg, IM_CMD_REG(host_index)); +- outb(attn_reg, IM_ATTN_REG(host_index)); +- spin_unlock_irqrestore(hosts[host_index]->host_lock, flags); ++ outl(cmd_reg, IM_CMD_REG(shpnt)); ++ outb(attn_reg, IM_ATTN_REG(shpnt)); ++ spin_unlock_irqrestore(shpnt->host_lock, flags); + } + + static void internal_done(Scsi_Cmnd * cmd) +@@ -732,34 +740,34 @@ + } + + /* SCSI-SCB-command for device_inquiry */ +-static int device_inquiry(int host_index, int ldn) ++static int device_inquiry(struct Scsi_Host *shpnt, int ldn) + { + int retr; + struct im_scb *scb; + struct im_tsb *tsb; + unsigned char *buf; + +- scb = &(ld(host_index)[ldn].scb); +- tsb = &(ld(host_index)[ldn].tsb); +- buf = (unsigned char *) (&(ld(host_index)[ldn].buf)); +- ld(host_index)[ldn].tsb.dev_status = 0; /* prepare statusblock */ ++ scb = &(ld(shpnt)[ldn].scb); ++ tsb = &(ld(shpnt)[ldn].tsb); ++ buf = (unsigned char *) (&(ld(shpnt)[ldn].buf)); ++ ld(shpnt)[ldn].tsb.dev_status = 0; /* prepare statusblock */ + for (retr = 0; retr < 3; retr++) { + /* fill scb with inquiry command */ + scb->command = IM_DEVICE_INQUIRY_CMD | IM_NO_DISCONNECT; + scb->enable = IM_REPORT_TSB_ONLY_ON_ERROR | IM_READ_CONTROL | IM_SUPRESS_EXCEPTION_SHORT | IM_RETRY_ENABLE | IM_BYPASS_BUFFER; +- last_scsi_command(host_index)[ldn] = IM_DEVICE_INQUIRY_CMD; +- last_scsi_type(host_index)[ldn] = IM_SCB; ++ last_scsi_command(shpnt)[ldn] = IM_DEVICE_INQUIRY_CMD; ++ last_scsi_type(shpnt)[ldn] = IM_SCB; + scb->sys_buf_adr = isa_virt_to_bus(buf); + scb->sys_buf_length = 255; /* maximum bufferlength gives max info */ + scb->tsb_adr = isa_virt_to_bus(tsb); + /* issue scb to passed ldn, and busy wait for interrupt */ +- got_interrupt(host_index) = 0; +- issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | ldn); +- while (!got_interrupt(host_index)) ++ got_interrupt(shpnt) = 0; ++ issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | ldn); ++ while (!got_interrupt(shpnt)) + barrier(); + + /*if command successful, break */ +- if ((stat_result(host_index) == IM_SCB_CMD_COMPLETED) || (stat_result(host_index) == IM_SCB_CMD_COMPLETED_WITH_RETRIES)) ++ if ((stat_result(shpnt) == IM_SCB_CMD_COMPLETED) || (stat_result(shpnt) == IM_SCB_CMD_COMPLETED_WITH_RETRIES)) + return 1; + } + /*if all three retries failed, return "no device at this ldn" */ +@@ -769,34 +777,34 @@ + return 1; + } + +-static int read_capacity(int host_index, int ldn) ++static int read_capacity(struct Scsi_Host *shpnt, int ldn) + { + int retr; + struct im_scb *scb; + struct im_tsb *tsb; + unsigned char *buf; + +- scb = &(ld(host_index)[ldn].scb); +- tsb = &(ld(host_index)[ldn].tsb); +- buf = (unsigned char *) (&(ld(host_index)[ldn].buf)); +- ld(host_index)[ldn].tsb.dev_status = 0; ++ scb = &(ld(shpnt)[ldn].scb); ++ tsb = &(ld(shpnt)[ldn].tsb); ++ buf = (unsigned char *) (&(ld(shpnt)[ldn].buf)); ++ ld(shpnt)[ldn].tsb.dev_status = 0; + for (retr = 0; retr < 3; retr++) { + /*fill scb with read capacity command */ + scb->command = IM_READ_CAPACITY_CMD; + scb->enable = IM_REPORT_TSB_ONLY_ON_ERROR | IM_READ_CONTROL | IM_RETRY_ENABLE | IM_BYPASS_BUFFER; +- last_scsi_command(host_index)[ldn] = IM_READ_CAPACITY_CMD; +- last_scsi_type(host_index)[ldn] = IM_SCB; ++ last_scsi_command(shpnt)[ldn] = IM_READ_CAPACITY_CMD; ++ last_scsi_type(shpnt)[ldn] = IM_SCB; + scb->sys_buf_adr = isa_virt_to_bus(buf); + scb->sys_buf_length = 8; + scb->tsb_adr = isa_virt_to_bus(tsb); + /*issue scb to passed ldn, and busy wait for interrupt */ +- got_interrupt(host_index) = 0; +- issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | ldn); +- while (!got_interrupt(host_index)) ++ got_interrupt(shpnt) = 0; ++ issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | ldn); ++ while (!got_interrupt(shpnt)) + barrier(); + + /*if got capacity, get block length and return one device found */ +- if ((stat_result(host_index) == IM_SCB_CMD_COMPLETED) || (stat_result(host_index) == IM_SCB_CMD_COMPLETED_WITH_RETRIES)) ++ if ((stat_result(shpnt) == IM_SCB_CMD_COMPLETED) || (stat_result(shpnt) == IM_SCB_CMD_COMPLETED_WITH_RETRIES)) + return 1; + } + /*if all three retries failed, return "no device at this ldn" */ +@@ -806,39 +814,39 @@ + return 1; + } + +-static int get_pos_info(int host_index) ++static int get_pos_info(struct Scsi_Host *shpnt) + { + int retr; + struct im_scb *scb; + struct im_tsb *tsb; + unsigned char *buf; + +- scb = &(ld(host_index)[MAX_LOG_DEV].scb); +- tsb = &(ld(host_index)[MAX_LOG_DEV].tsb); +- buf = (unsigned char *) (&(ld(host_index)[MAX_LOG_DEV].buf)); +- ld(host_index)[MAX_LOG_DEV].tsb.dev_status = 0; ++ scb = &(ld(shpnt)[MAX_LOG_DEV].scb); ++ tsb = &(ld(shpnt)[MAX_LOG_DEV].tsb); ++ buf = (unsigned char *) (&(ld(shpnt)[MAX_LOG_DEV].buf)); ++ ld(shpnt)[MAX_LOG_DEV].tsb.dev_status = 0; + for (retr = 0; retr < 3; retr++) { + /*fill scb with get_pos_info command */ + scb->command = IM_GET_POS_INFO_CMD; + scb->enable = IM_READ_CONTROL | IM_REPORT_TSB_ONLY_ON_ERROR | IM_RETRY_ENABLE | IM_BYPASS_BUFFER; +- last_scsi_command(host_index)[MAX_LOG_DEV] = IM_GET_POS_INFO_CMD; +- last_scsi_type(host_index)[MAX_LOG_DEV] = IM_SCB; ++ last_scsi_command(shpnt)[MAX_LOG_DEV] = IM_GET_POS_INFO_CMD; ++ last_scsi_type(shpnt)[MAX_LOG_DEV] = IM_SCB; + scb->sys_buf_adr = isa_virt_to_bus(buf); +- if (special(host_index) == IBM_SCSI2_FW) ++ if (special(shpnt) == IBM_SCSI2_FW) + scb->sys_buf_length = 256; /* get all info from F/W adapter */ + else + scb->sys_buf_length = 18; /* get exactly 18 bytes for other SCSI */ + scb->tsb_adr = isa_virt_to_bus(tsb); + /*issue scb to ldn=15, and busy wait for interrupt */ +- got_interrupt(host_index) = 0; +- issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | MAX_LOG_DEV); ++ got_interrupt(shpnt) = 0; ++ issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | MAX_LOG_DEV); + + /* FIXME: timeout */ +- while (!got_interrupt(host_index)) ++ while (!got_interrupt(shpnt)) + barrier(); + + /*if got POS-stuff, get block length and return one device found */ +- if ((stat_result(host_index) == IM_SCB_CMD_COMPLETED) || (stat_result(host_index) == IM_SCB_CMD_COMPLETED_WITH_RETRIES)) ++ if ((stat_result(shpnt) == IM_SCB_CMD_COMPLETED) || (stat_result(shpnt) == IM_SCB_CMD_COMPLETED_WITH_RETRIES)) + return 1; + } + /* if all three retries failed, return "no device at this ldn" */ +@@ -851,14 +859,16 @@ + /* SCSI-immediate-command for assign. This functions maps/unmaps specific + ldn-numbers on SCSI (PUN,LUN). It is needed for presetting of the + subsystem and for dynamical remapping od ldns. */ +-static int immediate_assign(int host_index, unsigned int pun, unsigned int lun, unsigned int ldn, unsigned int operation) ++static int immediate_assign(struct Scsi_Host *shpnt, unsigned int pun, ++ unsigned int lun, unsigned int ldn, ++ unsigned int operation) + { + int retr; + unsigned long imm_cmd; + + for (retr = 0; retr < 3; retr++) { + /* select mutation level of the SCSI-adapter */ +- switch (special(host_index)) { ++ switch (special(shpnt)) { + case IBM_SCSI2_FW: + imm_cmd = (unsigned long) (IM_ASSIGN_IMM_CMD); + imm_cmd |= (unsigned long) ((lun & 7) << 24); +@@ -867,7 +877,7 @@ + imm_cmd |= (unsigned long) ((ldn & 15) << 16); + break; + default: +- imm_cmd = inl(IM_CMD_REG(host_index)); ++ imm_cmd = inl(IM_CMD_REG(shpnt)); + imm_cmd &= (unsigned long) (0xF8000000); /* keep reserved bits */ + imm_cmd |= (unsigned long) (IM_ASSIGN_IMM_CMD); + imm_cmd |= (unsigned long) ((lun & 7) << 24); +@@ -876,15 +886,15 @@ + imm_cmd |= (unsigned long) ((ldn & 15) << 16); + break; + } +- last_scsi_command(host_index)[MAX_LOG_DEV] = IM_ASSIGN_IMM_CMD; +- last_scsi_type(host_index)[MAX_LOG_DEV] = IM_IMM_CMD; +- got_interrupt(host_index) = 0; +- issue_cmd(host_index, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV); +- while (!got_interrupt(host_index)) ++ last_scsi_command(shpnt)[MAX_LOG_DEV] = IM_ASSIGN_IMM_CMD; ++ last_scsi_type(shpnt)[MAX_LOG_DEV] = IM_IMM_CMD; ++ got_interrupt(shpnt) = 0; ++ issue_cmd(shpnt, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV); ++ while (!got_interrupt(shpnt)) + barrier(); + + /*if command successful, break */ +- if (stat_result(host_index) == IM_IMMEDIATE_CMD_COMPLETED) ++ if (stat_result(shpnt) == IM_IMMEDIATE_CMD_COMPLETED) + return 1; + } + if (retr >= 3) +@@ -893,7 +903,7 @@ + return 1; + } + +-static int immediate_feature(int host_index, unsigned int speed, unsigned int timeout) ++static int immediate_feature(struct Scsi_Host *shpnt, unsigned int speed, unsigned int timeout) + { + int retr; + unsigned long imm_cmd; +@@ -903,16 +913,16 @@ + imm_cmd = IM_FEATURE_CTR_IMM_CMD; + imm_cmd |= (unsigned long) ((speed & 0x7) << 29); + imm_cmd |= (unsigned long) ((timeout & 0x1fff) << 16); +- last_scsi_command(host_index)[MAX_LOG_DEV] = IM_FEATURE_CTR_IMM_CMD; +- last_scsi_type(host_index)[MAX_LOG_DEV] = IM_IMM_CMD; +- got_interrupt(host_index) = 0; ++ last_scsi_command(shpnt)[MAX_LOG_DEV] = IM_FEATURE_CTR_IMM_CMD; ++ last_scsi_type(shpnt)[MAX_LOG_DEV] = IM_IMM_CMD; ++ got_interrupt(shpnt) = 0; + /* we need to run into command errors in order to probe for the + * right speed! */ + global_command_error_excuse = 1; +- issue_cmd(host_index, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV); ++ issue_cmd(shpnt, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV); + + /* FIXME: timeout */ +- while (!got_interrupt(host_index)) ++ while (!got_interrupt(shpnt)) + barrier(); + if (global_command_error_excuse == CMD_FAIL) { + global_command_error_excuse = 0; +@@ -920,7 +930,7 @@ + } else + global_command_error_excuse = 0; + /*if command successful, break */ +- if (stat_result(host_index) == IM_IMMEDIATE_CMD_COMPLETED) ++ if (stat_result(shpnt) == IM_IMMEDIATE_CMD_COMPLETED) + return 1; + } + if (retr >= 3) +@@ -930,35 +940,35 @@ + } + + #ifdef CONFIG_IBMMCA_SCSI_DEV_RESET +-static int immediate_reset(int host_index, unsigned int ldn) ++static int immediate_reset(struct Scsi_Host *shpnt, unsigned int ldn) + { + int retries; + int ticks; + unsigned long imm_command; + + for (retries = 0; retries < 3; retries++) { +- imm_command = inl(IM_CMD_REG(host_index)); ++ imm_command = inl(IM_CMD_REG(shpnt)); + imm_command &= (unsigned long) (0xFFFF0000); /* keep reserved bits */ + imm_command |= (unsigned long) (IM_RESET_IMM_CMD); +- last_scsi_command(host_index)[ldn] = IM_RESET_IMM_CMD; +- last_scsi_type(host_index)[ldn] = IM_IMM_CMD; +- got_interrupt(host_index) = 0; +- reset_status(host_index) = IM_RESET_IN_PROGRESS; +- issue_cmd(host_index, (unsigned long) (imm_command), IM_IMM_CMD | ldn); ++ last_scsi_command(shpnt)[ldn] = IM_RESET_IMM_CMD; ++ last_scsi_type(shpnt)[ldn] = IM_IMM_CMD; ++ got_interrupt(shpnt) = 0; ++ reset_status(shpnt) = IM_RESET_IN_PROGRESS; ++ issue_cmd(shpnt, (unsigned long) (imm_command), IM_IMM_CMD | ldn); + ticks = IM_RESET_DELAY * HZ; +- while (reset_status(host_index) == IM_RESET_IN_PROGRESS && --ticks) { ++ while (reset_status(shpnt) == IM_RESET_IN_PROGRESS && --ticks) { + udelay((1 + 999 / HZ) * 1000); + barrier(); + } + /* if reset did not complete, just complain */ + if (!ticks) { + printk(KERN_ERR "IBM MCA SCSI: reset did not complete within %d seconds.\n", IM_RESET_DELAY); +- reset_status(host_index) = IM_RESET_FINISHED_OK; ++ reset_status(shpnt) = IM_RESET_FINISHED_OK; + /* did not work, finish */ + return 1; + } + /*if command successful, break */ +- if (stat_result(host_index) == IM_IMMEDIATE_CMD_COMPLETED) ++ if (stat_result(shpnt) == IM_IMMEDIATE_CMD_COMPLETED) + return 1; + } + if (retries >= 3) +@@ -1060,35 +1070,35 @@ + return 0; + } + +-static int probe_bus_mode(int host_index) ++static int probe_bus_mode(struct Scsi_Host *shpnt) + { + struct im_pos_info *info; + int num_bus = 0; + int ldn; + +- info = (struct im_pos_info *) (&(ld(host_index)[MAX_LOG_DEV].buf)); +- if (get_pos_info(host_index)) { ++ info = (struct im_pos_info *) (&(ld(shpnt)[MAX_LOG_DEV].buf)); ++ if (get_pos_info(shpnt)) { + if (info->connector_size & 0xf000) +- subsystem_connector_size(host_index) = 16; ++ subsystem_connector_size(shpnt) = 16; + else +- subsystem_connector_size(host_index) = 32; ++ subsystem_connector_size(shpnt) = 32; + num_bus |= (info->pos_4b & 8) >> 3; + for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) { +- if ((special(host_index) == IBM_SCSI_WCACHE) || (special(host_index) == IBM_7568_WCACHE)) { ++ if ((special(shpnt) == IBM_SCSI_WCACHE) || (special(shpnt) == IBM_7568_WCACHE)) { + if (!((info->cache_stat >> ldn) & 1)) +- ld(host_index)[ldn].cache_flag = 0; ++ ld(shpnt)[ldn].cache_flag = 0; + } + if (!((info->retry_stat >> ldn) & 1)) +- ld(host_index)[ldn].retry_flag = 0; ++ ld(shpnt)[ldn].retry_flag = 0; + } + #ifdef IM_DEBUG_PROBE + printk("IBM MCA SCSI: SCSI-Cache bits: "); + for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) { +- printk("%d", ld(host_index)[ldn].cache_flag); ++ printk("%d", ld(shpnt)[ldn].cache_flag); + } + printk("\nIBM MCA SCSI: SCSI-Retry bits: "); + for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) { +- printk("%d", ld(host_index)[ldn].retry_flag); ++ printk("%d", ld(shpnt)[ldn].retry_flag); + } + printk("\n"); + #endif +@@ -1097,7 +1107,7 @@ + } + + /* probing scsi devices */ +-static void check_devices(int host_index, int adaptertype) ++static void check_devices(struct Scsi_Host *shpnt, int adaptertype) + { + int id, lun, ldn, ticks; + int count_devices; /* local counter for connected device */ +@@ -1108,24 +1118,24 @@ + /* assign default values to certain variables */ + ticks = 0; + count_devices = 0; +- IBM_DS(host_index).dyn_flag = 0; /* normally no need for dynamical ldn management */ +- IBM_DS(host_index).total_errors = 0; /* set errorcounter to 0 */ +- next_ldn(host_index) = 7; /* next ldn to be assigned is 7, because 0-6 is 'hardwired' */ ++ IBM_DS(shpnt).dyn_flag = 0; /* normally no need for dynamical ldn management */ ++ IBM_DS(shpnt).total_errors = 0; /* set errorcounter to 0 */ ++ next_ldn(shpnt) = 7; /* next ldn to be assigned is 7, because 0-6 is 'hardwired' */ + + /* initialize the very important driver-informational arrays/structs */ +- memset(ld(host_index), 0, sizeof(ld(host_index))); ++ memset(ld(shpnt), 0, sizeof(ld(shpnt))); + for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) { +- last_scsi_command(host_index)[ldn] = NO_SCSI; /* emptify last SCSI-command storage */ +- last_scsi_type(host_index)[ldn] = 0; +- ld(host_index)[ldn].cache_flag = 1; +- ld(host_index)[ldn].retry_flag = 1; ++ last_scsi_command(shpnt)[ldn] = NO_SCSI; /* emptify last SCSI-command storage */ ++ last_scsi_type(shpnt)[ldn] = 0; ++ ld(shpnt)[ldn].cache_flag = 1; ++ ld(shpnt)[ldn].retry_flag = 1; + } +- memset(get_ldn(host_index), TYPE_NO_DEVICE, sizeof(get_ldn(host_index))); /* this is essential ! */ +- memset(get_scsi(host_index), TYPE_NO_DEVICE, sizeof(get_scsi(host_index))); /* this is essential ! */ ++ memset(get_ldn(shpnt), TYPE_NO_DEVICE, sizeof(get_ldn(shpnt))); /* this is essential ! */ ++ memset(get_scsi(shpnt), TYPE_NO_DEVICE, sizeof(get_scsi(shpnt))); /* this is essential ! */ + for (lun = 0; lun < 8; lun++) { + /* mark the adapter at its pun on all luns */ +- get_scsi(host_index)[subsystem_pun(host_index)][lun] = TYPE_IBM_SCSI_ADAPTER; +- get_ldn(host_index)[subsystem_pun(host_index)][lun] = MAX_LOG_DEV; /* make sure, the subsystem ++ get_scsi(shpnt)[subsystem_pun(shpnt)][lun] = TYPE_IBM_SCSI_ADAPTER; ++ get_ldn(shpnt)[subsystem_pun(shpnt)][lun] = MAX_LOG_DEV; /* make sure, the subsystem + ldn is active for all + luns. */ + } +@@ -1134,9 +1144,9 @@ + /* monitor connected on model XX95. */ + + /* STEP 1: */ +- adapter_speed(host_index) = global_adapter_speed; +- speedrun = adapter_speed(host_index); +- while (immediate_feature(host_index, speedrun, adapter_timeout) == 2) { ++ adapter_speed(shpnt) = global_adapter_speed; ++ speedrun = adapter_speed(shpnt); ++ while (immediate_feature(shpnt, speedrun, adapter_timeout) == 2) { + probe_display(1); + if (speedrun == 7) + panic("IBM MCA SCSI: Cannot set Synchronous-Transfer-Rate!\n"); +@@ -1144,30 +1154,30 @@ + if (speedrun > 7) + speedrun = 7; + } +- adapter_speed(host_index) = speedrun; ++ adapter_speed(shpnt) = speedrun; + /* Get detailed information about the current adapter, necessary for + * device operations: */ +- num_bus = probe_bus_mode(host_index); ++ num_bus = probe_bus_mode(shpnt); + + /* num_bus contains only valid data for the F/W adapter! */ + if (adaptertype == IBM_SCSI2_FW) { /* F/W SCSI adapter: */ + /* F/W adapter PUN-space extension evaluation: */ + if (num_bus) { + printk(KERN_INFO "IBM MCA SCSI: Separate bus mode (wide-addressing enabled)\n"); +- subsystem_maxid(host_index) = 16; ++ subsystem_maxid(shpnt) = 16; + } else { + printk(KERN_INFO "IBM MCA SCSI: Combined bus mode (wide-addressing disabled)\n"); +- subsystem_maxid(host_index) = 8; ++ subsystem_maxid(shpnt) = 8; + } + printk(KERN_INFO "IBM MCA SCSI: Sync.-Rate (F/W: 20, Int.: 10, Ext.: %s) MBytes/s\n", ibmrate(speedrun, adaptertype)); + } else /* all other IBM SCSI adapters: */ + printk(KERN_INFO "IBM MCA SCSI: Synchronous-SCSI-Transfer-Rate: %s MBytes/s\n", ibmrate(speedrun, adaptertype)); + + /* assign correct PUN device space */ +- max_pun = subsystem_maxid(host_index); ++ max_pun = subsystem_maxid(shpnt); + + #ifdef IM_DEBUG_PROBE +- printk("IBM MCA SCSI: Current SCSI-host index: %d\n", host_index); ++ printk("IBM MCA SCSI: Current SCSI-host index: %d\n", shpnt); + printk("IBM MCA SCSI: Removing default logical SCSI-device mapping."); + #else + printk(KERN_INFO "IBM MCA SCSI: Dev. Order: %s, Mapping (takes <2min): ", (ibm_ansi_order) ? "ANSI" : "New"); +@@ -1177,7 +1187,7 @@ + #ifdef IM_DEBUG_PROBE + printk("."); + #endif +- immediate_assign(host_index, 0, 0, ldn, REMOVE_LDN); /* remove ldn (wherever) */ ++ immediate_assign(shpnt, 0, 0, ldn, REMOVE_LDN); /* remove ldn (wherever) */ + } + lun = 0; /* default lun is 0 */ + #ifndef IM_DEBUG_PROBE +@@ -1196,18 +1206,18 @@ + #ifdef IM_DEBUG_PROBE + printk("."); + #endif +- if (id != subsystem_pun(host_index)) { ++ if (id != subsystem_pun(shpnt)) { + /* if pun is not the adapter: */ + /* set ldn=0 to pun,lun */ +- immediate_assign(host_index, id, lun, PROBE_LDN, SET_LDN); +- if (device_inquiry(host_index, PROBE_LDN)) { /* probe device */ +- get_scsi(host_index)[id][lun] = (unsigned char) (ld(host_index)[PROBE_LDN].buf[0]); ++ immediate_assign(shpnt, id, lun, PROBE_LDN, SET_LDN); ++ if (device_inquiry(shpnt, PROBE_LDN)) { /* probe device */ ++ get_scsi(shpnt)[id][lun] = (unsigned char) (ld(shpnt)[PROBE_LDN].buf[0]); + /* entry, even for NO_LUN */ +- if (ld(host_index)[PROBE_LDN].buf[0] != TYPE_NO_LUN) ++ if (ld(shpnt)[PROBE_LDN].buf[0] != TYPE_NO_LUN) + count_devices++; /* a existing device is found */ + } + /* remove ldn */ +- immediate_assign(host_index, id, lun, PROBE_LDN, REMOVE_LDN); ++ immediate_assign(shpnt, id, lun, PROBE_LDN, REMOVE_LDN); + } + } + #ifndef IM_DEBUG_PROBE +@@ -1227,16 +1237,16 @@ + #ifdef IM_DEBUG_PROBE + printk("."); + #endif +- if (id != subsystem_pun(host_index)) { +- if (get_scsi(host_index)[id][lun] != TYPE_NO_LUN && get_scsi(host_index)[id][lun] != TYPE_NO_DEVICE) { ++ if (id != subsystem_pun(shpnt)) { ++ if (get_scsi(shpnt)[id][lun] != TYPE_NO_LUN && get_scsi(shpnt)[id][lun] != TYPE_NO_DEVICE) { + /* Only map if accepted type. Always enter for + lun == 0 to get no gaps into ldn-mapping for ldn<7. */ +- immediate_assign(host_index, id, lun, ldn, SET_LDN); +- get_ldn(host_index)[id][lun] = ldn; /* map ldn */ +- if (device_exists(host_index, ldn, &ld(host_index)[ldn].block_length, &ld(host_index)[ldn].device_type)) { ++ immediate_assign(shpnt, id, lun, ldn, SET_LDN); ++ get_ldn(shpnt)[id][lun] = ldn; /* map ldn */ ++ if (device_exists(shpnt, ldn, &ld(shpnt)[ldn].block_length, &ld(shpnt)[ldn].device_type)) { + #ifdef CONFIG_IBMMCA_SCSI_DEV_RESET + printk("resetting device at ldn=%x ... ", ldn); +- immediate_reset(host_index, ldn); ++ immediate_reset(shpnt, ldn); + #endif + ldn++; + } else { +@@ -1244,15 +1254,15 @@ + * handle it or because it has problems */ + if (lun > 0) { + /* remove mapping */ +- get_ldn(host_index)[id][lun] = TYPE_NO_DEVICE; +- immediate_assign(host_index, 0, 0, ldn, REMOVE_LDN); ++ get_ldn(shpnt)[id][lun] = TYPE_NO_DEVICE; ++ immediate_assign(shpnt, 0, 0, ldn, REMOVE_LDN); + } else + ldn++; + } + } else if (lun == 0) { + /* map lun == 0, even if no device exists */ +- immediate_assign(host_index, id, lun, ldn, SET_LDN); +- get_ldn(host_index)[id][lun] = ldn; /* map ldn */ ++ immediate_assign(shpnt, id, lun, ldn, SET_LDN); ++ get_ldn(shpnt)[id][lun] = ldn; /* map ldn */ + ldn++; + } + } +@@ -1262,14 +1272,14 @@ + /* map remaining ldns to non-existing devices */ + for (lun = 1; lun < 8 && ldn < MAX_LOG_DEV; lun++) + for (id = 0; id < max_pun && ldn < MAX_LOG_DEV; id++) { +- if (get_scsi(host_index)[id][lun] == TYPE_NO_LUN || get_scsi(host_index)[id][lun] == TYPE_NO_DEVICE) { ++ if (get_scsi(shpnt)[id][lun] == TYPE_NO_LUN || get_scsi(shpnt)[id][lun] == TYPE_NO_DEVICE) { + probe_display(1); + /* Map remaining ldns only to NON-existing pun,lun + combinations to make sure an inquiry will fail. + For MULTI_LUN, it is needed to avoid adapter autonome + SCSI-remapping. */ +- immediate_assign(host_index, id, lun, ldn, SET_LDN); +- get_ldn(host_index)[id][lun] = ldn; ++ immediate_assign(shpnt, id, lun, ldn, SET_LDN); ++ get_ldn(shpnt)[id][lun] = ldn; + ldn++; + } + } +@@ -1292,51 +1302,51 @@ + for (id = 0; id < max_pun; id++) { + printk("%2d ", id); + for (lun = 0; lun < 8; lun++) +- printk("%2s ", ti_p(get_scsi(host_index)[id][lun])); ++ printk("%2s ", ti_p(get_scsi(shpnt)[id][lun])); + printk(" %2d ", id); + for (lun = 0; lun < 8; lun++) +- printk("%2s ", ti_l(get_ldn(host_index)[id][lun])); ++ printk("%2s ", ti_l(get_ldn(shpnt)[id][lun])); + printk("\n"); + } + #endif + + /* assign total number of found SCSI-devices to the statistics struct */ +- IBM_DS(host_index).total_scsi_devices = count_devices; ++ IBM_DS(shpnt).total_scsi_devices = count_devices; + + /* decide for output in /proc-filesystem, if the configuration of + SCSI-devices makes dynamical reassignment of devices necessary */ + if (count_devices >= MAX_LOG_DEV) +- IBM_DS(host_index).dyn_flag = 1; /* dynamical assignment is necessary */ ++ IBM_DS(shpnt).dyn_flag = 1; /* dynamical assignment is necessary */ + else +- IBM_DS(host_index).dyn_flag = 0; /* dynamical assignment is not necessary */ ++ IBM_DS(shpnt).dyn_flag = 0; /* dynamical assignment is not necessary */ + + /* If no SCSI-devices are assigned, return 1 in order to cause message. */ + if (ldn == 0) + printk("IBM MCA SCSI: Warning: No SCSI-devices found/assigned!\n"); + + /* reset the counters for statistics on the current adapter */ +- IBM_DS(host_index).scbs = 0; +- IBM_DS(host_index).long_scbs = 0; +- IBM_DS(host_index).total_accesses = 0; +- IBM_DS(host_index).total_interrupts = 0; +- IBM_DS(host_index).dynamical_assignments = 0; +- memset(IBM_DS(host_index).ldn_access, 0x0, sizeof(IBM_DS(host_index).ldn_access)); +- memset(IBM_DS(host_index).ldn_read_access, 0x0, sizeof(IBM_DS(host_index).ldn_read_access)); +- memset(IBM_DS(host_index).ldn_write_access, 0x0, sizeof(IBM_DS(host_index).ldn_write_access)); +- memset(IBM_DS(host_index).ldn_inquiry_access, 0x0, sizeof(IBM_DS(host_index).ldn_inquiry_access)); +- memset(IBM_DS(host_index).ldn_modeselect_access, 0x0, sizeof(IBM_DS(host_index).ldn_modeselect_access)); +- memset(IBM_DS(host_index).ldn_assignments, 0x0, sizeof(IBM_DS(host_index).ldn_assignments)); ++ IBM_DS(shpnt).scbs = 0; ++ IBM_DS(shpnt).long_scbs = 0; ++ IBM_DS(shpnt).total_accesses = 0; ++ IBM_DS(shpnt).total_interrupts = 0; ++ IBM_DS(shpnt).dynamical_assignments = 0; ++ memset(IBM_DS(shpnt).ldn_access, 0x0, sizeof(IBM_DS(shpnt).ldn_access)); ++ memset(IBM_DS(shpnt).ldn_read_access, 0x0, sizeof(IBM_DS(shpnt).ldn_read_access)); ++ memset(IBM_DS(shpnt).ldn_write_access, 0x0, sizeof(IBM_DS(shpnt).ldn_write_access)); ++ memset(IBM_DS(shpnt).ldn_inquiry_access, 0x0, sizeof(IBM_DS(shpnt).ldn_inquiry_access)); ++ memset(IBM_DS(shpnt).ldn_modeselect_access, 0x0, sizeof(IBM_DS(shpnt).ldn_modeselect_access)); ++ memset(IBM_DS(shpnt).ldn_assignments, 0x0, sizeof(IBM_DS(shpnt).ldn_assignments)); + probe_display(0); + return; + } + +-static int device_exists(int host_index, int ldn, int *block_length, int *device_type) ++static int device_exists(struct Scsi_Host *shpnt, int ldn, int *block_length, int *device_type) + { + unsigned char *buf; + /* if no valid device found, return immediately with 0 */ +- if (!(device_inquiry(host_index, ldn))) ++ if (!(device_inquiry(shpnt, ldn))) + return 0; +- buf = (unsigned char *) (&(ld(host_index)[ldn].buf)); ++ buf = (unsigned char *) (&(ld(shpnt)[ldn].buf)); + if (*buf == TYPE_ROM) { + *device_type = TYPE_ROM; + *block_length = 2048; /* (standard blocksize for yellow-/red-book) */ +@@ -1349,7 +1359,7 @@ + } + if (*buf == TYPE_DISK) { + *device_type = TYPE_DISK; +- if (read_capacity(host_index, ldn)) { ++ if (read_capacity(shpnt, ldn)) { + *block_length = *(buf + 7) + (*(buf + 6) << 8) + (*(buf + 5) << 16) + (*(buf + 4) << 24); + return 1; + } else +@@ -1357,7 +1367,7 @@ + } + if (*buf == TYPE_MOD) { + *device_type = TYPE_MOD; +- if (read_capacity(host_index, ldn)) { ++ if (read_capacity(shpnt, ldn)) { + *block_length = *(buf + 7) + (*(buf + 6) << 8) + (*(buf + 5) << 16) + (*(buf + 4) << 24); + return 1; + } else +@@ -1430,6 +1440,9 @@ + return; + } + ++#if 0 ++ FIXME NEED TO MOVE TO SYSFS ++ + static int ibmmca_getinfo(char *buf, int slot, void *dev_id) + { + struct Scsi_Host *shpnt; +@@ -1480,58 +1493,34 @@ + + return len; + } ++#endif + +-int ibmmca_detect(struct scsi_host_template * scsi_template) ++static struct scsi_host_template ibmmca_driver_template = { ++ .proc_name = "ibmmca", ++ .proc_info = ibmmca_proc_info, ++ .name = "IBM SCSI-Subsystem", ++ .queuecommand = ibmmca_queuecommand, ++ .eh_abort_handler = ibmmca_abort, ++ .eh_host_reset_handler = ibmmca_host_reset, ++ .bios_param = ibmmca_biosparam, ++ .can_queue = 16, ++ .this_id = 7, ++ .sg_tablesize = 16, ++ .cmd_per_lun = 1, ++ .use_clustering = ENABLE_CLUSTERING, ++}; ++ ++static int ibmmca_probe(struct device *dev) + { + struct Scsi_Host *shpnt; +- int port, id, i, j, k, slot; +- int devices_on_irq_11 = 0; +- int devices_on_irq_14 = 0; +- int IRQ14_registered = 0; +- int IRQ11_registered = 0; +- +- found = 0; /* make absolutely sure, that found is set to 0 */ ++ int port, id, i, j, k, irq, enabled, ret = -EINVAL; ++ struct mca_device *mca_dev = to_mca_device(dev); ++ const char *description = ibmmca_description[mca_dev->index]; + + /* First of all, print the version number of the driver. This is + * important to allow better user bugreports in case of already + * having problems with the MCA_bus probing. */ + printk(KERN_INFO "IBM MCA SCSI: Version %s\n", IBMMCA_SCSI_DRIVER_VERSION); +- /* if this is not MCA machine, return "nothing found" */ +- if (!MCA_bus) { +- printk(KERN_INFO "IBM MCA SCSI: No Microchannel-bus present --> Aborting.\n" " This machine does not have any IBM MCA-bus\n" " or the MCA-Kernel-support is not enabled!\n"); +- return 0; +- } +- +-#ifdef MODULE +- /* If the driver is run as module, read from conf.modules or cmd-line */ +- if (boot_options) +- option_setup(boot_options); +-#endif +- +- /* get interrupt request level */ +- if (request_irq(IM_IRQ, interrupt_handler, IRQF_SHARED, "ibmmcascsi", hosts)) { +- printk(KERN_ERR "IBM MCA SCSI: Unable to get shared IRQ %d.\n", IM_IRQ); +- return 0; +- } else +- IRQ14_registered++; +- +- /* if ibmmcascsi setup option was passed to kernel, return "found" */ +- for (i = 0; i < IM_MAX_HOSTS; i++) +- if (io_port[i] > 0 && scsi_id[i] >= 0 && scsi_id[i] < 8) { +- printk("IBM MCA SCSI: forced detected SCSI Adapter, io=0x%x, scsi id=%d.\n", io_port[i], scsi_id[i]); +- if ((shpnt = ibmmca_register(scsi_template, io_port[i], scsi_id[i], FORCED_DETECTION, "forced detected SCSI Adapter"))) { +- for (k = 2; k < 7; k++) +- ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = 0; +- ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = FORCED_DETECTION; +- mca_set_adapter_name(MCA_INTEGSCSI, "forced detected SCSI Adapter"); +- mca_set_adapter_procfn(MCA_INTEGSCSI, (MCA_ProcFn) ibmmca_getinfo, shpnt); +- mca_mark_as_used(MCA_INTEGSCSI); +- devices_on_irq_14++; +- } +- } +- if (found) +- return found; +- + /* The POS2-register of all PS/2 model SCSI-subsystems has the following + * interpretation of bits: + * Bit 7 - 4 : Chip Revision ID (Release) +@@ -1558,7 +1547,14 @@ + + /* first look for the IBM SCSI integrated subsystem on the motherboard */ + for (j = 0; j < 8; j++) /* read the pos-information */ +- pos[j] = mca_read_stored_pos(MCA_INTEGSCSI, j); ++ pos[j] = mca_device_read_pos(mca_dev, j); ++ id = (pos[3] & 0xe0) >> 5; /* this is correct and represents the PUN */ ++ enabled = (pos[2] &0x01); ++ if (!enabled) { ++ printk(KERN_WARNING "IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n"); ++ printk(KERN_WARNING " SCSI-operations may not work.\n"); ++ } ++ + /* pos2 = pos3 = 0xff if there is no integrated SCSI-subsystem present, but + * if we ignore the settings of all surrounding pos registers, it is not + * completely sufficient to only check pos2 and pos3. */ +@@ -1566,232 +1562,137 @@ + * make sure, we see a real integrated onboard SCSI-interface and no + * internal system information, which gets mapped to some pos registers + * on models 95xx. */ +- if ((!pos[0] && !pos[1] && pos[2] > 0 && pos[3] > 0 && !pos[4] && !pos[5] && !pos[6] && !pos[7]) || (pos[0] == 0xff && pos[1] == 0xff && pos[2] < 0xff && pos[3] < 0xff && pos[4] == 0xff && pos[5] == 0xff && pos[6] == 0xff && pos[7] == 0xff)) { +- if ((pos[2] & 1) == 1) /* is the subsystem chip enabled ? */ ++ if (mca_dev->slot == MCA_INTEGSCSI && ++ ((!pos[0] && !pos[1] && pos[2] > 0 && ++ pos[3] > 0 && !pos[4] && !pos[5] && ++ !pos[6] && !pos[7]) || ++ (pos[0] == 0xff && pos[1] == 0xff && ++ pos[2] < 0xff && pos[3] < 0xff && ++ pos[4] == 0xff && pos[5] == 0xff && ++ pos[6] == 0xff && pos[7] == 0xff))) { ++ irq = IM_IRQ; + port = IM_IO_PORT; +- else { /* if disabled, no IRQs will be generated, as the chip won't +- * listen to the incoming commands and will do really nothing, +- * except for listening to the pos-register settings. If this +- * happens, I need to hugely think about it, as one has to +- * write something to the MCA-Bus pos register in order to +- * enable the chip. Normally, IBM-SCSI won't pass the POST, +- * when the chip is disabled (see IBM tech. ref.). */ +- port = IM_IO_PORT; /* anyway, set the portnumber and warn */ +- printk("IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n" " SCSI-operations may not work.\n"); +- } +- id = (pos[3] & 0xe0) >> 5; /* this is correct and represents the PUN */ +- /* give detailed information on the subsystem. This helps me +- * additionally during debugging and analyzing bug-reports. */ +- printk(KERN_INFO "IBM MCA SCSI: IBM Integrated SCSI Controller ffound, io=0x%x, scsi id=%d,\n", port, id); +- printk(KERN_INFO " chip rev.=%d, 8K NVRAM=%s, subsystem=%s\n", ((pos[2] & 0xf0) >> 4), (pos[2] & 2) ? "locked" : "accessible", (pos[2] & 1) ? "enabled." : "disabled."); +- +- /* register the found integrated SCSI-subsystem */ +- if ((shpnt = ibmmca_register(scsi_template, port, id, INTEGRATED_SCSI, "IBM Integrated SCSI Controller"))) +- { +- for (k = 2; k < 7; k++) +- ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k]; +- ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = INTEGRATED_SCSI; +- mca_set_adapter_name(MCA_INTEGSCSI, "IBM Integrated SCSI Controller"); +- mca_set_adapter_procfn(MCA_INTEGSCSI, (MCA_ProcFn) ibmmca_getinfo, shpnt); +- mca_mark_as_used(MCA_INTEGSCSI); +- devices_on_irq_14++; +- } +- } +- +- /* now look for other adapters in MCA slots, */ +- /* determine the number of known IBM-SCSI-subsystem types */ +- /* see the pos[2] dependence to get the adapter port-offset. */ +- for (i = 0; i < ARRAY_SIZE(subsys_list); i++) { +- /* scan each slot for a fitting adapter id */ +- slot = 0; /* start at slot 0 */ +- while ((slot = mca_find_adapter(subsys_list[i].mca_id, slot)) +- != MCA_NOTFOUND) { /* scan through all slots */ +- for (j = 0; j < 8; j++) /* read the pos-information */ +- pos[j] = mca_read_stored_pos(slot, j); +- if ((pos[2] & 1) == 1) +- /* is the subsystem chip enabled ? */ +- /* (explanations see above) */ +- port = IM_IO_PORT + ((pos[2] & 0x0e) << 2); +- else { +- /* anyway, set the portnumber and warn */ +- port = IM_IO_PORT + ((pos[2] & 0x0e) << 2); +- printk(KERN_WARNING "IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n"); +- printk(KERN_WARNING " SCSI-operations may not work.\n"); +- } +- if ((i == IBM_SCSI2_FW) && (pos[6] != 0)) { ++ } else { ++ irq = IM_IRQ; ++ port = IM_IO_PORT + ((pos[2] &0x0e) << 2); ++ if ((mca_dev->index == IBM_SCSI2_FW) && (pos[6] != 0)) { + printk(KERN_ERR "IBM MCA SCSI: ERROR - Wrong POS(6)-register setting!\n"); + printk(KERN_ERR " Impossible to determine adapter PUN!\n"); + printk(KERN_ERR " Guessing adapter PUN = 7.\n"); + id = 7; + } else { + id = (pos[3] & 0xe0) >> 5; /* get subsystem PUN */ +- if (i == IBM_SCSI2_FW) { ++ if (mca_dev->index == IBM_SCSI2_FW) { + id |= (pos[3] & 0x10) >> 1; /* get subsystem PUN high-bit + * for F/W adapters */ + } + } +- if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0)) { ++ if ((mca_dev->index == IBM_SCSI2_FW) && ++ (pos[4] & 0x01) && (pos[6] == 0)) { + /* IRQ11 is used by SCSI-2 F/W Adapter/A */ + printk(KERN_DEBUG "IBM MCA SCSI: SCSI-2 F/W adapter needs IRQ 11.\n"); +- /* get interrupt request level */ +- if (request_irq(IM_IRQ_FW, interrupt_handler, IRQF_SHARED, "ibmmcascsi", hosts)) { +- printk(KERN_ERR "IBM MCA SCSI: Unable to get shared IRQ %d.\n", IM_IRQ_FW); +- } else +- IRQ11_registered++; ++ irq = IM_IRQ_FW; + } +- printk(KERN_INFO "IBM MCA SCSI: %s found in slot %d, io=0x%x, scsi id=%d,\n", subsys_list[i].description, slot + 1, port, id); +- if ((pos[2] & 0xf0) == 0xf0) +- printk(KERN_DEBUG" ROM Addr.=off,"); +- else +- printk(KERN_DEBUG " ROM Addr.=0x%x,", ((pos[2] & 0xf0) << 13) + 0xc0000); +- printk(KERN_DEBUG " port-offset=0x%x, subsystem=%s\n", ((pos[2] & 0x0e) << 2), (pos[2] & 1) ? "enabled." : "disabled."); +- +- /* register the hostadapter */ +- if ((shpnt = ibmmca_register(scsi_template, port, id, i, subsys_list[i].description))) { +- for (k = 2; k < 8; k++) +- ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k]; +- ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = i; +- mca_set_adapter_name(slot, subsys_list[i].description); +- mca_set_adapter_procfn(slot, (MCA_ProcFn) ibmmca_getinfo, shpnt); +- mca_mark_as_used(slot); +- if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0)) +- devices_on_irq_11++; +- else +- devices_on_irq_14++; +- } +- slot++; /* advance to next slot */ +- } /* advance to next adapter id in the list of IBM-SCSI-subsystems */ + } + +- /* now check for SCSI-adapters, mapped to the integrated SCSI +- * area. E.g. a W/Cache in MCA-slot 9(!). Do the check correct here, +- * as this is a known effect on some models 95xx. */ +- for (i = 0; i < ARRAY_SIZE(subsys_list); i++) { +- /* scan each slot for a fitting adapter id */ +- slot = mca_find_adapter(subsys_list[i].mca_id, MCA_INTEGSCSI); +- if (slot != MCA_NOTFOUND) { /* scan through all slots */ +- for (j = 0; j < 8; j++) /* read the pos-information */ +- pos[j] = mca_read_stored_pos(slot, j); +- if ((pos[2] & 1) == 1) { /* is the subsystem chip enabled ? */ +- /* (explanations see above) */ +- port = IM_IO_PORT + ((pos[2] & 0x0e) << 2); +- } else { /* anyway, set the portnumber and warn */ +- port = IM_IO_PORT + ((pos[2] & 0x0e) << 2); +- printk(KERN_WARNING "IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n"); +- printk(KERN_WARNING " SCSI-operations may not work.\n"); +- } +- if ((i == IBM_SCSI2_FW) && (pos[6] != 0)) { +- printk(KERN_ERR "IBM MCA SCSI: ERROR - Wrong POS(6)-register setting!\n"); +- printk(KERN_ERR " Impossible to determine adapter PUN!\n"); +- printk(KERN_ERR " Guessing adapter PUN = 7.\n"); +- id = 7; +- } else { +- id = (pos[3] & 0xe0) >> 5; /* get subsystem PUN */ +- if (i == IBM_SCSI2_FW) +- id |= (pos[3] & 0x10) >> 1; /* get subsystem PUN high-bit +- * for F/W adapters */ +- } +- if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0)) { +- /* IRQ11 is used by SCSI-2 F/W Adapter/A */ +- printk(KERN_DEBUG "IBM MCA SCSI: SCSI-2 F/W adapter needs IRQ 11.\n"); +- /* get interrupt request level */ +- if (request_irq(IM_IRQ_FW, interrupt_handler, IRQF_SHARED, "ibmmcascsi", hosts)) +- printk(KERN_ERR "IBM MCA SCSI: Unable to get shared IRQ %d.\n", IM_IRQ_FW); +- else +- IRQ11_registered++; +- } +- printk(KERN_INFO "IBM MCA SCSI: %s found in slot %d, io=0x%x, scsi id=%d,\n", subsys_list[i].description, slot + 1, port, id); ++ ++ ++ /* give detailed information on the subsystem. This helps me ++ * additionally during debugging and analyzing bug-reports. */ ++ printk(KERN_INFO "IBM MCA SCSI: %s found, io=0x%x, scsi id=%d,\n", ++ description, port, id); ++ if (mca_dev->slot == MCA_INTEGSCSI) ++ printk(KERN_INFO " chip rev.=%d, 8K NVRAM=%s, subsystem=%s\n", ((pos[2] & 0xf0) >> 4), (pos[2] & 2) ? "locked" : "accessible", (pos[2] & 1) ? "enabled." : "disabled."); ++ else { + if ((pos[2] & 0xf0) == 0xf0) + printk(KERN_DEBUG " ROM Addr.=off,"); + else + printk(KERN_DEBUG " ROM Addr.=0x%x,", ((pos[2] & 0xf0) << 13) + 0xc0000); +- printk(KERN_DEBUG " port-offset=0x%x, subsystem=%s\n", ((pos[2] & 0x0e) << 2), (pos[2] & 1) ? "enabled." : "disabled."); + +- /* register the hostadapter */ +- if ((shpnt = ibmmca_register(scsi_template, port, id, i, subsys_list[i].description))) { +- for (k = 2; k < 7; k++) +- ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k]; +- ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = i; +- mca_set_adapter_name(slot, subsys_list[i].description); +- mca_set_adapter_procfn(slot, (MCA_ProcFn) ibmmca_getinfo, shpnt); +- mca_mark_as_used(slot); +- if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0)) +- devices_on_irq_11++; +- else +- devices_on_irq_14++; +- } +- slot++; /* advance to next slot */ +- } /* advance to next adapter id in the list of IBM-SCSI-subsystems */ ++ printk(KERN_DEBUG " port-offset=0x%x, subsystem=%s\n", ((pos[2] & 0x0e) << 2), (pos[2] & 1) ? "enabled." : "disabled."); + } +- if (IRQ11_registered && !devices_on_irq_11) +- free_irq(IM_IRQ_FW, hosts); /* no devices on IRQ 11 */ +- if (IRQ14_registered && !devices_on_irq_14) +- free_irq(IM_IRQ, hosts); /* no devices on IRQ 14 */ +- if (!devices_on_irq_11 && !devices_on_irq_14) +- printk(KERN_WARNING "IBM MCA SCSI: No IBM SCSI-subsystem adapter attached.\n"); +- return found; /* return the number of found SCSI hosts. Should be 1 or 0. */ +-} +- +-static struct Scsi_Host *ibmmca_register(struct scsi_host_template * scsi_template, int port, int id, int adaptertype, char *hostname) +-{ +- struct Scsi_Host *shpnt; +- int i, j; +- unsigned int ctrl; + + /* check I/O region */ +- if (!request_region(port, IM_N_IO_PORT, hostname)) { ++ if (!request_region(port, IM_N_IO_PORT, description)) { + printk(KERN_ERR "IBM MCA SCSI: Unable to get I/O region 0x%x-0x%x (%d ports).\n", port, port + IM_N_IO_PORT - 1, IM_N_IO_PORT); +- return NULL; ++ goto out_fail; + } + + /* register host */ +- shpnt = scsi_register(scsi_template, sizeof(struct ibmmca_hostdata)); ++ shpnt = scsi_host_alloc(&ibmmca_driver_template, ++ sizeof(struct ibmmca_hostdata)); + if (!shpnt) { + printk(KERN_ERR "IBM MCA SCSI: Unable to register host.\n"); +- release_region(port, IM_N_IO_PORT); +- return NULL; ++ goto out_release; ++ } ++ ++ dev_set_drvdata(dev, shpnt); ++ if(request_irq(irq, interrupt_handler, IRQF_SHARED, description, dev)) { ++ printk(KERN_ERR "IBM MCA SCSI: failed to request interrupt %d\n", irq); ++ goto out_free_host; + } + + /* request I/O region */ +- hosts[found] = shpnt; /* add new found hostadapter to the list */ +- special(found) = adaptertype; /* important assignment or else crash! */ +- subsystem_connector_size(found) = 0; /* preset slot-size */ +- shpnt->irq = IM_IRQ; /* assign necessary stuff for the adapter */ ++ special(shpnt) = mca_dev->index; /* important assignment or else crash! */ ++ subsystem_connector_size(shpnt) = 0; /* preset slot-size */ ++ shpnt->irq = irq; /* assign necessary stuff for the adapter */ + shpnt->io_port = port; + shpnt->n_io_port = IM_N_IO_PORT; + shpnt->this_id = id; + shpnt->max_id = 8; /* 8 PUNs are default */ + /* now, the SCSI-subsystem is connected to Linux */ + +- ctrl = (unsigned int) (inb(IM_CTR_REG(found))); /* get control-register status */ + #ifdef IM_DEBUG_PROBE ++ ctrl = (unsigned int) (inb(IM_CTR_REG(found))); /* get control-register status */ + printk("IBM MCA SCSI: Control Register contents: %x, status: %x\n", ctrl, inb(IM_STAT_REG(found))); + printk("IBM MCA SCSI: This adapters' POS-registers: "); + for (i = 0; i < 8; i++) + printk("%x ", pos[i]); + printk("\n"); + #endif +- reset_status(found) = IM_RESET_NOT_IN_PROGRESS; ++ reset_status(shpnt) = IM_RESET_NOT_IN_PROGRESS; + + for (i = 0; i < 16; i++) /* reset the tables */ + for (j = 0; j < 8; j++) +- get_ldn(found)[i][j] = MAX_LOG_DEV; ++ get_ldn(shpnt)[i][j] = MAX_LOG_DEV; + + /* check which logical devices exist */ + /* after this line, local interrupting is possible: */ +- local_checking_phase_flag(found) = 1; +- check_devices(found, adaptertype); /* call by value, using the global variable hosts */ +- local_checking_phase_flag(found) = 0; +- found++; /* now increase index to be prepared for next found subsystem */ ++ local_checking_phase_flag(shpnt) = 1; ++ check_devices(shpnt, mca_dev->index); /* call by value, using the global variable hosts */ ++ local_checking_phase_flag(shpnt) = 0; ++ + /* an ibm mca subsystem has been detected */ +- return shpnt; ++ ++ for (k = 2; k < 7; k++) ++ ((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k]; ++ ((struct ibmmca_hostdata *) shpnt->hostdata)->_special = INTEGRATED_SCSI; ++ mca_device_set_name(mca_dev, description); ++ /* FIXME: NEED TO REPLUMB TO SYSFS ++ mca_set_adapter_procfn(MCA_INTEGSCSI, (MCA_ProcFn) ibmmca_getinfo, shpnt); ++ */ ++ mca_device_set_claim(mca_dev, 1); ++ if (scsi_add_host(shpnt, dev)) { ++ dev_printk(KERN_ERR, dev, "IBM MCA SCSI: scsi_add_host failed\n"); ++ goto out_free_host; ++ } ++ scsi_scan_host(shpnt); ++ ++ return 0; ++ out_free_host: ++ scsi_host_put(shpnt); ++ out_release: ++ release_region(port, IM_N_IO_PORT); ++ out_fail: ++ return ret; + } + +-static int ibmmca_release(struct Scsi_Host *shpnt) ++static int __devexit ibmmca_remove(struct device *dev) + { ++ struct Scsi_Host *shpnt = dev_get_drvdata(dev); ++ scsi_remove_host(shpnt); + release_region(shpnt->io_port, shpnt->n_io_port); +- if (!(--found)) +- free_irq(shpnt->irq, hosts); ++ free_irq(shpnt->irq, dev); + return 0; + } + +@@ -1805,33 +1706,24 @@ + int current_ldn; + int id, lun; + int target; +- int host_index; + int max_pun; + int i; +- struct scatterlist *sl; ++ struct scatterlist *sg; + + shpnt = cmd->device->host; +- /* search for the right hostadapter */ +- for (host_index = 0; hosts[host_index] && hosts[host_index]->host_no != shpnt->host_no; host_index++); + +- if (!hosts[host_index]) { /* invalid hostadapter descriptor address */ +- cmd->result = DID_NO_CONNECT << 16; +- if (done) +- done(cmd); +- return 0; +- } +- max_pun = subsystem_maxid(host_index); ++ max_pun = subsystem_maxid(shpnt); + if (ibm_ansi_order) { + target = max_pun - 1 - cmd->device->id; +- if ((target <= subsystem_pun(host_index)) && (cmd->device->id <= subsystem_pun(host_index))) ++ if ((target <= subsystem_pun(shpnt)) && (cmd->device->id <= subsystem_pun(shpnt))) + target--; +- else if ((target >= subsystem_pun(host_index)) && (cmd->device->id >= subsystem_pun(host_index))) ++ else if ((target >= subsystem_pun(shpnt)) && (cmd->device->id >= subsystem_pun(shpnt))) + target++; + } else + target = cmd->device->id; + + /* if (target,lun) is NO LUN or not existing at all, return error */ +- if ((get_scsi(host_index)[target][cmd->device->lun] == TYPE_NO_LUN) || (get_scsi(host_index)[target][cmd->device->lun] == TYPE_NO_DEVICE)) { ++ if ((get_scsi(shpnt)[target][cmd->device->lun] == TYPE_NO_LUN) || (get_scsi(shpnt)[target][cmd->device->lun] == TYPE_NO_DEVICE)) { + cmd->result = DID_NO_CONNECT << 16; + if (done) + done(cmd); +@@ -1839,16 +1731,16 @@ + } + + /*if (target,lun) unassigned, do further checks... */ +- ldn = get_ldn(host_index)[target][cmd->device->lun]; ++ ldn = get_ldn(shpnt)[target][cmd->device->lun]; + if (ldn >= MAX_LOG_DEV) { /* on invalid ldn do special stuff */ + if (ldn > MAX_LOG_DEV) { /* dynamical remapping if ldn unassigned */ +- current_ldn = next_ldn(host_index); /* stop-value for one circle */ +- while (ld(host_index)[next_ldn(host_index)].cmd) { /* search for a occupied, but not in */ ++ current_ldn = next_ldn(shpnt); /* stop-value for one circle */ ++ while (ld(shpnt)[next_ldn(shpnt)].cmd) { /* search for a occupied, but not in */ + /* command-processing ldn. */ +- next_ldn(host_index)++; +- if (next_ldn(host_index) >= MAX_LOG_DEV) +- next_ldn(host_index) = 7; +- if (current_ldn == next_ldn(host_index)) { /* One circle done ? */ ++ next_ldn(shpnt)++; ++ if (next_ldn(shpnt) >= MAX_LOG_DEV) ++ next_ldn(shpnt) = 7; ++ if (current_ldn == next_ldn(shpnt)) { /* One circle done ? */ + /* no non-processing ldn found */ + scmd_printk(KERN_WARNING, cmd, + "IBM MCA SCSI: Cannot assign SCSI-device dynamically!\n" +@@ -1864,56 +1756,56 @@ + /* unmap non-processing ldn */ + for (id = 0; id < max_pun; id++) + for (lun = 0; lun < 8; lun++) { +- if (get_ldn(host_index)[id][lun] == next_ldn(host_index)) { +- get_ldn(host_index)[id][lun] = TYPE_NO_DEVICE; +- get_scsi(host_index)[id][lun] = TYPE_NO_DEVICE; ++ if (get_ldn(shpnt)[id][lun] == next_ldn(shpnt)) { ++ get_ldn(shpnt)[id][lun] = TYPE_NO_DEVICE; ++ get_scsi(shpnt)[id][lun] = TYPE_NO_DEVICE; + /* unmap entry */ + } + } + /* set reduced interrupt_handler-mode for checking */ +- local_checking_phase_flag(host_index) = 1; ++ local_checking_phase_flag(shpnt) = 1; + /* map found ldn to pun,lun */ +- get_ldn(host_index)[target][cmd->device->lun] = next_ldn(host_index); ++ get_ldn(shpnt)[target][cmd->device->lun] = next_ldn(shpnt); + /* change ldn to the right value, that is now next_ldn */ +- ldn = next_ldn(host_index); ++ ldn = next_ldn(shpnt); + /* unassign all ldns (pun,lun,ldn does not matter for remove) */ +- immediate_assign(host_index, 0, 0, 0, REMOVE_LDN); ++ immediate_assign(shpnt, 0, 0, 0, REMOVE_LDN); + /* set only LDN for remapped device */ +- immediate_assign(host_index, target, cmd->device->lun, ldn, SET_LDN); ++ immediate_assign(shpnt, target, cmd->device->lun, ldn, SET_LDN); + /* get device information for ld[ldn] */ +- if (device_exists(host_index, ldn, &ld(host_index)[ldn].block_length, &ld(host_index)[ldn].device_type)) { +- ld(host_index)[ldn].cmd = NULL; /* To prevent panic set 0, because ++ if (device_exists(shpnt, ldn, &ld(shpnt)[ldn].block_length, &ld(shpnt)[ldn].device_type)) { ++ ld(shpnt)[ldn].cmd = NULL; /* To prevent panic set 0, because + devices that were not assigned, + should have nothing in progress. */ +- get_scsi(host_index)[target][cmd->device->lun] = ld(host_index)[ldn].device_type; ++ get_scsi(shpnt)[target][cmd->device->lun] = ld(shpnt)[ldn].device_type; + /* increase assignment counters for statistics in /proc */ +- IBM_DS(host_index).dynamical_assignments++; +- IBM_DS(host_index).ldn_assignments[ldn]++; ++ IBM_DS(shpnt).dynamical_assignments++; ++ IBM_DS(shpnt).ldn_assignments[ldn]++; + } else + /* panic here, because a device, found at boottime has + vanished */ + panic("IBM MCA SCSI: ldn=0x%x, SCSI-device on (%d,%d) vanished!\n", ldn, target, cmd->device->lun); + /* unassign again all ldns (pun,lun,ldn does not matter for remove) */ +- immediate_assign(host_index, 0, 0, 0, REMOVE_LDN); ++ immediate_assign(shpnt, 0, 0, 0, REMOVE_LDN); + /* remap all ldns, as written in the pun/lun table */ + lun = 0; + #ifdef CONFIG_SCSI_MULTI_LUN + for (lun = 0; lun < 8; lun++) + #endif + for (id = 0; id < max_pun; id++) { +- if (get_ldn(host_index)[id][lun] <= MAX_LOG_DEV) +- immediate_assign(host_index, id, lun, get_ldn(host_index)[id][lun], SET_LDN); ++ if (get_ldn(shpnt)[id][lun] <= MAX_LOG_DEV) ++ immediate_assign(shpnt, id, lun, get_ldn(shpnt)[id][lun], SET_LDN); + } + /* set back to normal interrupt_handling */ +- local_checking_phase_flag(host_index) = 0; ++ local_checking_phase_flag(shpnt) = 0; + #ifdef IM_DEBUG_PROBE + /* Information on syslog terminal */ + printk("IBM MCA SCSI: ldn=0x%x dynamically reassigned to (%d,%d).\n", ldn, target, cmd->device->lun); + #endif + /* increase next_ldn for next dynamical assignment */ +- next_ldn(host_index)++; +- if (next_ldn(host_index) >= MAX_LOG_DEV) +- next_ldn(host_index) = 7; ++ next_ldn(shpnt)++; ++ if (next_ldn(shpnt) >= MAX_LOG_DEV) ++ next_ldn(shpnt) = 7; + } else { /* wall against Linux accesses to the subsystem adapter */ + cmd->result = DID_BAD_TARGET << 16; + if (done) +@@ -1923,34 +1815,32 @@ + } + + /*verify there is no command already in progress for this log dev */ +- if (ld(host_index)[ldn].cmd) ++ if (ld(shpnt)[ldn].cmd) + panic("IBM MCA SCSI: cmd already in progress for this ldn.\n"); + + /*save done in cmd, and save cmd for the interrupt handler */ + cmd->scsi_done = done; +- ld(host_index)[ldn].cmd = cmd; ++ ld(shpnt)[ldn].cmd = cmd; + + /*fill scb information independent of the scsi command */ +- scb = &(ld(host_index)[ldn].scb); +- ld(host_index)[ldn].tsb.dev_status = 0; ++ scb = &(ld(shpnt)[ldn].scb); ++ ld(shpnt)[ldn].tsb.dev_status = 0; + scb->enable = IM_REPORT_TSB_ONLY_ON_ERROR | IM_RETRY_ENABLE; +- scb->tsb_adr = isa_virt_to_bus(&(ld(host_index)[ldn].tsb)); ++ scb->tsb_adr = isa_virt_to_bus(&(ld(shpnt)[ldn].tsb)); + scsi_cmd = cmd->cmnd[0]; + +- if (cmd->use_sg) { +- i = cmd->use_sg; +- sl = (struct scatterlist *) (cmd->request_buffer); +- if (i > 16) +- panic("IBM MCA SCSI: scatter-gather list too long.\n"); +- while (--i >= 0) { +- ld(host_index)[ldn].sge[i].address = (void *) (isa_page_to_bus(sl[i].page) + sl[i].offset); +- ld(host_index)[ldn].sge[i].byte_length = sl[i].length; ++ if (scsi_sg_count(cmd)) { ++ BUG_ON(scsi_sg_count(cmd) > 16); ++ ++ scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) { ++ ld(shpnt)[ldn].sge[i].address = (void *) (isa_page_to_bus(sg->page) + sg->offset); ++ ld(shpnt)[ldn].sge[i].byte_length = sg->length; + } + scb->enable |= IM_POINTER_TO_LIST; +- scb->sys_buf_adr = isa_virt_to_bus(&(ld(host_index)[ldn].sge[0])); +- scb->sys_buf_length = cmd->use_sg * sizeof(struct im_sge); ++ scb->sys_buf_adr = isa_virt_to_bus(&(ld(shpnt)[ldn].sge[0])); ++ scb->sys_buf_length = scsi_sg_count(cmd) * sizeof(struct im_sge); + } else { +- scb->sys_buf_adr = isa_virt_to_bus(cmd->request_buffer); ++ scb->sys_buf_adr = isa_virt_to_bus(scsi_sglist(cmd)); + /* recent Linux midlevel SCSI places 1024 byte for inquiry + * command. Far too much for old PS/2 hardware. */ + switch (scsi_cmd) { +@@ -1961,16 +1851,16 @@ + case REQUEST_SENSE: + case MODE_SENSE: + case MODE_SELECT: +- if (cmd->request_bufflen > 255) ++ if (scsi_bufflen(cmd) > 255) + scb->sys_buf_length = 255; + else +- scb->sys_buf_length = cmd->request_bufflen; ++ scb->sys_buf_length = scsi_bufflen(cmd); + break; + case TEST_UNIT_READY: + scb->sys_buf_length = 0; + break; + default: +- scb->sys_buf_length = cmd->request_bufflen; ++ scb->sys_buf_length = scsi_bufflen(cmd); + break; + } + } +@@ -1982,16 +1872,16 @@ + + /* for specific device-type debugging: */ + #ifdef IM_DEBUG_CMD_SPEC_DEV +- if (ld(host_index)[ldn].device_type == IM_DEBUG_CMD_DEVICE) +- printk("(SCSI-device-type=0x%x) issue scsi cmd=%02x to ldn=%d\n", ld(host_index)[ldn].device_type, scsi_cmd, ldn); ++ if (ld(shpnt)[ldn].device_type == IM_DEBUG_CMD_DEVICE) ++ printk("(SCSI-device-type=0x%x) issue scsi cmd=%02x to ldn=%d\n", ld(shpnt)[ldn].device_type, scsi_cmd, ldn); + #endif + + /* for possible panics store current command */ +- last_scsi_command(host_index)[ldn] = scsi_cmd; +- last_scsi_type(host_index)[ldn] = IM_SCB; ++ last_scsi_command(shpnt)[ldn] = scsi_cmd; ++ last_scsi_type(shpnt)[ldn] = IM_SCB; + /* update statistical info */ +- IBM_DS(host_index).total_accesses++; +- IBM_DS(host_index).ldn_access[ldn]++; ++ IBM_DS(shpnt).total_accesses++; ++ IBM_DS(shpnt).ldn_access[ldn]++; + + switch (scsi_cmd) { + case READ_6: +@@ -2003,17 +1893,17 @@ + /* Distinguish between disk and other devices. Only disks (that are the + most frequently accessed devices) should be supported by the + IBM-SCSI-Subsystem commands. */ +- switch (ld(host_index)[ldn].device_type) { ++ switch (ld(shpnt)[ldn].device_type) { + case TYPE_DISK: /* for harddisks enter here ... */ + case TYPE_MOD: /* ... try it also for MO-drives (send flames as */ + /* you like, if this won't work.) */ + if (scsi_cmd == READ_6 || scsi_cmd == READ_10 || scsi_cmd == READ_12) { + /* read command preparations */ + scb->enable |= IM_READ_CONTROL; +- IBM_DS(host_index).ldn_read_access[ldn]++; /* increase READ-access on ldn stat. */ ++ IBM_DS(shpnt).ldn_read_access[ldn]++; /* increase READ-access on ldn stat. */ + scb->command = IM_READ_DATA_CMD | IM_NO_DISCONNECT; + } else { /* write command preparations */ +- IBM_DS(host_index).ldn_write_access[ldn]++; /* increase write-count on ldn stat. */ ++ IBM_DS(shpnt).ldn_write_access[ldn]++; /* increase write-count on ldn stat. */ + scb->command = IM_WRITE_DATA_CMD | IM_NO_DISCONNECT; + } + if (scsi_cmd == READ_6 || scsi_cmd == WRITE_6) { +@@ -2023,9 +1913,9 @@ + scb->u1.log_blk_adr = (((unsigned) cmd->cmnd[5]) << 0) | (((unsigned) cmd->cmnd[4]) << 8) | (((unsigned) cmd->cmnd[3]) << 16) | (((unsigned) cmd->cmnd[2]) << 24); + scb->u2.blk.count = (((unsigned) cmd->cmnd[8]) << 0) | (((unsigned) cmd->cmnd[7]) << 8); + } +- last_scsi_logical_block(host_index)[ldn] = scb->u1.log_blk_adr; +- last_scsi_blockcount(host_index)[ldn] = scb->u2.blk.count; +- scb->u2.blk.length = ld(host_index)[ldn].block_length; ++ last_scsi_logical_block(shpnt)[ldn] = scb->u1.log_blk_adr; ++ last_scsi_blockcount(shpnt)[ldn] = scb->u2.blk.count; ++ scb->u2.blk.length = ld(shpnt)[ldn].block_length; + break; + /* for other devices, enter here. Other types are not known by + Linux! TYPE_NO_LUN is forbidden as valid device. */ +@@ -2046,14 +1936,14 @@ + scb->enable |= IM_BYPASS_BUFFER; + scb->u1.scsi_cmd_length = cmd->cmd_len; + memcpy(scb->u2.scsi_command, cmd->cmnd, cmd->cmd_len); +- last_scsi_type(host_index)[ldn] = IM_LONG_SCB; ++ last_scsi_type(shpnt)[ldn] = IM_LONG_SCB; + /* Read/write on this non-disk devices is also displayworthy, + so flash-up the LED/display. */ + break; + } + break; + case INQUIRY: +- IBM_DS(host_index).ldn_inquiry_access[ldn]++; ++ IBM_DS(shpnt).ldn_inquiry_access[ldn]++; + scb->command = IM_DEVICE_INQUIRY_CMD; + scb->enable |= IM_READ_CONTROL | IM_SUPRESS_EXCEPTION_SHORT | IM_BYPASS_BUFFER; + scb->u1.log_blk_adr = 0; +@@ -2064,7 +1954,7 @@ + scb->u1.log_blk_adr = 0; + scb->u1.scsi_cmd_length = 6; + memcpy(scb->u2.scsi_command, cmd->cmnd, 6); +- last_scsi_type(host_index)[ldn] = IM_LONG_SCB; ++ last_scsi_type(shpnt)[ldn] = IM_LONG_SCB; + break; + case READ_CAPACITY: + /* the length of system memory buffer must be exactly 8 bytes */ +@@ -2081,12 +1971,12 @@ + /* Commands that need write-only-mode (system -> device): */ + case MODE_SELECT: + case MODE_SELECT_10: +- IBM_DS(host_index).ldn_modeselect_access[ldn]++; ++ IBM_DS(shpnt).ldn_modeselect_access[ldn]++; + scb->command = IM_OTHER_SCSI_CMD_CMD; + scb->enable |= IM_SUPRESS_EXCEPTION_SHORT | IM_BYPASS_BUFFER; /*Select needs WRITE-enabled */ + scb->u1.scsi_cmd_length = cmd->cmd_len; + memcpy(scb->u2.scsi_command, cmd->cmnd, cmd->cmd_len); +- last_scsi_type(host_index)[ldn] = IM_LONG_SCB; ++ last_scsi_type(shpnt)[ldn] = IM_LONG_SCB; + break; + /* For other commands, read-only is useful. Most other commands are + running without an input-data-block. */ +@@ -2095,19 +1985,19 @@ + scb->enable |= IM_READ_CONTROL | IM_SUPRESS_EXCEPTION_SHORT | IM_BYPASS_BUFFER; + scb->u1.scsi_cmd_length = cmd->cmd_len; + memcpy(scb->u2.scsi_command, cmd->cmnd, cmd->cmd_len); +- last_scsi_type(host_index)[ldn] = IM_LONG_SCB; ++ last_scsi_type(shpnt)[ldn] = IM_LONG_SCB; + break; + } + /*issue scb command, and return */ + if (++disk_rw_in_progress == 1) + PS2_DISK_LED_ON(shpnt->host_no, target); + +- if (last_scsi_type(host_index)[ldn] == IM_LONG_SCB) { +- issue_cmd(host_index, isa_virt_to_bus(scb), IM_LONG_SCB | ldn); +- IBM_DS(host_index).long_scbs++; ++ if (last_scsi_type(shpnt)[ldn] == IM_LONG_SCB) { ++ issue_cmd(shpnt, isa_virt_to_bus(scb), IM_LONG_SCB | ldn); ++ IBM_DS(shpnt).long_scbs++; + } else { +- issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | ldn); +- IBM_DS(host_index).scbs++; ++ issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | ldn); ++ IBM_DS(shpnt).scbs++; + } + return 0; + } +@@ -2122,7 +2012,6 @@ + unsigned int ldn; + void (*saved_done) (Scsi_Cmnd *); + int target; +- int host_index; + int max_pun; + unsigned long imm_command; + +@@ -2131,35 +2020,23 @@ + #endif + + shpnt = cmd->device->host; +- /* search for the right hostadapter */ +- for (host_index = 0; hosts[host_index] && hosts[host_index]->host_no != shpnt->host_no; host_index++); + +- if (!hosts[host_index]) { /* invalid hostadapter descriptor address */ +- cmd->result = DID_NO_CONNECT << 16; +- if (cmd->scsi_done) +- (cmd->scsi_done) (cmd); +- shpnt = cmd->device->host; +-#ifdef IM_DEBUG_PROBE +- printk(KERN_DEBUG "IBM MCA SCSI: Abort adapter selection failed!\n"); +-#endif +- return SUCCESS; +- } +- max_pun = subsystem_maxid(host_index); ++ max_pun = subsystem_maxid(shpnt); + if (ibm_ansi_order) { + target = max_pun - 1 - cmd->device->id; +- if ((target <= subsystem_pun(host_index)) && (cmd->device->id <= subsystem_pun(host_index))) ++ if ((target <= subsystem_pun(shpnt)) && (cmd->device->id <= subsystem_pun(shpnt))) + target--; +- else if ((target >= subsystem_pun(host_index)) && (cmd->device->id >= subsystem_pun(host_index))) ++ else if ((target >= subsystem_pun(shpnt)) && (cmd->device->id >= subsystem_pun(shpnt))) + target++; + } else + target = cmd->device->id; + + /* get logical device number, and disable system interrupts */ + printk(KERN_WARNING "IBM MCA SCSI: Sending abort to device pun=%d, lun=%d.\n", target, cmd->device->lun); +- ldn = get_ldn(host_index)[target][cmd->device->lun]; ++ ldn = get_ldn(shpnt)[target][cmd->device->lun]; + + /*if cmd for this ldn has already finished, no need to abort */ +- if (!ld(host_index)[ldn].cmd) { ++ if (!ld(shpnt)[ldn].cmd) { + return SUCCESS; + } + +@@ -2170,20 +2047,20 @@ + saved_done = cmd->scsi_done; + cmd->scsi_done = internal_done; + cmd->SCp.Status = 0; +- last_scsi_command(host_index)[ldn] = IM_ABORT_IMM_CMD; +- last_scsi_type(host_index)[ldn] = IM_IMM_CMD; +- imm_command = inl(IM_CMD_REG(host_index)); ++ last_scsi_command(shpnt)[ldn] = IM_ABORT_IMM_CMD; ++ last_scsi_type(shpnt)[ldn] = IM_IMM_CMD; ++ imm_command = inl(IM_CMD_REG(shpnt)); + imm_command &= (unsigned long) (0xffff0000); /* mask reserved stuff */ + imm_command |= (unsigned long) (IM_ABORT_IMM_CMD); + /* must wait for attention reg not busy */ + /* FIXME - timeout, politeness */ + while (1) { +- if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY)) ++ if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY)) + break; + } + /* write registers and enable system interrupts */ +- outl(imm_command, IM_CMD_REG(host_index)); +- outb(IM_IMM_CMD | ldn, IM_ATTN_REG(host_index)); ++ outl(imm_command, IM_CMD_REG(shpnt)); ++ outb(IM_IMM_CMD | ldn, IM_ATTN_REG(shpnt)); + #ifdef IM_DEBUG_PROBE + printk("IBM MCA SCSI: Abort queued to adapter...\n"); + #endif +@@ -2202,7 +2079,7 @@ + cmd->result |= DID_ABORT << 16; + if (cmd->scsi_done) + (cmd->scsi_done) (cmd); +- ld(host_index)[ldn].cmd = NULL; ++ ld(shpnt)[ldn].cmd = NULL; + #ifdef IM_DEBUG_PROBE + printk("IBM MCA SCSI: Abort finished with success.\n"); + #endif +@@ -2211,7 +2088,7 @@ + cmd->result |= DID_NO_CONNECT << 16; + if (cmd->scsi_done) + (cmd->scsi_done) (cmd); +- ld(host_index)[ldn].cmd = NULL; ++ ld(shpnt)[ldn].cmd = NULL; + #ifdef IM_DEBUG_PROBE + printk("IBM MCA SCSI: Abort failed.\n"); + #endif +@@ -2236,71 +2113,65 @@ + struct Scsi_Host *shpnt; + Scsi_Cmnd *cmd_aid; + int ticks, i; +- int host_index; + unsigned long imm_command; + + BUG_ON(cmd == NULL); + + ticks = IM_RESET_DELAY * HZ; + shpnt = cmd->device->host; +- /* search for the right hostadapter */ +- for (host_index = 0; hosts[host_index] && hosts[host_index]->host_no != shpnt->host_no; host_index++); + +- if (!hosts[host_index]) /* invalid hostadapter descriptor address */ +- return FAILED; +- +- if (local_checking_phase_flag(host_index)) { ++ if (local_checking_phase_flag(shpnt)) { + printk(KERN_WARNING "IBM MCA SCSI: unable to reset while checking devices.\n"); + return FAILED; + } + + /* issue reset immediate command to subsystem, and wait for interrupt */ + printk("IBM MCA SCSI: resetting all devices.\n"); +- reset_status(host_index) = IM_RESET_IN_PROGRESS; +- last_scsi_command(host_index)[0xf] = IM_RESET_IMM_CMD; +- last_scsi_type(host_index)[0xf] = IM_IMM_CMD; +- imm_command = inl(IM_CMD_REG(host_index)); ++ reset_status(shpnt) = IM_RESET_IN_PROGRESS; ++ last_scsi_command(shpnt)[0xf] = IM_RESET_IMM_CMD; ++ last_scsi_type(shpnt)[0xf] = IM_IMM_CMD; ++ imm_command = inl(IM_CMD_REG(shpnt)); + imm_command &= (unsigned long) (0xffff0000); /* mask reserved stuff */ + imm_command |= (unsigned long) (IM_RESET_IMM_CMD); + /* must wait for attention reg not busy */ + while (1) { +- if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY)) ++ if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY)) + break; + spin_unlock_irq(shpnt->host_lock); + yield(); + spin_lock_irq(shpnt->host_lock); + } + /*write registers and enable system interrupts */ +- outl(imm_command, IM_CMD_REG(host_index)); +- outb(IM_IMM_CMD | 0xf, IM_ATTN_REG(host_index)); ++ outl(imm_command, IM_CMD_REG(shpnt)); ++ outb(IM_IMM_CMD | 0xf, IM_ATTN_REG(shpnt)); + /* wait for interrupt finished or intr_stat register to be set, as the + * interrupt will not be executed, while we are in here! */ + + /* FIXME: This is really really icky we so want a sleeping version of this ! */ +- while (reset_status(host_index) == IM_RESET_IN_PROGRESS && --ticks && ((inb(IM_INTR_REG(host_index)) & 0x8f) != 0x8f)) { ++ while (reset_status(shpnt) == IM_RESET_IN_PROGRESS && --ticks && ((inb(IM_INTR_REG(shpnt)) & 0x8f) != 0x8f)) { + udelay((1 + 999 / HZ) * 1000); + barrier(); + } + /* if reset did not complete, just return an error */ + if (!ticks) { + printk(KERN_ERR "IBM MCA SCSI: reset did not complete within %d seconds.\n", IM_RESET_DELAY); +- reset_status(host_index) = IM_RESET_FINISHED_FAIL; ++ reset_status(shpnt) = IM_RESET_FINISHED_FAIL; + return FAILED; + } + +- if ((inb(IM_INTR_REG(host_index)) & 0x8f) == 0x8f) { ++ if ((inb(IM_INTR_REG(shpnt)) & 0x8f) == 0x8f) { + /* analysis done by this routine and not by the intr-routine */ +- if (inb(IM_INTR_REG(host_index)) == 0xaf) +- reset_status(host_index) = IM_RESET_FINISHED_OK_NO_INT; +- else if (inb(IM_INTR_REG(host_index)) == 0xcf) +- reset_status(host_index) = IM_RESET_FINISHED_FAIL; ++ if (inb(IM_INTR_REG(shpnt)) == 0xaf) ++ reset_status(shpnt) = IM_RESET_FINISHED_OK_NO_INT; ++ else if (inb(IM_INTR_REG(shpnt)) == 0xcf) ++ reset_status(shpnt) = IM_RESET_FINISHED_FAIL; + else /* failed, 4get it */ +- reset_status(host_index) = IM_RESET_NOT_IN_PROGRESS_NO_INT; +- outb(IM_EOI | 0xf, IM_ATTN_REG(host_index)); ++ reset_status(shpnt) = IM_RESET_NOT_IN_PROGRESS_NO_INT; ++ outb(IM_EOI | 0xf, IM_ATTN_REG(shpnt)); + } + + /* if reset failed, just return an error */ +- if (reset_status(host_index) == IM_RESET_FINISHED_FAIL) { ++ if (reset_status(shpnt) == IM_RESET_FINISHED_FAIL) { + printk(KERN_ERR "IBM MCA SCSI: reset failed.\n"); + return FAILED; + } +@@ -2308,9 +2179,9 @@ + /* so reset finished ok - call outstanding done's, and return success */ + printk(KERN_INFO "IBM MCA SCSI: Reset successfully completed.\n"); + for (i = 0; i < MAX_LOG_DEV; i++) { +- cmd_aid = ld(host_index)[i].cmd; ++ cmd_aid = ld(shpnt)[i].cmd; + if (cmd_aid && cmd_aid->scsi_done) { +- ld(host_index)[i].cmd = NULL; ++ ld(shpnt)[i].cmd = NULL; + cmd_aid->result = DID_RESET << 16; + } + } +@@ -2351,46 +2222,46 @@ + } + + /* calculate percentage of total accesses on a ldn */ +-static int ldn_access_load(int host_index, int ldn) ++static int ldn_access_load(struct Scsi_Host *shpnt, int ldn) + { +- if (IBM_DS(host_index).total_accesses == 0) ++ if (IBM_DS(shpnt).total_accesses == 0) + return (0); +- if (IBM_DS(host_index).ldn_access[ldn] == 0) ++ if (IBM_DS(shpnt).ldn_access[ldn] == 0) + return (0); +- return (IBM_DS(host_index).ldn_access[ldn] * 100) / IBM_DS(host_index).total_accesses; ++ return (IBM_DS(shpnt).ldn_access[ldn] * 100) / IBM_DS(shpnt).total_accesses; + } + + /* calculate total amount of r/w-accesses */ +-static int ldn_access_total_read_write(int host_index) ++static int ldn_access_total_read_write(struct Scsi_Host *shpnt) + { + int a; + int i; + + a = 0; + for (i = 0; i <= MAX_LOG_DEV; i++) +- a += IBM_DS(host_index).ldn_read_access[i] + IBM_DS(host_index).ldn_write_access[i]; ++ a += IBM_DS(shpnt).ldn_read_access[i] + IBM_DS(shpnt).ldn_write_access[i]; + return (a); + } + +-static int ldn_access_total_inquiry(int host_index) ++static int ldn_access_total_inquiry(struct Scsi_Host *shpnt) + { + int a; + int i; + + a = 0; + for (i = 0; i <= MAX_LOG_DEV; i++) +- a += IBM_DS(host_index).ldn_inquiry_access[i]; ++ a += IBM_DS(shpnt).ldn_inquiry_access[i]; + return (a); + } + +-static int ldn_access_total_modeselect(int host_index) ++static int ldn_access_total_modeselect(struct Scsi_Host *shpnt) + { + int a; + int i; + + a = 0; + for (i = 0; i <= MAX_LOG_DEV; i++) +- a += IBM_DS(host_index).ldn_modeselect_access[i]; ++ a += IBM_DS(shpnt).ldn_modeselect_access[i]; + return (a); + } + +@@ -2398,19 +2269,14 @@ + static int ibmmca_proc_info(struct Scsi_Host *shpnt, char *buffer, char **start, off_t offset, int length, int inout) + { + int len = 0; +- int i, id, lun, host_index; ++ int i, id, lun; + unsigned long flags; + int max_pun; + +- for (i = 0; hosts[i] && hosts[i] != shpnt; i++); + +- spin_lock_irqsave(hosts[i]->host_lock, flags); /* Check it */ +- host_index = i; +- if (!shpnt) { +- len += sprintf(buffer + len, "\nIBM MCA SCSI: Can't find adapter"); +- return len; +- } +- max_pun = subsystem_maxid(host_index); ++ spin_lock_irqsave(shpnt->host_lock, flags); /* Check it */ ++ ++ max_pun = subsystem_maxid(shpnt); + + len += sprintf(buffer + len, "\n IBM-SCSI-Subsystem-Linux-Driver, Version %s\n\n\n", IBMMCA_SCSI_DRIVER_VERSION); + len += sprintf(buffer + len, " SCSI Access-Statistics:\n"); +@@ -2421,40 +2287,40 @@ + len += sprintf(buffer + len, " Multiple LUN probing.....: No\n"); + #endif + len += sprintf(buffer + len, " This Hostnumber..........: %d\n", shpnt->host_no); +- len += sprintf(buffer + len, " Base I/O-Port............: 0x%x\n", (unsigned int) (IM_CMD_REG(host_index))); ++ len += sprintf(buffer + len, " Base I/O-Port............: 0x%x\n", (unsigned int) (IM_CMD_REG(shpnt))); + len += sprintf(buffer + len, " (Shared) IRQ.............: %d\n", IM_IRQ); +- len += sprintf(buffer + len, " Total Interrupts.........: %d\n", IBM_DS(host_index).total_interrupts); +- len += sprintf(buffer + len, " Total SCSI Accesses......: %d\n", IBM_DS(host_index).total_accesses); +- len += sprintf(buffer + len, " Total short SCBs.........: %d\n", IBM_DS(host_index).scbs); +- len += sprintf(buffer + len, " Total long SCBs..........: %d\n", IBM_DS(host_index).long_scbs); +- len += sprintf(buffer + len, " Total SCSI READ/WRITE..: %d\n", ldn_access_total_read_write(host_index)); +- len += sprintf(buffer + len, " Total SCSI Inquiries...: %d\n", ldn_access_total_inquiry(host_index)); +- len += sprintf(buffer + len, " Total SCSI Modeselects.: %d\n", ldn_access_total_modeselect(host_index)); +- len += sprintf(buffer + len, " Total SCSI other cmds..: %d\n", IBM_DS(host_index).total_accesses - ldn_access_total_read_write(host_index) +- - ldn_access_total_modeselect(host_index) +- - ldn_access_total_inquiry(host_index)); +- len += sprintf(buffer + len, " Total SCSI command fails.: %d\n\n", IBM_DS(host_index).total_errors); ++ len += sprintf(buffer + len, " Total Interrupts.........: %d\n", IBM_DS(shpnt).total_interrupts); ++ len += sprintf(buffer + len, " Total SCSI Accesses......: %d\n", IBM_DS(shpnt).total_accesses); ++ len += sprintf(buffer + len, " Total short SCBs.........: %d\n", IBM_DS(shpnt).scbs); ++ len += sprintf(buffer + len, " Total long SCBs..........: %d\n", IBM_DS(shpnt).long_scbs); ++ len += sprintf(buffer + len, " Total SCSI READ/WRITE..: %d\n", ldn_access_total_read_write(shpnt)); ++ len += sprintf(buffer + len, " Total SCSI Inquiries...: %d\n", ldn_access_total_inquiry(shpnt)); ++ len += sprintf(buffer + len, " Total SCSI Modeselects.: %d\n", ldn_access_total_modeselect(shpnt)); ++ len += sprintf(buffer + len, " Total SCSI other cmds..: %d\n", IBM_DS(shpnt).total_accesses - ldn_access_total_read_write(shpnt) ++ - ldn_access_total_modeselect(shpnt) ++ - ldn_access_total_inquiry(shpnt)); ++ len += sprintf(buffer + len, " Total SCSI command fails.: %d\n\n", IBM_DS(shpnt).total_errors); + len += sprintf(buffer + len, " Logical-Device-Number (LDN) Access-Statistics:\n"); + len += sprintf(buffer + len, " LDN | Accesses [%%] | READ | WRITE | ASSIGNMENTS\n"); + len += sprintf(buffer + len, " -----|--------------|-----------|-----------|--------------\n"); + for (i = 0; i <= MAX_LOG_DEV; i++) +- len += sprintf(buffer + len, " %2X | %3d | %8d | %8d | %8d\n", i, ldn_access_load(host_index, i), IBM_DS(host_index).ldn_read_access[i], IBM_DS(host_index).ldn_write_access[i], IBM_DS(host_index).ldn_assignments[i]); ++ len += sprintf(buffer + len, " %2X | %3d | %8d | %8d | %8d\n", i, ldn_access_load(shpnt, i), IBM_DS(shpnt).ldn_read_access[i], IBM_DS(shpnt).ldn_write_access[i], IBM_DS(shpnt).ldn_assignments[i]); + len += sprintf(buffer + len, " -----------------------------------------------------------\n\n"); + len += sprintf(buffer + len, " Dynamical-LDN-Assignment-Statistics:\n"); +- len += sprintf(buffer + len, " Number of physical SCSI-devices..: %d (+ Adapter)\n", IBM_DS(host_index).total_scsi_devices); +- len += sprintf(buffer + len, " Dynamical Assignment necessary...: %s\n", IBM_DS(host_index).dyn_flag ? "Yes" : "No "); +- len += sprintf(buffer + len, " Next LDN to be assigned..........: 0x%x\n", next_ldn(host_index)); +- len += sprintf(buffer + len, " Dynamical assignments done yet...: %d\n", IBM_DS(host_index).dynamical_assignments); ++ len += sprintf(buffer + len, " Number of physical SCSI-devices..: %d (+ Adapter)\n", IBM_DS(shpnt).total_scsi_devices); ++ len += sprintf(buffer + len, " Dynamical Assignment necessary...: %s\n", IBM_DS(shpnt).dyn_flag ? "Yes" : "No "); ++ len += sprintf(buffer + len, " Next LDN to be assigned..........: 0x%x\n", next_ldn(shpnt)); ++ len += sprintf(buffer + len, " Dynamical assignments done yet...: %d\n", IBM_DS(shpnt).dynamical_assignments); + len += sprintf(buffer + len, "\n Current SCSI-Device-Mapping:\n"); + len += sprintf(buffer + len, " Physical SCSI-Device Map Logical SCSI-Device Map\n"); + len += sprintf(buffer + len, " ID\\LUN 0 1 2 3 4 5 6 7 ID\\LUN 0 1 2 3 4 5 6 7\n"); + for (id = 0; id < max_pun; id++) { + len += sprintf(buffer + len, " %2d ", id); + for (lun = 0; lun < 8; lun++) +- len += sprintf(buffer + len, "%2s ", ti_p(get_scsi(host_index)[id][lun])); ++ len += sprintf(buffer + len, "%2s ", ti_p(get_scsi(shpnt)[id][lun])); + len += sprintf(buffer + len, " %2d ", id); + for (lun = 0; lun < 8; lun++) +- len += sprintf(buffer + len, "%2s ", ti_l(get_ldn(host_index)[id][lun])); ++ len += sprintf(buffer + len, "%2s ", ti_l(get_ldn(shpnt)[id][lun])); + len += sprintf(buffer + len, "\n"); + } + +@@ -2488,20 +2354,31 @@ + + __setup("ibmmcascsi=", option_setup); + +-static struct scsi_host_template driver_template = { +- .proc_name = "ibmmca", +- .proc_info = ibmmca_proc_info, +- .name = "IBM SCSI-Subsystem", +- .detect = ibmmca_detect, +- .release = ibmmca_release, +- .queuecommand = ibmmca_queuecommand, +- .eh_abort_handler = ibmmca_abort, +- .eh_host_reset_handler = ibmmca_host_reset, +- .bios_param = ibmmca_biosparam, +- .can_queue = 16, +- .this_id = 7, +- .sg_tablesize = 16, +- .cmd_per_lun = 1, +- .use_clustering = ENABLE_CLUSTERING, ++static struct mca_driver ibmmca_driver = { ++ .id_table = ibmmca_id_table, ++ .driver = { ++ .name = "ibmmca", ++ .bus = &mca_bus_type, ++ .probe = ibmmca_probe, ++ .remove = __devexit_p(ibmmca_remove), ++ }, + }; +-#include "scsi_module.c" ++ ++static int __init ibmmca_init(void) ++{ ++#ifdef MODULE ++ /* If the driver is run as module, read from conf.modules or cmd-line */ ++ if (boot_options) ++ option_setup(boot_options); ++#endif ++ ++ return mca_register_driver_integrated(&ibmmca_driver, MCA_INTEGSCSI); ++} ++ ++static void __exit ibmmca_exit(void) ++{ ++ mca_unregister_driver(&ibmmca_driver); ++} ++ ++module_init(ibmmca_init); ++module_exit(ibmmca_exit); +diff -Nurb linux-2.6.22-570/drivers/scsi/ibmmca.h linux-2.6.22-591/drivers/scsi/ibmmca.h +--- linux-2.6.22-570/drivers/scsi/ibmmca.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/ibmmca.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,21 +0,0 @@ +-/* +- * Low Level Driver for the IBM Microchannel SCSI Subsystem +- * (Headerfile, see Documentation/scsi/ibmmca.txt for description of the +- * IBM MCA SCSI-driver. +- * For use under the GNU General Public License within the Linux-kernel project. +- * This include file works only correctly with kernel 2.4.0 or higher!!! */ +- +-#ifndef _IBMMCA_H +-#define _IBMMCA_H +- +-/* Common forward declarations for all Linux-versions: */ +- +-/* Interfaces to the midlevel Linux SCSI driver */ +-static int ibmmca_detect (struct scsi_host_template *); +-static int ibmmca_release (struct Scsi_Host *); +-static int ibmmca_queuecommand (Scsi_Cmnd *, void (*done) (Scsi_Cmnd *)); +-static int ibmmca_abort (Scsi_Cmnd *); +-static int ibmmca_host_reset (Scsi_Cmnd *); +-static int ibmmca_biosparam (struct scsi_device *, struct block_device *, sector_t, int *); +- +-#endif /* _IBMMCA_H */ +diff -Nurb linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.c linux-2.6.22-591/drivers/scsi/ibmvscsi/ibmvscsi.c +--- linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/ibmvscsi/ibmvscsi.c 2007-12-21 15:36:12.000000000 -0500 +@@ -173,8 +173,7 @@ + } + } + if (in_use) +- printk(KERN_WARNING +- "ibmvscsi: releasing event pool with %d " ++ dev_warn(hostdata->dev, "releasing event pool with %d " + "events still in use?\n", in_use); + kfree(pool->events); + dma_free_coherent(hostdata->dev, +@@ -210,14 +209,12 @@ + struct srp_event_struct *evt) + { + if (!valid_event_struct(pool, evt)) { +- printk(KERN_ERR +- "ibmvscsi: Freeing invalid event_struct %p " ++ dev_err(evt->hostdata->dev, "Freeing invalid event_struct %p " + "(not in pool %p)\n", evt, pool->events); + return; + } + if (atomic_inc_return(&evt->free) != 1) { +- printk(KERN_ERR +- "ibmvscsi: Freeing event_struct %p " ++ dev_err(evt->hostdata->dev, "Freeing event_struct %p " + "which is not in use!\n", evt); + return; + } +@@ -408,13 +405,6 @@ + return 1; + } + +- if (sg_mapped > SG_ALL) { +- printk(KERN_ERR +- "ibmvscsi: More than %d mapped sg entries, got %d\n", +- SG_ALL, sg_mapped); +- return 0; +- } +- + indirect->table_desc.va = 0; + indirect->table_desc.len = sg_mapped * sizeof(struct srp_direct_buf); + indirect->table_desc.key = 0; +@@ -433,10 +423,9 @@ + SG_ALL * sizeof(struct srp_direct_buf), + &evt_struct->ext_list_token, 0); + if (!evt_struct->ext_list) { +- printk(KERN_ERR +- "ibmvscsi: Can't allocate memory for indirect table\n"); ++ sdev_printk(KERN_ERR, cmd->device, ++ "Can't allocate memory for indirect table\n"); + return 0; +- + } + } + +@@ -471,8 +460,8 @@ + cmd->request_bufflen, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(data->va)) { +- printk(KERN_ERR +- "ibmvscsi: Unable to map request_buffer for command!\n"); ++ sdev_printk(KERN_ERR, cmd->device, ++ "Unable to map request_buffer for command!\n"); + return 0; + } + data->len = cmd->request_bufflen; +@@ -503,12 +492,12 @@ + case DMA_NONE: + return 1; + case DMA_BIDIRECTIONAL: +- printk(KERN_ERR +- "ibmvscsi: Can't map DMA_BIDIRECTIONAL to read/write\n"); ++ sdev_printk(KERN_ERR, cmd->device, ++ "Can't map DMA_BIDIRECTIONAL to read/write\n"); + return 0; + default: +- printk(KERN_ERR +- "ibmvscsi: Unknown data direction 0x%02x; can't map!\n", ++ sdev_printk(KERN_ERR, cmd->device, ++ "Unknown data direction 0x%02x; can't map!\n", + cmd->sc_data_direction); + return 0; + } +@@ -520,6 +509,70 @@ + return map_single_data(cmd, srp_cmd, dev); + } + ++/** ++ * purge_requests: Our virtual adapter just shut down. purge any sent requests ++ * @hostdata: the adapter ++ */ ++static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code) ++{ ++ struct srp_event_struct *tmp_evt, *pos; ++ unsigned long flags; ++ ++ spin_lock_irqsave(hostdata->host->host_lock, flags); ++ list_for_each_entry_safe(tmp_evt, pos, &hostdata->sent, list) { ++ list_del(&tmp_evt->list); ++ del_timer(&tmp_evt->timer); ++ if (tmp_evt->cmnd) { ++ tmp_evt->cmnd->result = (error_code << 16); ++ unmap_cmd_data(&tmp_evt->iu.srp.cmd, ++ tmp_evt, ++ tmp_evt->hostdata->dev); ++ if (tmp_evt->cmnd_done) ++ tmp_evt->cmnd_done(tmp_evt->cmnd); ++ } else if (tmp_evt->done) ++ tmp_evt->done(tmp_evt); ++ free_event_struct(&tmp_evt->hostdata->pool, tmp_evt); ++ } ++ spin_unlock_irqrestore(hostdata->host->host_lock, flags); ++} ++ ++/** ++ * ibmvscsi_reset_host - Reset the connection to the server ++ * @hostdata: struct ibmvscsi_host_data to reset ++*/ ++static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata) ++{ ++ scsi_block_requests(hostdata->host); ++ atomic_set(&hostdata->request_limit, 0); ++ ++ purge_requests(hostdata, DID_ERROR); ++ if ((ibmvscsi_reset_crq_queue(&hostdata->queue, hostdata)) || ++ (ibmvscsi_send_crq(hostdata, 0xC001000000000000LL, 0)) || ++ (vio_enable_interrupts(to_vio_dev(hostdata->dev)))) { ++ atomic_set(&hostdata->request_limit, -1); ++ dev_err(hostdata->dev, "error after reset\n"); ++ } ++ ++ scsi_unblock_requests(hostdata->host); ++} ++ ++/** ++ * ibmvscsi_timeout - Internal command timeout handler ++ * @evt_struct: struct srp_event_struct that timed out ++ * ++ * Called when an internally generated command times out ++*/ ++static void ibmvscsi_timeout(struct srp_event_struct *evt_struct) ++{ ++ struct ibmvscsi_host_data *hostdata = evt_struct->hostdata; ++ ++ dev_err(hostdata->dev, "Command timed out (%x). Resetting connection\n", ++ evt_struct->iu.srp.cmd.opcode); ++ ++ ibmvscsi_reset_host(hostdata); ++} ++ ++ + /* ------------------------------------------------------------ + * Routines for sending and receiving SRPs + */ +@@ -527,12 +580,14 @@ + * ibmvscsi_send_srp_event: - Transforms event to u64 array and calls send_crq() + * @evt_struct: evt_struct to be sent + * @hostdata: ibmvscsi_host_data of host ++ * @timeout: timeout in seconds - 0 means do not time command + * + * Returns the value returned from ibmvscsi_send_crq(). (Zero for success) + * Note that this routine assumes that host_lock is held for synchronization + */ + static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct, +- struct ibmvscsi_host_data *hostdata) ++ struct ibmvscsi_host_data *hostdata, ++ unsigned long timeout) + { + u64 *crq_as_u64 = (u64 *) &evt_struct->crq; + int request_status; +@@ -588,12 +643,20 @@ + */ + list_add_tail(&evt_struct->list, &hostdata->sent); + ++ init_timer(&evt_struct->timer); ++ if (timeout) { ++ evt_struct->timer.data = (unsigned long) evt_struct; ++ evt_struct->timer.expires = jiffies + (timeout * HZ); ++ evt_struct->timer.function = (void (*)(unsigned long))ibmvscsi_timeout; ++ add_timer(&evt_struct->timer); ++ } ++ + if ((rc = + ibmvscsi_send_crq(hostdata, crq_as_u64[0], crq_as_u64[1])) != 0) { + list_del(&evt_struct->list); ++ del_timer(&evt_struct->timer); + +- printk(KERN_ERR "ibmvscsi: send error %d\n", +- rc); ++ dev_err(hostdata->dev, "send error %d\n", rc); + atomic_inc(&hostdata->request_limit); + goto send_error; + } +@@ -634,9 +697,8 @@ + + if (unlikely(rsp->opcode != SRP_RSP)) { + if (printk_ratelimit()) +- printk(KERN_WARNING +- "ibmvscsi: bad SRP RSP type %d\n", +- rsp->opcode); ++ dev_warn(evt_struct->hostdata->dev, ++ "bad SRP RSP type %d\n", rsp->opcode); + } + + if (cmnd) { +@@ -697,7 +759,7 @@ + srp_cmd->lun = ((u64) lun) << 48; + + if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) { +- printk(KERN_ERR "ibmvscsi: couldn't convert cmd to srp_cmd\n"); ++ sdev_printk(KERN_ERR, cmnd->device, "couldn't convert cmd to srp_cmd\n"); + free_event_struct(&hostdata->pool, evt_struct); + return SCSI_MLQUEUE_HOST_BUSY; + } +@@ -722,7 +784,7 @@ + offsetof(struct srp_indirect_buf, desc_list); + } + +- return ibmvscsi_send_srp_event(evt_struct, hostdata); ++ return ibmvscsi_send_srp_event(evt_struct, hostdata, 0); + } + + /* ------------------------------------------------------------ +@@ -744,10 +806,10 @@ + DMA_BIDIRECTIONAL); + + if (evt_struct->xfer_iu->mad.adapter_info.common.status) { +- printk("ibmvscsi: error %d getting adapter info\n", ++ dev_err(hostdata->dev, "error %d getting adapter info\n", + evt_struct->xfer_iu->mad.adapter_info.common.status); + } else { +- printk("ibmvscsi: host srp version: %s, " ++ dev_info(hostdata->dev, "host srp version: %s, " + "host partition %s (%d), OS %d, max io %u\n", + hostdata->madapter_info.srp_version, + hostdata->madapter_info.partition_name, +@@ -761,10 +823,9 @@ + + if (hostdata->madapter_info.os_type == 3 && + strcmp(hostdata->madapter_info.srp_version, "1.6a") <= 0) { +- printk("ibmvscsi: host (Ver. %s) doesn't support large" +- "transfers\n", ++ dev_err(hostdata->dev, "host (Ver. %s) doesn't support large transfers\n", + hostdata->madapter_info.srp_version); +- printk("ibmvscsi: limiting scatterlists to %d\n", ++ dev_err(hostdata->dev, "limiting scatterlists to %d\n", + MAX_INDIRECT_BUFS); + hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS; + } +@@ -784,12 +845,13 @@ + { + struct viosrp_adapter_info *req; + struct srp_event_struct *evt_struct; ++ unsigned long flags; + dma_addr_t addr; + + evt_struct = get_event_struct(&hostdata->pool); + if (!evt_struct) { +- printk(KERN_ERR "ibmvscsi: couldn't allocate an event " +- "for ADAPTER_INFO_REQ!\n"); ++ dev_err(hostdata->dev, ++ "couldn't allocate an event for ADAPTER_INFO_REQ!\n"); + return; + } + +@@ -809,20 +871,20 @@ + DMA_BIDIRECTIONAL); + + if (dma_mapping_error(req->buffer)) { +- printk(KERN_ERR +- "ibmvscsi: Unable to map request_buffer " +- "for adapter_info!\n"); ++ dev_err(hostdata->dev, "Unable to map request_buffer for adapter_info!\n"); + free_event_struct(&hostdata->pool, evt_struct); + return; + } + +- if (ibmvscsi_send_srp_event(evt_struct, hostdata)) { +- printk(KERN_ERR "ibmvscsi: couldn't send ADAPTER_INFO_REQ!\n"); ++ spin_lock_irqsave(hostdata->host->host_lock, flags); ++ if (ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2)) { ++ dev_err(hostdata->dev, "couldn't send ADAPTER_INFO_REQ!\n"); + dma_unmap_single(hostdata->dev, + addr, + sizeof(hostdata->madapter_info), + DMA_BIDIRECTIONAL); + } ++ spin_unlock_irqrestore(hostdata->host->host_lock, flags); + }; + + /** +@@ -839,24 +901,23 @@ + case SRP_LOGIN_RSP: /* it worked! */ + break; + case SRP_LOGIN_REJ: /* refused! */ +- printk(KERN_INFO "ibmvscsi: SRP_LOGIN_REJ reason %u\n", ++ dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n", + evt_struct->xfer_iu->srp.login_rej.reason); + /* Login failed. */ + atomic_set(&hostdata->request_limit, -1); + return; + default: +- printk(KERN_ERR +- "ibmvscsi: Invalid login response typecode 0x%02x!\n", ++ dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n", + evt_struct->xfer_iu->srp.login_rsp.opcode); + /* Login failed. */ + atomic_set(&hostdata->request_limit, -1); + return; + } + +- printk(KERN_INFO "ibmvscsi: SRP_LOGIN succeeded\n"); ++ dev_info(hostdata->dev, "SRP_LOGIN succeeded\n"); + + if (evt_struct->xfer_iu->srp.login_rsp.req_lim_delta < 0) +- printk(KERN_ERR "ibmvscsi: Invalid request_limit.\n"); ++ dev_err(hostdata->dev, "Invalid request_limit.\n"); + + /* Now we know what the real request-limit is. + * This value is set rather than added to request_limit because +@@ -885,8 +946,7 @@ + struct srp_login_req *login; + struct srp_event_struct *evt_struct = get_event_struct(&hostdata->pool); + if (!evt_struct) { +- printk(KERN_ERR +- "ibmvscsi: couldn't allocate an event for login req!\n"); ++ dev_err(hostdata->dev, "couldn't allocate an event for login req!\n"); + return FAILED; + } + +@@ -907,9 +967,9 @@ + */ + atomic_set(&hostdata->request_limit, 1); + +- rc = ibmvscsi_send_srp_event(evt_struct, hostdata); ++ rc = ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2); + spin_unlock_irqrestore(hostdata->host->host_lock, flags); +- printk("ibmvscsic: sent SRP login\n"); ++ dev_info(hostdata->dev, "sent SRP login\n"); + return rc; + }; + +@@ -958,13 +1018,13 @@ + + if (!found_evt) { + spin_unlock_irqrestore(hostdata->host->host_lock, flags); +- return FAILED; ++ return SUCCESS; + } + + evt = get_event_struct(&hostdata->pool); + if (evt == NULL) { + spin_unlock_irqrestore(hostdata->host->host_lock, flags); +- printk(KERN_ERR "ibmvscsi: failed to allocate abort event\n"); ++ sdev_printk(KERN_ERR, cmd->device, "failed to allocate abort event\n"); + return FAILED; + } + +@@ -982,15 +1042,16 @@ + tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK; + tsk_mgmt->task_tag = (u64) found_evt; + +- printk(KERN_INFO "ibmvscsi: aborting command. lun 0x%lx, tag 0x%lx\n", ++ sdev_printk(KERN_INFO, cmd->device, "aborting command. lun 0x%lx, tag 0x%lx\n", + tsk_mgmt->lun, tsk_mgmt->task_tag); + + evt->sync_srp = &srp_rsp; + init_completion(&evt->comp); +- rsp_rc = ibmvscsi_send_srp_event(evt, hostdata); ++ rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2); + spin_unlock_irqrestore(hostdata->host->host_lock, flags); + if (rsp_rc != 0) { +- printk(KERN_ERR "ibmvscsi: failed to send abort() event\n"); ++ sdev_printk(KERN_ERR, cmd->device, ++ "failed to send abort() event. rc=%d\n", rsp_rc); + return FAILED; + } + +@@ -999,8 +1060,7 @@ + /* make sure we got a good response */ + if (unlikely(srp_rsp.srp.rsp.opcode != SRP_RSP)) { + if (printk_ratelimit()) +- printk(KERN_WARNING +- "ibmvscsi: abort bad SRP RSP type %d\n", ++ sdev_printk(KERN_WARNING, cmd->device, "abort bad SRP RSP type %d\n", + srp_rsp.srp.rsp.opcode); + return FAILED; + } +@@ -1012,10 +1072,9 @@ + + if (rsp_rc) { + if (printk_ratelimit()) +- printk(KERN_WARNING +- "ibmvscsi: abort code %d for task tag 0x%lx\n", +- rsp_rc, +- tsk_mgmt->task_tag); ++ sdev_printk(KERN_WARNING, cmd->device, ++ "abort code %d for task tag 0x%lx\n", ++ rsp_rc, tsk_mgmt->task_tag); + return FAILED; + } + +@@ -1034,14 +1093,12 @@ + + if (found_evt == NULL) { + spin_unlock_irqrestore(hostdata->host->host_lock, flags); +- printk(KERN_INFO +- "ibmvscsi: aborted task tag 0x%lx completed\n", ++ sdev_printk(KERN_INFO, cmd->device, "aborted task tag 0x%lx completed\n", + tsk_mgmt->task_tag); + return SUCCESS; + } + +- printk(KERN_INFO +- "ibmvscsi: successfully aborted task tag 0x%lx\n", ++ sdev_printk(KERN_INFO, cmd->device, "successfully aborted task tag 0x%lx\n", + tsk_mgmt->task_tag); + + cmd->result = (DID_ABORT << 16); +@@ -1076,7 +1133,7 @@ + evt = get_event_struct(&hostdata->pool); + if (evt == NULL) { + spin_unlock_irqrestore(hostdata->host->host_lock, flags); +- printk(KERN_ERR "ibmvscsi: failed to allocate reset event\n"); ++ sdev_printk(KERN_ERR, cmd->device, "failed to allocate reset event\n"); + return FAILED; + } + +@@ -1093,15 +1150,16 @@ + tsk_mgmt->lun = ((u64) lun) << 48; + tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET; + +- printk(KERN_INFO "ibmvscsi: resetting device. lun 0x%lx\n", ++ sdev_printk(KERN_INFO, cmd->device, "resetting device. lun 0x%lx\n", + tsk_mgmt->lun); + + evt->sync_srp = &srp_rsp; + init_completion(&evt->comp); +- rsp_rc = ibmvscsi_send_srp_event(evt, hostdata); ++ rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2); + spin_unlock_irqrestore(hostdata->host->host_lock, flags); + if (rsp_rc != 0) { +- printk(KERN_ERR "ibmvscsi: failed to send reset event\n"); ++ sdev_printk(KERN_ERR, cmd->device, ++ "failed to send reset event. rc=%d\n", rsp_rc); + return FAILED; + } + +@@ -1110,8 +1168,7 @@ + /* make sure we got a good response */ + if (unlikely(srp_rsp.srp.rsp.opcode != SRP_RSP)) { + if (printk_ratelimit()) +- printk(KERN_WARNING +- "ibmvscsi: reset bad SRP RSP type %d\n", ++ sdev_printk(KERN_WARNING, cmd->device, "reset bad SRP RSP type %d\n", + srp_rsp.srp.rsp.opcode); + return FAILED; + } +@@ -1123,8 +1180,8 @@ + + if (rsp_rc) { + if (printk_ratelimit()) +- printk(KERN_WARNING +- "ibmvscsi: reset code %d for task tag 0x%lx\n", ++ sdev_printk(KERN_WARNING, cmd->device, ++ "reset code %d for task tag 0x%lx\n", + rsp_rc, tsk_mgmt->task_tag); + return FAILED; + } +@@ -1154,32 +1211,30 @@ + } + + /** +- * purge_requests: Our virtual adapter just shut down. purge any sent requests +- * @hostdata: the adapter +- */ +-static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code) ++ * ibmvscsi_eh_host_reset_handler - Reset the connection to the server ++ * @cmd: struct scsi_cmnd having problems ++*/ ++static int ibmvscsi_eh_host_reset_handler(struct scsi_cmnd *cmd) + { +- struct srp_event_struct *tmp_evt, *pos; +- unsigned long flags; ++ unsigned long wait_switch = 0; ++ struct ibmvscsi_host_data *hostdata = ++ (struct ibmvscsi_host_data *)cmd->device->host->hostdata; + +- spin_lock_irqsave(hostdata->host->host_lock, flags); +- list_for_each_entry_safe(tmp_evt, pos, &hostdata->sent, list) { +- list_del(&tmp_evt->list); +- if (tmp_evt->cmnd) { +- tmp_evt->cmnd->result = (error_code << 16); +- unmap_cmd_data(&tmp_evt->iu.srp.cmd, +- tmp_evt, +- tmp_evt->hostdata->dev); +- if (tmp_evt->cmnd_done) +- tmp_evt->cmnd_done(tmp_evt->cmnd); +- } else { +- if (tmp_evt->done) { +- tmp_evt->done(tmp_evt); +- } +- } +- free_event_struct(&tmp_evt->hostdata->pool, tmp_evt); ++ dev_err(hostdata->dev, "Resetting connection due to error recovery\n"); ++ ++ ibmvscsi_reset_host(hostdata); ++ ++ for (wait_switch = jiffies + (init_timeout * HZ); ++ time_before(jiffies, wait_switch) && ++ atomic_read(&hostdata->request_limit) < 2;) { ++ ++ msleep(10); + } +- spin_unlock_irqrestore(hostdata->host->host_lock, flags); ++ ++ if (atomic_read(&hostdata->request_limit) <= 0) ++ return FAILED; ++ ++ return SUCCESS; + } + + /** +@@ -1191,6 +1246,7 @@ + void ibmvscsi_handle_crq(struct viosrp_crq *crq, + struct ibmvscsi_host_data *hostdata) + { ++ long rc; + unsigned long flags; + struct srp_event_struct *evt_struct = + (struct srp_event_struct *)crq->IU_data_ptr; +@@ -1198,27 +1254,25 @@ + case 0xC0: /* initialization */ + switch (crq->format) { + case 0x01: /* Initialization message */ +- printk(KERN_INFO "ibmvscsi: partner initialized\n"); ++ dev_info(hostdata->dev, "partner initialized\n"); + /* Send back a response */ +- if (ibmvscsi_send_crq(hostdata, +- 0xC002000000000000LL, 0) == 0) { ++ if ((rc = ibmvscsi_send_crq(hostdata, ++ 0xC002000000000000LL, 0)) == 0) { + /* Now login */ + send_srp_login(hostdata); + } else { +- printk(KERN_ERR +- "ibmvscsi: Unable to send init rsp\n"); ++ dev_err(hostdata->dev, "Unable to send init rsp. rc=%ld\n", rc); + } + + break; + case 0x02: /* Initialization response */ +- printk(KERN_INFO +- "ibmvscsi: partner initialization complete\n"); ++ dev_info(hostdata->dev, "partner initialization complete\n"); + + /* Now login */ + send_srp_login(hostdata); + break; + default: +- printk(KERN_ERR "ibmvscsi: unknown crq message type\n"); ++ dev_err(hostdata->dev, "unknown crq message type: %d\n", crq->format); + } + return; + case 0xFF: /* Hypervisor telling us the connection is closed */ +@@ -1226,8 +1280,7 @@ + atomic_set(&hostdata->request_limit, 0); + if (crq->format == 0x06) { + /* We need to re-setup the interpartition connection */ +- printk(KERN_INFO +- "ibmvscsi: Re-enabling adapter!\n"); ++ dev_info(hostdata->dev, "Re-enabling adapter!\n"); + purge_requests(hostdata, DID_REQUEUE); + if ((ibmvscsi_reenable_crq_queue(&hostdata->queue, + hostdata)) || +@@ -1235,13 +1288,10 @@ + 0xC001000000000000LL, 0))) { + atomic_set(&hostdata->request_limit, + -1); +- printk(KERN_ERR +- "ibmvscsi: error after" +- " enable\n"); ++ dev_err(hostdata->dev, "error after enable\n"); + } + } else { +- printk(KERN_INFO +- "ibmvscsi: Virtual adapter failed rc %d!\n", ++ dev_err(hostdata->dev, "Virtual adapter failed rc %d!\n", + crq->format); + + purge_requests(hostdata, DID_ERROR); +@@ -1251,8 +1301,7 @@ + 0xC001000000000000LL, 0))) { + atomic_set(&hostdata->request_limit, + -1); +- printk(KERN_ERR +- "ibmvscsi: error after reset\n"); ++ dev_err(hostdata->dev, "error after reset\n"); + } + } + scsi_unblock_requests(hostdata->host); +@@ -1260,8 +1309,7 @@ + case 0x80: /* real payload */ + break; + default: +- printk(KERN_ERR +- "ibmvscsi: got an invalid message type 0x%02x\n", ++ dev_err(hostdata->dev, "got an invalid message type 0x%02x\n", + crq->valid); + return; + } +@@ -1271,15 +1319,13 @@ + * actually sent + */ + if (!valid_event_struct(&hostdata->pool, evt_struct)) { +- printk(KERN_ERR +- "ibmvscsi: returned correlation_token 0x%p is invalid!\n", ++ dev_err(hostdata->dev, "returned correlation_token 0x%p is invalid!\n", + (void *)crq->IU_data_ptr); + return; + } + + if (atomic_read(&evt_struct->free)) { +- printk(KERN_ERR +- "ibmvscsi: received duplicate correlation_token 0x%p!\n", ++ dev_err(hostdata->dev, "received duplicate correlation_token 0x%p!\n", + (void *)crq->IU_data_ptr); + return; + } +@@ -1288,11 +1334,12 @@ + atomic_add(evt_struct->xfer_iu->srp.rsp.req_lim_delta, + &hostdata->request_limit); + ++ del_timer(&evt_struct->timer); ++ + if (evt_struct->done) + evt_struct->done(evt_struct); + else +- printk(KERN_ERR +- "ibmvscsi: returned done() is NULL; not running it!\n"); ++ dev_err(hostdata->dev, "returned done() is NULL; not running it!\n"); + + /* + * Lock the host_lock before messing with these structures, since we +@@ -1313,13 +1360,13 @@ + { + struct viosrp_host_config *host_config; + struct srp_event_struct *evt_struct; ++ unsigned long flags; + dma_addr_t addr; + int rc; + + evt_struct = get_event_struct(&hostdata->pool); + if (!evt_struct) { +- printk(KERN_ERR +- "ibmvscsi: could't allocate event for HOST_CONFIG!\n"); ++ dev_err(hostdata->dev, "couldn't allocate event for HOST_CONFIG!\n"); + return -1; + } + +@@ -1339,14 +1386,15 @@ + DMA_BIDIRECTIONAL); + + if (dma_mapping_error(host_config->buffer)) { +- printk(KERN_ERR +- "ibmvscsi: dma_mapping error " "getting host config\n"); ++ dev_err(hostdata->dev, "dma_mapping error getting host config\n"); + free_event_struct(&hostdata->pool, evt_struct); + return -1; + } + + init_completion(&evt_struct->comp); +- rc = ibmvscsi_send_srp_event(evt_struct, hostdata); ++ spin_lock_irqsave(hostdata->host->host_lock, flags); ++ rc = ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2); ++ spin_unlock_irqrestore(hostdata->host->host_lock, flags); + if (rc == 0) + wait_for_completion(&evt_struct->comp); + dma_unmap_single(hostdata->dev, addr, length, DMA_BIDIRECTIONAL); +@@ -1375,6 +1423,23 @@ + return 0; + } + ++/** ++ * ibmvscsi_change_queue_depth - Change the device's queue depth ++ * @sdev: scsi device struct ++ * @qdepth: depth to set ++ * ++ * Return value: ++ * actual depth set ++ **/ ++static int ibmvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth) ++{ ++ if (qdepth > IBMVSCSI_MAX_CMDS_PER_LUN) ++ qdepth = IBMVSCSI_MAX_CMDS_PER_LUN; ++ ++ scsi_adjust_queue_depth(sdev, 0, qdepth); ++ return sdev->queue_depth; ++} ++ + /* ------------------------------------------------------------ + * sysfs attributes + */ +@@ -1520,7 +1585,9 @@ + .queuecommand = ibmvscsi_queuecommand, + .eh_abort_handler = ibmvscsi_eh_abort_handler, + .eh_device_reset_handler = ibmvscsi_eh_device_reset_handler, ++ .eh_host_reset_handler = ibmvscsi_eh_host_reset_handler, + .slave_configure = ibmvscsi_slave_configure, ++ .change_queue_depth = ibmvscsi_change_queue_depth, + .cmd_per_lun = 16, + .can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT, + .this_id = -1, +@@ -1545,7 +1612,7 @@ + driver_template.can_queue = max_requests; + host = scsi_host_alloc(&driver_template, sizeof(*hostdata)); + if (!host) { +- printk(KERN_ERR "ibmvscsi: couldn't allocate host data\n"); ++ dev_err(&vdev->dev, "couldn't allocate host data\n"); + goto scsi_host_alloc_failed; + } + +@@ -1559,11 +1626,11 @@ + + rc = ibmvscsi_init_crq_queue(&hostdata->queue, hostdata, max_requests); + if (rc != 0 && rc != H_RESOURCE) { +- printk(KERN_ERR "ibmvscsi: couldn't initialize crq\n"); ++ dev_err(&vdev->dev, "couldn't initialize crq. rc=%d\n", rc); + goto init_crq_failed; + } + if (initialize_event_pool(&hostdata->pool, max_requests, hostdata) != 0) { +- printk(KERN_ERR "ibmvscsi: couldn't initialize event pool\n"); ++ dev_err(&vdev->dev, "couldn't initialize event pool\n"); + goto init_pool_failed; + } + +diff -Nurb linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.h linux-2.6.22-591/drivers/scsi/ibmvscsi/ibmvscsi.h +--- linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/ibmvscsi/ibmvscsi.h 2007-12-21 15:36:12.000000000 -0500 +@@ -45,6 +45,7 @@ + #define MAX_INDIRECT_BUFS 10 + + #define IBMVSCSI_MAX_REQUESTS_DEFAULT 100 ++#define IBMVSCSI_MAX_CMDS_PER_LUN 64 + + /* ------------------------------------------------------------ + * Data Structures +@@ -69,6 +70,7 @@ + union viosrp_iu iu; + void (*cmnd_done) (struct scsi_cmnd *); + struct completion comp; ++ struct timer_list timer; + union viosrp_iu *sync_srp; + struct srp_direct_buf *ext_list; + dma_addr_t ext_list_token; +diff -Nurb linux-2.6.22-570/drivers/scsi/ibmvscsi/rpa_vscsi.c linux-2.6.22-591/drivers/scsi/ibmvscsi/rpa_vscsi.c +--- linux-2.6.22-570/drivers/scsi/ibmvscsi/rpa_vscsi.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/ibmvscsi/rpa_vscsi.c 2007-12-21 15:36:12.000000000 -0500 +@@ -177,7 +177,7 @@ + memset(&hostdata->madapter_info, 0x00, + sizeof(hostdata->madapter_info)); + +- printk(KERN_INFO "rpa_vscsi: SPR_VERSION: %s\n", SRP_VERSION); ++ dev_info(hostdata->dev, "SRP_VERSION: %s\n", SRP_VERSION); + strcpy(hostdata->madapter_info.srp_version, SRP_VERSION); + + strncpy(hostdata->madapter_info.partition_name, partition_name, +@@ -232,25 +232,24 @@ + + if (rc == 2) { + /* Adapter is good, but other end is not ready */ +- printk(KERN_WARNING "ibmvscsi: Partner adapter not ready\n"); ++ dev_warn(hostdata->dev, "Partner adapter not ready\n"); + retrc = 0; + } else if (rc != 0) { +- printk(KERN_WARNING "ibmvscsi: Error %d opening adapter\n", rc); ++ dev_warn(hostdata->dev, "Error %d opening adapter\n", rc); + goto reg_crq_failed; + } + + if (request_irq(vdev->irq, + ibmvscsi_handle_event, + 0, "ibmvscsi", (void *)hostdata) != 0) { +- printk(KERN_ERR "ibmvscsi: couldn't register irq 0x%x\n", ++ dev_err(hostdata->dev, "couldn't register irq 0x%x\n", + vdev->irq); + goto req_irq_failed; + } + + rc = vio_enable_interrupts(vdev); + if (rc != 0) { +- printk(KERN_ERR "ibmvscsi: Error %d enabling interrupts!!!\n", +- rc); ++ dev_err(hostdata->dev, "Error %d enabling interrupts!!!\n", rc); + goto req_irq_failed; + } + +@@ -294,7 +293,7 @@ + } while ((rc == H_IN_PROGRESS) || (rc == H_BUSY) || (H_IS_LONG_BUSY(rc))); + + if (rc) +- printk(KERN_ERR "ibmvscsi: Error %d enabling adapter\n", rc); ++ dev_err(hostdata->dev, "Error %d enabling adapter\n", rc); + return rc; + } + +@@ -327,10 +326,9 @@ + queue->msg_token, PAGE_SIZE); + if (rc == 2) { + /* Adapter is good, but other end is not ready */ +- printk(KERN_WARNING "ibmvscsi: Partner adapter not ready\n"); ++ dev_warn(hostdata->dev, "Partner adapter not ready\n"); + } else if (rc != 0) { +- printk(KERN_WARNING +- "ibmvscsi: couldn't register crq--rc 0x%x\n", rc); ++ dev_warn(hostdata->dev, "couldn't register crq--rc 0x%x\n", rc); + } + return rc; + } +diff -Nurb linux-2.6.22-570/drivers/scsi/initio.c linux-2.6.22-591/drivers/scsi/initio.c +--- linux-2.6.22-570/drivers/scsi/initio.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/initio.c 2007-12-21 15:36:12.000000000 -0500 +@@ -3,7 +3,8 @@ + * + * Copyright (c) 1994-1998 Initio Corporation + * Copyright (c) 1998 Bas Vermeulen +- * All rights reserved. ++ * Copyright (c) 2004 Christoph Hellwig ++ * Copyright (c) 2007 Red Hat + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by +@@ -19,38 +20,6 @@ + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * +- * -------------------------------------------------------------------------- +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions +- * are met: +- * 1. Redistributions of source code must retain the above copyright +- * notice, this list of conditions, and the following disclaimer, +- * without modification, immediately at the beginning of the file. +- * 2. Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in the +- * documentation and/or other materials provided with the distribution. +- * 3. The name of the author may not be used to endorse or promote products +- * derived from this software without specific prior written permission. +- * +- * Where this Software is combined with software released under the terms of +- * the GNU General Public License ("GPL") and the terms of the GPL would require the +- * combined work to also be released under the terms of the GPL, the terms +- * and conditions of this License will apply in addition to those of the +- * GPL with the exception of any terms or conditions of this License that +- * conflict with, or are expressly prohibited by, the GPL. +- * +- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR +- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +- * SUCH DAMAGE. + * + ************************************************************************* + * +@@ -70,14 +39,14 @@ + * - Fix memory allocation problem + * 03/04/98 hc - v1.01l + * - Fix tape rewind which will hang the system problem +- * - Set can_queue to tul_num_scb ++ * - Set can_queue to initio_num_scb + * 06/25/98 hc - v1.01m + * - Get it work for kernel version >= 2.1.75 +- * - Dynamic assign SCSI bus reset holding time in init_tulip() ++ * - Dynamic assign SCSI bus reset holding time in initio_init() + * 07/02/98 hc - v1.01n + * - Support 0002134A + * 08/07/98 hc - v1.01o +- * - Change the tul_abort_srb routine to use scsi_done. <01> ++ * - Change the initio_abort_srb routine to use scsi_done. <01> + * 09/07/98 hl - v1.02 + * - Change the INI9100U define and proc_dir_entry to + * reflect the newer Kernel 2.1.118, but the v1.o1o +@@ -150,23 +119,13 @@ + static unsigned int i91u_debug = DEBUG_DEFAULT; + #endif + +-#define TUL_RDWORD(x,y) (short)(inl((int)((ULONG)((ULONG)x+(UCHAR)y)) )) +- +-typedef struct PCI_ID_Struc { +- unsigned short vendor_id; +- unsigned short device_id; +-} PCI_ID; +- +-static int tul_num_ch = 4; /* Maximum 4 adapters */ +-static int tul_num_scb; +-static int tul_tag_enable = 1; +-static SCB *tul_scb; ++static int initio_tag_enable = 1; + + #ifdef DEBUG_i91u + static int setup_debug = 0; + #endif + +-static void i91uSCBPost(BYTE * pHcb, BYTE * pScb); ++static void i91uSCBPost(u8 * pHcb, u8 * pScb); + + /* PCI Devices supported by this driver */ + static struct pci_device_id i91u_pci_devices[] = { +@@ -184,74 +143,66 @@ + #define DEBUG_STATE 0 + #define INT_DISC 0 + +-/*--- external functions --*/ +-static void tul_se2_wait(void); ++/*--- forward references ---*/ ++static struct scsi_ctrl_blk *initio_find_busy_scb(struct initio_host * host, u16 tarlun); ++static struct scsi_ctrl_blk *initio_find_done_scb(struct initio_host * host); ++ ++static int tulip_main(struct initio_host * host); ++ ++static int initio_next_state(struct initio_host * host); ++static int initio_state_1(struct initio_host * host); ++static int initio_state_2(struct initio_host * host); ++static int initio_state_3(struct initio_host * host); ++static int initio_state_4(struct initio_host * host); ++static int initio_state_5(struct initio_host * host); ++static int initio_state_6(struct initio_host * host); ++static int initio_state_7(struct initio_host * host); ++static int initio_xfer_data_in(struct initio_host * host); ++static int initio_xfer_data_out(struct initio_host * host); ++static int initio_xpad_in(struct initio_host * host); ++static int initio_xpad_out(struct initio_host * host); ++static int initio_status_msg(struct initio_host * host); ++ ++static int initio_msgin(struct initio_host * host); ++static int initio_msgin_sync(struct initio_host * host); ++static int initio_msgin_accept(struct initio_host * host); ++static int initio_msgout_reject(struct initio_host * host); ++static int initio_msgin_extend(struct initio_host * host); ++ ++static int initio_msgout_ide(struct initio_host * host); ++static int initio_msgout_abort_targ(struct initio_host * host); ++static int initio_msgout_abort_tag(struct initio_host * host); ++ ++static int initio_bus_device_reset(struct initio_host * host); ++static void initio_select_atn(struct initio_host * host, struct scsi_ctrl_blk * scb); ++static void initio_select_atn3(struct initio_host * host, struct scsi_ctrl_blk * scb); ++static void initio_select_atn_stop(struct initio_host * host, struct scsi_ctrl_blk * scb); ++static int int_initio_busfree(struct initio_host * host); ++static int int_initio_scsi_rst(struct initio_host * host); ++static int int_initio_bad_seq(struct initio_host * host); ++static int int_initio_resel(struct initio_host * host); ++static int initio_sync_done(struct initio_host * host); ++static int wdtr_done(struct initio_host * host); ++static int wait_tulip(struct initio_host * host); ++static int initio_wait_done_disc(struct initio_host * host); ++static int initio_wait_disc(struct initio_host * host); ++static void tulip_scsi(struct initio_host * host); ++static int initio_post_scsi_rst(struct initio_host * host); ++ ++static void initio_se2_ew_en(unsigned long base); ++static void initio_se2_ew_ds(unsigned long base); ++static int initio_se2_rd_all(unsigned long base); ++static void initio_se2_update_all(unsigned long base); /* setup default pattern */ ++static void initio_read_eeprom(unsigned long base); + +-/*--- forward refrence ---*/ +-static SCB *tul_find_busy_scb(HCS * pCurHcb, WORD tarlun); +-static SCB *tul_find_done_scb(HCS * pCurHcb); +- +-static int tulip_main(HCS * pCurHcb); +- +-static int tul_next_state(HCS * pCurHcb); +-static int tul_state_1(HCS * pCurHcb); +-static int tul_state_2(HCS * pCurHcb); +-static int tul_state_3(HCS * pCurHcb); +-static int tul_state_4(HCS * pCurHcb); +-static int tul_state_5(HCS * pCurHcb); +-static int tul_state_6(HCS * pCurHcb); +-static int tul_state_7(HCS * pCurHcb); +-static int tul_xfer_data_in(HCS * pCurHcb); +-static int tul_xfer_data_out(HCS * pCurHcb); +-static int tul_xpad_in(HCS * pCurHcb); +-static int tul_xpad_out(HCS * pCurHcb); +-static int tul_status_msg(HCS * pCurHcb); +- +-static int tul_msgin(HCS * pCurHcb); +-static int tul_msgin_sync(HCS * pCurHcb); +-static int tul_msgin_accept(HCS * pCurHcb); +-static int tul_msgout_reject(HCS * pCurHcb); +-static int tul_msgin_extend(HCS * pCurHcb); +- +-static int tul_msgout_ide(HCS * pCurHcb); +-static int tul_msgout_abort_targ(HCS * pCurHcb); +-static int tul_msgout_abort_tag(HCS * pCurHcb); +- +-static int tul_bus_device_reset(HCS * pCurHcb); +-static void tul_select_atn(HCS * pCurHcb, SCB * pCurScb); +-static void tul_select_atn3(HCS * pCurHcb, SCB * pCurScb); +-static void tul_select_atn_stop(HCS * pCurHcb, SCB * pCurScb); +-static int int_tul_busfree(HCS * pCurHcb); +-static int int_tul_scsi_rst(HCS * pCurHcb); +-static int int_tul_bad_seq(HCS * pCurHcb); +-static int int_tul_resel(HCS * pCurHcb); +-static int tul_sync_done(HCS * pCurHcb); +-static int wdtr_done(HCS * pCurHcb); +-static int wait_tulip(HCS * pCurHcb); +-static int tul_wait_done_disc(HCS * pCurHcb); +-static int tul_wait_disc(HCS * pCurHcb); +-static void tulip_scsi(HCS * pCurHcb); +-static int tul_post_scsi_rst(HCS * pCurHcb); +- +-static void tul_se2_ew_en(WORD CurBase); +-static void tul_se2_ew_ds(WORD CurBase); +-static int tul_se2_rd_all(WORD CurBase); +-static void tul_se2_update_all(WORD CurBase); /* setup default pattern */ +-static void tul_read_eeprom(WORD CurBase); +- +- /* ---- INTERNAL VARIABLES ---- */ +-static HCS tul_hcs[MAX_SUPPORTED_ADAPTERS]; +-static INI_ADPT_STRUCT i91u_adpt[MAX_SUPPORTED_ADAPTERS]; ++/* ---- INTERNAL VARIABLES ---- */ + +-/*NVRAM nvram, *nvramp = &nvram; */ + static NVRAM i91unvram; + static NVRAM *i91unvramp; + +- +- +-static UCHAR i91udftNvRam[64] = ++static u8 i91udftNvRam[64] = + { +-/*----------- header -----------*/ ++ /*----------- header -----------*/ + 0x25, 0xc9, /* Signature */ + 0x40, /* Size */ + 0x01, /* Revision */ +@@ -289,7 +240,7 @@ + 0, 0}; /* - CheckSum - */ + + +-static UCHAR tul_rate_tbl[8] = /* fast 20 */ ++static u8 initio_rate_tbl[8] = /* fast 20 */ + { + /* nanosecond devide by 4 */ + 12, /* 50ns, 20M */ +@@ -302,53 +253,17 @@ + 62 /* 250ns, 4M */ + }; + +-static void tul_do_pause(unsigned amount) +-{ /* Pause for amount jiffies */ ++static void initio_do_pause(unsigned amount) ++{ ++ /* Pause for amount jiffies */ + unsigned long the_time = jiffies + amount; + +- while (time_before_eq(jiffies, the_time)); ++ while (time_before_eq(jiffies, the_time)) ++ cpu_relax(); + } + + /*-- forward reference --*/ + +-/******************************************************************* +- Use memeory refresh time ~ 15us * 2 +-********************************************************************/ +-void tul_se2_wait(void) +-{ +-#if 1 +- udelay(30); +-#else +- UCHAR readByte; +- +- readByte = TUL_RD(0, 0x61); +- if ((readByte & 0x10) == 0x10) { +- for (;;) { +- readByte = TUL_RD(0, 0x61); +- if ((readByte & 0x10) == 0x10) +- break; +- } +- for (;;) { +- readByte = TUL_RD(0, 0x61); +- if ((readByte & 0x10) != 0x10) +- break; +- } +- } else { +- for (;;) { +- readByte = TUL_RD(0, 0x61); +- if ((readByte & 0x10) == 0x10) +- break; +- } +- for (;;) { +- readByte = TUL_RD(0, 0x61); +- if ((readByte & 0x10) != 0x10) +- break; +- } +- } +-#endif +-} +- +- + /****************************************************************** + Input: instruction for Serial E2PROM + +@@ -379,1174 +294,1019 @@ + + + ******************************************************************/ +-static void tul_se2_instr(WORD CurBase, UCHAR instr) ++ ++/** ++ * initio_se2_instr - bitbang an instruction ++ * @base: Base of InitIO controller ++ * @instr: Instruction for serial E2PROM ++ * ++ * Bitbang an instruction out to the serial E2Prom ++ */ ++ ++static void initio_se2_instr(unsigned long base, u8 instr) + { + int i; +- UCHAR b; ++ u8 b; + +- TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2DO); /* cs+start bit */ +- tul_se2_wait(); +- TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK | SE2DO); /* +CLK */ +- tul_se2_wait(); ++ outb(SE2CS | SE2DO, base + TUL_NVRAM); /* cs+start bit */ ++ udelay(30); ++ outb(SE2CS | SE2CLK | SE2DO, base + TUL_NVRAM); /* +CLK */ ++ udelay(30); + + for (i = 0; i < 8; i++) { + if (instr & 0x80) + b = SE2CS | SE2DO; /* -CLK+dataBit */ + else + b = SE2CS; /* -CLK */ +- TUL_WR(CurBase + TUL_NVRAM, b); +- tul_se2_wait(); +- TUL_WR(CurBase + TUL_NVRAM, b | SE2CLK); /* +CLK */ +- tul_se2_wait(); ++ outb(b, base + TUL_NVRAM); ++ udelay(30); ++ outb(b | SE2CLK, base + TUL_NVRAM); /* +CLK */ ++ udelay(30); + instr <<= 1; + } +- TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK */ +- tul_se2_wait(); +- return; ++ outb(SE2CS, base + TUL_NVRAM); /* -CLK */ ++ udelay(30); + } + + +-/****************************************************************** +- Function name : tul_se2_ew_en +- Description : Enable erase/write state of serial EEPROM +-******************************************************************/ +-void tul_se2_ew_en(WORD CurBase) ++/** ++ * initio_se2_ew_en - Enable erase/write ++ * @base: Base address of InitIO controller ++ * ++ * Enable erase/write state of serial EEPROM ++ */ ++void initio_se2_ew_en(unsigned long base) + { +- tul_se2_instr(CurBase, 0x30); /* EWEN */ +- TUL_WR(CurBase + TUL_NVRAM, 0); /* -CS */ +- tul_se2_wait(); +- return; ++ initio_se2_instr(base, 0x30); /* EWEN */ ++ outb(0, base + TUL_NVRAM); /* -CS */ ++ udelay(30); + } + + +-/************************************************************************ +- Disable erase/write state of serial EEPROM +-*************************************************************************/ +-void tul_se2_ew_ds(WORD CurBase) +-{ +- tul_se2_instr(CurBase, 0); /* EWDS */ +- TUL_WR(CurBase + TUL_NVRAM, 0); /* -CS */ +- tul_se2_wait(); +- return; ++/** ++ * initio_se2_ew_ds - Disable erase/write ++ * @base: Base address of InitIO controller ++ * ++ * Disable erase/write state of serial EEPROM ++ */ ++void initio_se2_ew_ds(unsigned long base) ++{ ++ initio_se2_instr(base, 0); /* EWDS */ ++ outb(0, base + TUL_NVRAM); /* -CS */ ++ udelay(30); + } + + +-/****************************************************************** +- Input :address of Serial E2PROM +- Output :value stored in Serial E2PROM +-*******************************************************************/ +-static USHORT tul_se2_rd(WORD CurBase, ULONG adr) ++/** ++ * initio_se2_rd - read E2PROM word ++ * @base: Base of InitIO controller ++ * @addr: Address of word in E2PROM ++ * ++ * Read a word from the NV E2PROM device ++ */ ++static u16 initio_se2_rd(unsigned long base, u8 addr) + { +- UCHAR instr, readByte; +- USHORT readWord; ++ u8 instr, rb; ++ u16 val = 0; + int i; + +- instr = (UCHAR) (adr | 0x80); +- tul_se2_instr(CurBase, instr); /* READ INSTR */ +- readWord = 0; ++ instr = (u8) (addr | 0x80); ++ initio_se2_instr(base, instr); /* READ INSTR */ + + for (i = 15; i >= 0; i--) { +- TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK); /* +CLK */ +- tul_se2_wait(); +- TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK */ ++ outb(SE2CS | SE2CLK, base + TUL_NVRAM); /* +CLK */ ++ udelay(30); ++ outb(SE2CS, base + TUL_NVRAM); /* -CLK */ + + /* sample data after the following edge of clock */ +- readByte = TUL_RD(CurBase, TUL_NVRAM); +- readByte &= SE2DI; +- readWord += (readByte << i); +- tul_se2_wait(); /* 6/20/95 */ ++ rb = inb(base + TUL_NVRAM); ++ rb &= SE2DI; ++ val += (rb << i); ++ udelay(30); /* 6/20/95 */ + } + +- TUL_WR(CurBase + TUL_NVRAM, 0); /* no chip select */ +- tul_se2_wait(); +- return readWord; ++ outb(0, base + TUL_NVRAM); /* no chip select */ ++ udelay(30); ++ return val; + } + +- +-/****************************************************************** +- Input: new value in Serial E2PROM, address of Serial E2PROM +-*******************************************************************/ +-static void tul_se2_wr(WORD CurBase, UCHAR adr, USHORT writeWord) ++/** ++ * initio_se2_wr - read E2PROM word ++ * @base: Base of InitIO controller ++ * @addr: Address of word in E2PROM ++ * @val: Value to write ++ * ++ * Write a word to the NV E2PROM device. Used when recovering from ++ * a problem with the NV. ++ */ ++static void initio_se2_wr(unsigned long base, u8 addr, u16 val) + { +- UCHAR readByte; +- UCHAR instr; ++ u8 rb; ++ u8 instr; + int i; + +- instr = (UCHAR) (adr | 0x40); +- tul_se2_instr(CurBase, instr); /* WRITE INSTR */ ++ instr = (u8) (addr | 0x40); ++ initio_se2_instr(base, instr); /* WRITE INSTR */ + for (i = 15; i >= 0; i--) { +- if (writeWord & 0x8000) +- TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2DO); /* -CLK+dataBit 1 */ ++ if (val & 0x8000) ++ outb(SE2CS | SE2DO, base + TUL_NVRAM); /* -CLK+dataBit 1 */ + else +- TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK+dataBit 0 */ +- tul_se2_wait(); +- TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK); /* +CLK */ +- tul_se2_wait(); +- writeWord <<= 1; +- } +- TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK */ +- tul_se2_wait(); +- TUL_WR(CurBase + TUL_NVRAM, 0); /* -CS */ +- tul_se2_wait(); ++ outb(SE2CS, base + TUL_NVRAM); /* -CLK+dataBit 0 */ ++ udelay(30); ++ outb(SE2CS | SE2CLK, base + TUL_NVRAM); /* +CLK */ ++ udelay(30); ++ val <<= 1; ++ } ++ outb(SE2CS, base + TUL_NVRAM); /* -CLK */ ++ udelay(30); ++ outb(0, base + TUL_NVRAM); /* -CS */ ++ udelay(30); + +- TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* +CS */ +- tul_se2_wait(); ++ outb(SE2CS, base + TUL_NVRAM); /* +CS */ ++ udelay(30); + + for (;;) { +- TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK); /* +CLK */ +- tul_se2_wait(); +- TUL_WR(CurBase + TUL_NVRAM, SE2CS); /* -CLK */ +- tul_se2_wait(); +- if ((readByte = TUL_RD(CurBase, TUL_NVRAM)) & SE2DI) ++ outb(SE2CS | SE2CLK, base + TUL_NVRAM); /* +CLK */ ++ udelay(30); ++ outb(SE2CS, base + TUL_NVRAM); /* -CLK */ ++ udelay(30); ++ if ((rb = inb(base + TUL_NVRAM)) & SE2DI) + break; /* write complete */ + } +- TUL_WR(CurBase + TUL_NVRAM, 0); /* -CS */ +- return; ++ outb(0, base + TUL_NVRAM); /* -CS */ + } + ++/** ++ * initio_se2_rd_all - read hostadapter NV configuration ++ * @base: Base address of InitIO controller ++ * ++ * Reads the E2PROM data into main memory. Ensures that the checksum ++ * and header marker are valid. Returns 1 on success -1 on error. ++ */ + +-/*********************************************************************** +- Read SCSI H/A configuration parameters from serial EEPROM +-************************************************************************/ +-int tul_se2_rd_all(WORD CurBase) ++static int initio_se2_rd_all(unsigned long base) + { + int i; +- ULONG chksum = 0; +- USHORT *np; ++ u16 chksum = 0; ++ u16 *np; + + i91unvramp = &i91unvram; +- np = (USHORT *) i91unvramp; +- for (i = 0; i < 32; i++) { +- *np++ = tul_se2_rd(CurBase, i); +- } ++ np = (u16 *) i91unvramp; ++ for (i = 0; i < 32; i++) ++ *np++ = initio_se2_rd(base, i); + +-/*--------------------Is signature "ini" ok ? ----------------*/ ++ /* Is signature "ini" ok ? */ + if (i91unvramp->NVM_Signature != INI_SIGNATURE) + return -1; +-/*---------------------- Is ckecksum ok ? ----------------------*/ +- np = (USHORT *) i91unvramp; ++ /* Is ckecksum ok ? */ ++ np = (u16 *) i91unvramp; + for (i = 0; i < 31; i++) + chksum += *np++; +- if (i91unvramp->NVM_CheckSum != (USHORT) chksum) ++ if (i91unvramp->NVM_CheckSum != chksum) + return -1; + return 1; + } + +- +-/*********************************************************************** +- Update SCSI H/A configuration parameters from serial EEPROM +-************************************************************************/ +-void tul_se2_update_all(WORD CurBase) ++/** ++ * initio_se2_update_all - Update E2PROM ++ * @base: Base of InitIO controller ++ * ++ * Update the E2PROM by wrting any changes into the E2PROM ++ * chip, rewriting the checksum. ++ */ ++static void initio_se2_update_all(unsigned long base) + { /* setup default pattern */ + int i; +- ULONG chksum = 0; +- USHORT *np, *np1; ++ u16 chksum = 0; ++ u16 *np, *np1; + + i91unvramp = &i91unvram; + /* Calculate checksum first */ +- np = (USHORT *) i91udftNvRam; ++ np = (u16 *) i91udftNvRam; + for (i = 0; i < 31; i++) + chksum += *np++; +- *np = (USHORT) chksum; +- tul_se2_ew_en(CurBase); /* Enable write */ ++ *np = chksum; ++ initio_se2_ew_en(base); /* Enable write */ + +- np = (USHORT *) i91udftNvRam; +- np1 = (USHORT *) i91unvramp; ++ np = (u16 *) i91udftNvRam; ++ np1 = (u16 *) i91unvramp; + for (i = 0; i < 32; i++, np++, np1++) { +- if (*np != *np1) { +- tul_se2_wr(CurBase, i, *np); +- } ++ if (*np != *np1) ++ initio_se2_wr(base, i, *np); + } +- +- tul_se2_ew_ds(CurBase); /* Disable write */ +- return; ++ initio_se2_ew_ds(base); /* Disable write */ + } + +-/************************************************************************* +- Function name : read_eeprom +-**************************************************************************/ +-void tul_read_eeprom(WORD CurBase) +-{ +- UCHAR gctrl; +- +- i91unvramp = &i91unvram; +-/*------Enable EEProm programming ---*/ +- gctrl = TUL_RD(CurBase, TUL_GCTRL); +- TUL_WR(CurBase + TUL_GCTRL, gctrl | TUL_GCTRL_EEPROM_BIT); +- if (tul_se2_rd_all(CurBase) != 1) { +- tul_se2_update_all(CurBase); /* setup default pattern */ +- tul_se2_rd_all(CurBase); /* load again */ +- } +-/*------ Disable EEProm programming ---*/ +- gctrl = TUL_RD(CurBase, TUL_GCTRL); +- TUL_WR(CurBase + TUL_GCTRL, gctrl & ~TUL_GCTRL_EEPROM_BIT); +-} /* read_eeprom */ ++/** ++ * initio_read_eeprom - Retrieve configuration ++ * @base: Base of InitIO Host Adapter ++ * ++ * Retrieve the host adapter configuration data from E2Prom. If the ++ * data is invalid then the defaults are used and are also restored ++ * into the E2PROM. This forms the access point for the SCSI driver ++ * into the E2PROM layer, the other functions for the E2PROM are all ++ * internal use. ++ * ++ * Must be called single threaded, uses a shared global area. ++ */ + +-static int Addi91u_into_Adapter_table(WORD wBIOS, WORD wBASE, BYTE bInterrupt, +- BYTE bBus, BYTE bDevice) ++static void initio_read_eeprom(unsigned long base) + { +- int i, j; ++ u8 gctrl; + +- for (i = 0; i < MAX_SUPPORTED_ADAPTERS; i++) { +- if (i91u_adpt[i].ADPT_BIOS < wBIOS) +- continue; +- if (i91u_adpt[i].ADPT_BIOS == wBIOS) { +- if (i91u_adpt[i].ADPT_BASE == wBASE) { +- if (i91u_adpt[i].ADPT_Bus != 0xFF) +- return 1; +- } else if (i91u_adpt[i].ADPT_BASE < wBASE) +- continue; +- } +- for (j = MAX_SUPPORTED_ADAPTERS - 1; j > i; j--) { +- i91u_adpt[j].ADPT_BASE = i91u_adpt[j - 1].ADPT_BASE; +- i91u_adpt[j].ADPT_INTR = i91u_adpt[j - 1].ADPT_INTR; +- i91u_adpt[j].ADPT_BIOS = i91u_adpt[j - 1].ADPT_BIOS; +- i91u_adpt[j].ADPT_Bus = i91u_adpt[j - 1].ADPT_Bus; +- i91u_adpt[j].ADPT_Device = i91u_adpt[j - 1].ADPT_Device; +- } +- i91u_adpt[i].ADPT_BASE = wBASE; +- i91u_adpt[i].ADPT_INTR = bInterrupt; +- i91u_adpt[i].ADPT_BIOS = wBIOS; +- i91u_adpt[i].ADPT_Bus = bBus; +- i91u_adpt[i].ADPT_Device = bDevice; +- return 0; +- } +- return 1; ++ i91unvramp = &i91unvram; ++ /* Enable EEProm programming */ ++ gctrl = inb(base + TUL_GCTRL); ++ outb(gctrl | TUL_GCTRL_EEPROM_BIT, base + TUL_GCTRL); ++ if (initio_se2_rd_all(base) != 1) { ++ initio_se2_update_all(base); /* setup default pattern */ ++ initio_se2_rd_all(base); /* load again */ ++ } ++ /* Disable EEProm programming */ ++ gctrl = inb(base + TUL_GCTRL); ++ outb(gctrl & ~TUL_GCTRL_EEPROM_BIT, base + TUL_GCTRL); + } + +-static void init_i91uAdapter_table(void) +-{ +- int i; +- +- for (i = 0; i < MAX_SUPPORTED_ADAPTERS; i++) { /* Initialize adapter structure */ +- i91u_adpt[i].ADPT_BIOS = 0xffff; +- i91u_adpt[i].ADPT_BASE = 0xffff; +- i91u_adpt[i].ADPT_INTR = 0xff; +- i91u_adpt[i].ADPT_Bus = 0xff; +- i91u_adpt[i].ADPT_Device = 0xff; +- } +- return; +-} ++/** ++ * initio_stop_bm - stop bus master ++ * @host: InitIO we are stopping ++ * ++ * Stop any pending DMA operation, aborting the DMA if neccessary ++ */ + +-static void tul_stop_bm(HCS * pCurHcb) ++static void initio_stop_bm(struct initio_host * host) + { + +- if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND) { /* if DMA xfer is pending, abort DMA xfer */ +- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_ABT | TAX_X_CLR_FIFO); ++ if (inb(host->addr + TUL_XStatus) & XPEND) { /* if DMA xfer is pending, abort DMA xfer */ ++ outb(TAX_X_ABT | TAX_X_CLR_FIFO, host->addr + TUL_XCmd); + /* wait Abort DMA xfer done */ +- while ((TUL_RD(pCurHcb->HCS_Base, TUL_Int) & XABT) == 0); ++ while ((inb(host->addr + TUL_Int) & XABT) == 0) ++ cpu_relax(); + } +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); + } + +-/***************************************************************************/ +-static void get_tulipPCIConfig(HCS * pCurHcb, int ch_idx) +-{ +- pCurHcb->HCS_Base = i91u_adpt[ch_idx].ADPT_BASE; /* Supply base address */ +- pCurHcb->HCS_BIOS = i91u_adpt[ch_idx].ADPT_BIOS; /* Supply BIOS address */ +- pCurHcb->HCS_Intr = i91u_adpt[ch_idx].ADPT_INTR; /* Supply interrupt line */ +- return; +-} ++/** ++ * initio_reset_scsi - Reset SCSI host controller ++ * @host: InitIO host to reset ++ * @seconds: Recovery time ++ * ++ * Perform a full reset of the SCSI subsystem. ++ */ + +-/***************************************************************************/ +-static int tul_reset_scsi(HCS * pCurHcb, int seconds) ++static int initio_reset_scsi(struct initio_host * host, int seconds) + { +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_RST_BUS); ++ outb(TSC_RST_BUS, host->addr + TUL_SCtrl0); + +- while (!((pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt)) & TSS_SCSIRST_INT)); +- /* reset tulip chip */ ++ while (!((host->jsint = inb(host->addr + TUL_SInt)) & TSS_SCSIRST_INT)) ++ cpu_relax(); + +- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, 0); ++ /* reset tulip chip */ ++ outb(0, host->addr + TUL_SSignal); + + /* Stall for a while, wait for target's firmware ready,make it 2 sec ! */ + /* SONY 5200 tape drive won't work if only stall for 1 sec */ +- tul_do_pause(seconds * HZ); +- +- TUL_RD(pCurHcb->HCS_Base, TUL_SInt); ++ /* FIXME: this is a very long busy wait right now */ ++ initio_do_pause(seconds * HZ); + +- return (SCSI_RESET_SUCCESS); ++ inb(host->addr + TUL_SInt); ++ return SCSI_RESET_SUCCESS; + } + +-/***************************************************************************/ +-static int init_tulip(HCS * pCurHcb, SCB * scbp, int tul_num_scb, +- BYTE * pbBiosAdr, int seconds) ++/** ++ * initio_init - set up an InitIO host adapter ++ * @host: InitIO host adapter ++ * @num_scbs: Number of SCBS ++ * @bios_addr: BIOS address ++ * ++ * Set up the host adapter and devices according to the configuration ++ * retrieved from the E2PROM. ++ * ++ * Locking: Calls E2PROM layer code which is not re-enterable so must ++ * run single threaded for now. ++ */ ++ ++static void initio_init(struct initio_host * host, u8 *bios_addr) + { + int i; +- BYTE *pwFlags; +- BYTE *pbHeads; +- SCB *pTmpScb, *pPrevScb = NULL; +- +- pCurHcb->HCS_NumScbs = tul_num_scb; +- pCurHcb->HCS_Semaph = 1; +- spin_lock_init(&pCurHcb->HCS_SemaphLock); +- pCurHcb->HCS_JSStatus0 = 0; +- pCurHcb->HCS_Scb = scbp; +- pCurHcb->HCS_NxtPend = scbp; +- pCurHcb->HCS_NxtAvail = scbp; +- for (i = 0, pTmpScb = scbp; i < tul_num_scb; i++, pTmpScb++) { +- pTmpScb->SCB_TagId = i; +- if (i != 0) +- pPrevScb->SCB_NxtScb = pTmpScb; +- pPrevScb = pTmpScb; +- } +- pPrevScb->SCB_NxtScb = NULL; +- pCurHcb->HCS_ScbEnd = pTmpScb; +- pCurHcb->HCS_FirstAvail = scbp; +- pCurHcb->HCS_LastAvail = pPrevScb; +- spin_lock_init(&pCurHcb->HCS_AvailLock); +- pCurHcb->HCS_FirstPend = NULL; +- pCurHcb->HCS_LastPend = NULL; +- pCurHcb->HCS_FirstBusy = NULL; +- pCurHcb->HCS_LastBusy = NULL; +- pCurHcb->HCS_FirstDone = NULL; +- pCurHcb->HCS_LastDone = NULL; +- pCurHcb->HCS_ActScb = NULL; +- pCurHcb->HCS_ActTcs = NULL; ++ u8 *flags; ++ u8 *heads; + +- tul_read_eeprom(pCurHcb->HCS_Base); +-/*---------- get H/A configuration -------------*/ ++ /* Get E2Prom configuration */ ++ initio_read_eeprom(host->addr); + if (i91unvramp->NVM_SCSIInfo[0].NVM_NumOfTarg == 8) +- pCurHcb->HCS_MaxTar = 8; ++ host->max_tar = 8; + else +- pCurHcb->HCS_MaxTar = 16; ++ host->max_tar = 16; + +- pCurHcb->HCS_Config = i91unvramp->NVM_SCSIInfo[0].NVM_ChConfig1; ++ host->config = i91unvramp->NVM_SCSIInfo[0].NVM_ChConfig1; + +- pCurHcb->HCS_SCSI_ID = i91unvramp->NVM_SCSIInfo[0].NVM_ChSCSIID; +- pCurHcb->HCS_IdMask = ~(1 << pCurHcb->HCS_SCSI_ID); ++ host->scsi_id = i91unvramp->NVM_SCSIInfo[0].NVM_ChSCSIID; ++ host->idmask = ~(1 << host->scsi_id); + + #ifdef CHK_PARITY + /* Enable parity error response */ +- TUL_WR(pCurHcb->HCS_Base + TUL_PCMD, TUL_RD(pCurHcb->HCS_Base, TUL_PCMD) | 0x40); ++ outb(inb(host->addr + TUL_PCMD) | 0x40, host->addr + TUL_PCMD); + #endif + + /* Mask all the interrupt */ +- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); ++ outb(0x1F, host->addr + TUL_Mask); + +- tul_stop_bm(pCurHcb); ++ initio_stop_bm(host); + /* --- Initialize the tulip --- */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_RST_CHIP); ++ outb(TSC_RST_CHIP, host->addr + TUL_SCtrl0); + + /* program HBA's SCSI ID */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SScsiId, pCurHcb->HCS_SCSI_ID << 4); ++ outb(host->scsi_id << 4, host->addr + TUL_SScsiId); + + /* Enable Initiator Mode ,phase latch,alternate sync period mode, + disable SCSI reset */ +- if (pCurHcb->HCS_Config & HCC_EN_PAR) +- pCurHcb->HCS_SConf1 = (TSC_INITDEFAULT | TSC_EN_SCSI_PAR); ++ if (host->config & HCC_EN_PAR) ++ host->sconf1 = (TSC_INITDEFAULT | TSC_EN_SCSI_PAR); + else +- pCurHcb->HCS_SConf1 = (TSC_INITDEFAULT); +- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurHcb->HCS_SConf1); ++ host->sconf1 = (TSC_INITDEFAULT); ++ outb(host->sconf1, host->addr + TUL_SConfig); + + /* Enable HW reselect */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); ++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); + +- TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, 0); ++ outb(0, host->addr + TUL_SPeriod); + + /* selection time out = 250 ms */ +- TUL_WR(pCurHcb->HCS_Base + TUL_STimeOut, 153); ++ outb(153, host->addr + TUL_STimeOut); + +-/*--------- Enable SCSI terminator -----*/ +- TUL_WR(pCurHcb->HCS_Base + TUL_XCtrl, (pCurHcb->HCS_Config & (HCC_ACT_TERM1 | HCC_ACT_TERM2))); +- TUL_WR(pCurHcb->HCS_Base + TUL_GCTRL1, +- ((pCurHcb->HCS_Config & HCC_AUTO_TERM) >> 4) | (TUL_RD(pCurHcb->HCS_Base, TUL_GCTRL1) & 0xFE)); ++ /* Enable SCSI terminator */ ++ outb((host->config & (HCC_ACT_TERM1 | HCC_ACT_TERM2)), ++ host->addr + TUL_XCtrl); ++ outb(((host->config & HCC_AUTO_TERM) >> 4) | ++ (inb(host->addr + TUL_GCTRL1) & 0xFE), ++ host->addr + TUL_GCTRL1); + + for (i = 0, +- pwFlags = & (i91unvramp->NVM_SCSIInfo[0].NVM_Targ0Config), +- pbHeads = pbBiosAdr + 0x180; +- i < pCurHcb->HCS_MaxTar; +- i++, pwFlags++) { +- pCurHcb->HCS_Tcs[i].TCS_Flags = *pwFlags & ~(TCF_SYNC_DONE | TCF_WDTR_DONE); +- if (pCurHcb->HCS_Tcs[i].TCS_Flags & TCF_EN_255) +- pCurHcb->HCS_Tcs[i].TCS_DrvFlags = TCF_DRV_255_63; ++ flags = & (i91unvramp->NVM_SCSIInfo[0].NVM_Targ0Config), ++ heads = bios_addr + 0x180; ++ i < host->max_tar; ++ i++, flags++) { ++ host->targets[i].flags = *flags & ~(TCF_SYNC_DONE | TCF_WDTR_DONE); ++ if (host->targets[i].flags & TCF_EN_255) ++ host->targets[i].drv_flags = TCF_DRV_255_63; + else +- pCurHcb->HCS_Tcs[i].TCS_DrvFlags = 0; +- pCurHcb->HCS_Tcs[i].TCS_JS_Period = 0; +- pCurHcb->HCS_Tcs[i].TCS_SConfig0 = pCurHcb->HCS_SConf1; +- pCurHcb->HCS_Tcs[i].TCS_DrvHead = *pbHeads++; +- if (pCurHcb->HCS_Tcs[i].TCS_DrvHead == 255) +- pCurHcb->HCS_Tcs[i].TCS_DrvFlags = TCF_DRV_255_63; ++ host->targets[i].drv_flags = 0; ++ host->targets[i].js_period = 0; ++ host->targets[i].sconfig0 = host->sconf1; ++ host->targets[i].heads = *heads++; ++ if (host->targets[i].heads == 255) ++ host->targets[i].drv_flags = TCF_DRV_255_63; + else +- pCurHcb->HCS_Tcs[i].TCS_DrvFlags = 0; +- pCurHcb->HCS_Tcs[i].TCS_DrvSector = *pbHeads++; +- pCurHcb->HCS_Tcs[i].TCS_Flags &= ~TCF_BUSY; +- pCurHcb->HCS_ActTags[i] = 0; +- pCurHcb->HCS_MaxTags[i] = 0xFF; ++ host->targets[i].drv_flags = 0; ++ host->targets[i].sectors = *heads++; ++ host->targets[i].flags &= ~TCF_BUSY; ++ host->act_tags[i] = 0; ++ host->max_tags[i] = 0xFF; + } /* for */ + printk("i91u: PCI Base=0x%04X, IRQ=%d, BIOS=0x%04X0, SCSI ID=%d\n", +- pCurHcb->HCS_Base, pCurHcb->HCS_Intr, +- pCurHcb->HCS_BIOS, pCurHcb->HCS_SCSI_ID); +-/*------------------- reset SCSI Bus ---------------------------*/ +- if (pCurHcb->HCS_Config & HCC_SCSI_RESET) { +- printk("i91u: Reset SCSI Bus ... \n"); +- tul_reset_scsi(pCurHcb, seconds); +- } +- TUL_WR(pCurHcb->HCS_Base + TUL_SCFG1, 0x17); +- TUL_WR(pCurHcb->HCS_Base + TUL_SIntEnable, 0xE9); +- return (0); ++ host->addr, host->irq, ++ host->bios_addr, host->scsi_id); ++ /* Reset SCSI Bus */ ++ if (host->config & HCC_SCSI_RESET) { ++ printk(KERN_INFO "i91u: Reset SCSI Bus ... \n"); ++ initio_reset_scsi(host, 10); ++ } ++ outb(0x17, host->addr + TUL_SCFG1); ++ outb(0xE9, host->addr + TUL_SIntEnable); + } + +-/***************************************************************************/ +-static SCB *tul_alloc_scb(HCS * hcsp) ++/** ++ * initio_alloc_scb - Allocate an SCB ++ * @host: InitIO host we are allocating for ++ * ++ * Walk the SCB list for the controller and allocate a free SCB if ++ * one exists. ++ */ ++static struct scsi_ctrl_blk *initio_alloc_scb(struct initio_host *host) + { +- SCB *pTmpScb; +- ULONG flags; +- spin_lock_irqsave(&(hcsp->HCS_AvailLock), flags); +- if ((pTmpScb = hcsp->HCS_FirstAvail) != NULL) { ++ struct scsi_ctrl_blk *scb; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&host->avail_lock, flags); ++ if ((scb = host->first_avail) != NULL) { + #if DEBUG_QUEUE +- printk("find scb at %08lx\n", (ULONG) pTmpScb); ++ printk("find scb at %p\n", scb); + #endif +- if ((hcsp->HCS_FirstAvail = pTmpScb->SCB_NxtScb) == NULL) +- hcsp->HCS_LastAvail = NULL; +- pTmpScb->SCB_NxtScb = NULL; +- pTmpScb->SCB_Status = SCB_RENT; ++ if ((host->first_avail = scb->next) == NULL) ++ host->last_avail = NULL; ++ scb->next = NULL; ++ scb->status = SCB_RENT; + } +- spin_unlock_irqrestore(&(hcsp->HCS_AvailLock), flags); +- return (pTmpScb); ++ spin_unlock_irqrestore(&host->avail_lock, flags); ++ return scb; + } + +-/***************************************************************************/ +-static void tul_release_scb(HCS * hcsp, SCB * scbp) ++/** ++ * initio_release_scb - Release an SCB ++ * @host: InitIO host that owns the SCB ++ * @cmnd: SCB command block being returned ++ * ++ * Return an allocated SCB to the host free list ++ */ ++ ++static void initio_release_scb(struct initio_host * host, struct scsi_ctrl_blk * cmnd) + { +- ULONG flags; ++ unsigned long flags; + + #if DEBUG_QUEUE +- printk("Release SCB %lx; ", (ULONG) scbp); ++ printk("Release SCB %p; ", cmnd); + #endif +- spin_lock_irqsave(&(hcsp->HCS_AvailLock), flags); +- scbp->SCB_Srb = NULL; +- scbp->SCB_Status = 0; +- scbp->SCB_NxtScb = NULL; +- if (hcsp->HCS_LastAvail != NULL) { +- hcsp->HCS_LastAvail->SCB_NxtScb = scbp; +- hcsp->HCS_LastAvail = scbp; ++ spin_lock_irqsave(&(host->avail_lock), flags); ++ cmnd->srb = NULL; ++ cmnd->status = 0; ++ cmnd->next = NULL; ++ if (host->last_avail != NULL) { ++ host->last_avail->next = cmnd; ++ host->last_avail = cmnd; + } else { +- hcsp->HCS_FirstAvail = scbp; +- hcsp->HCS_LastAvail = scbp; ++ host->first_avail = cmnd; ++ host->last_avail = cmnd; + } +- spin_unlock_irqrestore(&(hcsp->HCS_AvailLock), flags); ++ spin_unlock_irqrestore(&(host->avail_lock), flags); + } + + /***************************************************************************/ +-static void tul_append_pend_scb(HCS * pCurHcb, SCB * scbp) ++static void initio_append_pend_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp) + { + + #if DEBUG_QUEUE +- printk("Append pend SCB %lx; ", (ULONG) scbp); ++ printk("Append pend SCB %p; ", scbp); + #endif +- scbp->SCB_Status = SCB_PEND; +- scbp->SCB_NxtScb = NULL; +- if (pCurHcb->HCS_LastPend != NULL) { +- pCurHcb->HCS_LastPend->SCB_NxtScb = scbp; +- pCurHcb->HCS_LastPend = scbp; ++ scbp->status = SCB_PEND; ++ scbp->next = NULL; ++ if (host->last_pending != NULL) { ++ host->last_pending->next = scbp; ++ host->last_pending = scbp; + } else { +- pCurHcb->HCS_FirstPend = scbp; +- pCurHcb->HCS_LastPend = scbp; ++ host->first_pending = scbp; ++ host->last_pending = scbp; + } + } + + /***************************************************************************/ +-static void tul_push_pend_scb(HCS * pCurHcb, SCB * scbp) ++static void initio_push_pend_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp) + { + + #if DEBUG_QUEUE +- printk("Push pend SCB %lx; ", (ULONG) scbp); ++ printk("Push pend SCB %p; ", scbp); + #endif +- scbp->SCB_Status = SCB_PEND; +- if ((scbp->SCB_NxtScb = pCurHcb->HCS_FirstPend) != NULL) { +- pCurHcb->HCS_FirstPend = scbp; ++ scbp->status = SCB_PEND; ++ if ((scbp->next = host->first_pending) != NULL) { ++ host->first_pending = scbp; + } else { +- pCurHcb->HCS_FirstPend = scbp; +- pCurHcb->HCS_LastPend = scbp; ++ host->first_pending = scbp; ++ host->last_pending = scbp; + } + } + +-/***************************************************************************/ +-static SCB *tul_find_first_pend_scb(HCS * pCurHcb) ++static struct scsi_ctrl_blk *initio_find_first_pend_scb(struct initio_host * host) + { +- SCB *pFirstPend; ++ struct scsi_ctrl_blk *first; + + +- pFirstPend = pCurHcb->HCS_FirstPend; +- while (pFirstPend != NULL) { +- if (pFirstPend->SCB_Opcode != ExecSCSI) { +- return (pFirstPend); +- } +- if (pFirstPend->SCB_TagMsg == 0) { +- if ((pCurHcb->HCS_ActTags[pFirstPend->SCB_Target] == 0) && +- !(pCurHcb->HCS_Tcs[pFirstPend->SCB_Target].TCS_Flags & TCF_BUSY)) { +- return (pFirstPend); +- } ++ first = host->first_pending; ++ while (first != NULL) { ++ if (first->opcode != ExecSCSI) ++ return first; ++ if (first->tagmsg == 0) { ++ if ((host->act_tags[first->target] == 0) && ++ !(host->targets[first->target].flags & TCF_BUSY)) ++ return first; + } else { +- if ((pCurHcb->HCS_ActTags[pFirstPend->SCB_Target] >= +- pCurHcb->HCS_MaxTags[pFirstPend->SCB_Target]) | +- (pCurHcb->HCS_Tcs[pFirstPend->SCB_Target].TCS_Flags & TCF_BUSY)) { +- pFirstPend = pFirstPend->SCB_NxtScb; ++ if ((host->act_tags[first->target] >= ++ host->max_tags[first->target]) | ++ (host->targets[first->target].flags & TCF_BUSY)) { ++ first = first->next; + continue; + } +- return (pFirstPend); ++ return first; + } +- pFirstPend = pFirstPend->SCB_NxtScb; ++ first = first->next; + } +- +- +- return (pFirstPend); ++ return first; + } +-/***************************************************************************/ +-static void tul_unlink_pend_scb(HCS * pCurHcb, SCB * pCurScb) ++ ++static void initio_unlink_pend_scb(struct initio_host * host, struct scsi_ctrl_blk * scb) + { +- SCB *pTmpScb, *pPrevScb; ++ struct scsi_ctrl_blk *tmp, *prev; + + #if DEBUG_QUEUE +- printk("unlink pend SCB %lx; ", (ULONG) pCurScb); ++ printk("unlink pend SCB %p; ", scb); + #endif + +- pPrevScb = pTmpScb = pCurHcb->HCS_FirstPend; +- while (pTmpScb != NULL) { +- if (pCurScb == pTmpScb) { /* Unlink this SCB */ +- if (pTmpScb == pCurHcb->HCS_FirstPend) { +- if ((pCurHcb->HCS_FirstPend = pTmpScb->SCB_NxtScb) == NULL) +- pCurHcb->HCS_LastPend = NULL; ++ prev = tmp = host->first_pending; ++ while (tmp != NULL) { ++ if (scb == tmp) { /* Unlink this SCB */ ++ if (tmp == host->first_pending) { ++ if ((host->first_pending = tmp->next) == NULL) ++ host->last_pending = NULL; + } else { +- pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb; +- if (pTmpScb == pCurHcb->HCS_LastPend) +- pCurHcb->HCS_LastPend = pPrevScb; ++ prev->next = tmp->next; ++ if (tmp == host->last_pending) ++ host->last_pending = prev; + } +- pTmpScb->SCB_NxtScb = NULL; ++ tmp->next = NULL; + break; + } +- pPrevScb = pTmpScb; +- pTmpScb = pTmpScb->SCB_NxtScb; ++ prev = tmp; ++ tmp = tmp->next; + } +- return; + } +-/***************************************************************************/ +-static void tul_append_busy_scb(HCS * pCurHcb, SCB * scbp) ++ ++static void initio_append_busy_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp) + { + + #if DEBUG_QUEUE +- printk("append busy SCB %lx; ", (ULONG) scbp); ++ printk("append busy SCB %o; ", scbp); + #endif +- if (scbp->SCB_TagMsg) +- pCurHcb->HCS_ActTags[scbp->SCB_Target]++; ++ if (scbp->tagmsg) ++ host->act_tags[scbp->target]++; + else +- pCurHcb->HCS_Tcs[scbp->SCB_Target].TCS_Flags |= TCF_BUSY; +- scbp->SCB_Status = SCB_BUSY; +- scbp->SCB_NxtScb = NULL; +- if (pCurHcb->HCS_LastBusy != NULL) { +- pCurHcb->HCS_LastBusy->SCB_NxtScb = scbp; +- pCurHcb->HCS_LastBusy = scbp; ++ host->targets[scbp->target].flags |= TCF_BUSY; ++ scbp->status = SCB_BUSY; ++ scbp->next = NULL; ++ if (host->last_busy != NULL) { ++ host->last_busy->next = scbp; ++ host->last_busy = scbp; + } else { +- pCurHcb->HCS_FirstBusy = scbp; +- pCurHcb->HCS_LastBusy = scbp; ++ host->first_busy = scbp; ++ host->last_busy = scbp; + } + } + + /***************************************************************************/ +-static SCB *tul_pop_busy_scb(HCS * pCurHcb) ++static struct scsi_ctrl_blk *initio_pop_busy_scb(struct initio_host * host) + { +- SCB *pTmpScb; ++ struct scsi_ctrl_blk *tmp; + + +- if ((pTmpScb = pCurHcb->HCS_FirstBusy) != NULL) { +- if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL) +- pCurHcb->HCS_LastBusy = NULL; +- pTmpScb->SCB_NxtScb = NULL; +- if (pTmpScb->SCB_TagMsg) +- pCurHcb->HCS_ActTags[pTmpScb->SCB_Target]--; ++ if ((tmp = host->first_busy) != NULL) { ++ if ((host->first_busy = tmp->next) == NULL) ++ host->last_busy = NULL; ++ tmp->next = NULL; ++ if (tmp->tagmsg) ++ host->act_tags[tmp->target]--; + else +- pCurHcb->HCS_Tcs[pTmpScb->SCB_Target].TCS_Flags &= ~TCF_BUSY; ++ host->targets[tmp->target].flags &= ~TCF_BUSY; + } + #if DEBUG_QUEUE +- printk("Pop busy SCB %lx; ", (ULONG) pTmpScb); ++ printk("Pop busy SCB %p; ", tmp); + #endif +- return (pTmpScb); ++ return tmp; + } + + /***************************************************************************/ +-static void tul_unlink_busy_scb(HCS * pCurHcb, SCB * pCurScb) ++static void initio_unlink_busy_scb(struct initio_host * host, struct scsi_ctrl_blk * scb) + { +- SCB *pTmpScb, *pPrevScb; ++ struct scsi_ctrl_blk *tmp, *prev; + + #if DEBUG_QUEUE +- printk("unlink busy SCB %lx; ", (ULONG) pCurScb); ++ printk("unlink busy SCB %p; ", scb); + #endif + +- pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy; +- while (pTmpScb != NULL) { +- if (pCurScb == pTmpScb) { /* Unlink this SCB */ +- if (pTmpScb == pCurHcb->HCS_FirstBusy) { +- if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL) +- pCurHcb->HCS_LastBusy = NULL; +- } else { +- pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb; +- if (pTmpScb == pCurHcb->HCS_LastBusy) +- pCurHcb->HCS_LastBusy = pPrevScb; +- } +- pTmpScb->SCB_NxtScb = NULL; +- if (pTmpScb->SCB_TagMsg) +- pCurHcb->HCS_ActTags[pTmpScb->SCB_Target]--; ++ prev = tmp = host->first_busy; ++ while (tmp != NULL) { ++ if (scb == tmp) { /* Unlink this SCB */ ++ if (tmp == host->first_busy) { ++ if ((host->first_busy = tmp->next) == NULL) ++ host->last_busy = NULL; ++ } else { ++ prev->next = tmp->next; ++ if (tmp == host->last_busy) ++ host->last_busy = prev; ++ } ++ tmp->next = NULL; ++ if (tmp->tagmsg) ++ host->act_tags[tmp->target]--; + else +- pCurHcb->HCS_Tcs[pTmpScb->SCB_Target].TCS_Flags &= ~TCF_BUSY; ++ host->targets[tmp->target].flags &= ~TCF_BUSY; + break; + } +- pPrevScb = pTmpScb; +- pTmpScb = pTmpScb->SCB_NxtScb; ++ prev = tmp; ++ tmp = tmp->next; + } + return; + } + +-/***************************************************************************/ +-SCB *tul_find_busy_scb(HCS * pCurHcb, WORD tarlun) ++struct scsi_ctrl_blk *initio_find_busy_scb(struct initio_host * host, u16 tarlun) + { +- SCB *pTmpScb, *pPrevScb; +- WORD scbp_tarlun; ++ struct scsi_ctrl_blk *tmp, *prev; ++ u16 scbp_tarlun; + + +- pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy; +- while (pTmpScb != NULL) { +- scbp_tarlun = (pTmpScb->SCB_Lun << 8) | (pTmpScb->SCB_Target); ++ prev = tmp = host->first_busy; ++ while (tmp != NULL) { ++ scbp_tarlun = (tmp->lun << 8) | (tmp->target); + if (scbp_tarlun == tarlun) { /* Unlink this SCB */ + break; + } +- pPrevScb = pTmpScb; +- pTmpScb = pTmpScb->SCB_NxtScb; ++ prev = tmp; ++ tmp = tmp->next; + } + #if DEBUG_QUEUE +- printk("find busy SCB %lx; ", (ULONG) pTmpScb); ++ printk("find busy SCB %p; ", tmp); + #endif +- return (pTmpScb); ++ return tmp; + } + +-/***************************************************************************/ +-static void tul_append_done_scb(HCS * pCurHcb, SCB * scbp) ++static void initio_append_done_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp) + { +- + #if DEBUG_QUEUE +- printk("append done SCB %lx; ", (ULONG) scbp); ++ printk("append done SCB %p; ", scbp); + #endif + +- scbp->SCB_Status = SCB_DONE; +- scbp->SCB_NxtScb = NULL; +- if (pCurHcb->HCS_LastDone != NULL) { +- pCurHcb->HCS_LastDone->SCB_NxtScb = scbp; +- pCurHcb->HCS_LastDone = scbp; ++ scbp->status = SCB_DONE; ++ scbp->next = NULL; ++ if (host->last_done != NULL) { ++ host->last_done->next = scbp; ++ host->last_done = scbp; + } else { +- pCurHcb->HCS_FirstDone = scbp; +- pCurHcb->HCS_LastDone = scbp; ++ host->first_done = scbp; ++ host->last_done = scbp; + } + } + +-/***************************************************************************/ +-SCB *tul_find_done_scb(HCS * pCurHcb) ++struct scsi_ctrl_blk *initio_find_done_scb(struct initio_host * host) + { +- SCB *pTmpScb; +- ++ struct scsi_ctrl_blk *tmp; + +- if ((pTmpScb = pCurHcb->HCS_FirstDone) != NULL) { +- if ((pCurHcb->HCS_FirstDone = pTmpScb->SCB_NxtScb) == NULL) +- pCurHcb->HCS_LastDone = NULL; +- pTmpScb->SCB_NxtScb = NULL; ++ if ((tmp = host->first_done) != NULL) { ++ if ((host->first_done = tmp->next) == NULL) ++ host->last_done = NULL; ++ tmp->next = NULL; + } + #if DEBUG_QUEUE +- printk("find done SCB %lx; ", (ULONG) pTmpScb); ++ printk("find done SCB %p; ",tmp); + #endif +- return (pTmpScb); ++ return tmp; + } + +-/***************************************************************************/ +-static int tul_abort_srb(HCS * pCurHcb, struct scsi_cmnd *srbp) ++static int initio_abort_srb(struct initio_host * host, struct scsi_cmnd *srbp) + { +- ULONG flags; +- SCB *pTmpScb, *pPrevScb; ++ unsigned long flags; ++ struct scsi_ctrl_blk *tmp, *prev; + +- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); ++ spin_lock_irqsave(&host->semaph_lock, flags); + +- if ((pCurHcb->HCS_Semaph == 0) && (pCurHcb->HCS_ActScb == NULL)) { +- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); ++ if ((host->semaph == 0) && (host->active == NULL)) { + /* disable Jasmin SCSI Int */ +- +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); +- +- tulip_main(pCurHcb); +- +- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); +- +- pCurHcb->HCS_Semaph = 1; +- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F); +- +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); +- ++ outb(0x1F, host->addr + TUL_Mask); ++ spin_unlock_irqrestore(&host->semaph_lock, flags); ++ /* FIXME: synchronize_irq needed ? */ ++ tulip_main(host); ++ spin_lock_irqsave(&host->semaph_lock, flags); ++ host->semaph = 1; ++ outb(0x0F, host->addr + TUL_Mask); ++ spin_unlock_irqrestore(&host->semaph_lock, flags); + return SCSI_ABORT_SNOOZE; + } +- pPrevScb = pTmpScb = pCurHcb->HCS_FirstPend; /* Check Pend queue */ +- while (pTmpScb != NULL) { ++ prev = tmp = host->first_pending; /* Check Pend queue */ ++ while (tmp != NULL) { + /* 07/27/98 */ +- if (pTmpScb->SCB_Srb == srbp) { +- if (pTmpScb == pCurHcb->HCS_ActScb) { +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); ++ if (tmp->srb == srbp) { ++ if (tmp == host->active) { ++ spin_unlock_irqrestore(&host->semaph_lock, flags); + return SCSI_ABORT_BUSY; +- } else if (pTmpScb == pCurHcb->HCS_FirstPend) { +- if ((pCurHcb->HCS_FirstPend = pTmpScb->SCB_NxtScb) == NULL) +- pCurHcb->HCS_LastPend = NULL; +- } else { +- pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb; +- if (pTmpScb == pCurHcb->HCS_LastPend) +- pCurHcb->HCS_LastPend = pPrevScb; +- } +- pTmpScb->SCB_HaStat = HOST_ABORTED; +- pTmpScb->SCB_Flags |= SCF_DONE; +- if (pTmpScb->SCB_Flags & SCF_POST) +- (*pTmpScb->SCB_Post) ((BYTE *) pCurHcb, (BYTE *) pTmpScb); +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); ++ } else if (tmp == host->first_pending) { ++ if ((host->first_pending = tmp->next) == NULL) ++ host->last_pending = NULL; ++ } else { ++ prev->next = tmp->next; ++ if (tmp == host->last_pending) ++ host->last_pending = prev; ++ } ++ tmp->hastat = HOST_ABORTED; ++ tmp->flags |= SCF_DONE; ++ if (tmp->flags & SCF_POST) ++ (*tmp->post) ((u8 *) host, (u8 *) tmp); ++ spin_unlock_irqrestore(&host->semaph_lock, flags); + return SCSI_ABORT_SUCCESS; + } +- pPrevScb = pTmpScb; +- pTmpScb = pTmpScb->SCB_NxtScb; ++ prev = tmp; ++ tmp = tmp->next; + } + +- pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy; /* Check Busy queue */ +- while (pTmpScb != NULL) { +- +- if (pTmpScb->SCB_Srb == srbp) { +- +- if (pTmpScb == pCurHcb->HCS_ActScb) { +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); ++ prev = tmp = host->first_busy; /* Check Busy queue */ ++ while (tmp != NULL) { ++ if (tmp->srb == srbp) { ++ if (tmp == host->active) { ++ spin_unlock_irqrestore(&host->semaph_lock, flags); + return SCSI_ABORT_BUSY; +- } else if (pTmpScb->SCB_TagMsg == 0) { +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); ++ } else if (tmp->tagmsg == 0) { ++ spin_unlock_irqrestore(&host->semaph_lock, flags); + return SCSI_ABORT_BUSY; + } else { +- pCurHcb->HCS_ActTags[pTmpScb->SCB_Target]--; +- if (pTmpScb == pCurHcb->HCS_FirstBusy) { +- if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL) +- pCurHcb->HCS_LastBusy = NULL; +- } else { +- pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb; +- if (pTmpScb == pCurHcb->HCS_LastBusy) +- pCurHcb->HCS_LastBusy = pPrevScb; +- } +- pTmpScb->SCB_NxtScb = NULL; +- +- +- pTmpScb->SCB_HaStat = HOST_ABORTED; +- pTmpScb->SCB_Flags |= SCF_DONE; +- if (pTmpScb->SCB_Flags & SCF_POST) +- (*pTmpScb->SCB_Post) ((BYTE *) pCurHcb, (BYTE *) pTmpScb); +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); ++ host->act_tags[tmp->target]--; ++ if (tmp == host->first_busy) { ++ if ((host->first_busy = tmp->next) == NULL) ++ host->last_busy = NULL; ++ } else { ++ prev->next = tmp->next; ++ if (tmp == host->last_busy) ++ host->last_busy = prev; ++ } ++ tmp->next = NULL; ++ ++ ++ tmp->hastat = HOST_ABORTED; ++ tmp->flags |= SCF_DONE; ++ if (tmp->flags & SCF_POST) ++ (*tmp->post) ((u8 *) host, (u8 *) tmp); ++ spin_unlock_irqrestore(&host->semaph_lock, flags); + return SCSI_ABORT_SUCCESS; + } + } +- pPrevScb = pTmpScb; +- pTmpScb = pTmpScb->SCB_NxtScb; ++ prev = tmp; ++ tmp = tmp->next; + } +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); +- return (SCSI_ABORT_NOT_RUNNING); ++ spin_unlock_irqrestore(&host->semaph_lock, flags); ++ return SCSI_ABORT_NOT_RUNNING; + } + + /***************************************************************************/ +-static int tul_bad_seq(HCS * pCurHcb) +-{ +- SCB *pCurScb; +- +- printk("tul_bad_seg c=%d\n", pCurHcb->HCS_Index); +- +- if ((pCurScb = pCurHcb->HCS_ActScb) != NULL) { +- tul_unlink_busy_scb(pCurHcb, pCurScb); +- pCurScb->SCB_HaStat = HOST_BAD_PHAS; +- pCurScb->SCB_TaStat = 0; +- tul_append_done_scb(pCurHcb, pCurScb); +- } +- tul_stop_bm(pCurHcb); +- +- tul_reset_scsi(pCurHcb, 8); /* 7/29/98 */ +- +- return (tul_post_scsi_rst(pCurHcb)); +-} +- +-#if 0 +- +-/************************************************************************/ +-static int tul_device_reset(HCS * pCurHcb, struct scsi_cmnd *pSrb, +- unsigned int target, unsigned int ResetFlags) ++static int initio_bad_seq(struct initio_host * host) + { +- ULONG flags; +- SCB *pScb; +- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); +- +- if (ResetFlags & SCSI_RESET_ASYNCHRONOUS) { +- +- if ((pCurHcb->HCS_Semaph == 0) && (pCurHcb->HCS_ActScb == NULL)) { +- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); +- /* disable Jasmin SCSI Int */ +- +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); +- +- tulip_main(pCurHcb); +- +- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); +- +- pCurHcb->HCS_Semaph = 1; +- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F); +- +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); +- +- return SCSI_RESET_SNOOZE; +- } +- pScb = pCurHcb->HCS_FirstBusy; /* Check Busy queue */ +- while (pScb != NULL) { +- if (pScb->SCB_Srb == pSrb) +- break; +- pScb = pScb->SCB_NxtScb; +- } +- if (pScb == NULL) { +- printk("Unable to Reset - No SCB Found\n"); +- +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); +- return SCSI_RESET_NOT_RUNNING; +- } +- } +- if ((pScb = tul_alloc_scb(pCurHcb)) == NULL) { +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); +- return SCSI_RESET_NOT_RUNNING; +- } +- pScb->SCB_Opcode = BusDevRst; +- pScb->SCB_Flags = SCF_POST; +- pScb->SCB_Target = target; +- pScb->SCB_Mode = 0; +- +- pScb->SCB_Srb = NULL; +- if (ResetFlags & SCSI_RESET_SYNCHRONOUS) { +- pScb->SCB_Srb = pSrb; +- } +- tul_push_pend_scb(pCurHcb, pScb); /* push this SCB to Pending queue */ ++ struct scsi_ctrl_blk *scb; + +- if (pCurHcb->HCS_Semaph == 1) { +- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); +- /* disable Jasmin SCSI Int */ +- pCurHcb->HCS_Semaph = 0; +- +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); +- +- tulip_main(pCurHcb); +- +- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); ++ printk("initio_bad_seg c=%d\n", host->index); + +- pCurHcb->HCS_Semaph = 1; +- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F); ++ if ((scb = host->active) != NULL) { ++ initio_unlink_busy_scb(host, scb); ++ scb->hastat = HOST_BAD_PHAS; ++ scb->tastat = 0; ++ initio_append_done_scb(host, scb); + } +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); +- return SCSI_RESET_PENDING; +-} +- +-static int tul_reset_scsi_bus(HCS * pCurHcb) +-{ +- ULONG flags; +- +- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); +- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); +- pCurHcb->HCS_Semaph = 0; +- +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); +- +- tul_stop_bm(pCurHcb); +- +- tul_reset_scsi(pCurHcb, 2); /* 7/29/98 */ +- +- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); +- tul_post_scsi_rst(pCurHcb); +- +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); +- +- tulip_main(pCurHcb); +- +- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); +- +- pCurHcb->HCS_Semaph = 1; +- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F); +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); +- return (SCSI_RESET_SUCCESS | SCSI_RESET_HOST_RESET); ++ initio_stop_bm(host); ++ initio_reset_scsi(host, 8); /* 7/29/98 */ ++ return initio_post_scsi_rst(host); + } + +-#endif /* 0 */ + + /************************************************************************/ +-static void tul_exec_scb(HCS * pCurHcb, SCB * pCurScb) ++static void initio_exec_scb(struct initio_host * host, struct scsi_ctrl_blk * scb) + { +- ULONG flags; ++ unsigned long flags; + +- pCurScb->SCB_Mode = 0; ++ scb->mode = 0; + +- pCurScb->SCB_SGIdx = 0; +- pCurScb->SCB_SGMax = pCurScb->SCB_SGLen; ++ scb->sgidx = 0; ++ scb->sgmax = scb->sglen; + +- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); ++ spin_lock_irqsave(&host->semaph_lock, flags); + +- tul_append_pend_scb(pCurHcb, pCurScb); /* Append this SCB to Pending queue */ ++ initio_append_pend_scb(host, scb); /* Append this SCB to Pending queue */ + + /* VVVVV 07/21/98 */ +- if (pCurHcb->HCS_Semaph == 1) { +- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); +- /* disable Jasmin SCSI Int */ +- pCurHcb->HCS_Semaph = 0; +- +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); +- +- tulip_main(pCurHcb); ++ if (host->semaph == 1) { ++ /* Disable Jasmin SCSI Int */ ++ outb(0x1F, host->addr + TUL_Mask); ++ host->semaph = 0; ++ spin_unlock_irqrestore(&host->semaph_lock, flags); + +- spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags); ++ tulip_main(host); + +- pCurHcb->HCS_Semaph = 1; +- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F); ++ spin_lock_irqsave(&host->semaph_lock, flags); ++ host->semaph = 1; ++ outb(0x0F, host->addr + TUL_Mask); + } +- spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags); ++ spin_unlock_irqrestore(&host->semaph_lock, flags); + return; + } + + /***************************************************************************/ +-static int tul_isr(HCS * pCurHcb) ++static int initio_isr(struct initio_host * host) + { +- /* Enter critical section */ +- +- if (TUL_RD(pCurHcb->HCS_Base, TUL_Int) & TSS_INT_PENDING) { +- if (pCurHcb->HCS_Semaph == 1) { +- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F); ++ if (inb(host->addr + TUL_Int) & TSS_INT_PENDING) { ++ if (host->semaph == 1) { ++ outb(0x1F, host->addr + TUL_Mask); + /* Disable Tulip SCSI Int */ +- pCurHcb->HCS_Semaph = 0; ++ host->semaph = 0; + +- tulip_main(pCurHcb); ++ tulip_main(host); + +- pCurHcb->HCS_Semaph = 1; +- TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F); +- return (1); ++ host->semaph = 1; ++ outb(0x0F, host->addr + TUL_Mask); ++ return 1; + } + } +- return (0); ++ return 0; + } + +-/***************************************************************************/ +-int tulip_main(HCS * pCurHcb) ++static int tulip_main(struct initio_host * host) + { +- SCB *pCurScb; ++ struct scsi_ctrl_blk *scb; + + for (;;) { ++ tulip_scsi(host); /* Call tulip_scsi */ + +- tulip_scsi(pCurHcb); /* Call tulip_scsi */ +- +- while ((pCurScb = tul_find_done_scb(pCurHcb)) != NULL) { /* find done entry */ +- if (pCurScb->SCB_TaStat == INI_QUEUE_FULL) { +- pCurHcb->HCS_MaxTags[pCurScb->SCB_Target] = +- pCurHcb->HCS_ActTags[pCurScb->SCB_Target] - 1; +- pCurScb->SCB_TaStat = 0; +- tul_append_pend_scb(pCurHcb, pCurScb); ++ /* Walk the list of completed SCBs */ ++ while ((scb = initio_find_done_scb(host)) != NULL) { /* find done entry */ ++ if (scb->tastat == INI_QUEUE_FULL) { ++ host->max_tags[scb->target] = ++ host->act_tags[scb->target] - 1; ++ scb->tastat = 0; ++ initio_append_pend_scb(host, scb); + continue; + } +- if (!(pCurScb->SCB_Mode & SCM_RSENS)) { /* not in auto req. sense mode */ +- if (pCurScb->SCB_TaStat == 2) { ++ if (!(scb->mode & SCM_RSENS)) { /* not in auto req. sense mode */ ++ if (scb->tastat == 2) { + + /* clr sync. nego flag */ + +- if (pCurScb->SCB_Flags & SCF_SENSE) { +- BYTE len; +- len = pCurScb->SCB_SenseLen; ++ if (scb->flags & SCF_SENSE) { ++ u8 len; ++ len = scb->senselen; + if (len == 0) + len = 1; +- pCurScb->SCB_BufLen = pCurScb->SCB_SenseLen; +- pCurScb->SCB_BufPtr = pCurScb->SCB_SensePtr; +- pCurScb->SCB_Flags &= ~(SCF_SG | SCF_DIR); /* for xfer_data_in */ +-/* pCurScb->SCB_Flags |= SCF_NO_DCHK; */ +- /* so, we won't report worng direction in xfer_data_in, ++ scb->buflen = scb->senselen; ++ scb->bufptr = scb->senseptr; ++ scb->flags &= ~(SCF_SG | SCF_DIR); /* for xfer_data_in */ ++ /* so, we won't report wrong direction in xfer_data_in, + and won't report HOST_DO_DU in state_6 */ +- pCurScb->SCB_Mode = SCM_RSENS; +- pCurScb->SCB_Ident &= 0xBF; /* Disable Disconnect */ +- pCurScb->SCB_TagMsg = 0; +- pCurScb->SCB_TaStat = 0; +- pCurScb->SCB_CDBLen = 6; +- pCurScb->SCB_CDB[0] = SCSICMD_RequestSense; +- pCurScb->SCB_CDB[1] = 0; +- pCurScb->SCB_CDB[2] = 0; +- pCurScb->SCB_CDB[3] = 0; +- pCurScb->SCB_CDB[4] = len; +- pCurScb->SCB_CDB[5] = 0; +- tul_push_pend_scb(pCurHcb, pCurScb); ++ scb->mode = SCM_RSENS; ++ scb->ident &= 0xBF; /* Disable Disconnect */ ++ scb->tagmsg = 0; ++ scb->tastat = 0; ++ scb->cdblen = 6; ++ scb->cdb[0] = SCSICMD_RequestSense; ++ scb->cdb[1] = 0; ++ scb->cdb[2] = 0; ++ scb->cdb[3] = 0; ++ scb->cdb[4] = len; ++ scb->cdb[5] = 0; ++ initio_push_pend_scb(host, scb); + break; + } + } + } else { /* in request sense mode */ + +- if (pCurScb->SCB_TaStat == 2) { /* check contition status again after sending ++ if (scb->tastat == 2) { /* check contition status again after sending + requset sense cmd 0x3 */ +- pCurScb->SCB_HaStat = HOST_BAD_PHAS; ++ scb->hastat = HOST_BAD_PHAS; + } +- pCurScb->SCB_TaStat = 2; ++ scb->tastat = 2; + } +- pCurScb->SCB_Flags |= SCF_DONE; +- if (pCurScb->SCB_Flags & SCF_POST) { +- (*pCurScb->SCB_Post) ((BYTE *) pCurHcb, (BYTE *) pCurScb); ++ scb->flags |= SCF_DONE; ++ if (scb->flags & SCF_POST) { ++ /* FIXME: only one post method and lose casts */ ++ (*scb->post) ((u8 *) host, (u8 *) scb); + } + } /* while */ +- + /* find_active: */ +- if (TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0) & TSS_INT_PENDING) ++ if (inb(host->addr + TUL_SStatus0) & TSS_INT_PENDING) + continue; +- +- if (pCurHcb->HCS_ActScb) { /* return to OS and wait for xfer_done_ISR/Selected_ISR */ ++ if (host->active) /* return to OS and wait for xfer_done_ISR/Selected_ISR */ + return 1; /* return to OS, enable interrupt */ +- } + /* Check pending SCB */ +- if (tul_find_first_pend_scb(pCurHcb) == NULL) { ++ if (initio_find_first_pend_scb(host) == NULL) + return 1; /* return to OS, enable interrupt */ +- } + } /* End of for loop */ + /* statement won't reach here */ + } + +- +- +- +-/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */ +-/***************************************************************************/ +-/***************************************************************************/ +-/***************************************************************************/ +-/***************************************************************************/ +- +-/***************************************************************************/ +-void tulip_scsi(HCS * pCurHcb) ++static void tulip_scsi(struct initio_host * host) + { +- SCB *pCurScb; +- TCS *pCurTcb; ++ struct scsi_ctrl_blk *scb; ++ struct target_control *active_tc; + + /* make sure to service interrupt asap */ +- +- if ((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0)) & TSS_INT_PENDING) { +- +- pCurHcb->HCS_Phase = pCurHcb->HCS_JSStatus0 & TSS_PH_MASK; +- pCurHcb->HCS_JSStatus1 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1); +- pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt); +- if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) { /* SCSI bus reset detected */ +- int_tul_scsi_rst(pCurHcb); ++ if ((host->jsstatus0 = inb(host->addr + TUL_SStatus0)) & TSS_INT_PENDING) { ++ host->phase = host->jsstatus0 & TSS_PH_MASK; ++ host->jsstatus1 = inb(host->addr + TUL_SStatus1); ++ host->jsint = inb(host->addr + TUL_SInt); ++ if (host->jsint & TSS_SCSIRST_INT) { /* SCSI bus reset detected */ ++ int_initio_scsi_rst(host); + return; + } +- if (pCurHcb->HCS_JSInt & TSS_RESEL_INT) { /* if selected/reselected interrupt */ +- if (int_tul_resel(pCurHcb) == 0) +- tul_next_state(pCurHcb); ++ if (host->jsint & TSS_RESEL_INT) { /* if selected/reselected interrupt */ ++ if (int_initio_resel(host) == 0) ++ initio_next_state(host); + return; + } +- if (pCurHcb->HCS_JSInt & TSS_SEL_TIMEOUT) { +- int_tul_busfree(pCurHcb); ++ if (host->jsint & TSS_SEL_TIMEOUT) { ++ int_initio_busfree(host); + return; + } +- if (pCurHcb->HCS_JSInt & TSS_DISC_INT) { /* BUS disconnection */ +- int_tul_busfree(pCurHcb); /* unexpected bus free or sel timeout */ ++ if (host->jsint & TSS_DISC_INT) { /* BUS disconnection */ ++ int_initio_busfree(host); /* unexpected bus free or sel timeout */ + return; + } +- if (pCurHcb->HCS_JSInt & (TSS_FUNC_COMP | TSS_BUS_SERV)) { /* func complete or Bus service */ +- if ((pCurScb = pCurHcb->HCS_ActScb) != NULL) +- tul_next_state(pCurHcb); ++ if (host->jsint & (TSS_FUNC_COMP | TSS_BUS_SERV)) { /* func complete or Bus service */ ++ if ((scb = host->active) != NULL) ++ initio_next_state(host); + return; + } + } +- if (pCurHcb->HCS_ActScb != NULL) ++ if (host->active != NULL) + return; + +- if ((pCurScb = tul_find_first_pend_scb(pCurHcb)) == NULL) ++ if ((scb = initio_find_first_pend_scb(host)) == NULL) + return; + + /* program HBA's SCSI ID & target SCSI ID */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SScsiId, +- (pCurHcb->HCS_SCSI_ID << 4) | (pCurScb->SCB_Target & 0x0F)); +- if (pCurScb->SCB_Opcode == ExecSCSI) { +- pCurTcb = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target]; ++ outb((host->scsi_id << 4) | (scb->target & 0x0F), ++ host->addr + TUL_SScsiId); ++ if (scb->opcode == ExecSCSI) { ++ active_tc = &host->targets[scb->target]; + +- if (pCurScb->SCB_TagMsg) +- pCurTcb->TCS_DrvFlags |= TCF_DRV_EN_TAG; ++ if (scb->tagmsg) ++ active_tc->drv_flags |= TCF_DRV_EN_TAG; + else +- pCurTcb->TCS_DrvFlags &= ~TCF_DRV_EN_TAG; ++ active_tc->drv_flags &= ~TCF_DRV_EN_TAG; + +- TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurTcb->TCS_JS_Period); +- if ((pCurTcb->TCS_Flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) { /* do wdtr negotiation */ +- tul_select_atn_stop(pCurHcb, pCurScb); ++ outb(active_tc->js_period, host->addr + TUL_SPeriod); ++ if ((active_tc->flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) { /* do wdtr negotiation */ ++ initio_select_atn_stop(host, scb); + } else { +- if ((pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) { /* do sync negotiation */ +- tul_select_atn_stop(pCurHcb, pCurScb); ++ if ((active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) { /* do sync negotiation */ ++ initio_select_atn_stop(host, scb); + } else { +- if (pCurScb->SCB_TagMsg) +- tul_select_atn3(pCurHcb, pCurScb); ++ if (scb->tagmsg) ++ initio_select_atn3(host, scb); + else +- tul_select_atn(pCurHcb, pCurScb); ++ initio_select_atn(host, scb); + } + } +- if (pCurScb->SCB_Flags & SCF_POLL) { +- while (wait_tulip(pCurHcb) != -1) { +- if (tul_next_state(pCurHcb) == -1) ++ if (scb->flags & SCF_POLL) { ++ while (wait_tulip(host) != -1) { ++ if (initio_next_state(host) == -1) + break; + } + } +- } else if (pCurScb->SCB_Opcode == BusDevRst) { +- tul_select_atn_stop(pCurHcb, pCurScb); +- pCurScb->SCB_NxtStat = 8; +- if (pCurScb->SCB_Flags & SCF_POLL) { +- while (wait_tulip(pCurHcb) != -1) { +- if (tul_next_state(pCurHcb) == -1) ++ } else if (scb->opcode == BusDevRst) { ++ initio_select_atn_stop(host, scb); ++ scb->next_state = 8; ++ if (scb->flags & SCF_POLL) { ++ while (wait_tulip(host) != -1) { ++ if (initio_next_state(host) == -1) + break; + } + } +- } else if (pCurScb->SCB_Opcode == AbortCmd) { +- if (tul_abort_srb(pCurHcb, pCurScb->SCB_Srb) != 0) { +- +- +- tul_unlink_pend_scb(pCurHcb, pCurScb); +- +- tul_release_scb(pCurHcb, pCurScb); ++ } else if (scb->opcode == AbortCmd) { ++ if (initio_abort_srb(host, scb->srb) != 0) { ++ initio_unlink_pend_scb(host, scb); ++ initio_release_scb(host, scb); + } else { +- pCurScb->SCB_Opcode = BusDevRst; +- tul_select_atn_stop(pCurHcb, pCurScb); +- pCurScb->SCB_NxtStat = 8; ++ scb->opcode = BusDevRst; ++ initio_select_atn_stop(host, scb); ++ scb->next_state = 8; + } +- +-/* 08/03/98 */ + } else { +- tul_unlink_pend_scb(pCurHcb, pCurScb); +- pCurScb->SCB_HaStat = 0x16; /* bad command */ +- tul_append_done_scb(pCurHcb, pCurScb); ++ initio_unlink_pend_scb(host, scb); ++ scb->hastat = 0x16; /* bad command */ ++ initio_append_done_scb(host, scb); + } + return; + } + ++/** ++ * initio_next_state - Next SCSI state ++ * @host: InitIO host we are processing ++ * ++ * Progress the active command block along the state machine ++ * until we hit a state which we must wait for activity to occur. ++ * ++ * Returns zero or a negative code. ++ */ + +-/***************************************************************************/ +-int tul_next_state(HCS * pCurHcb) ++static int initio_next_state(struct initio_host * host) + { + int next; + +- next = pCurHcb->HCS_ActScb->SCB_NxtStat; ++ next = host->active->next_state; + for (;;) { + switch (next) { + case 1: +- next = tul_state_1(pCurHcb); ++ next = initio_state_1(host); + break; + case 2: +- next = tul_state_2(pCurHcb); ++ next = initio_state_2(host); + break; + case 3: +- next = tul_state_3(pCurHcb); ++ next = initio_state_3(host); + break; + case 4: +- next = tul_state_4(pCurHcb); ++ next = initio_state_4(host); + break; + case 5: +- next = tul_state_5(pCurHcb); ++ next = initio_state_5(host); + break; + case 6: +- next = tul_state_6(pCurHcb); ++ next = initio_state_6(host); + break; + case 7: +- next = tul_state_7(pCurHcb); ++ next = initio_state_7(host); + break; + case 8: +- return (tul_bus_device_reset(pCurHcb)); ++ return initio_bus_device_reset(host); + default: +- return (tul_bad_seq(pCurHcb)); ++ return initio_bad_seq(host); + } + if (next <= 0) + return next; +@@ -1554,338 +1314,363 @@ + } + + +-/***************************************************************************/ +-/* sTate after selection with attention & stop */ +-int tul_state_1(HCS * pCurHcb) ++/** ++ * initio_state_1 - SCSI state machine ++ * @host: InitIO host we are controlling ++ * ++ * Perform SCSI state processing for Select/Attention/Stop ++ */ ++ ++static int initio_state_1(struct initio_host * host) + { +- SCB *pCurScb = pCurHcb->HCS_ActScb; +- TCS *pCurTcb = pCurHcb->HCS_ActTcs; ++ struct scsi_ctrl_blk *scb = host->active; ++ struct target_control *active_tc = host->active_tc; + #if DEBUG_STATE + printk("-s1-"); + #endif + +- tul_unlink_pend_scb(pCurHcb, pCurScb); +- tul_append_busy_scb(pCurHcb, pCurScb); ++ /* Move the SCB from pending to busy */ ++ initio_unlink_pend_scb(host, scb); ++ initio_append_busy_scb(host, scb); + +- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurTcb->TCS_SConfig0); ++ outb(active_tc->sconfig0, host->addr + TUL_SConfig ); + /* ATN on */ +- if (pCurHcb->HCS_Phase == MSG_OUT) { +- +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, (TSC_EN_BUS_IN | TSC_HW_RESELECT)); +- +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_Ident); +- +- if (pCurScb->SCB_TagMsg) { +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagMsg); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagId); +- } +- if ((pCurTcb->TCS_Flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) { +- +- pCurTcb->TCS_Flags |= TCF_WDTR_DONE; +- +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 2); /* Extended msg length */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3); /* Sync request */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1); /* Start from 16 bits */ +- } else if ((pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) { +- +- pCurTcb->TCS_Flags |= TCF_SYNC_DONE; +- +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3); /* extended msg length */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1); /* sync request */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, tul_rate_tbl[pCurTcb->TCS_Flags & TCF_SCSI_RATE]); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MAX_OFFSET); /* REQ/ACK offset */ ++ if (host->phase == MSG_OUT) { ++ outb(TSC_EN_BUS_IN | TSC_HW_RESELECT, host->addr + TUL_SCtrl1); ++ outb(scb->ident, host->addr + TUL_SFifo); ++ ++ if (scb->tagmsg) { ++ outb(scb->tagmsg, host->addr + TUL_SFifo); ++ outb(scb->tagid, host->addr + TUL_SFifo); ++ } ++ if ((active_tc->flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) { ++ active_tc->flags |= TCF_WDTR_DONE; ++ outb(MSG_EXTEND, host->addr + TUL_SFifo); ++ outb(2, host->addr + TUL_SFifo); /* Extended msg length */ ++ outb(3, host->addr + TUL_SFifo); /* Sync request */ ++ outb(1, host->addr + TUL_SFifo); /* Start from 16 bits */ ++ } else if ((active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) { ++ active_tc->flags |= TCF_SYNC_DONE; ++ outb(MSG_EXTEND, host->addr + TUL_SFifo); ++ outb(3, host->addr + TUL_SFifo); /* extended msg length */ ++ outb(1, host->addr + TUL_SFifo); /* sync request */ ++ outb(initio_rate_tbl[active_tc->flags & TCF_SCSI_RATE], host->addr + TUL_SFifo); ++ outb(MAX_OFFSET, host->addr + TUL_SFifo); /* REQ/ACK offset */ + } +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- if (wait_tulip(pCurHcb) == -1) +- return (-1); ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; + } +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); +- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, (TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7))); +- return (3); ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); ++ outb((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)), host->addr + TUL_SSignal); ++ /* Into before CDB xfer */ ++ return 3; + } + + +-/***************************************************************************/ +-/* state after selection with attention */ +-/* state after selection with attention3 */ +-int tul_state_2(HCS * pCurHcb) ++/** ++ * initio_state_2 - SCSI state machine ++ * @host: InitIO host we are controlling ++ * ++ * state after selection with attention ++ * state after selection with attention3 ++ */ ++ ++static int initio_state_2(struct initio_host * host) + { +- SCB *pCurScb = pCurHcb->HCS_ActScb; +- TCS *pCurTcb = pCurHcb->HCS_ActTcs; ++ struct scsi_ctrl_blk *scb = host->active; ++ struct target_control *active_tc = host->active_tc; + #if DEBUG_STATE + printk("-s2-"); + #endif + +- tul_unlink_pend_scb(pCurHcb, pCurScb); +- tul_append_busy_scb(pCurHcb, pCurScb); ++ initio_unlink_pend_scb(host, scb); ++ initio_append_busy_scb(host, scb); + +- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurTcb->TCS_SConfig0); ++ outb(active_tc->sconfig0, host->addr + TUL_SConfig); + +- if (pCurHcb->HCS_JSStatus1 & TSS_CMD_PH_CMP) { +- return (4); +- } +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); +- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, (TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7))); +- return (3); ++ if (host->jsstatus1 & TSS_CMD_PH_CMP) ++ return 4; ++ ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); ++ outb((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)), host->addr + TUL_SSignal); ++ /* Into before CDB xfer */ ++ return 3; + } + +-/***************************************************************************/ +-/* state before CDB xfer is done */ +-int tul_state_3(HCS * pCurHcb) ++/** ++ * initio_state_3 - SCSI state machine ++ * @host: InitIO host we are controlling ++ * ++ * state before CDB xfer is done ++ */ ++ ++static int initio_state_3(struct initio_host * host) + { +- SCB *pCurScb = pCurHcb->HCS_ActScb; +- TCS *pCurTcb = pCurHcb->HCS_ActTcs; ++ struct scsi_ctrl_blk *scb = host->active; ++ struct target_control *active_tc = host->active_tc; + int i; + + #if DEBUG_STATE + printk("-s3-"); + #endif + for (;;) { +- switch (pCurHcb->HCS_Phase) { ++ switch (host->phase) { + case CMD_OUT: /* Command out phase */ +- for (i = 0; i < (int) pCurScb->SCB_CDBLen; i++) +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_CDB[i]); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- if (wait_tulip(pCurHcb) == -1) +- return (-1); +- if (pCurHcb->HCS_Phase == CMD_OUT) { +- return (tul_bad_seq(pCurHcb)); +- } +- return (4); ++ for (i = 0; i < (int) scb->cdblen; i++) ++ outb(scb->cdb[i], host->addr + TUL_SFifo); ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; ++ if (host->phase == CMD_OUT) ++ return initio_bad_seq(host); ++ return 4; + + case MSG_IN: /* Message in phase */ +- pCurScb->SCB_NxtStat = 3; +- if (tul_msgin(pCurHcb) == -1) +- return (-1); ++ scb->next_state = 3; ++ if (initio_msgin(host) == -1) ++ return -1; + break; + + case STATUS_IN: /* Status phase */ +- if (tul_status_msg(pCurHcb) == -1) +- return (-1); ++ if (initio_status_msg(host) == -1) ++ return -1; + break; + + case MSG_OUT: /* Message out phase */ +- if (pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) { ++ if (active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) { ++ outb(MSG_NOP, host->addr + TUL_SFifo); /* msg nop */ ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; ++ } else { ++ active_tc->flags |= TCF_SYNC_DONE; + +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP); /* msg nop */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- if (wait_tulip(pCurHcb) == -1) +- return (-1); +- +- } else { +- pCurTcb->TCS_Flags |= TCF_SYNC_DONE; +- +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3); /* ext. msg len */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1); /* sync request */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, tul_rate_tbl[pCurTcb->TCS_Flags & TCF_SCSI_RATE]); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MAX_OFFSET); /* REQ/ACK offset */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- if (wait_tulip(pCurHcb) == -1) +- return (-1); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); +- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)); ++ outb(MSG_EXTEND, host->addr + TUL_SFifo); ++ outb(3, host->addr + TUL_SFifo); /* ext. msg len */ ++ outb(1, host->addr + TUL_SFifo); /* sync request */ ++ outb(initio_rate_tbl[active_tc->flags & TCF_SCSI_RATE], host->addr + TUL_SFifo); ++ outb(MAX_OFFSET, host->addr + TUL_SFifo); /* REQ/ACK offset */ ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); ++ outb(inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7), host->addr + TUL_SSignal); + + } + break; +- + default: +- return (tul_bad_seq(pCurHcb)); ++ return initio_bad_seq(host); + } + } + } + +- +-/***************************************************************************/ +-int tul_state_4(HCS * pCurHcb) ++/** ++ * initio_state_4 - SCSI state machine ++ * @host: InitIO host we are controlling ++ * ++ * SCSI state machine. State 4 ++ */ ++ ++static int initio_state_4(struct initio_host * host) + { +- SCB *pCurScb = pCurHcb->HCS_ActScb; ++ struct scsi_ctrl_blk *scb = host->active; + + #if DEBUG_STATE + printk("-s4-"); + #endif +- if ((pCurScb->SCB_Flags & SCF_DIR) == SCF_NO_XF) { +- return (6); /* Go to state 6 */ ++ if ((scb->flags & SCF_DIR) == SCF_NO_XF) { ++ return 6; /* Go to state 6 (After data) */ + } + for (;;) { +- if (pCurScb->SCB_BufLen == 0) +- return (6); /* Go to state 6 */ ++ if (scb->buflen == 0) ++ return 6; + +- switch (pCurHcb->HCS_Phase) { ++ switch (host->phase) { + + case STATUS_IN: /* Status phase */ +- if ((pCurScb->SCB_Flags & SCF_DIR) != 0) { /* if direction bit set then report data underrun */ +- pCurScb->SCB_HaStat = HOST_DO_DU; +- } +- if ((tul_status_msg(pCurHcb)) == -1) +- return (-1); ++ if ((scb->flags & SCF_DIR) != 0) /* if direction bit set then report data underrun */ ++ scb->hastat = HOST_DO_DU; ++ if ((initio_status_msg(host)) == -1) ++ return -1; + break; + + case MSG_IN: /* Message in phase */ +- pCurScb->SCB_NxtStat = 0x4; +- if (tul_msgin(pCurHcb) == -1) +- return (-1); ++ scb->next_state = 0x4; ++ if (initio_msgin(host) == -1) ++ return -1; + break; + + case MSG_OUT: /* Message out phase */ +- if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) { +- pCurScb->SCB_BufLen = 0; +- pCurScb->SCB_HaStat = HOST_DO_DU; +- if (tul_msgout_ide(pCurHcb) == -1) +- return (-1); +- return (6); /* Go to state 6 */ +- } else { +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP); /* msg nop */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- if (wait_tulip(pCurHcb) == -1) +- return (-1); ++ if (host->jsstatus0 & TSS_PAR_ERROR) { ++ scb->buflen = 0; ++ scb->hastat = HOST_DO_DU; ++ if (initio_msgout_ide(host) == -1) ++ return -1; ++ return 6; ++ } else { ++ outb(MSG_NOP, host->addr + TUL_SFifo); /* msg nop */ ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; + } + break; + + case DATA_IN: /* Data in phase */ +- return (tul_xfer_data_in(pCurHcb)); ++ return initio_xfer_data_in(host); + + case DATA_OUT: /* Data out phase */ +- return (tul_xfer_data_out(pCurHcb)); ++ return initio_xfer_data_out(host); + + default: +- return (tul_bad_seq(pCurHcb)); ++ return initio_bad_seq(host); + } + } + } + + +-/***************************************************************************/ +-/* state after dma xfer done or phase change before xfer done */ +-int tul_state_5(HCS * pCurHcb) ++/** ++ * initio_state_5 - SCSI state machine ++ * @host: InitIO host we are controlling ++ * ++ * State after dma xfer done or phase change before xfer done ++ */ ++ ++static int initio_state_5(struct initio_host * host) + { +- SCB *pCurScb = pCurHcb->HCS_ActScb; ++ struct scsi_ctrl_blk *scb = host->active; + long cnt, xcnt; /* cannot use unsigned !! code: if (xcnt < 0) */ + + #if DEBUG_STATE + printk("-s5-"); + #endif +-/*------ get remaining count -------*/ ++ /*------ get remaining count -------*/ ++ cnt = inl(host->addr + TUL_SCnt0) & 0x0FFFFFF; + +- cnt = TUL_RDLONG(pCurHcb->HCS_Base, TUL_SCnt0) & 0x0FFFFFF; +- +- if (TUL_RD(pCurHcb->HCS_Base, TUL_XCmd) & 0x20) { ++ if (inb(host->addr + TUL_XCmd) & 0x20) { + /* ----------------------- DATA_IN ----------------------------- */ + /* check scsi parity error */ +- if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) { +- pCurScb->SCB_HaStat = HOST_DO_DU; +- } +- if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND) { /* DMA xfer pending, Send STOP */ ++ if (host->jsstatus0 & TSS_PAR_ERROR) ++ scb->hastat = HOST_DO_DU; ++ if (inb(host->addr + TUL_XStatus) & XPEND) { /* DMA xfer pending, Send STOP */ + /* tell Hardware scsi xfer has been terminated */ +- TUL_WR(pCurHcb->HCS_Base + TUL_XCtrl, TUL_RD(pCurHcb->HCS_Base, TUL_XCtrl) | 0x80); ++ outb(inb(host->addr + TUL_XCtrl) | 0x80, host->addr + TUL_XCtrl); + /* wait until DMA xfer not pending */ +- while (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND); ++ while (inb(host->addr + TUL_XStatus) & XPEND) ++ cpu_relax(); + } + } else { +-/*-------- DATA OUT -----------*/ +- if ((TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1) & TSS_XFER_CMP) == 0) { +- if (pCurHcb->HCS_ActTcs->TCS_JS_Period & TSC_WIDE_SCSI) +- cnt += (TUL_RD(pCurHcb->HCS_Base, TUL_SFifoCnt) & 0x1F) << 1; ++ /*-------- DATA OUT -----------*/ ++ if ((inb(host->addr + TUL_SStatus1) & TSS_XFER_CMP) == 0) { ++ if (host->active_tc->js_period & TSC_WIDE_SCSI) ++ cnt += (inb(host->addr + TUL_SFifoCnt) & 0x1F) << 1; + else +- cnt += (TUL_RD(pCurHcb->HCS_Base, TUL_SFifoCnt) & 0x1F); ++ cnt += (inb(host->addr + TUL_SFifoCnt) & 0x1F); + } +- if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND) { /* if DMA xfer is pending, abort DMA xfer */ +- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_ABT); ++ if (inb(host->addr + TUL_XStatus) & XPEND) { /* if DMA xfer is pending, abort DMA xfer */ ++ outb(TAX_X_ABT, host->addr + TUL_XCmd); + /* wait Abort DMA xfer done */ +- while ((TUL_RD(pCurHcb->HCS_Base, TUL_Int) & XABT) == 0); +- } +- if ((cnt == 1) && (pCurHcb->HCS_Phase == DATA_OUT)) { +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- if (wait_tulip(pCurHcb) == -1) { +- return (-1); ++ while ((inb(host->addr + TUL_Int) & XABT) == 0) ++ cpu_relax(); + } ++ if ((cnt == 1) && (host->phase == DATA_OUT)) { ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; + cnt = 0; + } else { +- if ((TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1) & TSS_XFER_CMP) == 0) +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); ++ if ((inb(host->addr + TUL_SStatus1) & TSS_XFER_CMP) == 0) ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); + } + } +- + if (cnt == 0) { +- pCurScb->SCB_BufLen = 0; +- return (6); /* Go to state 6 */ ++ scb->buflen = 0; ++ return 6; /* After Data */ + } + /* Update active data pointer */ +- xcnt = (long) pCurScb->SCB_BufLen - cnt; /* xcnt== bytes already xferred */ +- pCurScb->SCB_BufLen = (U32) cnt; /* cnt == bytes left to be xferred */ +- if (pCurScb->SCB_Flags & SCF_SG) { +- register SG *sgp; +- ULONG i; +- +- sgp = &pCurScb->SCB_SGList[pCurScb->SCB_SGIdx]; +- for (i = pCurScb->SCB_SGIdx; i < pCurScb->SCB_SGMax; sgp++, i++) { +- xcnt -= (long) sgp->SG_Len; ++ xcnt = (long) scb->buflen - cnt; /* xcnt== bytes already xferred */ ++ scb->buflen = (u32) cnt; /* cnt == bytes left to be xferred */ ++ if (scb->flags & SCF_SG) { ++ struct sg_entry *sgp; ++ unsigned long i; ++ ++ sgp = &scb->sglist[scb->sgidx]; ++ for (i = scb->sgidx; i < scb->sgmax; sgp++, i++) { ++ xcnt -= (long) sgp->len; + if (xcnt < 0) { /* this sgp xfer half done */ +- xcnt += (long) sgp->SG_Len; /* xcnt == bytes xferred in this sgp */ +- sgp->SG_Ptr += (U32) xcnt; /* new ptr to be xfer */ +- sgp->SG_Len -= (U32) xcnt; /* new len to be xfer */ +- pCurScb->SCB_BufPtr += ((U32) (i - pCurScb->SCB_SGIdx) << 3); ++ xcnt += (long) sgp->len; /* xcnt == bytes xferred in this sgp */ ++ sgp->data += (u32) xcnt; /* new ptr to be xfer */ ++ sgp->len -= (u32) xcnt; /* new len to be xfer */ ++ scb->bufptr += ((u32) (i - scb->sgidx) << 3); + /* new SG table ptr */ +- pCurScb->SCB_SGLen = (BYTE) (pCurScb->SCB_SGMax - i); ++ scb->sglen = (u8) (scb->sgmax - i); + /* new SG table len */ +- pCurScb->SCB_SGIdx = (WORD) i; ++ scb->sgidx = (u16) i; + /* for next disc and come in this loop */ +- return (4); /* Go to state 4 */ ++ return 4; /* Go to state 4 */ + } + /* else (xcnt >= 0 , i.e. this sgp already xferred */ + } /* for */ +- return (6); /* Go to state 6 */ ++ return 6; /* Go to state 6 */ + } else { +- pCurScb->SCB_BufPtr += (U32) xcnt; ++ scb->bufptr += (u32) xcnt; + } +- return (4); /* Go to state 4 */ ++ return 4; /* Go to state 4 */ + } + +-/***************************************************************************/ +-/* state after Data phase */ +-int tul_state_6(HCS * pCurHcb) ++/** ++ * initio_state_6 - SCSI state machine ++ * @host: InitIO host we are controlling ++ * ++ * State after Data phase ++ */ ++ ++static int initio_state_6(struct initio_host * host) + { +- SCB *pCurScb = pCurHcb->HCS_ActScb; ++ struct scsi_ctrl_blk *scb = host->active; + + #if DEBUG_STATE + printk("-s6-"); + #endif + for (;;) { +- switch (pCurHcb->HCS_Phase) { ++ switch (host->phase) { + case STATUS_IN: /* Status phase */ +- if ((tul_status_msg(pCurHcb)) == -1) +- return (-1); ++ if ((initio_status_msg(host)) == -1) ++ return -1; + break; + + case MSG_IN: /* Message in phase */ +- pCurScb->SCB_NxtStat = 6; +- if ((tul_msgin(pCurHcb)) == -1) +- return (-1); ++ scb->next_state = 6; ++ if ((initio_msgin(host)) == -1) ++ return -1; + break; + + case MSG_OUT: /* Message out phase */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP); /* msg nop */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- if (wait_tulip(pCurHcb) == -1) +- return (-1); ++ outb(MSG_NOP, host->addr + TUL_SFifo); /* msg nop */ ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; + break; + + case DATA_IN: /* Data in phase */ +- return (tul_xpad_in(pCurHcb)); ++ return initio_xpad_in(host); + + case DATA_OUT: /* Data out phase */ +- return (tul_xpad_out(pCurHcb)); ++ return initio_xpad_out(host); + + default: +- return (tul_bad_seq(pCurHcb)); ++ return initio_bad_seq(host); + } + } + } + +-/***************************************************************************/ +-int tul_state_7(HCS * pCurHcb) ++/** ++ * initio_state_7 - SCSI state machine ++ * @host: InitIO host we are controlling ++ * ++ */ ++ ++int initio_state_7(struct initio_host * host) + { + int cnt, i; + +@@ -1893,1139 +1678,1029 @@ + printk("-s7-"); + #endif + /* flush SCSI FIFO */ +- cnt = TUL_RD(pCurHcb->HCS_Base, TUL_SFifoCnt) & 0x1F; ++ cnt = inb(host->addr + TUL_SFifoCnt) & 0x1F; + if (cnt) { + for (i = 0; i < cnt; i++) +- TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); ++ inb(host->addr + TUL_SFifo); + } +- switch (pCurHcb->HCS_Phase) { ++ switch (host->phase) { + case DATA_IN: /* Data in phase */ + case DATA_OUT: /* Data out phase */ +- return (tul_bad_seq(pCurHcb)); ++ return initio_bad_seq(host); + default: +- return (6); /* Go to state 6 */ ++ return 6; /* Go to state 6 */ + } + } + +-/***************************************************************************/ +-int tul_xfer_data_in(HCS * pCurHcb) ++/** ++ * initio_xfer_data_in - Commence data input ++ * @host: InitIO host in use ++ * ++ * Commence a block of data transfer. The transfer itself will ++ * be managed by the controller and we will get a completion (or ++ * failure) interrupt. ++ */ ++static int initio_xfer_data_in(struct initio_host * host) + { +- SCB *pCurScb = pCurHcb->HCS_ActScb; ++ struct scsi_ctrl_blk *scb = host->active; + +- if ((pCurScb->SCB_Flags & SCF_DIR) == SCF_DOUT) { +- return (6); /* wrong direction */ +- } +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, pCurScb->SCB_BufLen); ++ if ((scb->flags & SCF_DIR) == SCF_DOUT) ++ return 6; /* wrong direction */ + +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_DMA_IN); /* 7/25/95 */ ++ outl(scb->buflen, host->addr + TUL_SCnt0); ++ outb(TSC_XF_DMA_IN, host->addr + TUL_SCmd); /* 7/25/95 */ + +- if (pCurScb->SCB_Flags & SCF_SG) { /* S/G xfer */ +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, ((ULONG) pCurScb->SCB_SGLen) << 3); +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr); +- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_SG_IN); ++ if (scb->flags & SCF_SG) { /* S/G xfer */ ++ outl(((u32) scb->sglen) << 3, host->addr + TUL_XCntH); ++ outl(scb->bufptr, host->addr + TUL_XAddH); ++ outb(TAX_SG_IN, host->addr + TUL_XCmd); + } else { +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, pCurScb->SCB_BufLen); +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr); +- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_IN); ++ outl(scb->buflen, host->addr + TUL_XCntH); ++ outl(scb->bufptr, host->addr + TUL_XAddH); ++ outb(TAX_X_IN, host->addr + TUL_XCmd); + } +- pCurScb->SCB_NxtStat = 0x5; +- return (0); /* return to OS, wait xfer done , let jas_isr come in */ ++ scb->next_state = 0x5; ++ return 0; /* return to OS, wait xfer done , let jas_isr come in */ + } + ++/** ++ * initio_xfer_data_out - Commence data output ++ * @host: InitIO host in use ++ * ++ * Commence a block of data transfer. The transfer itself will ++ * be managed by the controller and we will get a completion (or ++ * failure) interrupt. ++ */ + +-/***************************************************************************/ +-int tul_xfer_data_out(HCS * pCurHcb) ++static int initio_xfer_data_out(struct initio_host * host) + { +- SCB *pCurScb = pCurHcb->HCS_ActScb; ++ struct scsi_ctrl_blk *scb = host->active; + +- if ((pCurScb->SCB_Flags & SCF_DIR) == SCF_DIN) { +- return (6); /* wrong direction */ +- } +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, pCurScb->SCB_BufLen); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_DMA_OUT); ++ if ((scb->flags & SCF_DIR) == SCF_DIN) ++ return 6; /* wrong direction */ + +- if (pCurScb->SCB_Flags & SCF_SG) { /* S/G xfer */ +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, ((ULONG) pCurScb->SCB_SGLen) << 3); +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr); +- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_SG_OUT); ++ outl(scb->buflen, host->addr + TUL_SCnt0); ++ outb(TSC_XF_DMA_OUT, host->addr + TUL_SCmd); ++ ++ if (scb->flags & SCF_SG) { /* S/G xfer */ ++ outl(((u32) scb->sglen) << 3, host->addr + TUL_XCntH); ++ outl(scb->bufptr, host->addr + TUL_XAddH); ++ outb(TAX_SG_OUT, host->addr + TUL_XCmd); + } else { +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, pCurScb->SCB_BufLen); +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr); +- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_OUT); ++ outl(scb->buflen, host->addr + TUL_XCntH); ++ outl(scb->bufptr, host->addr + TUL_XAddH); ++ outb(TAX_X_OUT, host->addr + TUL_XCmd); + } + +- pCurScb->SCB_NxtStat = 0x5; +- return (0); /* return to OS, wait xfer done , let jas_isr come in */ ++ scb->next_state = 0x5; ++ return 0; /* return to OS, wait xfer done , let jas_isr come in */ + } + +- +-/***************************************************************************/ +-int tul_xpad_in(HCS * pCurHcb) ++int initio_xpad_in(struct initio_host * host) + { +- SCB *pCurScb = pCurHcb->HCS_ActScb; +- TCS *pCurTcb = pCurHcb->HCS_ActTcs; ++ struct scsi_ctrl_blk *scb = host->active; ++ struct target_control *active_tc = host->active_tc; + +- if ((pCurScb->SCB_Flags & SCF_DIR) != SCF_NO_DCHK) { +- pCurScb->SCB_HaStat = HOST_DO_DU; /* over run */ +- } ++ if ((scb->flags & SCF_DIR) != SCF_NO_DCHK) ++ scb->hastat = HOST_DO_DU; /* over run */ + for (;;) { +- if (pCurTcb->TCS_JS_Period & TSC_WIDE_SCSI) +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 2); ++ if (active_tc->js_period & TSC_WIDE_SCSI) ++ outl(2, host->addr + TUL_SCnt0); + else +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); ++ outl(1, host->addr + TUL_SCnt0); + +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); +- if ((wait_tulip(pCurHcb)) == -1) { +- return (-1); +- } +- if (pCurHcb->HCS_Phase != DATA_IN) { +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); +- return (6); ++ outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; ++ if (host->phase != DATA_IN) { ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); ++ return 6; + } +- TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); ++ inb(host->addr + TUL_SFifo); + } + } + +-int tul_xpad_out(HCS * pCurHcb) ++int initio_xpad_out(struct initio_host * host) + { +- SCB *pCurScb = pCurHcb->HCS_ActScb; +- TCS *pCurTcb = pCurHcb->HCS_ActTcs; ++ struct scsi_ctrl_blk *scb = host->active; ++ struct target_control *active_tc = host->active_tc; + +- if ((pCurScb->SCB_Flags & SCF_DIR) != SCF_NO_DCHK) { +- pCurScb->SCB_HaStat = HOST_DO_DU; /* over run */ +- } ++ if ((scb->flags & SCF_DIR) != SCF_NO_DCHK) ++ scb->hastat = HOST_DO_DU; /* over run */ + for (;;) { +- if (pCurTcb->TCS_JS_Period & TSC_WIDE_SCSI) +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 2); ++ if (active_tc->js_period & TSC_WIDE_SCSI) ++ outl(2, host->addr + TUL_SCnt0); + else +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); ++ outl(1, host->addr + TUL_SCnt0); + +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 0); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- if ((wait_tulip(pCurHcb)) == -1) { +- return (-1); +- } +- if (pCurHcb->HCS_Phase != DATA_OUT) { /* Disable wide CPU to allow read 16 bits */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); +- return (6); ++ outb(0, host->addr + TUL_SFifo); ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ if ((wait_tulip(host)) == -1) ++ return -1; ++ if (host->phase != DATA_OUT) { /* Disable wide CPU to allow read 16 bits */ ++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); ++ return 6; + } + } + } + +- +-/***************************************************************************/ +-int tul_status_msg(HCS * pCurHcb) ++int initio_status_msg(struct initio_host * host) + { /* status & MSG_IN */ +- SCB *pCurScb = pCurHcb->HCS_ActScb; +- BYTE msg; ++ struct scsi_ctrl_blk *scb = host->active; ++ u8 msg; ++ ++ outb(TSC_CMD_COMP, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; + +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_CMD_COMP); +- if ((wait_tulip(pCurHcb)) == -1) { +- return (-1); +- } + /* get status */ +- pCurScb->SCB_TaStat = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); ++ scb->tastat = inb(host->addr + TUL_SFifo); + +- if (pCurHcb->HCS_Phase == MSG_OUT) { +- if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) { +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_PARITY); +- } else { +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP); +- } +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- return (wait_tulip(pCurHcb)); +- } +- if (pCurHcb->HCS_Phase == MSG_IN) { +- msg = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); +- if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) { /* Parity error */ +- if ((tul_msgin_accept(pCurHcb)) == -1) +- return (-1); +- if (pCurHcb->HCS_Phase != MSG_OUT) +- return (tul_bad_seq(pCurHcb)); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_PARITY); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- return (wait_tulip(pCurHcb)); ++ if (host->phase == MSG_OUT) { ++ if (host->jsstatus0 & TSS_PAR_ERROR) ++ outb(MSG_PARITY, host->addr + TUL_SFifo); ++ else ++ outb(MSG_NOP, host->addr + TUL_SFifo); ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ return wait_tulip(host); ++ } ++ if (host->phase == MSG_IN) { ++ msg = inb(host->addr + TUL_SFifo); ++ if (host->jsstatus0 & TSS_PAR_ERROR) { /* Parity error */ ++ if ((initio_msgin_accept(host)) == -1) ++ return -1; ++ if (host->phase != MSG_OUT) ++ return initio_bad_seq(host); ++ outb(MSG_PARITY, host->addr + TUL_SFifo); ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ return wait_tulip(host); + } + if (msg == 0) { /* Command complete */ + +- if ((pCurScb->SCB_TaStat & 0x18) == 0x10) { /* No link support */ +- return (tul_bad_seq(pCurHcb)); +- } +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT); +- return tul_wait_done_disc(pCurHcb); ++ if ((scb->tastat & 0x18) == 0x10) /* No link support */ ++ return initio_bad_seq(host); ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); ++ outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd); ++ return initio_wait_done_disc(host); + + } +- if ((msg == MSG_LINK_COMP) || (msg == MSG_LINK_FLAG)) { +- if ((pCurScb->SCB_TaStat & 0x18) == 0x10) +- return (tul_msgin_accept(pCurHcb)); ++ if (msg == MSG_LINK_COMP || msg == MSG_LINK_FLAG) { ++ if ((scb->tastat & 0x18) == 0x10) ++ return initio_msgin_accept(host); + } + } +- return (tul_bad_seq(pCurHcb)); ++ return initio_bad_seq(host); + } + + +-/***************************************************************************/ + /* scsi bus free */ +-int int_tul_busfree(HCS * pCurHcb) ++int int_initio_busfree(struct initio_host * host) + { +- SCB *pCurScb = pCurHcb->HCS_ActScb; ++ struct scsi_ctrl_blk *scb = host->active; + +- if (pCurScb != NULL) { +- if (pCurScb->SCB_Status & SCB_SELECT) { /* selection timeout */ +- tul_unlink_pend_scb(pCurHcb, pCurScb); +- pCurScb->SCB_HaStat = HOST_SEL_TOUT; +- tul_append_done_scb(pCurHcb, pCurScb); ++ if (scb != NULL) { ++ if (scb->status & SCB_SELECT) { /* selection timeout */ ++ initio_unlink_pend_scb(host, scb); ++ scb->hastat = HOST_SEL_TOUT; ++ initio_append_done_scb(host, scb); + } else { /* Unexpected bus free */ +- tul_unlink_busy_scb(pCurHcb, pCurScb); +- pCurScb->SCB_HaStat = HOST_BUS_FREE; +- tul_append_done_scb(pCurHcb, pCurScb); +- } +- pCurHcb->HCS_ActScb = NULL; +- pCurHcb->HCS_ActTcs = NULL; +- } +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */ +- return (-1); ++ initio_unlink_busy_scb(host, scb); ++ scb->hastat = HOST_BUS_FREE; ++ initio_append_done_scb(host, scb); ++ } ++ host->active = NULL; ++ host->active_tc = NULL; ++ } ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */ ++ outb(TSC_INITDEFAULT, host->addr + TUL_SConfig); ++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */ ++ return -1; + } + + +-/***************************************************************************/ +-/* scsi bus reset */ +-static int int_tul_scsi_rst(HCS * pCurHcb) ++/** ++ * int_initio_scsi_rst - SCSI reset occurred ++ * @host: Host seeing the reset ++ * ++ * A SCSI bus reset has occurred. Clean up any pending transfer ++ * the hardware is doing by DMA and then abort all active and ++ * disconnected commands. The mid layer should sort the rest out ++ * for us ++ */ ++ ++static int int_initio_scsi_rst(struct initio_host * host) + { +- SCB *pCurScb; ++ struct scsi_ctrl_blk *scb; + int i; + + /* if DMA xfer is pending, abort DMA xfer */ +- if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & 0x01) { +- TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_ABT | TAX_X_CLR_FIFO); ++ if (inb(host->addr + TUL_XStatus) & 0x01) { ++ outb(TAX_X_ABT | TAX_X_CLR_FIFO, host->addr + TUL_XCmd); + /* wait Abort DMA xfer done */ +- while ((TUL_RD(pCurHcb->HCS_Base, TUL_Int) & 0x04) == 0); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); ++ while ((inb(host->addr + TUL_Int) & 0x04) == 0) ++ cpu_relax(); ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); + } + /* Abort all active & disconnected scb */ +- while ((pCurScb = tul_pop_busy_scb(pCurHcb)) != NULL) { +- pCurScb->SCB_HaStat = HOST_BAD_PHAS; +- tul_append_done_scb(pCurHcb, pCurScb); ++ while ((scb = initio_pop_busy_scb(host)) != NULL) { ++ scb->hastat = HOST_BAD_PHAS; ++ initio_append_done_scb(host, scb); + } +- pCurHcb->HCS_ActScb = NULL; +- pCurHcb->HCS_ActTcs = NULL; ++ host->active = NULL; ++ host->active_tc = NULL; + + /* clr sync nego. done flag */ +- for (i = 0; i < pCurHcb->HCS_MaxTar; i++) { +- pCurHcb->HCS_Tcs[i].TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE); +- } +- return (-1); ++ for (i = 0; i < host->max_tar; i++) ++ host->targets[i].flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE); ++ return -1; + } + ++/** ++ * int_initio_scsi_resel - Reselection occured ++ * @host: InitIO host adapter ++ * ++ * A SCSI reselection event has been signalled and the interrupt ++ * is now being processed. Work out which command block needs attention ++ * and continue processing that command. ++ */ + +-/***************************************************************************/ +-/* scsi reselection */ +-int int_tul_resel(HCS * pCurHcb) ++int int_initio_resel(struct initio_host * host) + { +- SCB *pCurScb; +- TCS *pCurTcb; +- BYTE tag, msg = 0; +- BYTE tar, lun; +- +- if ((pCurScb = pCurHcb->HCS_ActScb) != NULL) { +- if (pCurScb->SCB_Status & SCB_SELECT) { /* if waiting for selection complete */ +- pCurScb->SCB_Status &= ~SCB_SELECT; +- } +- pCurHcb->HCS_ActScb = NULL; ++ struct scsi_ctrl_blk *scb; ++ struct target_control *active_tc; ++ u8 tag, msg = 0; ++ u8 tar, lun; ++ ++ if ((scb = host->active) != NULL) { ++ /* FIXME: Why check and not just clear ? */ ++ if (scb->status & SCB_SELECT) /* if waiting for selection complete */ ++ scb->status &= ~SCB_SELECT; ++ host->active = NULL; + } + /* --------- get target id---------------------- */ +- tar = TUL_RD(pCurHcb->HCS_Base, TUL_SBusId); ++ tar = inb(host->addr + TUL_SBusId); + /* ------ get LUN from Identify message----------- */ +- lun = TUL_RD(pCurHcb->HCS_Base, TUL_SIdent) & 0x0F; ++ lun = inb(host->addr + TUL_SIdent) & 0x0F; + /* 07/22/98 from 0x1F -> 0x0F */ +- pCurTcb = &pCurHcb->HCS_Tcs[tar]; +- pCurHcb->HCS_ActTcs = pCurTcb; +- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurTcb->TCS_SConfig0); +- TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurTcb->TCS_JS_Period); +- ++ active_tc = &host->targets[tar]; ++ host->active_tc = active_tc; ++ outb(active_tc->sconfig0, host->addr + TUL_SConfig); ++ outb(active_tc->js_period, host->addr + TUL_SPeriod); + + /* ------------- tag queueing ? ------------------- */ +- if (pCurTcb->TCS_DrvFlags & TCF_DRV_EN_TAG) { +- if ((tul_msgin_accept(pCurHcb)) == -1) +- return (-1); +- if (pCurHcb->HCS_Phase != MSG_IN) ++ if (active_tc->drv_flags & TCF_DRV_EN_TAG) { ++ if ((initio_msgin_accept(host)) == -1) ++ return -1; ++ if (host->phase != MSG_IN) + goto no_tag; +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); +- if ((wait_tulip(pCurHcb)) == -1) +- return (-1); +- msg = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); /* Read Tag Message */ ++ outl(1, host->addr + TUL_SCnt0); ++ outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; ++ msg = inb(host->addr + TUL_SFifo); /* Read Tag Message */ + +- if ((msg < MSG_STAG) || (msg > MSG_OTAG)) /* Is simple Tag */ ++ if (msg < MSG_STAG || msg > MSG_OTAG) /* Is simple Tag */ + goto no_tag; + +- if ((tul_msgin_accept(pCurHcb)) == -1) +- return (-1); ++ if (initio_msgin_accept(host) == -1) ++ return -1; + +- if (pCurHcb->HCS_Phase != MSG_IN) ++ if (host->phase != MSG_IN) + goto no_tag; + +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); +- if ((wait_tulip(pCurHcb)) == -1) +- return (-1); +- tag = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); /* Read Tag ID */ +- pCurScb = pCurHcb->HCS_Scb + tag; +- if ((pCurScb->SCB_Target != tar) || (pCurScb->SCB_Lun != lun)) { +- return tul_msgout_abort_tag(pCurHcb); +- } +- if (pCurScb->SCB_Status != SCB_BUSY) { /* 03/24/95 */ +- return tul_msgout_abort_tag(pCurHcb); +- } +- pCurHcb->HCS_ActScb = pCurScb; +- if ((tul_msgin_accept(pCurHcb)) == -1) +- return (-1); ++ outl(1, host->addr + TUL_SCnt0); ++ outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; ++ tag = inb(host->addr + TUL_SFifo); /* Read Tag ID */ ++ scb = host->scb + tag; ++ if (scb->target != tar || scb->lun != lun) { ++ return initio_msgout_abort_tag(host); ++ } ++ if (scb->status != SCB_BUSY) { /* 03/24/95 */ ++ return initio_msgout_abort_tag(host); ++ } ++ host->active = scb; ++ if ((initio_msgin_accept(host)) == -1) ++ return -1; + } else { /* No tag */ + no_tag: +- if ((pCurScb = tul_find_busy_scb(pCurHcb, tar | (lun << 8))) == NULL) { +- return tul_msgout_abort_targ(pCurHcb); ++ if ((scb = initio_find_busy_scb(host, tar | (lun << 8))) == NULL) { ++ return initio_msgout_abort_targ(host); + } +- pCurHcb->HCS_ActScb = pCurScb; +- if (!(pCurTcb->TCS_DrvFlags & TCF_DRV_EN_TAG)) { +- if ((tul_msgin_accept(pCurHcb)) == -1) +- return (-1); ++ host->active = scb; ++ if (!(active_tc->drv_flags & TCF_DRV_EN_TAG)) { ++ if ((initio_msgin_accept(host)) == -1) ++ return -1; + } + } + return 0; + } + ++/** ++ * int_initio_bad_seq - out of phase ++ * @host: InitIO host flagging event ++ * ++ * We have ended up out of phase somehow. Reset the host controller ++ * and throw all our toys out of the pram. Let the midlayer clean up ++ */ + +-/***************************************************************************/ +-static int int_tul_bad_seq(HCS * pCurHcb) ++static int int_initio_bad_seq(struct initio_host * host) + { /* target wrong phase */ +- SCB *pCurScb; ++ struct scsi_ctrl_blk *scb; + int i; + +- tul_reset_scsi(pCurHcb, 10); ++ initio_reset_scsi(host, 10); + +- while ((pCurScb = tul_pop_busy_scb(pCurHcb)) != NULL) { +- pCurScb->SCB_HaStat = HOST_BAD_PHAS; +- tul_append_done_scb(pCurHcb, pCurScb); ++ while ((scb = initio_pop_busy_scb(host)) != NULL) { ++ scb->hastat = HOST_BAD_PHAS; ++ initio_append_done_scb(host, scb); + } +- for (i = 0; i < pCurHcb->HCS_MaxTar; i++) { +- pCurHcb->HCS_Tcs[i].TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE); +- } +- return (-1); ++ for (i = 0; i < host->max_tar; i++) ++ host->targets[i].flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE); ++ return -1; + } + + +-/***************************************************************************/ +-int tul_msgout_abort_targ(HCS * pCurHcb) ++/** ++ * initio_msgout_abort_targ - abort a tag ++ * @host: InitIO host ++ * ++ * Abort when the target/lun does not match or when our SCB is not ++ * busy. Used by untagged commands. ++ */ ++ ++static int initio_msgout_abort_targ(struct initio_host * host) + { + +- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); +- if (tul_msgin_accept(pCurHcb) == -1) +- return (-1); +- if (pCurHcb->HCS_Phase != MSG_OUT) +- return (tul_bad_seq(pCurHcb)); ++ outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal); ++ if (initio_msgin_accept(host) == -1) ++ return -1; ++ if (host->phase != MSG_OUT) ++ return initio_bad_seq(host); + +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_ABORT); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); ++ outb(MSG_ABORT, host->addr + TUL_SFifo); ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + +- return tul_wait_disc(pCurHcb); ++ return initio_wait_disc(host); + } + +-/***************************************************************************/ +-int tul_msgout_abort_tag(HCS * pCurHcb) ++/** ++ * initio_msgout_abort_tag - abort a tag ++ * @host: InitIO host ++ * ++ * Abort when the target/lun does not match or when our SCB is not ++ * busy. Used for tagged commands. ++ */ ++ ++static int initio_msgout_abort_tag(struct initio_host * host) + { + +- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); +- if (tul_msgin_accept(pCurHcb) == -1) +- return (-1); +- if (pCurHcb->HCS_Phase != MSG_OUT) +- return (tul_bad_seq(pCurHcb)); ++ outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal); ++ if (initio_msgin_accept(host) == -1) ++ return -1; ++ if (host->phase != MSG_OUT) ++ return initio_bad_seq(host); + +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_ABORT_TAG); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); ++ outb(MSG_ABORT_TAG, host->addr + TUL_SFifo); ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); + +- return tul_wait_disc(pCurHcb); ++ return initio_wait_disc(host); + + } + +-/***************************************************************************/ +-int tul_msgin(HCS * pCurHcb) ++/** ++ * initio_msgin - Message in ++ * @host: InitIO Host ++ * ++ * Process incoming message ++ */ ++static int initio_msgin(struct initio_host * host) + { +- TCS *pCurTcb; ++ struct target_control *active_tc; + + for (;;) { ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); + +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); +- +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); +- if ((wait_tulip(pCurHcb)) == -1) +- return (-1); ++ outl(1, host->addr + TUL_SCnt0); ++ outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; + +- switch (TUL_RD(pCurHcb->HCS_Base, TUL_SFifo)) { ++ switch (inb(host->addr + TUL_SFifo)) { + case MSG_DISC: /* Disconnect msg */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT); +- +- return tul_wait_disc(pCurHcb); +- ++ outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd); ++ return initio_wait_disc(host); + case MSG_SDP: + case MSG_RESTORE: + case MSG_NOP: +- tul_msgin_accept(pCurHcb); ++ initio_msgin_accept(host); + break; +- + case MSG_REJ: /* Clear ATN first */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, +- (TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7))); +- pCurTcb = pCurHcb->HCS_ActTcs; +- if ((pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) { /* do sync nego */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); +- } +- tul_msgin_accept(pCurHcb); ++ outb((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)), ++ host->addr + TUL_SSignal); ++ active_tc = host->active_tc; ++ if ((active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) /* do sync nego */ ++ outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), ++ host->addr + TUL_SSignal); ++ initio_msgin_accept(host); + break; +- + case MSG_EXTEND: /* extended msg */ +- tul_msgin_extend(pCurHcb); ++ initio_msgin_extend(host); + break; +- + case MSG_IGNOREWIDE: +- tul_msgin_accept(pCurHcb); +- break; +- +- /* get */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); +- if (wait_tulip(pCurHcb) == -1) +- return -1; +- +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 0); /* put pad */ +- TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); /* get IGNORE field */ +- TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); /* get pad */ +- +- tul_msgin_accept(pCurHcb); ++ initio_msgin_accept(host); + break; +- + case MSG_COMP: +- { +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT); +- return tul_wait_done_disc(pCurHcb); +- } ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); ++ outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd); ++ return initio_wait_done_disc(host); + default: +- tul_msgout_reject(pCurHcb); ++ initio_msgout_reject(host); + break; + } +- if (pCurHcb->HCS_Phase != MSG_IN) +- return (pCurHcb->HCS_Phase); ++ if (host->phase != MSG_IN) ++ return host->phase; + } + /* statement won't reach here */ + } + +- +- +- +-/***************************************************************************/ +-int tul_msgout_reject(HCS * pCurHcb) ++static int initio_msgout_reject(struct initio_host * host) + { ++ outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal); + +- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); +- +- if ((tul_msgin_accept(pCurHcb)) == -1) +- return (-1); ++ if (initio_msgin_accept(host) == -1) ++ return -1; + +- if (pCurHcb->HCS_Phase == MSG_OUT) { +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_REJ); /* Msg reject */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- return (wait_tulip(pCurHcb)); ++ if (host->phase == MSG_OUT) { ++ outb(MSG_REJ, host->addr + TUL_SFifo); /* Msg reject */ ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ return wait_tulip(host); + } +- return (pCurHcb->HCS_Phase); ++ return host->phase; + } + +- +- +-/***************************************************************************/ +-int tul_msgout_ide(HCS * pCurHcb) ++static int initio_msgout_ide(struct initio_host * host) + { +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_IDE); /* Initiator Detected Error */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- return (wait_tulip(pCurHcb)); ++ outb(MSG_IDE, host->addr + TUL_SFifo); /* Initiator Detected Error */ ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ return wait_tulip(host); + } + +- +-/***************************************************************************/ +-int tul_msgin_extend(HCS * pCurHcb) ++static int initio_msgin_extend(struct initio_host * host) + { +- BYTE len, idx; ++ u8 len, idx; + +- if (tul_msgin_accept(pCurHcb) != MSG_IN) +- return (pCurHcb->HCS_Phase); ++ if (initio_msgin_accept(host) != MSG_IN) ++ return host->phase; + + /* Get extended msg length */ +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); +- if (wait_tulip(pCurHcb) == -1) +- return (-1); ++ outl(1, host->addr + TUL_SCnt0); ++ outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; + +- len = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); +- pCurHcb->HCS_Msg[0] = len; ++ len = inb(host->addr + TUL_SFifo); ++ host->msg[0] = len; + for (idx = 1; len != 0; len--) { + +- if ((tul_msgin_accept(pCurHcb)) != MSG_IN) +- return (pCurHcb->HCS_Phase); +- TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN); +- if (wait_tulip(pCurHcb) == -1) +- return (-1); +- pCurHcb->HCS_Msg[idx++] = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo); +- } +- if (pCurHcb->HCS_Msg[1] == 1) { /* if it's synchronous data transfer request */ +- if (pCurHcb->HCS_Msg[0] != 3) /* if length is not right */ +- return (tul_msgout_reject(pCurHcb)); +- if (pCurHcb->HCS_ActTcs->TCS_Flags & TCF_NO_SYNC_NEGO) { /* Set OFFSET=0 to do async, nego back */ +- pCurHcb->HCS_Msg[3] = 0; +- } else { +- if ((tul_msgin_sync(pCurHcb) == 0) && +- (pCurHcb->HCS_ActTcs->TCS_Flags & TCF_SYNC_DONE)) { +- tul_sync_done(pCurHcb); +- return (tul_msgin_accept(pCurHcb)); ++ if ((initio_msgin_accept(host)) != MSG_IN) ++ return host->phase; ++ outl(1, host->addr + TUL_SCnt0); ++ outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd); ++ if (wait_tulip(host) == -1) ++ return -1; ++ host->msg[idx++] = inb(host->addr + TUL_SFifo); ++ } ++ if (host->msg[1] == 1) { /* if it's synchronous data transfer request */ ++ u8 r; ++ if (host->msg[0] != 3) /* if length is not right */ ++ return initio_msgout_reject(host); ++ if (host->active_tc->flags & TCF_NO_SYNC_NEGO) { /* Set OFFSET=0 to do async, nego back */ ++ host->msg[3] = 0; ++ } else { ++ if (initio_msgin_sync(host) == 0 && ++ (host->active_tc->flags & TCF_SYNC_DONE)) { ++ initio_sync_done(host); ++ return initio_msgin_accept(host); + } + } + +- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); +- if ((tul_msgin_accept(pCurHcb)) != MSG_OUT) +- return (pCurHcb->HCS_Phase); ++ r = inb(host->addr + TUL_SSignal); ++ outb((r & (TSC_SET_ACK | 7)) | TSC_SET_ATN, ++ host->addr + TUL_SSignal); ++ if (initio_msgin_accept(host) != MSG_OUT) ++ return host->phase; + /* sync msg out */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); +- +- tul_sync_done(pCurHcb); ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); + +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurHcb->HCS_Msg[2]); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurHcb->HCS_Msg[3]); ++ initio_sync_done(host); + +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- return (wait_tulip(pCurHcb)); ++ outb(MSG_EXTEND, host->addr + TUL_SFifo); ++ outb(3, host->addr + TUL_SFifo); ++ outb(1, host->addr + TUL_SFifo); ++ outb(host->msg[2], host->addr + TUL_SFifo); ++ outb(host->msg[3], host->addr + TUL_SFifo); ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ return wait_tulip(host); + } +- if ((pCurHcb->HCS_Msg[0] != 2) || (pCurHcb->HCS_Msg[1] != 3)) +- return (tul_msgout_reject(pCurHcb)); ++ if (host->msg[0] != 2 || host->msg[1] != 3) ++ return initio_msgout_reject(host); + /* if it's WIDE DATA XFER REQ */ +- if (pCurHcb->HCS_ActTcs->TCS_Flags & TCF_NO_WDTR) { +- pCurHcb->HCS_Msg[2] = 0; ++ if (host->active_tc->flags & TCF_NO_WDTR) { ++ host->msg[2] = 0; + } else { +- if (pCurHcb->HCS_Msg[2] > 2) /* > 32 bits */ +- return (tul_msgout_reject(pCurHcb)); +- if (pCurHcb->HCS_Msg[2] == 2) { /* == 32 */ +- pCurHcb->HCS_Msg[2] = 1; ++ if (host->msg[2] > 2) /* > 32 bits */ ++ return initio_msgout_reject(host); ++ if (host->msg[2] == 2) { /* == 32 */ ++ host->msg[2] = 1; + } else { +- if ((pCurHcb->HCS_ActTcs->TCS_Flags & TCF_NO_WDTR) == 0) { +- wdtr_done(pCurHcb); +- if ((pCurHcb->HCS_ActTcs->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) +- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); +- return (tul_msgin_accept(pCurHcb)); ++ if ((host->active_tc->flags & TCF_NO_WDTR) == 0) { ++ wdtr_done(host); ++ if ((host->active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) ++ outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal); ++ return initio_msgin_accept(host); + } + } + } +- TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN)); ++ outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal); + +- if (tul_msgin_accept(pCurHcb) != MSG_OUT) +- return (pCurHcb->HCS_Phase); ++ if (initio_msgin_accept(host) != MSG_OUT) ++ return host->phase; + /* WDTR msg out */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 2); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurHcb->HCS_Msg[2]); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- return (wait_tulip(pCurHcb)); ++ outb(MSG_EXTEND, host->addr + TUL_SFifo); ++ outb(2, host->addr + TUL_SFifo); ++ outb(3, host->addr + TUL_SFifo); ++ outb(host->msg[2], host->addr + TUL_SFifo); ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ return wait_tulip(host); + } + +-/***************************************************************************/ +-int tul_msgin_sync(HCS * pCurHcb) ++static int initio_msgin_sync(struct initio_host * host) + { + char default_period; + +- default_period = tul_rate_tbl[pCurHcb->HCS_ActTcs->TCS_Flags & TCF_SCSI_RATE]; +- if (pCurHcb->HCS_Msg[3] > MAX_OFFSET) { +- pCurHcb->HCS_Msg[3] = MAX_OFFSET; +- if (pCurHcb->HCS_Msg[2] < default_period) { +- pCurHcb->HCS_Msg[2] = default_period; ++ default_period = initio_rate_tbl[host->active_tc->flags & TCF_SCSI_RATE]; ++ if (host->msg[3] > MAX_OFFSET) { ++ host->msg[3] = MAX_OFFSET; ++ if (host->msg[2] < default_period) { ++ host->msg[2] = default_period; + return 1; + } +- if (pCurHcb->HCS_Msg[2] >= 59) { /* Change to async */ +- pCurHcb->HCS_Msg[3] = 0; +- } ++ if (host->msg[2] >= 59) /* Change to async */ ++ host->msg[3] = 0; + return 1; + } + /* offset requests asynchronous transfers ? */ +- if (pCurHcb->HCS_Msg[3] == 0) { ++ if (host->msg[3] == 0) { + return 0; + } +- if (pCurHcb->HCS_Msg[2] < default_period) { +- pCurHcb->HCS_Msg[2] = default_period; ++ if (host->msg[2] < default_period) { ++ host->msg[2] = default_period; + return 1; + } +- if (pCurHcb->HCS_Msg[2] >= 59) { +- pCurHcb->HCS_Msg[3] = 0; ++ if (host->msg[2] >= 59) { ++ host->msg[3] = 0; + return 1; + } + return 0; + } + +- +-/***************************************************************************/ +-int wdtr_done(HCS * pCurHcb) ++static int wdtr_done(struct initio_host * host) + { +- pCurHcb->HCS_ActTcs->TCS_Flags &= ~TCF_SYNC_DONE; +- pCurHcb->HCS_ActTcs->TCS_Flags |= TCF_WDTR_DONE; ++ host->active_tc->flags &= ~TCF_SYNC_DONE; ++ host->active_tc->flags |= TCF_WDTR_DONE; + +- pCurHcb->HCS_ActTcs->TCS_JS_Period = 0; +- if (pCurHcb->HCS_Msg[2]) { /* if 16 bit */ +- pCurHcb->HCS_ActTcs->TCS_JS_Period |= TSC_WIDE_SCSI; +- } +- pCurHcb->HCS_ActTcs->TCS_SConfig0 &= ~TSC_ALT_PERIOD; +- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurHcb->HCS_ActTcs->TCS_SConfig0); +- TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurHcb->HCS_ActTcs->TCS_JS_Period); ++ host->active_tc->js_period = 0; ++ if (host->msg[2]) /* if 16 bit */ ++ host->active_tc->js_period |= TSC_WIDE_SCSI; ++ host->active_tc->sconfig0 &= ~TSC_ALT_PERIOD; ++ outb(host->active_tc->sconfig0, host->addr + TUL_SConfig); ++ outb(host->active_tc->js_period, host->addr + TUL_SPeriod); + + return 1; + } + +-/***************************************************************************/ +-int tul_sync_done(HCS * pCurHcb) ++static int initio_sync_done(struct initio_host * host) + { + int i; + +- pCurHcb->HCS_ActTcs->TCS_Flags |= TCF_SYNC_DONE; ++ host->active_tc->flags |= TCF_SYNC_DONE; + +- if (pCurHcb->HCS_Msg[3]) { +- pCurHcb->HCS_ActTcs->TCS_JS_Period |= pCurHcb->HCS_Msg[3]; ++ if (host->msg[3]) { ++ host->active_tc->js_period |= host->msg[3]; + for (i = 0; i < 8; i++) { +- if (tul_rate_tbl[i] >= pCurHcb->HCS_Msg[2]) /* pick the big one */ ++ if (initio_rate_tbl[i] >= host->msg[2]) /* pick the big one */ + break; + } +- pCurHcb->HCS_ActTcs->TCS_JS_Period |= (i << 4); +- pCurHcb->HCS_ActTcs->TCS_SConfig0 |= TSC_ALT_PERIOD; ++ host->active_tc->js_period |= (i << 4); ++ host->active_tc->sconfig0 |= TSC_ALT_PERIOD; + } +- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurHcb->HCS_ActTcs->TCS_SConfig0); +- TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurHcb->HCS_ActTcs->TCS_JS_Period); ++ outb(host->active_tc->sconfig0, host->addr + TUL_SConfig); ++ outb(host->active_tc->js_period, host->addr + TUL_SPeriod); + +- return (-1); ++ return -1; + } + + +-int tul_post_scsi_rst(HCS * pCurHcb) ++static int initio_post_scsi_rst(struct initio_host * host) + { +- SCB *pCurScb; +- TCS *pCurTcb; ++ struct scsi_ctrl_blk *scb; ++ struct target_control *active_tc; + int i; + +- pCurHcb->HCS_ActScb = NULL; +- pCurHcb->HCS_ActTcs = NULL; +- pCurHcb->HCS_Flags = 0; +- +- while ((pCurScb = tul_pop_busy_scb(pCurHcb)) != NULL) { +- pCurScb->SCB_HaStat = HOST_BAD_PHAS; +- tul_append_done_scb(pCurHcb, pCurScb); ++ host->active = NULL; ++ host->active_tc = NULL; ++ host->flags = 0; ++ ++ while ((scb = initio_pop_busy_scb(host)) != NULL) { ++ scb->hastat = HOST_BAD_PHAS; ++ initio_append_done_scb(host, scb); + } + /* clear sync done flag */ +- pCurTcb = &pCurHcb->HCS_Tcs[0]; +- for (i = 0; i < pCurHcb->HCS_MaxTar; pCurTcb++, i++) { +- pCurTcb->TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE); ++ active_tc = &host->targets[0]; ++ for (i = 0; i < host->max_tar; active_tc++, i++) { ++ active_tc->flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE); + /* Initialize the sync. xfer register values to an asyn xfer */ +- pCurTcb->TCS_JS_Period = 0; +- pCurTcb->TCS_SConfig0 = pCurHcb->HCS_SConf1; +- pCurHcb->HCS_ActTags[0] = 0; /* 07/22/98 */ +- pCurHcb->HCS_Tcs[i].TCS_Flags &= ~TCF_BUSY; /* 07/22/98 */ ++ active_tc->js_period = 0; ++ active_tc->sconfig0 = host->sconf1; ++ host->act_tags[0] = 0; /* 07/22/98 */ ++ host->targets[i].flags &= ~TCF_BUSY; /* 07/22/98 */ + } /* for */ + +- return (-1); ++ return -1; + } + +-/***************************************************************************/ +-void tul_select_atn_stop(HCS * pCurHcb, SCB * pCurScb) ++static void initio_select_atn_stop(struct initio_host * host, struct scsi_ctrl_blk * scb) + { +- pCurScb->SCB_Status |= SCB_SELECT; +- pCurScb->SCB_NxtStat = 0x1; +- pCurHcb->HCS_ActScb = pCurScb; +- pCurHcb->HCS_ActTcs = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target]; +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_SELATNSTOP); +- return; ++ scb->status |= SCB_SELECT; ++ scb->next_state = 0x1; ++ host->active = scb; ++ host->active_tc = &host->targets[scb->target]; ++ outb(TSC_SELATNSTOP, host->addr + TUL_SCmd); + } + + +-/***************************************************************************/ +-void tul_select_atn(HCS * pCurHcb, SCB * pCurScb) ++static void initio_select_atn(struct initio_host * host, struct scsi_ctrl_blk * scb) + { + int i; + +- pCurScb->SCB_Status |= SCB_SELECT; +- pCurScb->SCB_NxtStat = 0x2; ++ scb->status |= SCB_SELECT; ++ scb->next_state = 0x2; + +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_Ident); +- for (i = 0; i < (int) pCurScb->SCB_CDBLen; i++) +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_CDB[i]); +- pCurHcb->HCS_ActTcs = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target]; +- pCurHcb->HCS_ActScb = pCurScb; +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_SEL_ATN); +- return; ++ outb(scb->ident, host->addr + TUL_SFifo); ++ for (i = 0; i < (int) scb->cdblen; i++) ++ outb(scb->cdb[i], host->addr + TUL_SFifo); ++ host->active_tc = &host->targets[scb->target]; ++ host->active = scb; ++ outb(TSC_SEL_ATN, host->addr + TUL_SCmd); + } + +-/***************************************************************************/ +-void tul_select_atn3(HCS * pCurHcb, SCB * pCurScb) ++static void initio_select_atn3(struct initio_host * host, struct scsi_ctrl_blk * scb) + { + int i; + +- pCurScb->SCB_Status |= SCB_SELECT; +- pCurScb->SCB_NxtStat = 0x2; +- +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_Ident); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagMsg); +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagId); +- for (i = 0; i < (int) pCurScb->SCB_CDBLen; i++) +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_CDB[i]); +- pCurHcb->HCS_ActTcs = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target]; +- pCurHcb->HCS_ActScb = pCurScb; +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_SEL_ATN3); +- return; +-} ++ scb->status |= SCB_SELECT; ++ scb->next_state = 0x2; + +-/***************************************************************************/ +-/* SCSI Bus Device Reset */ +-int tul_bus_device_reset(HCS * pCurHcb) ++ outb(scb->ident, host->addr + TUL_SFifo); ++ outb(scb->tagmsg, host->addr + TUL_SFifo); ++ outb(scb->tagid, host->addr + TUL_SFifo); ++ for (i = 0; i < scb->cdblen; i++) ++ outb(scb->cdb[i], host->addr + TUL_SFifo); ++ host->active_tc = &host->targets[scb->target]; ++ host->active = scb; ++ outb(TSC_SEL_ATN3, host->addr + TUL_SCmd); ++} ++ ++/** ++ * initio_bus_device_reset - SCSI Bus Device Reset ++ * @host: InitIO host to reset ++ * ++ * Perform a device reset and abort all pending SCBs for the ++ * victim device ++ */ ++int initio_bus_device_reset(struct initio_host * host) + { +- SCB *pCurScb = pCurHcb->HCS_ActScb; +- TCS *pCurTcb = pCurHcb->HCS_ActTcs; +- SCB *pTmpScb, *pPrevScb; +- BYTE tar; ++ struct scsi_ctrl_blk *scb = host->active; ++ struct target_control *active_tc = host->active_tc; ++ struct scsi_ctrl_blk *tmp, *prev; ++ u8 tar; + +- if (pCurHcb->HCS_Phase != MSG_OUT) { +- return (int_tul_bad_seq(pCurHcb)); /* Unexpected phase */ +- } +- tul_unlink_pend_scb(pCurHcb, pCurScb); +- tul_release_scb(pCurHcb, pCurScb); ++ if (host->phase != MSG_OUT) ++ return int_initio_bad_seq(host); /* Unexpected phase */ + ++ initio_unlink_pend_scb(host, scb); ++ initio_release_scb(host, scb); + +- tar = pCurScb->SCB_Target; /* target */ +- pCurTcb->TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE | TCF_BUSY); ++ ++ tar = scb->target; /* target */ ++ active_tc->flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE | TCF_BUSY); + /* clr sync. nego & WDTR flags 07/22/98 */ + + /* abort all SCB with same target */ +- pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy; /* Check Busy queue */ +- while (pTmpScb != NULL) { +- +- if (pTmpScb->SCB_Target == tar) { ++ prev = tmp = host->first_busy; /* Check Busy queue */ ++ while (tmp != NULL) { ++ if (tmp->target == tar) { + /* unlink it */ +- if (pTmpScb == pCurHcb->HCS_FirstBusy) { +- if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL) +- pCurHcb->HCS_LastBusy = NULL; +- } else { +- pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb; +- if (pTmpScb == pCurHcb->HCS_LastBusy) +- pCurHcb->HCS_LastBusy = pPrevScb; ++ if (tmp == host->first_busy) { ++ if ((host->first_busy = tmp->next) == NULL) ++ host->last_busy = NULL; ++ } else { ++ prev->next = tmp->next; ++ if (tmp == host->last_busy) ++ host->last_busy = prev; + } +- pTmpScb->SCB_HaStat = HOST_ABORTED; +- tul_append_done_scb(pCurHcb, pTmpScb); ++ tmp->hastat = HOST_ABORTED; ++ initio_append_done_scb(host, tmp); + } + /* Previous haven't change */ + else { +- pPrevScb = pTmpScb; ++ prev = tmp; + } +- pTmpScb = pTmpScb->SCB_NxtScb; ++ tmp = tmp->next; + } +- +- TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_DEVRST); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT); +- +- return tul_wait_disc(pCurHcb); ++ outb(MSG_DEVRST, host->addr + TUL_SFifo); ++ outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd); ++ return initio_wait_disc(host); + + } + +-/***************************************************************************/ +-int tul_msgin_accept(HCS * pCurHcb) ++static int initio_msgin_accept(struct initio_host * host) + { +- TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT); +- return (wait_tulip(pCurHcb)); ++ outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd); ++ return wait_tulip(host); + } + +-/***************************************************************************/ +-int wait_tulip(HCS * pCurHcb) ++static int wait_tulip(struct initio_host * host) + { + +- while (!((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0)) +- & TSS_INT_PENDING)); ++ while (!((host->jsstatus0 = inb(host->addr + TUL_SStatus0)) ++ & TSS_INT_PENDING)) ++ cpu_relax(); ++ ++ host->jsint = inb(host->addr + TUL_SInt); ++ host->phase = host->jsstatus0 & TSS_PH_MASK; ++ host->jsstatus1 = inb(host->addr + TUL_SStatus1); + +- pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt); +- pCurHcb->HCS_Phase = pCurHcb->HCS_JSStatus0 & TSS_PH_MASK; +- pCurHcb->HCS_JSStatus1 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1); +- +- if (pCurHcb->HCS_JSInt & TSS_RESEL_INT) { /* if SCSI bus reset detected */ +- return (int_tul_resel(pCurHcb)); +- } +- if (pCurHcb->HCS_JSInt & TSS_SEL_TIMEOUT) { /* if selected/reselected timeout interrupt */ +- return (int_tul_busfree(pCurHcb)); +- } +- if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) { /* if SCSI bus reset detected */ +- return (int_tul_scsi_rst(pCurHcb)); +- } +- if (pCurHcb->HCS_JSInt & TSS_DISC_INT) { /* BUS disconnection */ +- if (pCurHcb->HCS_Flags & HCF_EXPECT_DONE_DISC) { +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */ +- tul_unlink_busy_scb(pCurHcb, pCurHcb->HCS_ActScb); +- pCurHcb->HCS_ActScb->SCB_HaStat = 0; +- tul_append_done_scb(pCurHcb, pCurHcb->HCS_ActScb); +- pCurHcb->HCS_ActScb = NULL; +- pCurHcb->HCS_ActTcs = NULL; +- pCurHcb->HCS_Flags &= ~HCF_EXPECT_DONE_DISC; +- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */ +- return (-1); +- } +- if (pCurHcb->HCS_Flags & HCF_EXPECT_DISC) { +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */ +- pCurHcb->HCS_ActScb = NULL; +- pCurHcb->HCS_ActTcs = NULL; +- pCurHcb->HCS_Flags &= ~HCF_EXPECT_DISC; +- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */ +- return (-1); ++ if (host->jsint & TSS_RESEL_INT) /* if SCSI bus reset detected */ ++ return int_initio_resel(host); ++ if (host->jsint & TSS_SEL_TIMEOUT) /* if selected/reselected timeout interrupt */ ++ return int_initio_busfree(host); ++ if (host->jsint & TSS_SCSIRST_INT) /* if SCSI bus reset detected */ ++ return int_initio_scsi_rst(host); ++ ++ if (host->jsint & TSS_DISC_INT) { /* BUS disconnection */ ++ if (host->flags & HCF_EXPECT_DONE_DISC) { ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */ ++ initio_unlink_busy_scb(host, host->active); ++ host->active->hastat = 0; ++ initio_append_done_scb(host, host->active); ++ host->active = NULL; ++ host->active_tc = NULL; ++ host->flags &= ~HCF_EXPECT_DONE_DISC; ++ outb(TSC_INITDEFAULT, host->addr + TUL_SConfig); ++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */ ++ return -1; + } +- return (int_tul_busfree(pCurHcb)); ++ if (host->flags & HCF_EXPECT_DISC) { ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */ ++ host->active = NULL; ++ host->active_tc = NULL; ++ host->flags &= ~HCF_EXPECT_DISC; ++ outb(TSC_INITDEFAULT, host->addr + TUL_SConfig); ++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */ ++ return -1; + } +- if (pCurHcb->HCS_JSInt & (TSS_FUNC_COMP | TSS_BUS_SERV)) { +- return (pCurHcb->HCS_Phase); ++ return int_initio_busfree(host); + } +- return (pCurHcb->HCS_Phase); ++ /* The old code really does the below. Can probably be removed */ ++ if (host->jsint & (TSS_FUNC_COMP | TSS_BUS_SERV)) ++ return host->phase; ++ return host->phase; + } +-/***************************************************************************/ +-int tul_wait_disc(HCS * pCurHcb) +-{ +- +- while (!((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0)) +- & TSS_INT_PENDING)); + ++static int initio_wait_disc(struct initio_host * host) ++{ ++ while (!((host->jsstatus0 = inb(host->addr + TUL_SStatus0)) & TSS_INT_PENDING)) ++ cpu_relax(); + +- pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt); ++ host->jsint = inb(host->addr + TUL_SInt); + +- if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) { /* if SCSI bus reset detected */ +- return (int_tul_scsi_rst(pCurHcb)); +- } +- if (pCurHcb->HCS_JSInt & TSS_DISC_INT) { /* BUS disconnection */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */ +- pCurHcb->HCS_ActScb = NULL; +- return (-1); ++ if (host->jsint & TSS_SCSIRST_INT) /* if SCSI bus reset detected */ ++ return int_initio_scsi_rst(host); ++ if (host->jsint & TSS_DISC_INT) { /* BUS disconnection */ ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */ ++ outb(TSC_INITDEFAULT, host->addr + TUL_SConfig); ++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */ ++ host->active = NULL; ++ return -1; + } +- return (tul_bad_seq(pCurHcb)); ++ return initio_bad_seq(host); + } + +-/***************************************************************************/ +-int tul_wait_done_disc(HCS * pCurHcb) ++static int initio_wait_done_disc(struct initio_host * host) + { ++ while (!((host->jsstatus0 = inb(host->addr + TUL_SStatus0)) ++ & TSS_INT_PENDING)) ++ cpu_relax(); + ++ host->jsint = inb(host->addr + TUL_SInt); + +- while (!((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0)) +- & TSS_INT_PENDING)); +- +- pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt); ++ if (host->jsint & TSS_SCSIRST_INT) /* if SCSI bus reset detected */ ++ return int_initio_scsi_rst(host); ++ if (host->jsint & TSS_DISC_INT) { /* BUS disconnection */ ++ outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */ ++ outb(TSC_INITDEFAULT, host->addr + TUL_SConfig); ++ outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1); /* Enable HW reselect */ ++ initio_unlink_busy_scb(host, host->active); + +- +- if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) { /* if SCSI bus reset detected */ +- return (int_tul_scsi_rst(pCurHcb)); +- } +- if (pCurHcb->HCS_JSInt & TSS_DISC_INT) { /* BUS disconnection */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO); /* Flush SCSI FIFO */ +- TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT); +- TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT); /* Enable HW reselect */ +- tul_unlink_busy_scb(pCurHcb, pCurHcb->HCS_ActScb); +- +- tul_append_done_scb(pCurHcb, pCurHcb->HCS_ActScb); +- pCurHcb->HCS_ActScb = NULL; +- return (-1); ++ initio_append_done_scb(host, host->active); ++ host->active = NULL; ++ return -1; + } +- return (tul_bad_seq(pCurHcb)); ++ return initio_bad_seq(host); + } + ++/** ++ * i91u_intr - IRQ handler ++ * @irqno: IRQ number ++ * @dev_id: IRQ identifier ++ * ++ * Take the relevant locks and then invoke the actual isr processing ++ * code under the lock. ++ */ ++ + static irqreturn_t i91u_intr(int irqno, void *dev_id) + { + struct Scsi_Host *dev = dev_id; + unsigned long flags; ++ int r; + + spin_lock_irqsave(dev->host_lock, flags); +- tul_isr((HCS *)dev->base); ++ r = initio_isr((struct initio_host *)dev->hostdata); + spin_unlock_irqrestore(dev->host_lock, flags); ++ if (r) + return IRQ_HANDLED; ++ else ++ return IRQ_NONE; + } + +-static int tul_NewReturnNumberOfAdapters(void) +-{ +- struct pci_dev *pDev = NULL; /* Start from none */ +- int iAdapters = 0; +- long dRegValue; +- WORD wBIOS; +- int i = 0; +- +- init_i91uAdapter_table(); +- +- for (i = 0; i < ARRAY_SIZE(i91u_pci_devices); i++) +- { +- while ((pDev = pci_find_device(i91u_pci_devices[i].vendor, i91u_pci_devices[i].device, pDev)) != NULL) { +- if (pci_enable_device(pDev)) +- continue; +- pci_read_config_dword(pDev, 0x44, (u32 *) & dRegValue); +- wBIOS = (UWORD) (dRegValue & 0xFF); +- if (((dRegValue & 0xFF00) >> 8) == 0xFF) +- dRegValue = 0; +- wBIOS = (wBIOS << 8) + ((UWORD) ((dRegValue & 0xFF00) >> 8)); +- if (pci_set_dma_mask(pDev, DMA_32BIT_MASK)) { +- printk(KERN_WARNING +- "i91u: Could not set 32 bit DMA mask\n"); +- continue; +- } +- +- if (Addi91u_into_Adapter_table(wBIOS, +- (pDev->resource[0].start), +- pDev->irq, +- pDev->bus->number, +- (pDev->devfn >> 3) +- ) == 0) +- iAdapters++; +- } +- } +- +- return (iAdapters); +-} +- +-static int i91u_detect(struct scsi_host_template * tpnt) +-{ +- HCS *pHCB; +- struct Scsi_Host *hreg; +- unsigned long i; /* 01/14/98 */ +- int ok = 0, iAdapters; +- ULONG dBiosAdr; +- BYTE *pbBiosAdr; +- +- /* Get total number of adapters in the motherboard */ +- iAdapters = tul_NewReturnNumberOfAdapters(); +- if (iAdapters == 0) /* If no tulip founded, return */ +- return (0); +- +- tul_num_ch = (iAdapters > tul_num_ch) ? tul_num_ch : iAdapters; +- /* Update actually channel number */ +- if (tul_tag_enable) { /* 1.01i */ +- tul_num_scb = MAX_TARGETS * i91u_MAXQUEUE; +- } else { +- tul_num_scb = MAX_TARGETS + 3; /* 1-tape, 1-CD_ROM, 1- extra */ +- } /* Update actually SCBs per adapter */ +- +- /* Get total memory needed for HCS */ +- i = tul_num_ch * sizeof(HCS); +- memset((unsigned char *) &tul_hcs[0], 0, i); /* Initialize tul_hcs 0 */ +- /* Get total memory needed for SCB */ +- +- for (; tul_num_scb >= MAX_TARGETS + 3; tul_num_scb--) { +- i = tul_num_ch * tul_num_scb * sizeof(SCB); +- if ((tul_scb = kmalloc(i, GFP_ATOMIC | GFP_DMA)) != NULL) +- break; +- } +- if (tul_scb == NULL) { +- printk("i91u: SCB memory allocation error\n"); +- return (0); +- } +- memset((unsigned char *) tul_scb, 0, i); +- +- for (i = 0, pHCB = &tul_hcs[0]; /* Get pointer for control block */ +- i < tul_num_ch; +- i++, pHCB++) { +- get_tulipPCIConfig(pHCB, i); +- +- dBiosAdr = pHCB->HCS_BIOS; +- dBiosAdr = (dBiosAdr << 4); + +- pbBiosAdr = phys_to_virt(dBiosAdr); +- +- init_tulip(pHCB, tul_scb + (i * tul_num_scb), tul_num_scb, pbBiosAdr, 10); +- request_region(pHCB->HCS_Base, 256, "i91u"); /* Register */ +- +- pHCB->HCS_Index = i; /* 7/29/98 */ +- hreg = scsi_register(tpnt, sizeof(HCS)); +- if(hreg == NULL) { +- release_region(pHCB->HCS_Base, 256); +- return 0; +- } +- hreg->io_port = pHCB->HCS_Base; +- hreg->n_io_port = 0xff; +- hreg->can_queue = tul_num_scb; /* 03/05/98 */ +- hreg->unique_id = pHCB->HCS_Base; +- hreg->max_id = pHCB->HCS_MaxTar; +- hreg->max_lun = 32; /* 10/21/97 */ +- hreg->irq = pHCB->HCS_Intr; +- hreg->this_id = pHCB->HCS_SCSI_ID; /* Assign HCS index */ +- hreg->base = (unsigned long)pHCB; +- hreg->sg_tablesize = TOTAL_SG_ENTRY; /* Maximun support is 32 */ +- +- /* Initial tulip chip */ +- ok = request_irq(pHCB->HCS_Intr, i91u_intr, IRQF_DISABLED | IRQF_SHARED, "i91u", hreg); +- if (ok < 0) { +- printk(KERN_WARNING "i91u: unable to request IRQ %d\n\n", pHCB->HCS_Intr); +- return 0; +- } +- } +- +- tpnt->this_id = -1; +- tpnt->can_queue = 1; +- +- return 1; +-} ++/** ++ * initio_build_scb - Build the mappings and SCB ++ * @host: InitIO host taking the command ++ * @cblk: Firmware command block ++ * @cmnd: SCSI midlayer command block ++ * ++ * Translate the abstract SCSI command into a firmware command block ++ * suitable for feeding to the InitIO host controller. This also requires ++ * we build the scatter gather lists and ensure they are mapped properly. ++ */ + +-static void i91uBuildSCB(HCS * pHCB, SCB * pSCB, struct scsi_cmnd * SCpnt) ++static void initio_build_scb(struct initio_host * host, struct scsi_ctrl_blk * cblk, struct scsi_cmnd * cmnd) + { /* Create corresponding SCB */ +- struct scatterlist *pSrbSG; +- SG *pSG; /* Pointer to SG list */ +- int i; +- long TotalLen; ++ struct scatterlist *sglist; ++ struct sg_entry *sg; /* Pointer to SG list */ ++ int i, nseg; ++ long total_len; + dma_addr_t dma_addr; + +- pSCB->SCB_Post = i91uSCBPost; /* i91u's callback routine */ +- pSCB->SCB_Srb = SCpnt; +- pSCB->SCB_Opcode = ExecSCSI; +- pSCB->SCB_Flags = SCF_POST; /* After SCSI done, call post routine */ +- pSCB->SCB_Target = SCpnt->device->id; +- pSCB->SCB_Lun = SCpnt->device->lun; +- pSCB->SCB_Ident = SCpnt->device->lun | DISC_ALLOW; ++ /* Fill in the command headers */ ++ cblk->post = i91uSCBPost; /* i91u's callback routine */ ++ cblk->srb = cmnd; ++ cblk->opcode = ExecSCSI; ++ cblk->flags = SCF_POST; /* After SCSI done, call post routine */ ++ cblk->target = cmnd->device->id; ++ cblk->lun = cmnd->device->lun; ++ cblk->ident = cmnd->device->lun | DISC_ALLOW; + +- pSCB->SCB_Flags |= SCF_SENSE; /* Turn on auto request sense */ +- dma_addr = dma_map_single(&pHCB->pci_dev->dev, SCpnt->sense_buffer, +- SENSE_SIZE, DMA_FROM_DEVICE); +- pSCB->SCB_SensePtr = cpu_to_le32((u32)dma_addr); +- pSCB->SCB_SenseLen = cpu_to_le32(SENSE_SIZE); +- SCpnt->SCp.ptr = (char *)(unsigned long)dma_addr; +- +- pSCB->SCB_CDBLen = SCpnt->cmd_len; +- pSCB->SCB_HaStat = 0; +- pSCB->SCB_TaStat = 0; +- memcpy(&pSCB->SCB_CDB[0], &SCpnt->cmnd, SCpnt->cmd_len); ++ cblk->flags |= SCF_SENSE; /* Turn on auto request sense */ + +- if (SCpnt->device->tagged_supported) { /* Tag Support */ +- pSCB->SCB_TagMsg = SIMPLE_QUEUE_TAG; /* Do simple tag only */ ++ /* Map the sense buffer into bus memory */ ++ dma_addr = dma_map_single(&host->pci_dev->dev, cmnd->sense_buffer, ++ SENSE_SIZE, DMA_FROM_DEVICE); ++ cblk->senseptr = cpu_to_le32((u32)dma_addr); ++ cblk->senselen = cpu_to_le32(SENSE_SIZE); ++ cmnd->SCp.ptr = (char *)(unsigned long)dma_addr; ++ cblk->cdblen = cmnd->cmd_len; ++ ++ /* Clear the returned status */ ++ cblk->hastat = 0; ++ cblk->tastat = 0; ++ /* Command the command */ ++ memcpy(&cblk->cdb[0], &cmnd->cmnd, cmnd->cmd_len); ++ ++ /* Set up tags */ ++ if (cmnd->device->tagged_supported) { /* Tag Support */ ++ cblk->tagmsg = SIMPLE_QUEUE_TAG; /* Do simple tag only */ + } else { +- pSCB->SCB_TagMsg = 0; /* No tag support */ ++ cblk->tagmsg = 0; /* No tag support */ + } ++ + /* todo handle map_sg error */ +- if (SCpnt->use_sg) { +- dma_addr = dma_map_single(&pHCB->pci_dev->dev, &pSCB->SCB_SGList[0], +- sizeof(struct SG_Struc) * TOTAL_SG_ENTRY, ++ nseg = scsi_dma_map(cmnd); ++ BUG_ON(nseg < 0); ++ if (nseg) { ++ dma_addr = dma_map_single(&host->pci_dev->dev, &cblk->sglist[0], ++ sizeof(struct sg_entry) * TOTAL_SG_ENTRY, + DMA_BIDIRECTIONAL); +- pSCB->SCB_BufPtr = cpu_to_le32((u32)dma_addr); +- SCpnt->SCp.dma_handle = dma_addr; ++ cblk->bufptr = cpu_to_le32((u32)dma_addr); ++ cmnd->SCp.dma_handle = dma_addr; + +- pSrbSG = (struct scatterlist *) SCpnt->request_buffer; +- pSCB->SCB_SGLen = dma_map_sg(&pHCB->pci_dev->dev, pSrbSG, +- SCpnt->use_sg, SCpnt->sc_data_direction); +- +- pSCB->SCB_Flags |= SCF_SG; /* Turn on SG list flag */ +- for (i = 0, TotalLen = 0, pSG = &pSCB->SCB_SGList[0]; /* 1.01g */ +- i < pSCB->SCB_SGLen; i++, pSG++, pSrbSG++) { +- pSG->SG_Ptr = cpu_to_le32((u32)sg_dma_address(pSrbSG)); +- TotalLen += pSG->SG_Len = cpu_to_le32((u32)sg_dma_len(pSrbSG)); +- } +- +- pSCB->SCB_BufLen = (SCpnt->request_bufflen > TotalLen) ? +- TotalLen : SCpnt->request_bufflen; +- } else if (SCpnt->request_bufflen) { /* Non SG */ +- dma_addr = dma_map_single(&pHCB->pci_dev->dev, SCpnt->request_buffer, +- SCpnt->request_bufflen, +- SCpnt->sc_data_direction); +- SCpnt->SCp.dma_handle = dma_addr; +- pSCB->SCB_BufPtr = cpu_to_le32((u32)dma_addr); +- pSCB->SCB_BufLen = cpu_to_le32((u32)SCpnt->request_bufflen); +- pSCB->SCB_SGLen = 0; +- } else { +- pSCB->SCB_BufLen = 0; +- pSCB->SCB_SGLen = 0; ++ ++ cblk->flags |= SCF_SG; /* Turn on SG list flag */ ++ total_len = 0; ++ sg = &cblk->sglist[0]; ++ scsi_for_each_sg(cmnd, sglist, cblk->sglen, i) { ++ sg->data = cpu_to_le32((u32)sg_dma_address(sglist)); ++ total_len += sg->len = cpu_to_le32((u32)sg_dma_len(sglist)); ++ } ++ ++ cblk->buflen = (scsi_bufflen(cmnd) > total_len) ? ++ total_len : scsi_bufflen(cmnd); ++ } else { /* No data transfer required */ ++ cblk->buflen = 0; ++ cblk->sglen = 0; + } + } + ++/** ++ * i91u_queuecommand - Queue a new command if possible ++ * @cmd: SCSI command block from the mid layer ++ * @done: Completion handler ++ * ++ * Attempts to queue a new command with the host adapter. Will return ++ * zero if successful or indicate a host busy condition if not (which ++ * will cause the mid layer to call us again later with the command) ++ */ ++ + static int i91u_queuecommand(struct scsi_cmnd *cmd, + void (*done)(struct scsi_cmnd *)) + { +- HCS *pHCB = (HCS *) cmd->device->host->base; +- register SCB *pSCB; ++ struct initio_host *host = (struct initio_host *) cmd->device->host->hostdata; ++ struct scsi_ctrl_blk *cmnd; + + cmd->scsi_done = done; + +- pSCB = tul_alloc_scb(pHCB); +- if (!pSCB) ++ cmnd = initio_alloc_scb(host); ++ if (!cmnd) + return SCSI_MLQUEUE_HOST_BUSY; + +- i91uBuildSCB(pHCB, pSCB, cmd); +- tul_exec_scb(pHCB, pSCB); ++ initio_build_scb(host, cmnd, cmd); ++ initio_exec_scb(host, cmnd); + return 0; + } + +-#if 0 /* no new EH yet */ +-/* +- * Abort a queued command +- * (commands that are on the bus can't be aborted easily) +- */ +-static int i91u_abort(struct scsi_cmnd * SCpnt) +-{ +- HCS *pHCB; +- +- pHCB = (HCS *) SCpnt->device->host->base; +- return tul_abort_srb(pHCB, SCpnt); +-} +- +-/* +- * Reset registers, reset a hanging bus and +- * kill active and disconnected commands for target w/o soft reset ++/** ++ * i91u_bus_reset - reset the SCSI bus ++ * @cmnd: Command block we want to trigger the reset for ++ * ++ * Initiate a SCSI bus reset sequence + */ +-static int i91u_reset(struct scsi_cmnd * SCpnt, unsigned int reset_flags) +-{ /* I need Host Control Block Information */ +- HCS *pHCB; +- +- pHCB = (HCS *) SCpnt->device->host->base; +- +- if (reset_flags & (SCSI_RESET_SUGGEST_BUS_RESET | SCSI_RESET_SUGGEST_HOST_RESET)) +- return tul_reset_scsi_bus(pHCB); +- else +- return tul_device_reset(pHCB, SCpnt, SCpnt->device->id, reset_flags); +-} +-#endif + +-static int i91u_bus_reset(struct scsi_cmnd * SCpnt) ++static int i91u_bus_reset(struct scsi_cmnd * cmnd) + { +- HCS *pHCB; ++ struct initio_host *host; + +- pHCB = (HCS *) SCpnt->device->host->base; ++ host = (struct initio_host *) cmnd->device->host->hostdata; + +- spin_lock_irq(SCpnt->device->host->host_lock); +- tul_reset_scsi(pHCB, 0); +- spin_unlock_irq(SCpnt->device->host->host_lock); ++ spin_lock_irq(cmnd->device->host->host_lock); ++ initio_reset_scsi(host, 0); ++ spin_unlock_irq(cmnd->device->host->host_lock); + + return SUCCESS; + } + +-/* +- * Return the "logical geometry" ++/** ++ * i91u_biospararm - return the "logical geometry ++ * @sdev: SCSI device ++ * @dev; Matching block device ++ * @capacity: Sector size of drive ++ * @info_array: Return space for BIOS geometry ++ * ++ * Map the device geometry in a manner compatible with the host ++ * controller BIOS behaviour. ++ * ++ * FIXME: limited to 2^32 sector devices. + */ ++ + static int i91u_biosparam(struct scsi_device *sdev, struct block_device *dev, + sector_t capacity, int *info_array) + { +- HCS *pHcb; /* Point to Host adapter control block */ +- TCS *pTcb; ++ struct initio_host *host; /* Point to Host adapter control block */ ++ struct target_control *tc; + +- pHcb = (HCS *) sdev->host->base; +- pTcb = &pHcb->HCS_Tcs[sdev->id]; ++ host = (struct initio_host *) sdev->host->hostdata; ++ tc = &host->targets[sdev->id]; + +- if (pTcb->TCS_DrvHead) { +- info_array[0] = pTcb->TCS_DrvHead; +- info_array[1] = pTcb->TCS_DrvSector; +- info_array[2] = (unsigned long)capacity / pTcb->TCS_DrvHead / pTcb->TCS_DrvSector; ++ if (tc->heads) { ++ info_array[0] = tc->heads; ++ info_array[1] = tc->sectors; ++ info_array[2] = (unsigned long)capacity / tc->heads / tc->sectors; + } else { +- if (pTcb->TCS_DrvFlags & TCF_DRV_255_63) { ++ if (tc->drv_flags & TCF_DRV_255_63) { + info_array[0] = 255; + info_array[1] = 63; + info_array[2] = (unsigned long)capacity / 255 / 63; +@@ -3047,7 +2722,16 @@ + return 0; + } + +-static void i91u_unmap_cmnd(struct pci_dev *pci_dev, struct scsi_cmnd *cmnd) ++/** ++ * i91u_unmap_scb - Unmap a command ++ * @pci_dev: PCI device the command is for ++ * @cmnd: The command itself ++ * ++ * Unmap any PCI mapping/IOMMU resources allocated when the command ++ * was mapped originally as part of initio_build_scb ++ */ ++ ++static void i91u_unmap_scb(struct pci_dev *pci_dev, struct scsi_cmnd *cmnd) + { + /* auto sense buffer */ + if (cmnd->SCp.ptr) { +@@ -3058,65 +2742,63 @@ + } + + /* request buffer */ +- if (cmnd->use_sg) { ++ if (scsi_sg_count(cmnd)) { + dma_unmap_single(&pci_dev->dev, cmnd->SCp.dma_handle, +- sizeof(struct SG_Struc) * TOTAL_SG_ENTRY, ++ sizeof(struct sg_entry) * TOTAL_SG_ENTRY, + DMA_BIDIRECTIONAL); + +- dma_unmap_sg(&pci_dev->dev, cmnd->request_buffer, +- cmnd->use_sg, +- cmnd->sc_data_direction); +- } else if (cmnd->request_bufflen) { +- dma_unmap_single(&pci_dev->dev, cmnd->SCp.dma_handle, +- cmnd->request_bufflen, +- cmnd->sc_data_direction); ++ scsi_dma_unmap(cmnd); + } + } + +-/***************************************************************************** +- Function name : i91uSCBPost +- Description : This is callback routine be called when tulip finish one +- SCSI command. +- Input : pHCB - Pointer to host adapter control block. +- pSCB - Pointer to SCSI control block. +- Output : None. +- Return : None. +-*****************************************************************************/ +-static void i91uSCBPost(BYTE * pHcb, BYTE * pScb) +-{ +- struct scsi_cmnd *pSRB; /* Pointer to SCSI request block */ +- HCS *pHCB; +- SCB *pSCB; +- +- pHCB = (HCS *) pHcb; +- pSCB = (SCB *) pScb; +- if ((pSRB = pSCB->SCB_Srb) == 0) { +- printk("i91uSCBPost: SRB pointer is empty\n"); ++/** ++ * i91uSCBPost - SCSI callback ++ * @host: Pointer to host adapter control block. ++ * @cmnd: Pointer to SCSI control block. ++ * ++ * This is callback routine be called when tulip finish one ++ * SCSI command. ++ */ ++ ++static void i91uSCBPost(u8 * host_mem, u8 * cblk_mem) ++{ ++ struct scsi_cmnd *cmnd; /* Pointer to SCSI request block */ ++ struct initio_host *host; ++ struct scsi_ctrl_blk *cblk; + +- tul_release_scb(pHCB, pSCB); /* Release SCB for current channel */ ++ host = (struct initio_host *) host_mem; ++ cblk = (struct scsi_ctrl_blk *) cblk_mem; ++ if ((cmnd = cblk->srb) == NULL) { ++ printk(KERN_ERR "i91uSCBPost: SRB pointer is empty\n"); ++ WARN_ON(1); ++ initio_release_scb(host, cblk); /* Release SCB for current channel */ + return; + } +- switch (pSCB->SCB_HaStat) { ++ ++ /* ++ * Remap the firmware error status into a mid layer one ++ */ ++ switch (cblk->hastat) { + case 0x0: + case 0xa: /* Linked command complete without error and linked normally */ + case 0xb: /* Linked command complete without error interrupt generated */ +- pSCB->SCB_HaStat = 0; ++ cblk->hastat = 0; + break; + + case 0x11: /* Selection time out-The initiator selection or target + reselection was not complete within the SCSI Time out period */ +- pSCB->SCB_HaStat = DID_TIME_OUT; ++ cblk->hastat = DID_TIME_OUT; + break; + + case 0x14: /* Target bus phase sequence failure-An invalid bus phase or bus + phase sequence was requested by the target. The host adapter + will generate a SCSI Reset Condition, notifying the host with + a SCRD interrupt */ +- pSCB->SCB_HaStat = DID_RESET; ++ cblk->hastat = DID_RESET; + break; + + case 0x1a: /* SCB Aborted. 07/21/98 */ +- pSCB->SCB_HaStat = DID_ABORT; ++ cblk->hastat = DID_ABORT; + break; + + case 0x12: /* Data overrun/underrun-The target attempted to transfer more data +@@ -3126,49 +2808,196 @@ + case 0x16: /* Invalid SCB Operation Code. */ + + default: +- printk("ini9100u: %x %x\n", pSCB->SCB_HaStat, pSCB->SCB_TaStat); +- pSCB->SCB_HaStat = DID_ERROR; /* Couldn't find any better */ ++ printk("ini9100u: %x %x\n", cblk->hastat, cblk->tastat); ++ cblk->hastat = DID_ERROR; /* Couldn't find any better */ + break; + } + +- pSRB->result = pSCB->SCB_TaStat | (pSCB->SCB_HaStat << 16); +- +- if (pSRB == NULL) { +- printk("pSRB is NULL\n"); +- } +- +- i91u_unmap_cmnd(pHCB->pci_dev, pSRB); +- pSRB->scsi_done(pSRB); /* Notify system DONE */ +- +- tul_release_scb(pHCB, pSCB); /* Release SCB for current channel */ ++ cmnd->result = cblk->tastat | (cblk->hastat << 16); ++ WARN_ON(cmnd == NULL); ++ i91u_unmap_scb(host->pci_dev, cmnd); ++ cmnd->scsi_done(cmnd); /* Notify system DONE */ ++ initio_release_scb(host, cblk); /* Release SCB for current channel */ + } + +-/* +- * Release ressources +- */ +-static int i91u_release(struct Scsi_Host *hreg) +-{ +- free_irq(hreg->irq, hreg); +- release_region(hreg->io_port, 256); +- return 0; +-} +-MODULE_LICENSE("Dual BSD/GPL"); +- +-static struct scsi_host_template driver_template = { ++static struct scsi_host_template initio_template = { + .proc_name = "INI9100U", +- .name = i91u_REVID, +- .detect = i91u_detect, +- .release = i91u_release, ++ .name = "Initio INI-9X00U/UW SCSI device driver", + .queuecommand = i91u_queuecommand, +-// .abort = i91u_abort, +-// .reset = i91u_reset, + .eh_bus_reset_handler = i91u_bus_reset, + .bios_param = i91u_biosparam, +- .can_queue = 1, ++ .can_queue = MAX_TARGETS * i91u_MAXQUEUE, + .this_id = 1, + .sg_tablesize = SG_ALL, + .cmd_per_lun = 1, + .use_clustering = ENABLE_CLUSTERING, + }; +-#include "scsi_module.c" + ++static int initio_probe_one(struct pci_dev *pdev, ++ const struct pci_device_id *id) ++{ ++ struct Scsi_Host *shost; ++ struct initio_host *host; ++ u32 reg; ++ u16 bios_seg; ++ struct scsi_ctrl_blk *scb, *tmp, *prev = NULL /* silence gcc */; ++ int num_scb, i, error; ++ ++ error = pci_enable_device(pdev); ++ if (error) ++ return error; ++ ++ pci_read_config_dword(pdev, 0x44, (u32 *) & reg); ++ bios_seg = (u16) (reg & 0xFF); ++ if (((reg & 0xFF00) >> 8) == 0xFF) ++ reg = 0; ++ bios_seg = (bios_seg << 8) + ((u16) ((reg & 0xFF00) >> 8)); ++ ++ if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { ++ printk(KERN_WARNING "i91u: Could not set 32 bit DMA mask\n"); ++ error = -ENODEV; ++ goto out_disable_device; ++ } ++ shost = scsi_host_alloc(&initio_template, sizeof(struct initio_host)); ++ if (!shost) { ++ printk(KERN_WARNING "initio: Could not allocate host structure.\n"); ++ error = -ENOMEM; ++ goto out_disable_device; ++ } ++ host = (struct initio_host *)shost->hostdata; ++ memset(host, 0, sizeof(struct initio_host)); ++ ++ if (!request_region(host->addr, 256, "i91u")) { ++ printk(KERN_WARNING "initio: I/O port range 0x%x is busy.\n", host->addr); ++ error = -ENODEV; ++ goto out_host_put; ++ } ++ ++ if (initio_tag_enable) /* 1.01i */ ++ num_scb = MAX_TARGETS * i91u_MAXQUEUE; ++ else ++ num_scb = MAX_TARGETS + 3; /* 1-tape, 1-CD_ROM, 1- extra */ ++ ++ for (; num_scb >= MAX_TARGETS + 3; num_scb--) { ++ i = num_scb * sizeof(struct scsi_ctrl_blk); ++ if ((scb = kzalloc(i, GFP_DMA)) != NULL) ++ break; ++ } ++ ++ if (!scb) { ++ printk(KERN_WARNING "initio: Cannot allocate SCB array.\n"); ++ error = -ENOMEM; ++ goto out_release_region; ++ } ++ ++ host->num_scbs = num_scb; ++ host->scb = scb; ++ host->next_pending = scb; ++ host->next_avail = scb; ++ for (i = 0, tmp = scb; i < num_scb; i++, tmp++) { ++ tmp->tagid = i; ++ if (i != 0) ++ prev->next = tmp; ++ prev = tmp; ++ } ++ prev->next = NULL; ++ host->scb_end = tmp; ++ host->first_avail = scb; ++ host->last_avail = prev; ++ ++ initio_init(host, phys_to_virt(bios_seg << 4)); ++ ++ host->jsstatus0 = 0; ++ ++ shost->io_port = host->addr; ++ shost->n_io_port = 0xff; ++ shost->can_queue = num_scb; /* 03/05/98 */ ++ shost->unique_id = host->addr; ++ shost->max_id = host->max_tar; ++ shost->max_lun = 32; /* 10/21/97 */ ++ shost->irq = pdev->irq; ++ shost->this_id = host->scsi_id; /* Assign HCS index */ ++ shost->base = host->addr; ++ shost->sg_tablesize = TOTAL_SG_ENTRY; ++ ++ error = request_irq(pdev->irq, i91u_intr, IRQF_DISABLED|IRQF_SHARED, "i91u", shost); ++ if (error < 0) { ++ printk(KERN_WARNING "initio: Unable to request IRQ %d\n", pdev->irq); ++ goto out_free_scbs; ++ } ++ ++ pci_set_drvdata(pdev, shost); ++ host->pci_dev = pdev; ++ ++ error = scsi_add_host(shost, &pdev->dev); ++ if (error) ++ goto out_free_irq; ++ scsi_scan_host(shost); ++ return 0; ++out_free_irq: ++ free_irq(pdev->irq, shost); ++out_free_scbs: ++ kfree(host->scb); ++out_release_region: ++ release_region(host->addr, 256); ++out_host_put: ++ scsi_host_put(shost); ++out_disable_device: ++ pci_disable_device(pdev); ++ return error; ++} ++ ++/** ++ * initio_remove_one - control shutdown ++ * @pdev: PCI device being released ++ * ++ * Release the resources assigned to this adapter after it has ++ * finished being used. ++ */ ++ ++static void initio_remove_one(struct pci_dev *pdev) ++{ ++ struct Scsi_Host *host = pci_get_drvdata(pdev); ++ struct initio_host *s = (struct initio_host *)host->hostdata; ++ scsi_remove_host(host); ++ free_irq(pdev->irq, host); ++ release_region(s->addr, 256); ++ scsi_host_put(host); ++ pci_disable_device(pdev); ++} ++ ++MODULE_LICENSE("GPL"); ++ ++static struct pci_device_id initio_pci_tbl[] = { ++ {PCI_VENDOR_ID_INIT, 0x9500, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, ++ {PCI_VENDOR_ID_INIT, 0x9400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, ++ {PCI_VENDOR_ID_INIT, 0x9401, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, ++ {PCI_VENDOR_ID_INIT, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, ++ {PCI_VENDOR_ID_DOMEX, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, ++ {0,} ++}; ++MODULE_DEVICE_TABLE(pci, initio_pci_tbl); ++ ++static struct pci_driver initio_pci_driver = { ++ .name = "initio", ++ .id_table = initio_pci_tbl, ++ .probe = initio_probe_one, ++ .remove = __devexit_p(initio_remove_one), ++}; ++ ++static int __init initio_init_driver(void) ++{ ++ return pci_register_driver(&initio_pci_driver); ++} ++ ++static void __exit initio_exit_driver(void) ++{ ++ pci_unregister_driver(&initio_pci_driver); ++} ++ ++MODULE_DESCRIPTION("Initio INI-9X00U/UW SCSI device driver"); ++MODULE_AUTHOR("Initio Corporation"); ++MODULE_LICENSE("GPL"); ++ ++module_init(initio_init_driver); ++module_exit(initio_exit_driver); +diff -Nurb linux-2.6.22-570/drivers/scsi/initio.h linux-2.6.22-591/drivers/scsi/initio.h +--- linux-2.6.22-570/drivers/scsi/initio.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/initio.h 2007-12-21 15:36:12.000000000 -0500 +@@ -4,6 +4,8 @@ + * Copyright (c) 1994-1998 Initio Corporation + * All rights reserved. + * ++ * Cleanups (c) Copyright 2007 Red Hat ++ * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) +@@ -18,27 +20,6 @@ + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * +- * -------------------------------------------------------------------------- +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions +- * are met: +- * 1. Redistributions of source code must retain the above copyright +- * notice, this list of conditions, and the following disclaimer, +- * without modification, immediately at the beginning of the file. +- * 2. Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in the +- * documentation and/or other materials provided with the distribution. +- * 3. The name of the author may not be used to endorse or promote products +- * derived from this software without specific prior written permission. +- * +- * Where this Software is combined with software released under the terms of +- * the GNU General Public License ("GPL") and the terms of the GPL would require the +- * combined work to also be released under the terms of the GPL, the terms +- * and conditions of this License will apply in addition to those of the +- * GPL with the exception of any terms or conditions of this License that +- * conflict with, or are expressly prohibited by, the GPL. +- * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +@@ -56,17 +37,6 @@ + + #include + +-#define ULONG unsigned long +-#define USHORT unsigned short +-#define UCHAR unsigned char +-#define BYTE unsigned char +-#define WORD unsigned short +-#define DWORD unsigned long +-#define UBYTE unsigned char +-#define UWORD unsigned short +-#define UDWORD unsigned long +-#define U32 u32 +- + #define TOTAL_SG_ENTRY 32 + #define MAX_SUPPORTED_ADAPTERS 8 + #define MAX_OFFSET 15 +@@ -368,55 +338,55 @@ + /************************************************************************/ + /* Scatter-Gather Element Structure */ + /************************************************************************/ +-typedef struct SG_Struc { +- U32 SG_Ptr; /* Data Pointer */ +- U32 SG_Len; /* Data Length */ +-} SG; ++struct sg_entry { ++ u32 data; /* Data Pointer */ ++ u32 len; /* Data Length */ ++}; + + /*********************************************************************** + SCSI Control Block + ************************************************************************/ +-typedef struct Scsi_Ctrl_Blk { +- struct Scsi_Ctrl_Blk *SCB_NxtScb; +- UBYTE SCB_Status; /*4 */ +- UBYTE SCB_NxtStat; /*5 */ +- UBYTE SCB_Mode; /*6 */ +- UBYTE SCB_Msgin; /*7 SCB_Res0 */ +- UWORD SCB_SGIdx; /*8 */ +- UWORD SCB_SGMax; /*A */ ++struct scsi_ctrl_blk { ++ struct scsi_ctrl_blk *next; ++ u8 status; /*4 */ ++ u8 next_state; /*5 */ ++ u8 mode; /*6 */ ++ u8 msgin; /*7 SCB_Res0 */ ++ u16 sgidx; /*8 */ ++ u16 sgmax; /*A */ + #ifdef ALPHA +- U32 SCB_Reserved[2]; /*C */ ++ u32 reserved[2]; /*C */ + #else +- U32 SCB_Reserved[3]; /*C */ ++ u32 reserved[3]; /*C */ + #endif + +- U32 SCB_XferLen; /*18 Current xfer len */ +- U32 SCB_TotXLen; /*1C Total xfer len */ +- U32 SCB_PAddr; /*20 SCB phy. Addr. */ +- +- UBYTE SCB_Opcode; /*24 SCB command code */ +- UBYTE SCB_Flags; /*25 SCB Flags */ +- UBYTE SCB_Target; /*26 Target Id */ +- UBYTE SCB_Lun; /*27 Lun */ +- U32 SCB_BufPtr; /*28 Data Buffer Pointer */ +- U32 SCB_BufLen; /*2C Data Allocation Length */ +- UBYTE SCB_SGLen; /*30 SG list # */ +- UBYTE SCB_SenseLen; /*31 Sense Allocation Length */ +- UBYTE SCB_HaStat; /*32 */ +- UBYTE SCB_TaStat; /*33 */ +- UBYTE SCB_CDBLen; /*34 CDB Length */ +- UBYTE SCB_Ident; /*35 Identify */ +- UBYTE SCB_TagMsg; /*36 Tag Message */ +- UBYTE SCB_TagId; /*37 Queue Tag */ +- UBYTE SCB_CDB[12]; /*38 */ +- U32 SCB_SGPAddr; /*44 SG List/Sense Buf phy. Addr. */ +- U32 SCB_SensePtr; /*48 Sense data pointer */ +- void (*SCB_Post) (BYTE *, BYTE *); /*4C POST routine */ +- struct scsi_cmnd *SCB_Srb; /*50 SRB Pointer */ +- SG SCB_SGList[TOTAL_SG_ENTRY]; /*54 Start of SG list */ +-} SCB; ++ u32 xferlen; /*18 Current xfer len */ ++ u32 totxlen; /*1C Total xfer len */ ++ u32 paddr; /*20 SCB phy. Addr. */ ++ ++ u8 opcode; /*24 SCB command code */ ++ u8 flags; /*25 SCB Flags */ ++ u8 target; /*26 Target Id */ ++ u8 lun; /*27 Lun */ ++ u32 bufptr; /*28 Data Buffer Pointer */ ++ u32 buflen; /*2C Data Allocation Length */ ++ u8 sglen; /*30 SG list # */ ++ u8 senselen; /*31 Sense Allocation Length */ ++ u8 hastat; /*32 */ ++ u8 tastat; /*33 */ ++ u8 cdblen; /*34 CDB Length */ ++ u8 ident; /*35 Identify */ ++ u8 tagmsg; /*36 Tag Message */ ++ u8 tagid; /*37 Queue Tag */ ++ u8 cdb[12]; /*38 */ ++ u32 sgpaddr; /*44 SG List/Sense Buf phy. Addr. */ ++ u32 senseptr; /*48 Sense data pointer */ ++ void (*post) (u8 *, u8 *); /*4C POST routine */ ++ struct scsi_cmnd *srb; /*50 SRB Pointer */ ++ struct sg_entry sglist[TOTAL_SG_ENTRY]; /*54 Start of SG list */ ++}; + +-/* Bit Definition for SCB_Status */ ++/* Bit Definition for status */ + #define SCB_RENT 0x01 + #define SCB_PEND 0x02 + #define SCB_CONTIG 0x04 /* Contigent Allegiance */ +@@ -425,17 +395,17 @@ + #define SCB_DONE 0x20 + + +-/* Opcodes of SCB_Opcode */ ++/* Opcodes for opcode */ + #define ExecSCSI 0x1 + #define BusDevRst 0x2 + #define AbortCmd 0x3 + + +-/* Bit Definition for SCB_Mode */ ++/* Bit Definition for mode */ + #define SCM_RSENS 0x01 /* request sense mode */ + + +-/* Bit Definition for SCB_Flags */ ++/* Bit Definition for flags */ + #define SCF_DONE 0x01 + #define SCF_POST 0x02 + #define SCF_SENSE 0x04 +@@ -492,15 +462,14 @@ + Target Device Control Structure + **********************************************************************/ + +-typedef struct Tar_Ctrl_Struc { +- UWORD TCS_Flags; /* 0 */ +- UBYTE TCS_JS_Period; /* 2 */ +- UBYTE TCS_SConfig0; /* 3 */ +- +- UWORD TCS_DrvFlags; /* 4 */ +- UBYTE TCS_DrvHead; /* 6 */ +- UBYTE TCS_DrvSector; /* 7 */ +-} TCS; ++struct target_control { ++ u16 flags; ++ u8 js_period; ++ u8 sconfig0; ++ u16 drv_flags; ++ u8 heads; ++ u8 sectors; ++}; + + /*********************************************************************** + Target Device Control Structure +@@ -523,62 +492,53 @@ + #define TCF_DRV_EN_TAG 0x0800 + #define TCF_DRV_255_63 0x0400 + +-typedef struct I91u_Adpt_Struc { +- UWORD ADPT_BIOS; /* 0 */ +- UWORD ADPT_BASE; /* 1 */ +- UBYTE ADPT_Bus; /* 2 */ +- UBYTE ADPT_Device; /* 3 */ +- UBYTE ADPT_INTR; /* 4 */ +-} INI_ADPT_STRUCT; +- +- + /*********************************************************************** + Host Adapter Control Structure + ************************************************************************/ +-typedef struct Ha_Ctrl_Struc { +- UWORD HCS_Base; /* 00 */ +- UWORD HCS_BIOS; /* 02 */ +- UBYTE HCS_Intr; /* 04 */ +- UBYTE HCS_SCSI_ID; /* 05 */ +- UBYTE HCS_MaxTar; /* 06 */ +- UBYTE HCS_NumScbs; /* 07 */ +- +- UBYTE HCS_Flags; /* 08 */ +- UBYTE HCS_Index; /* 09 */ +- UBYTE HCS_HaId; /* 0A */ +- UBYTE HCS_Config; /* 0B */ +- UWORD HCS_IdMask; /* 0C */ +- UBYTE HCS_Semaph; /* 0E */ +- UBYTE HCS_Phase; /* 0F */ +- UBYTE HCS_JSStatus0; /* 10 */ +- UBYTE HCS_JSInt; /* 11 */ +- UBYTE HCS_JSStatus1; /* 12 */ +- UBYTE HCS_SConf1; /* 13 */ +- +- UBYTE HCS_Msg[8]; /* 14 */ +- SCB *HCS_NxtAvail; /* 1C */ +- SCB *HCS_Scb; /* 20 */ +- SCB *HCS_ScbEnd; /* 24 */ +- SCB *HCS_NxtPend; /* 28 */ +- SCB *HCS_NxtContig; /* 2C */ +- SCB *HCS_ActScb; /* 30 */ +- TCS *HCS_ActTcs; /* 34 */ +- +- SCB *HCS_FirstAvail; /* 38 */ +- SCB *HCS_LastAvail; /* 3C */ +- SCB *HCS_FirstPend; /* 40 */ +- SCB *HCS_LastPend; /* 44 */ +- SCB *HCS_FirstBusy; /* 48 */ +- SCB *HCS_LastBusy; /* 4C */ +- SCB *HCS_FirstDone; /* 50 */ +- SCB *HCS_LastDone; /* 54 */ +- UBYTE HCS_MaxTags[16]; /* 58 */ +- UBYTE HCS_ActTags[16]; /* 68 */ +- TCS HCS_Tcs[MAX_TARGETS]; /* 78 */ +- spinlock_t HCS_AvailLock; +- spinlock_t HCS_SemaphLock; ++struct initio_host { ++ u16 addr; /* 00 */ ++ u16 bios_addr; /* 02 */ ++ u8 irq; /* 04 */ ++ u8 scsi_id; /* 05 */ ++ u8 max_tar; /* 06 */ ++ u8 num_scbs; /* 07 */ ++ ++ u8 flags; /* 08 */ ++ u8 index; /* 09 */ ++ u8 ha_id; /* 0A */ ++ u8 config; /* 0B */ ++ u16 idmask; /* 0C */ ++ u8 semaph; /* 0E */ ++ u8 phase; /* 0F */ ++ u8 jsstatus0; /* 10 */ ++ u8 jsint; /* 11 */ ++ u8 jsstatus1; /* 12 */ ++ u8 sconf1; /* 13 */ ++ ++ u8 msg[8]; /* 14 */ ++ struct scsi_ctrl_blk *next_avail; /* 1C */ ++ struct scsi_ctrl_blk *scb; /* 20 */ ++ struct scsi_ctrl_blk *scb_end; /* 24 */ /*UNUSED*/ ++ struct scsi_ctrl_blk *next_pending; /* 28 */ ++ struct scsi_ctrl_blk *next_contig; /* 2C */ /*UNUSED*/ ++ struct scsi_ctrl_blk *active; /* 30 */ ++ struct target_control *active_tc; /* 34 */ ++ ++ struct scsi_ctrl_blk *first_avail; /* 38 */ ++ struct scsi_ctrl_blk *last_avail; /* 3C */ ++ struct scsi_ctrl_blk *first_pending; /* 40 */ ++ struct scsi_ctrl_blk *last_pending; /* 44 */ ++ struct scsi_ctrl_blk *first_busy; /* 48 */ ++ struct scsi_ctrl_blk *last_busy; /* 4C */ ++ struct scsi_ctrl_blk *first_done; /* 50 */ ++ struct scsi_ctrl_blk *last_done; /* 54 */ ++ u8 max_tags[16]; /* 58 */ ++ u8 act_tags[16]; /* 68 */ ++ struct target_control targets[MAX_TARGETS]; /* 78 */ ++ spinlock_t avail_lock; ++ spinlock_t semaph_lock; + struct pci_dev *pci_dev; +-} HCS; ++}; + + /* Bit Definition for HCB_Config */ + #define HCC_SCSI_RESET 0x01 +@@ -599,47 +559,47 @@ + *******************************************************************/ + + typedef struct _NVRAM_SCSI { /* SCSI channel configuration */ +- UCHAR NVM_ChSCSIID; /* 0Ch -> Channel SCSI ID */ +- UCHAR NVM_ChConfig1; /* 0Dh -> Channel config 1 */ +- UCHAR NVM_ChConfig2; /* 0Eh -> Channel config 2 */ +- UCHAR NVM_NumOfTarg; /* 0Fh -> Number of SCSI target */ ++ u8 NVM_ChSCSIID; /* 0Ch -> Channel SCSI ID */ ++ u8 NVM_ChConfig1; /* 0Dh -> Channel config 1 */ ++ u8 NVM_ChConfig2; /* 0Eh -> Channel config 2 */ ++ u8 NVM_NumOfTarg; /* 0Fh -> Number of SCSI target */ + /* SCSI target configuration */ +- UCHAR NVM_Targ0Config; /* 10h -> Target 0 configuration */ +- UCHAR NVM_Targ1Config; /* 11h -> Target 1 configuration */ +- UCHAR NVM_Targ2Config; /* 12h -> Target 2 configuration */ +- UCHAR NVM_Targ3Config; /* 13h -> Target 3 configuration */ +- UCHAR NVM_Targ4Config; /* 14h -> Target 4 configuration */ +- UCHAR NVM_Targ5Config; /* 15h -> Target 5 configuration */ +- UCHAR NVM_Targ6Config; /* 16h -> Target 6 configuration */ +- UCHAR NVM_Targ7Config; /* 17h -> Target 7 configuration */ +- UCHAR NVM_Targ8Config; /* 18h -> Target 8 configuration */ +- UCHAR NVM_Targ9Config; /* 19h -> Target 9 configuration */ +- UCHAR NVM_TargAConfig; /* 1Ah -> Target A configuration */ +- UCHAR NVM_TargBConfig; /* 1Bh -> Target B configuration */ +- UCHAR NVM_TargCConfig; /* 1Ch -> Target C configuration */ +- UCHAR NVM_TargDConfig; /* 1Dh -> Target D configuration */ +- UCHAR NVM_TargEConfig; /* 1Eh -> Target E configuration */ +- UCHAR NVM_TargFConfig; /* 1Fh -> Target F configuration */ ++ u8 NVM_Targ0Config; /* 10h -> Target 0 configuration */ ++ u8 NVM_Targ1Config; /* 11h -> Target 1 configuration */ ++ u8 NVM_Targ2Config; /* 12h -> Target 2 configuration */ ++ u8 NVM_Targ3Config; /* 13h -> Target 3 configuration */ ++ u8 NVM_Targ4Config; /* 14h -> Target 4 configuration */ ++ u8 NVM_Targ5Config; /* 15h -> Target 5 configuration */ ++ u8 NVM_Targ6Config; /* 16h -> Target 6 configuration */ ++ u8 NVM_Targ7Config; /* 17h -> Target 7 configuration */ ++ u8 NVM_Targ8Config; /* 18h -> Target 8 configuration */ ++ u8 NVM_Targ9Config; /* 19h -> Target 9 configuration */ ++ u8 NVM_TargAConfig; /* 1Ah -> Target A configuration */ ++ u8 NVM_TargBConfig; /* 1Bh -> Target B configuration */ ++ u8 NVM_TargCConfig; /* 1Ch -> Target C configuration */ ++ u8 NVM_TargDConfig; /* 1Dh -> Target D configuration */ ++ u8 NVM_TargEConfig; /* 1Eh -> Target E configuration */ ++ u8 NVM_TargFConfig; /* 1Fh -> Target F configuration */ + } NVRAM_SCSI; + + typedef struct _NVRAM { + /*----------header ---------------*/ +- USHORT NVM_Signature; /* 0,1: Signature */ +- UCHAR NVM_Size; /* 2: Size of data structure */ +- UCHAR NVM_Revision; /* 3: Revision of data structure */ ++ u16 NVM_Signature; /* 0,1: Signature */ ++ u8 NVM_Size; /* 2: Size of data structure */ ++ u8 NVM_Revision; /* 3: Revision of data structure */ + /* ----Host Adapter Structure ---- */ +- UCHAR NVM_ModelByte0; /* 4: Model number (byte 0) */ +- UCHAR NVM_ModelByte1; /* 5: Model number (byte 1) */ +- UCHAR NVM_ModelInfo; /* 6: Model information */ +- UCHAR NVM_NumOfCh; /* 7: Number of SCSI channel */ +- UCHAR NVM_BIOSConfig1; /* 8: BIOS configuration 1 */ +- UCHAR NVM_BIOSConfig2; /* 9: BIOS configuration 2 */ +- UCHAR NVM_HAConfig1; /* A: Hoat adapter configuration 1 */ +- UCHAR NVM_HAConfig2; /* B: Hoat adapter configuration 2 */ ++ u8 NVM_ModelByte0; /* 4: Model number (byte 0) */ ++ u8 NVM_ModelByte1; /* 5: Model number (byte 1) */ ++ u8 NVM_ModelInfo; /* 6: Model information */ ++ u8 NVM_NumOfCh; /* 7: Number of SCSI channel */ ++ u8 NVM_BIOSConfig1; /* 8: BIOS configuration 1 */ ++ u8 NVM_BIOSConfig2; /* 9: BIOS configuration 2 */ ++ u8 NVM_HAConfig1; /* A: Hoat adapter configuration 1 */ ++ u8 NVM_HAConfig2; /* B: Hoat adapter configuration 2 */ + NVRAM_SCSI NVM_SCSIInfo[2]; +- UCHAR NVM_reserved[10]; ++ u8 NVM_reserved[10]; + /* ---------- CheckSum ---------- */ +- USHORT NVM_CheckSum; /* 0x3E, 0x3F: Checksum of NVRam */ ++ u16 NVM_CheckSum; /* 0x3E, 0x3F: Checksum of NVRam */ + } NVRAM, *PNVRAM; + + /* Bios Configuration for nvram->BIOSConfig1 */ +@@ -681,19 +641,6 @@ + #define DISC_ALLOW 0xC0 /* Disconnect is allowed */ + #define SCSICMD_RequestSense 0x03 + +-typedef struct _HCSinfo { +- ULONG base; +- UCHAR vec; +- UCHAR bios; /* High byte of BIOS address */ +- USHORT BaseAndBios; /* high byte: pHcsInfo->bios,low byte:pHcsInfo->base */ +-} HCSINFO; +- +-#define TUL_RD(x,y) (UCHAR)(inb( (int)((ULONG)(x+y)) )) +-#define TUL_RDLONG(x,y) (ULONG)(inl((int)((ULONG)(x+y)) )) +-#define TUL_WR( adr,data) outb( (UCHAR)(data), (int)(adr)) +-#define TUL_WRSHORT(adr,data) outw( (UWORD)(data), (int)(adr)) +-#define TUL_WRLONG( adr,data) outl( (ULONG)(data), (int)(adr)) +- + #define SCSI_ABORT_SNOOZE 0 + #define SCSI_ABORT_SUCCESS 1 + #define SCSI_ABORT_PENDING 2 +diff -Nurb linux-2.6.22-570/drivers/scsi/ipr.c linux-2.6.22-591/drivers/scsi/ipr.c +--- linux-2.6.22-570/drivers/scsi/ipr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/ipr.c 2007-12-21 15:36:12.000000000 -0500 +@@ -540,32 +540,6 @@ + } + + /** +- * ipr_unmap_sglist - Unmap scatterlist if mapped +- * @ioa_cfg: ioa config struct +- * @ipr_cmd: ipr command struct +- * +- * Return value: +- * nothing +- **/ +-static void ipr_unmap_sglist(struct ipr_ioa_cfg *ioa_cfg, +- struct ipr_cmnd *ipr_cmd) +-{ +- struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; +- +- if (ipr_cmd->dma_use_sg) { +- if (scsi_cmd->use_sg > 0) { +- pci_unmap_sg(ioa_cfg->pdev, scsi_cmd->request_buffer, +- scsi_cmd->use_sg, +- scsi_cmd->sc_data_direction); +- } else { +- pci_unmap_single(ioa_cfg->pdev, ipr_cmd->dma_handle, +- scsi_cmd->request_bufflen, +- scsi_cmd->sc_data_direction); +- } +- } +-} +- +-/** + * ipr_mask_and_clear_interrupts - Mask all and clear specified interrupts + * @ioa_cfg: ioa config struct + * @clr_ints: interrupts to clear +@@ -677,7 +651,7 @@ + + scsi_cmd->result |= (DID_ERROR << 16); + +- ipr_unmap_sglist(ioa_cfg, ipr_cmd); ++ scsi_dma_unmap(ipr_cmd->scsi_cmd); + scsi_cmd->scsi_done(scsi_cmd); + list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + } +@@ -2465,6 +2439,7 @@ + /** + * ipr_read_trace - Dump the adapter trace + * @kobj: kobject struct ++ * @bin_attr: bin_attribute struct + * @buf: buffer + * @off: offset + * @count: buffer size +@@ -2472,8 +2447,9 @@ + * Return value: + * number of bytes printed to buffer + **/ +-static ssize_t ipr_read_trace(struct kobject *kobj, char *buf, +- loff_t off, size_t count) ++static ssize_t ipr_read_trace(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct class_device *cdev = container_of(kobj,struct class_device,kobj); + struct Scsi_Host *shost = class_to_shost(cdev); +@@ -3166,6 +3142,7 @@ + /** + * ipr_read_dump - Dump the adapter + * @kobj: kobject struct ++ * @bin_attr: bin_attribute struct + * @buf: buffer + * @off: offset + * @count: buffer size +@@ -3173,8 +3150,9 @@ + * Return value: + * number of bytes printed to buffer + **/ +-static ssize_t ipr_read_dump(struct kobject *kobj, char *buf, +- loff_t off, size_t count) ++static ssize_t ipr_read_dump(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct class_device *cdev = container_of(kobj,struct class_device,kobj); + struct Scsi_Host *shost = class_to_shost(cdev); +@@ -3327,6 +3305,7 @@ + /** + * ipr_write_dump - Setup dump state of adapter + * @kobj: kobject struct ++ * @bin_attr: bin_attribute struct + * @buf: buffer + * @off: offset + * @count: buffer size +@@ -3334,8 +3313,9 @@ + * Return value: + * number of bytes printed to buffer + **/ +-static ssize_t ipr_write_dump(struct kobject *kobj, char *buf, +- loff_t off, size_t count) ++static ssize_t ipr_write_dump(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct class_device *cdev = container_of(kobj,struct class_device,kobj); + struct Scsi_Host *shost = class_to_shost(cdev); +@@ -4292,24 +4272,25 @@ + static int ipr_build_ioadl(struct ipr_ioa_cfg *ioa_cfg, + struct ipr_cmnd *ipr_cmd) + { +- int i; +- struct scatterlist *sglist; ++ int i, nseg; ++ struct scatterlist *sg; + u32 length; + u32 ioadl_flags = 0; + struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; + struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb; + struct ipr_ioadl_desc *ioadl = ipr_cmd->ioadl; + +- length = scsi_cmd->request_bufflen; +- +- if (length == 0) ++ length = scsi_bufflen(scsi_cmd); ++ if (!length) + return 0; + +- if (scsi_cmd->use_sg) { +- ipr_cmd->dma_use_sg = pci_map_sg(ioa_cfg->pdev, +- scsi_cmd->request_buffer, +- scsi_cmd->use_sg, +- scsi_cmd->sc_data_direction); ++ nseg = scsi_dma_map(scsi_cmd); ++ if (nseg < 0) { ++ dev_err(&ioa_cfg->pdev->dev, "pci_map_sg failed!\n"); ++ return -1; ++ } ++ ++ ipr_cmd->dma_use_sg = nseg; + + if (scsi_cmd->sc_data_direction == DMA_TO_DEVICE) { + ioadl_flags = IPR_IOADL_FLAGS_WRITE; +@@ -4324,8 +4305,6 @@ + cpu_to_be32(sizeof(struct ipr_ioadl_desc) * ipr_cmd->dma_use_sg); + } + +- sglist = scsi_cmd->request_buffer; +- + if (ipr_cmd->dma_use_sg <= ARRAY_SIZE(ioarcb->add_data.u.ioadl)) { + ioadl = ioarcb->add_data.u.ioadl; + ioarcb->write_ioadl_addr = +@@ -4334,51 +4313,14 @@ + ioarcb->read_ioadl_addr = ioarcb->write_ioadl_addr; + } + +- for (i = 0; i < ipr_cmd->dma_use_sg; i++) { ++ scsi_for_each_sg(scsi_cmd, sg, ipr_cmd->dma_use_sg, i) { + ioadl[i].flags_and_data_len = +- cpu_to_be32(ioadl_flags | sg_dma_len(&sglist[i])); +- ioadl[i].address = +- cpu_to_be32(sg_dma_address(&sglist[i])); ++ cpu_to_be32(ioadl_flags | sg_dma_len(sg)); ++ ioadl[i].address = cpu_to_be32(sg_dma_address(sg)); + } + +- if (likely(ipr_cmd->dma_use_sg)) { +- ioadl[i-1].flags_and_data_len |= +- cpu_to_be32(IPR_IOADL_FLAGS_LAST); +- return 0; +- } else +- dev_err(&ioa_cfg->pdev->dev, "pci_map_sg failed!\n"); +- } else { +- if (scsi_cmd->sc_data_direction == DMA_TO_DEVICE) { +- ioadl_flags = IPR_IOADL_FLAGS_WRITE; +- ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_WRITE_NOT_READ; +- ioarcb->write_data_transfer_length = cpu_to_be32(length); +- ioarcb->write_ioadl_len = cpu_to_be32(sizeof(struct ipr_ioadl_desc)); +- } else if (scsi_cmd->sc_data_direction == DMA_FROM_DEVICE) { +- ioadl_flags = IPR_IOADL_FLAGS_READ; +- ioarcb->read_data_transfer_length = cpu_to_be32(length); +- ioarcb->read_ioadl_len = cpu_to_be32(sizeof(struct ipr_ioadl_desc)); +- } +- +- ipr_cmd->dma_handle = pci_map_single(ioa_cfg->pdev, +- scsi_cmd->request_buffer, length, +- scsi_cmd->sc_data_direction); +- +- if (likely(!pci_dma_mapping_error(ipr_cmd->dma_handle))) { +- ioadl = ioarcb->add_data.u.ioadl; +- ioarcb->write_ioadl_addr = +- cpu_to_be32(be32_to_cpu(ioarcb->ioarcb_host_pci_addr) + +- offsetof(struct ipr_ioarcb, add_data)); +- ioarcb->read_ioadl_addr = ioarcb->write_ioadl_addr; +- ipr_cmd->dma_use_sg = 1; +- ioadl[0].flags_and_data_len = +- cpu_to_be32(ioadl_flags | length | IPR_IOADL_FLAGS_LAST); +- ioadl[0].address = cpu_to_be32(ipr_cmd->dma_handle); ++ ioadl[i-1].flags_and_data_len |= cpu_to_be32(IPR_IOADL_FLAGS_LAST); + return 0; +- } else +- dev_err(&ioa_cfg->pdev->dev, "pci_map_single failed!\n"); +- } +- +- return -1; + } + + /** +@@ -4441,7 +4383,7 @@ + res->needs_sync_complete = 1; + res->in_erp = 0; + } +- ipr_unmap_sglist(ioa_cfg, ipr_cmd); ++ scsi_dma_unmap(ipr_cmd->scsi_cmd); + list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + scsi_cmd->scsi_done(scsi_cmd); + } +@@ -4819,7 +4761,7 @@ + break; + } + +- ipr_unmap_sglist(ioa_cfg, ipr_cmd); ++ scsi_dma_unmap(ipr_cmd->scsi_cmd); + list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + scsi_cmd->scsi_done(scsi_cmd); + } +@@ -4840,10 +4782,10 @@ + struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; + u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc); + +- scsi_cmd->resid = be32_to_cpu(ipr_cmd->ioasa.residual_data_len); ++ scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->ioasa.residual_data_len)); + + if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) { +- ipr_unmap_sglist(ioa_cfg, ipr_cmd); ++ scsi_dma_unmap(ipr_cmd->scsi_cmd); + list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + scsi_cmd->scsi_done(scsi_cmd); + } else +diff -Nurb linux-2.6.22-570/drivers/scsi/ips.c linux-2.6.22-591/drivers/scsi/ips.c +--- linux-2.6.22-570/drivers/scsi/ips.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/ips.c 2007-12-21 15:36:12.000000000 -0500 +@@ -211,19 +211,6 @@ + #warning "This driver has only been tested on the x86/ia64/x86_64 platforms" + #endif + +-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0) +-#include +-#include "sd.h" +-#define IPS_LOCK_SAVE(lock,flags) spin_lock_irqsave(&io_request_lock,flags) +-#define IPS_UNLOCK_RESTORE(lock,flags) spin_unlock_irqrestore(&io_request_lock,flags) +-#ifndef __devexit_p +-#define __devexit_p(x) x +-#endif +-#else +-#define IPS_LOCK_SAVE(lock,flags) do{spin_lock(lock);(void)flags;}while(0) +-#define IPS_UNLOCK_RESTORE(lock,flags) do{spin_unlock(lock);(void)flags;}while(0) +-#endif +- + #define IPS_DMA_DIR(scb) ((!scb->scsi_cmd || ips_is_passthru(scb->scsi_cmd) || \ + DMA_NONE == scb->scsi_cmd->sc_data_direction) ? \ + PCI_DMA_BIDIRECTIONAL : \ +@@ -381,24 +368,13 @@ + .eh_abort_handler = ips_eh_abort, + .eh_host_reset_handler = ips_eh_reset, + .proc_name = "ips", +-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0) + .proc_info = ips_proc_info, + .slave_configure = ips_slave_configure, +-#else +- .proc_info = ips_proc24_info, +- .select_queue_depths = ips_select_queue_depth, +-#endif + .bios_param = ips_biosparam, + .this_id = -1, + .sg_tablesize = IPS_MAX_SG, + .cmd_per_lun = 3, + .use_clustering = ENABLE_CLUSTERING, +-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) +- .use_new_eh_code = 1, +-#endif +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20) && LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) +- .highmem_io = 1, +-#endif + }; + + +@@ -731,7 +707,7 @@ + /* free IRQ */ + free_irq(ha->irq, ha); + +- IPS_REMOVE_HOST(sh); ++ scsi_remove_host(sh); + scsi_host_put(sh); + + ips_released_controllers++; +@@ -813,7 +789,6 @@ + ips_ha_t *ha; + ips_copp_wait_item_t *item; + int ret; +- unsigned long cpu_flags; + struct Scsi_Host *host; + + METHOD_TRACE("ips_eh_abort", 1); +@@ -830,7 +805,7 @@ + if (!ha->active) + return (FAILED); + +- IPS_LOCK_SAVE(host->host_lock, cpu_flags); ++ spin_lock(host->host_lock); + + /* See if the command is on the copp queue */ + item = ha->copp_waitlist.head; +@@ -851,7 +826,7 @@ + ret = (FAILED); + } + +- IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); ++ spin_unlock(host->host_lock); + return ret; + } + +@@ -1129,7 +1104,7 @@ + /* A Reset IOCTL is only sent by the boot CD in extreme cases. */ + /* There can never be any system activity ( network or disk ), but check */ + /* anyway just as a good practice. */ +- pt = (ips_passthru_t *) SC->request_buffer; ++ pt = (ips_passthru_t *) scsi_sglist(SC); + if ((pt->CoppCP.cmd.reset.op_code == IPS_CMD_RESET_CHANNEL) && + (pt->CoppCP.cmd.reset.adapter_flag == 1)) { + if (ha->scb_activelist.count != 0) { +@@ -1176,18 +1151,10 @@ + /* Set bios geometry for the controller */ + /* */ + /****************************************************************************/ +-static int +-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) +-ips_biosparam(Disk * disk, kdev_t dev, int geom[]) +-{ +- ips_ha_t *ha = (ips_ha_t *) disk->device->host->hostdata; +- unsigned long capacity = disk->capacity; +-#else +-ips_biosparam(struct scsi_device *sdev, struct block_device *bdev, ++static int ips_biosparam(struct scsi_device *sdev, struct block_device *bdev, + sector_t capacity, int geom[]) + { + ips_ha_t *ha = (ips_ha_t *) sdev->host->hostdata; +-#endif + int heads; + int sectors; + int cylinders; +@@ -1225,70 +1192,6 @@ + return (0); + } + +-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) +- +-/* ips_proc24_info is a wrapper around ips_proc_info * +- * for compatibility with the 2.4 scsi parameters */ +-static int +-ips_proc24_info(char *buffer, char **start, off_t offset, int length, +- int hostno, int func) +-{ +- int i; +- +- for (i = 0; i < ips_next_controller; i++) { +- if (ips_sh[i] && ips_sh[i]->host_no == hostno) { +- return ips_proc_info(ips_sh[i], buffer, start, +- offset, length, func); +- } +- } +- return -EINVAL; +-} +- +-/****************************************************************************/ +-/* */ +-/* Routine Name: ips_select_queue_depth */ +-/* */ +-/* Routine Description: */ +-/* */ +-/* Select queue depths for the devices on the contoller */ +-/* */ +-/****************************************************************************/ +-static void +-ips_select_queue_depth(struct Scsi_Host *host, struct scsi_device * scsi_devs) +-{ +- struct scsi_device *device; +- ips_ha_t *ha; +- int count = 0; +- int min; +- +- ha = IPS_HA(host); +- min = ha->max_cmds / 4; +- +- for (device = scsi_devs; device; device = device->next) { +- if (device->host == host) { +- if ((device->channel == 0) && (device->type == 0)) +- count++; +- } +- } +- +- for (device = scsi_devs; device; device = device->next) { +- if (device->host == host) { +- if ((device->channel == 0) && (device->type == 0)) { +- device->queue_depth = +- (ha->max_cmds - 1) / count; +- if (device->queue_depth < min) +- device->queue_depth = min; +- } else { +- device->queue_depth = 2; +- } +- +- if (device->queue_depth < 2) +- device->queue_depth = 2; +- } +- } +-} +- +-#else + /****************************************************************************/ + /* */ + /* Routine Name: ips_slave_configure */ +@@ -1316,7 +1219,6 @@ + SDptr->skip_ms_page_3f = 1; + return 0; + } +-#endif + + /****************************************************************************/ + /* */ +@@ -1331,7 +1233,6 @@ + do_ipsintr(int irq, void *dev_id) + { + ips_ha_t *ha; +- unsigned long cpu_flags; + struct Scsi_Host *host; + int irqstatus; + +@@ -1347,16 +1248,16 @@ + return IRQ_HANDLED; + } + +- IPS_LOCK_SAVE(host->host_lock, cpu_flags); ++ spin_lock(host->host_lock); + + if (!ha->active) { +- IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); ++ spin_unlock(host->host_lock); + return IRQ_HANDLED; + } + + irqstatus = (*ha->func.intr) (ha); + +- IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); ++ spin_unlock(host->host_lock); + + /* start the next command */ + ips_next(ha, IPS_INTR_ON); +@@ -1606,15 +1507,8 @@ + if ((SC->cmnd[0] == IPS_IOCTL_COMMAND) && + (SC->device->channel == 0) && + (SC->device->id == IPS_ADAPTER_ID) && +- (SC->device->lun == 0) && SC->request_buffer) { +- if ((!SC->use_sg) && SC->request_bufflen && +- (((char *) SC->request_buffer)[0] == 'C') && +- (((char *) SC->request_buffer)[1] == 'O') && +- (((char *) SC->request_buffer)[2] == 'P') && +- (((char *) SC->request_buffer)[3] == 'P')) +- return 1; +- else if (SC->use_sg) { +- struct scatterlist *sg = SC->request_buffer; ++ (SC->device->lun == 0) && scsi_sglist(SC)) { ++ struct scatterlist *sg = scsi_sglist(SC); + char *buffer; + + /* kmap_atomic() ensures addressability of the user buffer.*/ +@@ -1630,7 +1524,6 @@ + kunmap_atomic(buffer - sg->offset, KM_IRQ0); + local_irq_restore(flags); + } +- } + return 0; + } + +@@ -1680,18 +1573,14 @@ + { + ips_passthru_t *pt; + int length = 0; +- int ret; ++ int i, ret; ++ struct scatterlist *sg = scsi_sglist(SC); + + METHOD_TRACE("ips_make_passthru", 1); + +- if (!SC->use_sg) { +- length = SC->request_bufflen; +- } else { +- struct scatterlist *sg = SC->request_buffer; +- int i; +- for (i = 0; i < SC->use_sg; i++) ++ scsi_for_each_sg(SC, sg, scsi_sg_count(SC), i) + length += sg[i].length; +- } ++ + if (length < sizeof (ips_passthru_t)) { + /* wrong size */ + DEBUG_VAR(1, "(%s%d) Passthru structure wrong size", +@@ -2115,7 +2004,7 @@ + + METHOD_TRACE("ips_cleanup_passthru", 1); + +- if ((!scb) || (!scb->scsi_cmd) || (!scb->scsi_cmd->request_buffer)) { ++ if ((!scb) || (!scb->scsi_cmd) || (!scsi_sglist(scb->scsi_cmd))) { + DEBUG_VAR(1, "(%s%d) couldn't cleanup after passthru", + ips_name, ha->host_num); + +@@ -2730,7 +2619,6 @@ + struct scsi_cmnd *q; + ips_copp_wait_item_t *item; + int ret; +- unsigned long cpu_flags = 0; + struct Scsi_Host *host; + METHOD_TRACE("ips_next", 1); + +@@ -2742,7 +2630,7 @@ + * this command won't time out + */ + if (intr == IPS_INTR_ON) +- IPS_LOCK_SAVE(host->host_lock, cpu_flags); ++ spin_lock(host->host_lock); + + if ((ha->subsys->param[3] & 0x300000) + && (ha->scb_activelist.count == 0)) { +@@ -2769,14 +2657,14 @@ + item = ips_removeq_copp_head(&ha->copp_waitlist); + ha->num_ioctl++; + if (intr == IPS_INTR_ON) +- IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); ++ spin_unlock(host->host_lock); + scb->scsi_cmd = item->scsi_cmd; + kfree(item); + + ret = ips_make_passthru(ha, scb->scsi_cmd, scb, intr); + + if (intr == IPS_INTR_ON) +- IPS_LOCK_SAVE(host->host_lock, cpu_flags); ++ spin_lock(host->host_lock); + switch (ret) { + case IPS_FAILURE: + if (scb->scsi_cmd) { +@@ -2846,7 +2734,7 @@ + SC = ips_removeq_wait(&ha->scb_waitlist, q); + + if (intr == IPS_INTR_ON) +- IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); /* Unlock HA after command is taken off queue */ ++ spin_unlock(host->host_lock); /* Unlock HA after command is taken off queue */ + + SC->result = DID_OK; + SC->host_scribble = NULL; +@@ -2866,43 +2754,28 @@ + /* copy in the CDB */ + memcpy(scb->cdb, SC->cmnd, SC->cmd_len); + +- /* Now handle the data buffer */ +- if (SC->use_sg) { ++ scb->sg_count = scsi_dma_map(SC); ++ BUG_ON(scb->sg_count < 0); ++ if (scb->sg_count) { + struct scatterlist *sg; + int i; + +- sg = SC->request_buffer; +- scb->sg_count = pci_map_sg(ha->pcidev, sg, SC->use_sg, +- SC->sc_data_direction); + scb->flags |= IPS_SCB_MAP_SG; +- for (i = 0; i < scb->sg_count; i++) { ++ ++ scsi_for_each_sg(SC, sg, scb->sg_count, i) { + if (ips_fill_scb_sg_single +- (ha, sg_dma_address(&sg[i]), scb, i, +- sg_dma_len(&sg[i])) < 0) ++ (ha, sg_dma_address(sg), scb, i, ++ sg_dma_len(sg)) < 0) + break; + } + scb->dcdb.transfer_length = scb->data_len; + } else { +- if (SC->request_bufflen) { +- scb->data_busaddr = +- pci_map_single(ha->pcidev, +- SC->request_buffer, +- SC->request_bufflen, +- SC->sc_data_direction); +- scb->flags |= IPS_SCB_MAP_SINGLE; +- ips_fill_scb_sg_single(ha, scb->data_busaddr, +- scb, 0, +- SC->request_bufflen); +- scb->dcdb.transfer_length = scb->data_len; +- } else { + scb->data_busaddr = 0L; + scb->sg_len = 0; + scb->data_len = 0; + scb->dcdb.transfer_length = 0; + } + +- } +- + scb->dcdb.cmd_attribute = + ips_command_direction[scb->scsi_cmd->cmnd[0]]; + +@@ -2919,7 +2792,7 @@ + scb->dcdb.transfer_length = 0; + } + if (intr == IPS_INTR_ON) +- IPS_LOCK_SAVE(host->host_lock, cpu_flags); ++ spin_lock(host->host_lock); + + ret = ips_send_cmd(ha, scb); + +@@ -2958,7 +2831,7 @@ + } /* end while */ + + if (intr == IPS_INTR_ON) +- IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags); ++ spin_unlock(host->host_lock); + } + + /****************************************************************************/ +@@ -3377,29 +3250,24 @@ + * the rest of the data and continue. + */ + if ((scb->breakup) || (scb->sg_break)) { ++ struct scatterlist *sg; ++ int sg_dma_index, ips_sg_index = 0; ++ + /* we had a data breakup */ + scb->data_len = 0; + +- if (scb->sg_count) { +- /* S/G request */ +- struct scatterlist *sg; +- int ips_sg_index = 0; +- int sg_dma_index; +- +- sg = scb->scsi_cmd->request_buffer; ++ sg = scsi_sglist(scb->scsi_cmd); + + /* Spin forward to last dma chunk */ + sg_dma_index = scb->breakup; + + /* Take care of possible partial on last chunk */ + ips_fill_scb_sg_single(ha, +- sg_dma_address(&sg +- [sg_dma_index]), ++ sg_dma_address(&sg[sg_dma_index]), + scb, ips_sg_index++, +- sg_dma_len(&sg +- [sg_dma_index])); ++ sg_dma_len(&sg[sg_dma_index])); + +- for (; sg_dma_index < scb->sg_count; ++ for (; sg_dma_index < scsi_sg_count(scb->scsi_cmd); + sg_dma_index++) { + if (ips_fill_scb_sg_single + (ha, +@@ -3407,21 +3275,6 @@ + scb, ips_sg_index++, + sg_dma_len(&sg[sg_dma_index])) < 0) + break; +- +- } +- +- } else { +- /* Non S/G Request */ +- (void) ips_fill_scb_sg_single(ha, +- scb-> +- data_busaddr + +- (scb->sg_break * +- ha->max_xfer), +- scb, 0, +- scb->scsi_cmd-> +- request_bufflen - +- (scb->sg_break * +- ha->max_xfer)); + } + + scb->dcdb.transfer_length = scb->data_len; +@@ -3653,15 +3506,15 @@ + static void + ips_scmd_buf_write(struct scsi_cmnd *scmd, void *data, unsigned int count) + { +- if (scmd->use_sg) { + int i; + unsigned int min_cnt, xfer_cnt; + char *cdata = (char *) data; + unsigned char *buffer; + unsigned long flags; +- struct scatterlist *sg = scmd->request_buffer; ++ struct scatterlist *sg = scsi_sglist(scmd); ++ + for (i = 0, xfer_cnt = 0; +- (i < scmd->use_sg) && (xfer_cnt < count); i++) { ++ (i < scsi_sg_count(scmd)) && (xfer_cnt < count); i++) { + min_cnt = min(count - xfer_cnt, sg[i].length); + + /* kmap_atomic() ensures addressability of the data buffer.*/ +@@ -3674,11 +3527,6 @@ + + xfer_cnt += min_cnt; + } +- +- } else { +- unsigned int min_cnt = min(count, scmd->request_bufflen); +- memcpy(scmd->request_buffer, data, min_cnt); +- } + } + + /****************************************************************************/ +@@ -3691,15 +3539,15 @@ + static void + ips_scmd_buf_read(struct scsi_cmnd *scmd, void *data, unsigned int count) + { +- if (scmd->use_sg) { + int i; + unsigned int min_cnt, xfer_cnt; + char *cdata = (char *) data; + unsigned char *buffer; + unsigned long flags; +- struct scatterlist *sg = scmd->request_buffer; ++ struct scatterlist *sg = scsi_sglist(scmd); ++ + for (i = 0, xfer_cnt = 0; +- (i < scmd->use_sg) && (xfer_cnt < count); i++) { ++ (i < scsi_sg_count(scmd)) && (xfer_cnt < count); i++) { + min_cnt = min(count - xfer_cnt, sg[i].length); + + /* kmap_atomic() ensures addressability of the data buffer.*/ +@@ -3712,11 +3560,6 @@ + + xfer_cnt += min_cnt; + } +- +- } else { +- unsigned int min_cnt = min(count, scmd->request_bufflen); +- memcpy(data, scmd->request_buffer, min_cnt); +- } + } + + /****************************************************************************/ +@@ -4350,7 +4193,7 @@ + + METHOD_TRACE("ips_rdcap", 1); + +- if (scb->scsi_cmd->request_bufflen < 8) ++ if (scsi_bufflen(scb->scsi_cmd) < 8) + return (0); + + cap.lba = +@@ -4735,8 +4578,7 @@ + + METHOD_TRACE("ips_freescb", 1); + if (scb->flags & IPS_SCB_MAP_SG) +- pci_unmap_sg(ha->pcidev, scb->scsi_cmd->request_buffer, +- scb->scsi_cmd->use_sg, IPS_DMA_DIR(scb)); ++ scsi_dma_unmap(scb->scsi_cmd); + else if (scb->flags & IPS_SCB_MAP_SINGLE) + pci_unmap_single(ha->pcidev, scb->data_busaddr, scb->data_len, + IPS_DMA_DIR(scb)); +@@ -7004,7 +6846,6 @@ + kfree(oldha); + ips_sh[index] = sh; + ips_ha[index] = ha; +- IPS_SCSI_SET_DEVICE(sh, ha); + + /* Store away needed values for later use */ + sh->io_port = ha->io_addr; +@@ -7016,17 +6857,16 @@ + sh->cmd_per_lun = sh->hostt->cmd_per_lun; + sh->unchecked_isa_dma = sh->hostt->unchecked_isa_dma; + sh->use_clustering = sh->hostt->use_clustering; +- +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,7) + sh->max_sectors = 128; +-#endif + + sh->max_id = ha->ntargets; + sh->max_lun = ha->nlun; + sh->max_channel = ha->nbus - 1; + sh->can_queue = ha->max_cmds - 1; + +- IPS_ADD_HOST(sh, NULL); ++ scsi_add_host(sh, NULL); ++ scsi_scan_host(sh); ++ + return 0; + } + +@@ -7069,7 +6909,7 @@ + return -ENODEV; + ips_driver_template.module = THIS_MODULE; + ips_order_controllers(); +- if (IPS_REGISTER_HOSTS(&ips_driver_template)) { ++ if (!ips_detect(&ips_driver_template)) { + pci_unregister_driver(&ips_pci_driver); + return -ENODEV; + } +@@ -7087,7 +6927,6 @@ + static void __exit + ips_module_exit(void) + { +- IPS_UNREGISTER_HOSTS(&ips_driver_template); + pci_unregister_driver(&ips_pci_driver); + unregister_reboot_notifier(&ips_notifier); + } +@@ -7443,15 +7282,9 @@ + return SUCCESS; + } + +-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,9) + MODULE_LICENSE("GPL"); +-#endif +- + MODULE_DESCRIPTION("IBM ServeRAID Adapter Driver " IPS_VER_STRING); +- +-#ifdef MODULE_VERSION + MODULE_VERSION(IPS_VER_STRING); +-#endif + + + /* +diff -Nurb linux-2.6.22-570/drivers/scsi/ips.h linux-2.6.22-591/drivers/scsi/ips.h +--- linux-2.6.22-570/drivers/scsi/ips.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/ips.h 2007-12-21 15:36:12.000000000 -0500 +@@ -58,10 +58,6 @@ + /* + * Some handy macros + */ +- #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20) || defined CONFIG_HIGHIO +- #define IPS_HIGHIO +- #endif +- + #define IPS_HA(x) ((ips_ha_t *) x->hostdata) + #define IPS_COMMAND_ID(ha, scb) (int) (scb - ha->scbs) + #define IPS_IS_TROMBONE(ha) (((ha->device_id == IPS_DEVICEID_COPPERHEAD) && \ +@@ -84,38 +80,8 @@ + #define IPS_SGLIST_SIZE(ha) (IPS_USE_ENH_SGLIST(ha) ? \ + sizeof(IPS_ENH_SG_LIST) : sizeof(IPS_STD_SG_LIST)) + +- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,4) +- #define pci_set_dma_mask(dev,mask) ( mask > 0xffffffff ? 1:0 ) +- #define scsi_set_pci_device(sh,dev) (0) +- #endif +- +- #ifndef IRQ_NONE +- typedef void irqreturn_t; +- #define IRQ_NONE +- #define IRQ_HANDLED +- #define IRQ_RETVAL(x) +- #endif +- +- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) +- #define IPS_REGISTER_HOSTS(SHT) scsi_register_module(MODULE_SCSI_HA,SHT) +- #define IPS_UNREGISTER_HOSTS(SHT) scsi_unregister_module(MODULE_SCSI_HA,SHT) +- #define IPS_ADD_HOST(shost,device) +- #define IPS_REMOVE_HOST(shost) +- #define IPS_SCSI_SET_DEVICE(sh,ha) scsi_set_pci_device(sh, (ha)->pcidev) +- #define IPS_PRINTK(level, pcidev, format, arg...) \ +- printk(level "%s %s:" format , "ips" , \ +- (pcidev)->slot_name , ## arg) +- #define scsi_host_alloc(sh,size) scsi_register(sh,size) +- #define scsi_host_put(sh) scsi_unregister(sh) +- #else +- #define IPS_REGISTER_HOSTS(SHT) (!ips_detect(SHT)) +- #define IPS_UNREGISTER_HOSTS(SHT) +- #define IPS_ADD_HOST(shost,device) do { scsi_add_host(shost,device); scsi_scan_host(shost); } while (0) +- #define IPS_REMOVE_HOST(shost) scsi_remove_host(shost) +- #define IPS_SCSI_SET_DEVICE(sh,ha) do { } while (0) + #define IPS_PRINTK(level, pcidev, format, arg...) \ + dev_printk(level , &((pcidev)->dev) , format , ## arg) +- #endif + + #define MDELAY(n) \ + do { \ +@@ -134,7 +100,7 @@ + #define pci_dma_hi32(a) ((a >> 16) >> 16) + #define pci_dma_lo32(a) (a & 0xffffffff) + +- #if (BITS_PER_LONG > 32) || (defined CONFIG_HIGHMEM64G && defined IPS_HIGHIO) ++ #if (BITS_PER_LONG > 32) || defined(CONFIG_HIGHMEM64G) + #define IPS_ENABLE_DMA64 (1) + #else + #define IPS_ENABLE_DMA64 (0) +@@ -451,16 +417,10 @@ + /* + * Scsi_Host Template + */ +-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) +- static int ips_proc24_info(char *, char **, off_t, int, int, int); +- static void ips_select_queue_depth(struct Scsi_Host *, struct scsi_device *); +- static int ips_biosparam(Disk *disk, kdev_t dev, int geom[]); +-#else + static int ips_proc_info(struct Scsi_Host *, char *, char **, off_t, int, int); + static int ips_biosparam(struct scsi_device *sdev, struct block_device *bdev, + sector_t capacity, int geom[]); + static int ips_slave_configure(struct scsi_device *SDptr); +-#endif + + /* + * Raid Command Formats +diff -Nurb linux-2.6.22-570/drivers/scsi/iscsi_tcp.c linux-2.6.22-591/drivers/scsi/iscsi_tcp.c +--- linux-2.6.22-570/drivers/scsi/iscsi_tcp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/iscsi_tcp.c 2007-12-21 15:36:12.000000000 -0500 +@@ -29,14 +29,15 @@ + #include + #include + #include ++#include + #include + #include + #include + #include + #include +-#include + #include + #include ++#include + #include + #include + #include +@@ -109,7 +110,7 @@ + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + + crypto_hash_digest(&tcp_conn->tx_hash, &buf->sg, buf->sg.length, crc); +- buf->sg.length = tcp_conn->hdr_size; ++ buf->sg.length += sizeof(u32); + } + + static inline int +@@ -211,16 +212,14 @@ + static int + iscsi_data_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) + { +- int rc; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; + struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr; + struct iscsi_session *session = conn->session; ++ struct scsi_cmnd *sc = ctask->sc; + int datasn = be32_to_cpu(rhdr->datasn); + +- rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr); +- if (rc) +- return rc; ++ iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr); + /* + * setup Data-In byte counter (gets decremented..) + */ +@@ -229,31 +228,36 @@ + if (tcp_conn->in.datalen == 0) + return 0; + +- if (ctask->datasn != datasn) ++ if (tcp_ctask->exp_datasn != datasn) { ++ debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->datasn(%d)\n", ++ __FUNCTION__, tcp_ctask->exp_datasn, datasn); + return ISCSI_ERR_DATASN; ++ } + +- ctask->datasn++; ++ tcp_ctask->exp_datasn++; + + tcp_ctask->data_offset = be32_to_cpu(rhdr->offset); +- if (tcp_ctask->data_offset + tcp_conn->in.datalen > ctask->total_length) ++ if (tcp_ctask->data_offset + tcp_conn->in.datalen > scsi_bufflen(sc)) { ++ debug_tcp("%s: data_offset(%d) + data_len(%d) > total_length_in(%d)\n", ++ __FUNCTION__, tcp_ctask->data_offset, ++ tcp_conn->in.datalen, scsi_bufflen(sc)); + return ISCSI_ERR_DATA_OFFSET; ++ } + + if (rhdr->flags & ISCSI_FLAG_DATA_STATUS) { +- struct scsi_cmnd *sc = ctask->sc; +- + conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1; + if (rhdr->flags & ISCSI_FLAG_DATA_UNDERFLOW) { + int res_count = be32_to_cpu(rhdr->residual_count); + + if (res_count > 0 && +- res_count <= sc->request_bufflen) { +- sc->resid = res_count; ++ res_count <= scsi_bufflen(sc)) { ++ scsi_set_resid(sc, res_count); + sc->result = (DID_OK << 16) | rhdr->cmd_status; + } else + sc->result = (DID_BAD_TARGET << 16) | + rhdr->cmd_status; + } else if (rhdr->flags & ISCSI_FLAG_DATA_OVERFLOW) { +- sc->resid = be32_to_cpu(rhdr->residual_count); ++ scsi_set_resid(sc, be32_to_cpu(rhdr->residual_count)); + sc->result = (DID_OK << 16) | rhdr->cmd_status; + } else + sc->result = (DID_OK << 16) | rhdr->cmd_status; +@@ -281,6 +285,8 @@ + { + struct iscsi_data *hdr; + struct scsi_cmnd *sc = ctask->sc; ++ int i, sg_count = 0; ++ struct scatterlist *sg; + + hdr = &r2t->dtask.hdr; + memset(hdr, 0, sizeof(struct iscsi_data)); +@@ -308,12 +314,9 @@ + iscsi_buf_init_iov(&r2t->headbuf, (char*)hdr, + sizeof(struct iscsi_hdr)); + +- if (sc->use_sg) { +- int i, sg_count = 0; +- struct scatterlist *sg = sc->request_buffer; +- ++ sg = scsi_sglist(sc); + r2t->sg = NULL; +- for (i = 0; i < sc->use_sg; i++, sg += 1) { ++ for (i = 0; i < scsi_sg_count(sc); i++, sg += 1) { + /* FIXME: prefetch ? */ + if (sg_count + sg->length > r2t->data_offset) { + int page_offset; +@@ -335,12 +338,6 @@ + sg_count += sg->length; + } + BUG_ON(r2t->sg == NULL); +- } else { +- iscsi_buf_init_iov(&r2t->sendbuf, +- (char*)sc->request_buffer + r2t->data_offset, +- r2t->data_count); +- r2t->sg = NULL; +- } + } + + /** +@@ -365,17 +362,16 @@ + return ISCSI_ERR_DATALEN; + } + +- if (tcp_ctask->exp_r2tsn && tcp_ctask->exp_r2tsn != r2tsn) ++ if (tcp_ctask->exp_datasn != r2tsn){ ++ debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->r2tsn(%d)\n", ++ __FUNCTION__, tcp_ctask->exp_datasn, r2tsn); + return ISCSI_ERR_R2TSN; +- +- rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr); +- if (rc) +- return rc; +- +- /* FIXME: use R2TSN to detect missing R2T */ ++ } + + /* fill-in new R2T associated with the task */ + spin_lock(&session->lock); ++ iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr); ++ + if (!ctask->sc || ctask->mtask || + session->state != ISCSI_STATE_LOGGED_IN) { + printk(KERN_INFO "iscsi_tcp: dropping R2T itt %d in " +@@ -401,11 +397,11 @@ + r2t->data_length, session->max_burst); + + r2t->data_offset = be32_to_cpu(rhdr->data_offset); +- if (r2t->data_offset + r2t->data_length > ctask->total_length) { ++ if (r2t->data_offset + r2t->data_length > scsi_bufflen(ctask->sc)) { + spin_unlock(&session->lock); + printk(KERN_ERR "iscsi_tcp: invalid R2T with data len %u at " + "offset %u and total length %d\n", r2t->data_length, +- r2t->data_offset, ctask->total_length); ++ r2t->data_offset, scsi_bufflen(ctask->sc)); + return ISCSI_ERR_DATALEN; + } + +@@ -414,9 +410,9 @@ + + iscsi_solicit_data_init(conn, ctask, r2t); + +- tcp_ctask->exp_r2tsn = r2tsn + 1; ++ tcp_ctask->exp_datasn = r2tsn + 1; + __kfifo_put(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*)); +- tcp_ctask->xmstate |= XMSTATE_SOL_HDR; ++ tcp_ctask->xmstate |= XMSTATE_SOL_HDR_INIT; + list_move_tail(&ctask->running, &conn->xmitqueue); + + scsi_queue_work(session->host, &conn->xmitwork); +@@ -600,7 +596,7 @@ + { + struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; + int buf_left = buf_size - (tcp_conn->data_copied + offset); +- int size = min(tcp_conn->in.copy, buf_left); ++ unsigned size = min(tcp_conn->in.copy, buf_left); + int rc; + + size = min(size, ctask->data_count); +@@ -609,7 +605,7 @@ + size, tcp_conn->in.offset, tcp_conn->in.copied); + + BUG_ON(size <= 0); +- BUG_ON(tcp_ctask->sent + size > ctask->total_length); ++ BUG_ON(tcp_ctask->sent + size > scsi_bufflen(ctask->sc)); + + rc = skb_copy_bits(tcp_conn->in.skb, tcp_conn->in.offset, + (char*)buf + (offset + tcp_conn->data_copied), size); +@@ -707,25 +703,8 @@ + + BUG_ON((void*)ctask != sc->SCp.ptr); + +- /* +- * copying Data-In into the Scsi_Cmnd +- */ +- if (!sc->use_sg) { +- i = ctask->data_count; +- rc = iscsi_ctask_copy(tcp_conn, ctask, sc->request_buffer, +- sc->request_bufflen, +- tcp_ctask->data_offset); +- if (rc == -EAGAIN) +- return rc; +- if (conn->datadgst_en) +- iscsi_recv_digest_update(tcp_conn, sc->request_buffer, +- i); +- rc = 0; +- goto done; +- } +- + offset = tcp_ctask->data_offset; +- sg = sc->request_buffer; ++ sg = scsi_sglist(sc); + + if (tcp_ctask->data_offset) + for (i = 0; i < tcp_ctask->sg_count; i++) +@@ -734,7 +713,7 @@ + if (offset < 0) + offset = 0; + +- for (i = tcp_ctask->sg_count; i < sc->use_sg; i++) { ++ for (i = tcp_ctask->sg_count; i < scsi_sg_count(sc); i++) { + char *dest; + + dest = kmap_atomic(sg[i].page, KM_SOFTIRQ0); +@@ -779,7 +758,6 @@ + } + BUG_ON(ctask->data_count); + +-done: + /* check for non-exceptional status */ + if (tcp_conn->in.hdr->flags & ISCSI_FLAG_DATA_STATUS) { + debug_scsi("done [sc %lx res %d itt 0x%x flags 0x%x]\n", +@@ -895,11 +873,27 @@ + } + } + +- if (tcp_conn->in_progress == IN_PROGRESS_DDIGEST_RECV) { ++ if (tcp_conn->in_progress == IN_PROGRESS_DDIGEST_RECV && ++ tcp_conn->in.copy) { + uint32_t recv_digest; + + debug_tcp("extra data_recv offset %d copy %d\n", + tcp_conn->in.offset, tcp_conn->in.copy); ++ ++ if (!tcp_conn->data_copied) { ++ if (tcp_conn->in.padding) { ++ debug_tcp("padding -> %d\n", ++ tcp_conn->in.padding); ++ memset(pad, 0, tcp_conn->in.padding); ++ sg_init_one(&sg, pad, tcp_conn->in.padding); ++ crypto_hash_update(&tcp_conn->rx_hash, ++ &sg, sg.length); ++ } ++ crypto_hash_final(&tcp_conn->rx_hash, ++ (u8 *) &tcp_conn->in.datadgst); ++ debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst); ++ } ++ + rc = iscsi_tcp_copy(conn, sizeof(uint32_t)); + if (rc) { + if (rc == -EAGAIN) +@@ -925,7 +919,6 @@ + + if (tcp_conn->in_progress == IN_PROGRESS_DATA_RECV && + tcp_conn->in.copy) { +- + debug_tcp("data_recv offset %d copy %d\n", + tcp_conn->in.offset, tcp_conn->in.copy); + +@@ -936,24 +929,32 @@ + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); + return 0; + } +- tcp_conn->in.copy -= tcp_conn->in.padding; +- tcp_conn->in.offset += tcp_conn->in.padding; +- if (conn->datadgst_en) { +- if (tcp_conn->in.padding) { +- debug_tcp("padding -> %d\n", +- tcp_conn->in.padding); +- memset(pad, 0, tcp_conn->in.padding); +- sg_init_one(&sg, pad, tcp_conn->in.padding); +- crypto_hash_update(&tcp_conn->rx_hash, +- &sg, sg.length); +- } +- crypto_hash_final(&tcp_conn->rx_hash, +- (u8 *) &tcp_conn->in.datadgst); +- debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst); ++ ++ if (tcp_conn->in.padding) ++ tcp_conn->in_progress = IN_PROGRESS_PAD_RECV; ++ else if (conn->datadgst_en) + tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV; ++ else ++ tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER; + tcp_conn->data_copied = 0; +- } else ++ } ++ ++ if (tcp_conn->in_progress == IN_PROGRESS_PAD_RECV && ++ tcp_conn->in.copy) { ++ int copylen = min(tcp_conn->in.padding - tcp_conn->data_copied, ++ tcp_conn->in.copy); ++ ++ tcp_conn->in.copy -= copylen; ++ tcp_conn->in.offset += copylen; ++ tcp_conn->data_copied += copylen; ++ ++ if (tcp_conn->data_copied != tcp_conn->in.padding) ++ tcp_conn->in_progress = IN_PROGRESS_PAD_RECV; ++ else if (conn->datadgst_en) ++ tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV; ++ else + tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER; ++ tcp_conn->data_copied = 0; + } + + debug_tcp("f, processed %d from out of %d padding %d\n", +@@ -1215,7 +1216,6 @@ + struct iscsi_r2t_info *r2t, int left) + { + struct iscsi_data *hdr; +- struct scsi_cmnd *sc = ctask->sc; + int new_offset; + + hdr = &r2t->dtask.hdr; +@@ -1245,15 +1245,8 @@ + if (iscsi_buf_left(&r2t->sendbuf)) + return; + +- if (sc->use_sg) { + iscsi_buf_init_sg(&r2t->sendbuf, r2t->sg); + r2t->sg += 1; +- } else { +- iscsi_buf_init_iov(&r2t->sendbuf, +- (char*)sc->request_buffer + new_offset, +- r2t->data_count); +- r2t->sg = NULL; +- } + } + + static void iscsi_set_padding(struct iscsi_tcp_cmd_task *tcp_ctask, +@@ -1277,41 +1270,10 @@ + static void + iscsi_tcp_cmd_init(struct iscsi_cmd_task *ctask) + { +- struct scsi_cmnd *sc = ctask->sc; + struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; + + BUG_ON(__kfifo_len(tcp_ctask->r2tqueue)); +- +- tcp_ctask->sent = 0; +- tcp_ctask->sg_count = 0; +- +- if (sc->sc_data_direction == DMA_TO_DEVICE) { +- tcp_ctask->xmstate = XMSTATE_W_HDR; +- tcp_ctask->exp_r2tsn = 0; +- BUG_ON(ctask->total_length == 0); +- +- if (sc->use_sg) { +- struct scatterlist *sg = sc->request_buffer; +- +- iscsi_buf_init_sg(&tcp_ctask->sendbuf, sg); +- tcp_ctask->sg = sg + 1; +- tcp_ctask->bad_sg = sg + sc->use_sg; +- } else { +- iscsi_buf_init_iov(&tcp_ctask->sendbuf, +- sc->request_buffer, +- sc->request_bufflen); +- tcp_ctask->sg = NULL; +- tcp_ctask->bad_sg = NULL; +- } +- debug_scsi("cmd [itt 0x%x total %d imm_data %d " +- "unsol count %d, unsol offset %d]\n", +- ctask->itt, ctask->total_length, ctask->imm_count, +- ctask->unsol_count, ctask->unsol_offset); +- } else +- tcp_ctask->xmstate = XMSTATE_R_HDR; +- +- iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)ctask->hdr, +- sizeof(struct iscsi_hdr)); ++ tcp_ctask->xmstate = XMSTATE_CMD_HDR_INIT; + } + + /** +@@ -1324,9 +1286,11 @@ + * call it again later, or recover. '0' return code means successful + * xmit. + * +- * Management xmit state machine consists of two states: +- * IN_PROGRESS_IMM_HEAD - PDU Header xmit in progress +- * IN_PROGRESS_IMM_DATA - PDU Data xmit in progress ++ * Management xmit state machine consists of these states: ++ * XMSTATE_IMM_HDR_INIT - calculate digest of PDU Header ++ * XMSTATE_IMM_HDR - PDU Header xmit in progress ++ * XMSTATE_IMM_DATA - PDU Data xmit in progress ++ * XMSTATE_IDLE - management PDU is done + **/ + static int + iscsi_tcp_mtask_xmit(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask) +@@ -1337,23 +1301,34 @@ + debug_scsi("mtask deq [cid %d state %x itt 0x%x]\n", + conn->id, tcp_mtask->xmstate, mtask->itt); + +- if (tcp_mtask->xmstate & XMSTATE_IMM_HDR) { +- tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR; +- if (mtask->data_count) ++ if (tcp_mtask->xmstate & XMSTATE_IMM_HDR_INIT) { ++ iscsi_buf_init_iov(&tcp_mtask->headbuf, (char*)mtask->hdr, ++ sizeof(struct iscsi_hdr)); ++ ++ if (mtask->data_count) { + tcp_mtask->xmstate |= XMSTATE_IMM_DATA; ++ iscsi_buf_init_iov(&tcp_mtask->sendbuf, ++ (char*)mtask->data, ++ mtask->data_count); ++ } ++ + if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE && + conn->stop_stage != STOP_CONN_RECOVER && + conn->hdrdgst_en) + iscsi_hdr_digest(conn, &tcp_mtask->headbuf, + (u8*)tcp_mtask->hdrext); ++ ++ tcp_mtask->sent = 0; ++ tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR_INIT; ++ tcp_mtask->xmstate |= XMSTATE_IMM_HDR; ++ } ++ ++ if (tcp_mtask->xmstate & XMSTATE_IMM_HDR) { + rc = iscsi_sendhdr(conn, &tcp_mtask->headbuf, + mtask->data_count); +- if (rc) { +- tcp_mtask->xmstate |= XMSTATE_IMM_HDR; +- if (mtask->data_count) +- tcp_mtask->xmstate &= ~XMSTATE_IMM_DATA; ++ if (rc) + return rc; +- } ++ tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR; + } + + if (tcp_mtask->xmstate & XMSTATE_IMM_DATA) { +@@ -1387,55 +1362,67 @@ + return 0; + } + +-static inline int +-iscsi_send_read_hdr(struct iscsi_conn *conn, +- struct iscsi_tcp_cmd_task *tcp_ctask) ++static int ++iscsi_send_cmd_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) + { +- int rc; ++ struct scsi_cmnd *sc = ctask->sc; ++ struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; ++ int rc = 0; + +- tcp_ctask->xmstate &= ~XMSTATE_R_HDR; +- if (conn->hdrdgst_en) +- iscsi_hdr_digest(conn, &tcp_ctask->headbuf, +- (u8*)tcp_ctask->hdrext); +- rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, 0); +- if (!rc) { +- BUG_ON(tcp_ctask->xmstate != XMSTATE_IDLE); +- return 0; /* wait for Data-In */ ++ if (tcp_ctask->xmstate & XMSTATE_CMD_HDR_INIT) { ++ tcp_ctask->sent = 0; ++ tcp_ctask->sg_count = 0; ++ tcp_ctask->exp_datasn = 0; ++ ++ if (sc->sc_data_direction == DMA_TO_DEVICE) { ++ struct scatterlist *sg = scsi_sglist(sc); ++ ++ iscsi_buf_init_sg(&tcp_ctask->sendbuf, sg); ++ tcp_ctask->sg = sg + 1; ++ tcp_ctask->bad_sg = sg + scsi_sg_count(sc); ++ ++ debug_scsi("cmd [itt 0x%x total %d imm_data %d " ++ "unsol count %d, unsol offset %d]\n", ++ ctask->itt, scsi_bufflen(sc), ++ ctask->imm_count, ctask->unsol_count, ++ ctask->unsol_offset); + } +- tcp_ctask->xmstate |= XMSTATE_R_HDR; +- return rc; +-} + +-static inline int +-iscsi_send_write_hdr(struct iscsi_conn *conn, +- struct iscsi_cmd_task *ctask) +-{ +- struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data; +- int rc; ++ iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)ctask->hdr, ++ sizeof(struct iscsi_hdr)); + +- tcp_ctask->xmstate &= ~XMSTATE_W_HDR; + if (conn->hdrdgst_en) + iscsi_hdr_digest(conn, &tcp_ctask->headbuf, + (u8*)tcp_ctask->hdrext); ++ tcp_ctask->xmstate &= ~XMSTATE_CMD_HDR_INIT; ++ tcp_ctask->xmstate |= XMSTATE_CMD_HDR_XMIT; ++ } ++ ++ if (tcp_ctask->xmstate & XMSTATE_CMD_HDR_XMIT) { + rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->imm_count); +- if (rc) { +- tcp_ctask->xmstate |= XMSTATE_W_HDR; ++ if (rc) + return rc; +- } ++ tcp_ctask->xmstate &= ~XMSTATE_CMD_HDR_XMIT; ++ ++ if (sc->sc_data_direction != DMA_TO_DEVICE) ++ return 0; + + if (ctask->imm_count) { + tcp_ctask->xmstate |= XMSTATE_IMM_DATA; + iscsi_set_padding(tcp_ctask, ctask->imm_count); + + if (ctask->conn->datadgst_en) { +- iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask); ++ iscsi_data_digest_init(ctask->conn->dd_data, ++ tcp_ctask); + tcp_ctask->immdigest = 0; + } + } + + if (ctask->unsol_count) +- tcp_ctask->xmstate |= XMSTATE_UNS_HDR | XMSTATE_UNS_INIT; +- return 0; ++ tcp_ctask->xmstate |= ++ XMSTATE_UNS_HDR | XMSTATE_UNS_INIT; ++ } ++ return rc; + } + + static int +@@ -1624,9 +1611,7 @@ + struct iscsi_data_task *dtask; + int left, rc; + +- if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) { +- tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR; +- tcp_ctask->xmstate |= XMSTATE_SOL_DATA; ++ if (tcp_ctask->xmstate & XMSTATE_SOL_HDR_INIT) { + if (!tcp_ctask->r2t) { + spin_lock_bh(&session->lock); + __kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t, +@@ -1640,13 +1625,20 @@ + if (conn->hdrdgst_en) + iscsi_hdr_digest(conn, &r2t->headbuf, + (u8*)dtask->hdrext); +- rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count); +- if (rc) { +- tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA; ++ tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR_INIT; + tcp_ctask->xmstate |= XMSTATE_SOL_HDR; +- return rc; + } + ++ if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) { ++ r2t = tcp_ctask->r2t; ++ dtask = &r2t->dtask; ++ ++ rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count); ++ if (rc) ++ return rc; ++ tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR; ++ tcp_ctask->xmstate |= XMSTATE_SOL_DATA; ++ + if (conn->datadgst_en) { + iscsi_data_digest_init(conn->dd_data, tcp_ctask); + dtask->digest = 0; +@@ -1677,8 +1669,6 @@ + left = r2t->data_length - r2t->sent; + if (left) { + iscsi_solicit_data_cont(conn, ctask, r2t, left); +- tcp_ctask->xmstate |= XMSTATE_SOL_DATA; +- tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR; + goto send_hdr; + } + +@@ -1693,8 +1683,6 @@ + if (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, + sizeof(void*))) { + tcp_ctask->r2t = r2t; +- tcp_ctask->xmstate |= XMSTATE_SOL_DATA; +- tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR; + spin_unlock_bh(&session->lock); + goto send_hdr; + } +@@ -1703,6 +1691,46 @@ + return 0; + } + ++/** ++ * iscsi_tcp_ctask_xmit - xmit normal PDU task ++ * @conn: iscsi connection ++ * @ctask: iscsi command task ++ * ++ * Notes: ++ * The function can return -EAGAIN in which case caller must ++ * call it again later, or recover. '0' return code means successful ++ * xmit. ++ * The function is devided to logical helpers (above) for the different ++ * xmit stages. ++ * ++ *iscsi_send_cmd_hdr() ++ * XMSTATE_CMD_HDR_INIT - prepare Header and Data buffers Calculate ++ * Header Digest ++ * XMSTATE_CMD_HDR_XMIT - Transmit header in progress ++ * ++ *iscsi_send_padding ++ * XMSTATE_W_PAD - Prepare and send pading ++ * XMSTATE_W_RESEND_PAD - retry send pading ++ * ++ *iscsi_send_digest ++ * XMSTATE_W_RESEND_DATA_DIGEST - Finalize and send Data Digest ++ * XMSTATE_W_RESEND_DATA_DIGEST - retry sending digest ++ * ++ *iscsi_send_unsol_hdr ++ * XMSTATE_UNS_INIT - prepare un-solicit data header and digest ++ * XMSTATE_UNS_HDR - send un-solicit header ++ * ++ *iscsi_send_unsol_pdu ++ * XMSTATE_UNS_DATA - send un-solicit data in progress ++ * ++ *iscsi_send_sol_pdu ++ * XMSTATE_SOL_HDR_INIT - solicit data header and digest initialize ++ * XMSTATE_SOL_HDR - send solicit header ++ * XMSTATE_SOL_DATA - send solicit data ++ * ++ *iscsi_tcp_ctask_xmit ++ * XMSTATE_IMM_DATA - xmit managment data (??) ++ **/ + static int + iscsi_tcp_ctask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) + { +@@ -1712,20 +1740,11 @@ + debug_scsi("ctask deq [cid %d xmstate %x itt 0x%x]\n", + conn->id, tcp_ctask->xmstate, ctask->itt); + +- /* +- * serialize with TMF AbortTask +- */ +- if (ctask->mtask) +- return rc; +- +- if (tcp_ctask->xmstate & XMSTATE_R_HDR) +- return iscsi_send_read_hdr(conn, tcp_ctask); +- +- if (tcp_ctask->xmstate & XMSTATE_W_HDR) { +- rc = iscsi_send_write_hdr(conn, ctask); ++ rc = iscsi_send_cmd_hdr(conn, ctask); + if (rc) + return rc; +- } ++ if (ctask->sc->sc_data_direction != DMA_TO_DEVICE) ++ return 0; + + if (tcp_ctask->xmstate & XMSTATE_IMM_DATA) { + rc = iscsi_send_data(ctask, &tcp_ctask->sendbuf, &tcp_ctask->sg, +@@ -1810,18 +1829,22 @@ + static void + iscsi_tcp_release_conn(struct iscsi_conn *conn) + { ++ struct iscsi_session *session = conn->session; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; ++ struct socket *sock = tcp_conn->sock; + +- if (!tcp_conn->sock) ++ if (!sock) + return; + +- sock_hold(tcp_conn->sock->sk); ++ sock_hold(sock->sk); + iscsi_conn_restore_callbacks(tcp_conn); +- sock_put(tcp_conn->sock->sk); ++ sock_put(sock->sk); + +- sock_release(tcp_conn->sock); ++ spin_lock_bh(&session->lock); + tcp_conn->sock = NULL; + conn->recv_lock = NULL; ++ spin_unlock_bh(&session->lock); ++ sockfd_put(sock); + } + + static void +@@ -1852,6 +1875,46 @@ + tcp_conn->hdr_size = sizeof(struct iscsi_hdr); + } + ++static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock, ++ char *buf, int *port, ++ int (*getname)(struct socket *, struct sockaddr *, ++ int *addrlen)) ++{ ++ struct sockaddr_storage *addr; ++ struct sockaddr_in6 *sin6; ++ struct sockaddr_in *sin; ++ int rc = 0, len; ++ ++ addr = kmalloc(GFP_KERNEL, sizeof(*addr)); ++ if (!addr) ++ return -ENOMEM; ++ ++ if (getname(sock, (struct sockaddr *) addr, &len)) { ++ rc = -ENODEV; ++ goto free_addr; ++ } ++ ++ switch (addr->ss_family) { ++ case AF_INET: ++ sin = (struct sockaddr_in *)addr; ++ spin_lock_bh(&conn->session->lock); ++ sprintf(buf, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr)); ++ *port = be16_to_cpu(sin->sin_port); ++ spin_unlock_bh(&conn->session->lock); ++ break; ++ case AF_INET6: ++ sin6 = (struct sockaddr_in6 *)addr; ++ spin_lock_bh(&conn->session->lock); ++ sprintf(buf, NIP6_FMT, NIP6(sin6->sin6_addr)); ++ *port = be16_to_cpu(sin6->sin6_port); ++ spin_unlock_bh(&conn->session->lock); ++ break; ++ } ++free_addr: ++ kfree(addr); ++ return rc; ++} ++ + static int + iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session, + struct iscsi_cls_conn *cls_conn, uint64_t transport_eph, +@@ -1869,10 +1932,24 @@ + printk(KERN_ERR "iscsi_tcp: sockfd_lookup failed %d\n", err); + return -EEXIST; + } ++ /* ++ * copy these values now because if we drop the session ++ * userspace may still want to query the values since we will ++ * be using them for the reconnect ++ */ ++ err = iscsi_tcp_get_addr(conn, sock, conn->portal_address, ++ &conn->portal_port, kernel_getpeername); ++ if (err) ++ goto free_socket; ++ ++ err = iscsi_tcp_get_addr(conn, sock, conn->local_address, ++ &conn->local_port, kernel_getsockname); ++ if (err) ++ goto free_socket; + + err = iscsi_conn_bind(cls_session, cls_conn, is_leading); + if (err) +- return err; ++ goto free_socket; + + /* bind iSCSI connection and socket */ + tcp_conn->sock = sock; +@@ -1896,25 +1973,19 @@ + * set receive state machine into initial state + */ + tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER; +- + return 0; ++ ++free_socket: ++ sockfd_put(sock); ++ return err; + } + + /* called with host lock */ + static void +-iscsi_tcp_mgmt_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask, +- char *data, uint32_t data_size) ++iscsi_tcp_mgmt_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask) + { + struct iscsi_tcp_mgmt_task *tcp_mtask = mtask->dd_data; +- +- iscsi_buf_init_iov(&tcp_mtask->headbuf, (char*)mtask->hdr, +- sizeof(struct iscsi_hdr)); +- tcp_mtask->xmstate = XMSTATE_IMM_HDR; +- tcp_mtask->sent = 0; +- +- if (mtask->data_count) +- iscsi_buf_init_iov(&tcp_mtask->sendbuf, (char*)mtask->data, +- mtask->data_count); ++ tcp_mtask->xmstate = XMSTATE_IMM_HDR_INIT; + } + + static int +@@ -2026,41 +2097,18 @@ + enum iscsi_param param, char *buf) + { + struct iscsi_conn *conn = cls_conn->dd_data; +- struct iscsi_tcp_conn *tcp_conn = conn->dd_data; +- struct inet_sock *inet; +- struct ipv6_pinfo *np; +- struct sock *sk; + int len; + + switch(param) { + case ISCSI_PARAM_CONN_PORT: +- mutex_lock(&conn->xmitmutex); +- if (!tcp_conn->sock) { +- mutex_unlock(&conn->xmitmutex); +- return -EINVAL; +- } +- +- inet = inet_sk(tcp_conn->sock->sk); +- len = sprintf(buf, "%hu\n", be16_to_cpu(inet->dport)); +- mutex_unlock(&conn->xmitmutex); ++ spin_lock_bh(&conn->session->lock); ++ len = sprintf(buf, "%hu\n", conn->portal_port); ++ spin_unlock_bh(&conn->session->lock); + break; + case ISCSI_PARAM_CONN_ADDRESS: +- mutex_lock(&conn->xmitmutex); +- if (!tcp_conn->sock) { +- mutex_unlock(&conn->xmitmutex); +- return -EINVAL; +- } +- +- sk = tcp_conn->sock->sk; +- if (sk->sk_family == PF_INET) { +- inet = inet_sk(sk); +- len = sprintf(buf, NIPQUAD_FMT "\n", +- NIPQUAD(inet->daddr)); +- } else { +- np = inet6_sk(sk); +- len = sprintf(buf, NIP6_FMT "\n", NIP6(np->daddr)); +- } +- mutex_unlock(&conn->xmitmutex); ++ spin_lock_bh(&conn->session->lock); ++ len = sprintf(buf, "%s\n", conn->portal_address); ++ spin_unlock_bh(&conn->session->lock); + break; + default: + return iscsi_conn_get_param(cls_conn, param, buf); +@@ -2069,6 +2117,29 @@ + return len; + } + ++static int ++iscsi_tcp_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param, ++ char *buf) ++{ ++ struct iscsi_session *session = iscsi_hostdata(shost->hostdata); ++ int len; ++ ++ switch (param) { ++ case ISCSI_HOST_PARAM_IPADDRESS: ++ spin_lock_bh(&session->lock); ++ if (!session->leadconn) ++ len = -ENODEV; ++ else ++ len = sprintf(buf, "%s\n", ++ session->leadconn->local_address); ++ spin_unlock_bh(&session->lock); ++ break; ++ default: ++ return iscsi_host_get_param(shost, param, buf); ++ } ++ return len; ++} ++ + static void + iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats) + { +@@ -2096,6 +2167,7 @@ + static struct iscsi_cls_session * + iscsi_tcp_session_create(struct iscsi_transport *iscsit, + struct scsi_transport_template *scsit, ++ uint16_t cmds_max, uint16_t qdepth, + uint32_t initial_cmdsn, uint32_t *hostno) + { + struct iscsi_cls_session *cls_session; +@@ -2103,7 +2175,7 @@ + uint32_t hn; + int cmd_i; + +- cls_session = iscsi_session_setup(iscsit, scsit, ++ cls_session = iscsi_session_setup(iscsit, scsit, cmds_max, qdepth, + sizeof(struct iscsi_tcp_cmd_task), + sizeof(struct iscsi_tcp_mgmt_task), + initial_cmdsn, &hn); +@@ -2142,17 +2214,24 @@ + iscsi_session_teardown(cls_session); + } + ++static int iscsi_tcp_slave_configure(struct scsi_device *sdev) ++{ ++ blk_queue_dma_alignment(sdev->request_queue, 0); ++ return 0; ++} ++ + static struct scsi_host_template iscsi_sht = { + .name = "iSCSI Initiator over TCP/IP", + .queuecommand = iscsi_queuecommand, + .change_queue_depth = iscsi_change_queue_depth, +- .can_queue = ISCSI_XMIT_CMDS_MAX - 1, ++ .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1, + .sg_tablesize = ISCSI_SG_TABLESIZE, + .max_sectors = 0xFFFF, + .cmd_per_lun = ISCSI_DEF_CMD_PER_LUN, + .eh_abort_handler = iscsi_eh_abort, + .eh_host_reset_handler = iscsi_eh_host_reset, + .use_clustering = DISABLE_CLUSTERING, ++ .slave_configure = iscsi_tcp_slave_configure, + .proc_name = "iscsi_tcp", + .this_id = -1, + }; +@@ -2179,8 +2258,12 @@ + ISCSI_EXP_STATSN | + ISCSI_PERSISTENT_PORT | + ISCSI_PERSISTENT_ADDRESS | +- ISCSI_TARGET_NAME | +- ISCSI_TPGT, ++ ISCSI_TARGET_NAME | ISCSI_TPGT | ++ ISCSI_USERNAME | ISCSI_PASSWORD | ++ ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN, ++ .host_param_mask = ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS | ++ ISCSI_HOST_INITIATOR_NAME | ++ ISCSI_HOST_NETDEV_NAME, + .host_template = &iscsi_sht, + .conndata_size = sizeof(struct iscsi_conn), + .max_conn = 1, +@@ -2197,6 +2280,9 @@ + .get_session_param = iscsi_session_get_param, + .start_conn = iscsi_conn_start, + .stop_conn = iscsi_tcp_conn_stop, ++ /* iscsi host params */ ++ .get_host_param = iscsi_tcp_host_get_param, ++ .set_host_param = iscsi_host_set_param, + /* IO */ + .send_pdu = iscsi_conn_send_pdu, + .get_stats = iscsi_conn_get_stats, +diff -Nurb linux-2.6.22-570/drivers/scsi/iscsi_tcp.h linux-2.6.22-591/drivers/scsi/iscsi_tcp.h +--- linux-2.6.22-570/drivers/scsi/iscsi_tcp.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/iscsi_tcp.h 2007-12-21 15:36:12.000000000 -0500 +@@ -29,11 +29,12 @@ + #define IN_PROGRESS_HEADER_GATHER 0x1 + #define IN_PROGRESS_DATA_RECV 0x2 + #define IN_PROGRESS_DDIGEST_RECV 0x3 ++#define IN_PROGRESS_PAD_RECV 0x4 + + /* xmit state machine */ + #define XMSTATE_IDLE 0x0 +-#define XMSTATE_R_HDR 0x1 +-#define XMSTATE_W_HDR 0x2 ++#define XMSTATE_CMD_HDR_INIT 0x1 ++#define XMSTATE_CMD_HDR_XMIT 0x2 + #define XMSTATE_IMM_HDR 0x4 + #define XMSTATE_IMM_DATA 0x8 + #define XMSTATE_UNS_INIT 0x10 +@@ -44,6 +45,8 @@ + #define XMSTATE_W_PAD 0x200 + #define XMSTATE_W_RESEND_PAD 0x400 + #define XMSTATE_W_RESEND_DATA_DIGEST 0x800 ++#define XMSTATE_IMM_HDR_INIT 0x1000 ++#define XMSTATE_SOL_HDR_INIT 0x2000 + + #define ISCSI_PAD_LEN 4 + #define ISCSI_SG_TABLESIZE SG_ALL +@@ -152,7 +155,7 @@ + struct scatterlist *sg; /* per-cmd SG list */ + struct scatterlist *bad_sg; /* assert statement */ + int sg_count; /* SG's to process */ +- uint32_t exp_r2tsn; ++ uint32_t exp_datasn; /* expected target's R2TSN/DataSN */ + int data_offset; + struct iscsi_r2t_info *r2t; /* in progress R2T */ + struct iscsi_queue r2tpool; +diff -Nurb linux-2.6.22-570/drivers/scsi/jazz_esp.c linux-2.6.22-591/drivers/scsi/jazz_esp.c +--- linux-2.6.22-570/drivers/scsi/jazz_esp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/jazz_esp.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1,6 +1,6 @@ + /* jazz_esp.c: ESP front-end for MIPS JAZZ systems. + * +- * Copyright (C) 2007 Thomas Bogendörfer (tsbogend@alpha.frankende) ++ * Copyright (C) 2007 Thomas Bogendörfer (tsbogend@alpha.frankende) + */ + + #include +@@ -143,7 +143,7 @@ + goto fail; + + host->max_id = 8; +- esp = host_to_esp(host); ++ esp = shost_priv(host); + + esp->host = host; + esp->dev = dev; +diff -Nurb linux-2.6.22-570/drivers/scsi/libiscsi.c linux-2.6.22-591/drivers/scsi/libiscsi.c +--- linux-2.6.22-570/drivers/scsi/libiscsi.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/libiscsi.c 2007-12-21 15:36:12.000000000 -0500 +@@ -22,7 +22,6 @@ + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + #include +-#include + #include + #include + #include +@@ -46,27 +45,53 @@ + } + EXPORT_SYMBOL_GPL(class_to_transport_session); + +-#define INVALID_SN_DELTA 0xffff ++/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */ ++#define SNA32_CHECK 2147483648UL + +-int +-iscsi_check_assign_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr) ++static int iscsi_sna_lt(u32 n1, u32 n2) ++{ ++ return n1 != n2 && ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) || ++ (n1 > n2 && (n2 - n1 < SNA32_CHECK))); ++} ++ ++/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */ ++static int iscsi_sna_lte(u32 n1, u32 n2) ++{ ++ return n1 == n2 || ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) || ++ (n1 > n2 && (n2 - n1 < SNA32_CHECK))); ++} ++ ++void ++iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr) + { + uint32_t max_cmdsn = be32_to_cpu(hdr->max_cmdsn); + uint32_t exp_cmdsn = be32_to_cpu(hdr->exp_cmdsn); + +- if (max_cmdsn < exp_cmdsn -1 && +- max_cmdsn > exp_cmdsn - INVALID_SN_DELTA) +- return ISCSI_ERR_MAX_CMDSN; +- if (max_cmdsn > session->max_cmdsn || +- max_cmdsn < session->max_cmdsn - INVALID_SN_DELTA) +- session->max_cmdsn = max_cmdsn; +- if (exp_cmdsn > session->exp_cmdsn || +- exp_cmdsn < session->exp_cmdsn - INVALID_SN_DELTA) ++ /* ++ * standard specifies this check for when to update expected and ++ * max sequence numbers ++ */ ++ if (iscsi_sna_lt(max_cmdsn, exp_cmdsn - 1)) ++ return; ++ ++ if (exp_cmdsn != session->exp_cmdsn && ++ !iscsi_sna_lt(exp_cmdsn, session->exp_cmdsn)) + session->exp_cmdsn = exp_cmdsn; + +- return 0; ++ if (max_cmdsn != session->max_cmdsn && ++ !iscsi_sna_lt(max_cmdsn, session->max_cmdsn)) { ++ session->max_cmdsn = max_cmdsn; ++ /* ++ * if the window closed with IO queued, then kick the ++ * xmit thread ++ */ ++ if (!list_empty(&session->leadconn->xmitqueue) || ++ __kfifo_len(session->leadconn->mgmtqueue)) ++ scsi_queue_work(session->host, ++ &session->leadconn->xmitwork); ++ } + } +-EXPORT_SYMBOL_GPL(iscsi_check_assign_cmdsn); ++EXPORT_SYMBOL_GPL(iscsi_update_cmdsn); + + void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *ctask, + struct iscsi_data *hdr) +@@ -115,14 +140,17 @@ + hdr->flags = ISCSI_ATTR_SIMPLE; + int_to_scsilun(sc->device->lun, (struct scsi_lun *)hdr->lun); + hdr->itt = build_itt(ctask->itt, conn->id, session->age); +- hdr->data_length = cpu_to_be32(sc->request_bufflen); ++ hdr->data_length = cpu_to_be32(scsi_bufflen(sc)); + hdr->cmdsn = cpu_to_be32(session->cmdsn); + session->cmdsn++; + hdr->exp_statsn = cpu_to_be32(conn->exp_statsn); + memcpy(hdr->cdb, sc->cmnd, sc->cmd_len); +- memset(&hdr->cdb[sc->cmd_len], 0, MAX_COMMAND_SIZE - sc->cmd_len); ++ if (sc->cmd_len < MAX_COMMAND_SIZE) ++ memset(&hdr->cdb[sc->cmd_len], 0, ++ MAX_COMMAND_SIZE - sc->cmd_len); + + ctask->data_count = 0; ++ ctask->imm_count = 0; + if (sc->sc_data_direction == DMA_TO_DEVICE) { + hdr->flags |= ISCSI_FLAG_CMD_WRITE; + /* +@@ -139,25 +167,24 @@ + * + * pad_count bytes to be sent as zero-padding + */ +- ctask->imm_count = 0; + ctask->unsol_count = 0; + ctask->unsol_offset = 0; + ctask->unsol_datasn = 0; + + if (session->imm_data_en) { +- if (ctask->total_length >= session->first_burst) ++ if (scsi_bufflen(sc) >= session->first_burst) + ctask->imm_count = min(session->first_burst, + conn->max_xmit_dlength); + else +- ctask->imm_count = min(ctask->total_length, ++ ctask->imm_count = min(scsi_bufflen(sc), + conn->max_xmit_dlength); + hton24(ctask->hdr->dlength, ctask->imm_count); + } else + zero_data(ctask->hdr->dlength); + + if (!session->initial_r2t_en) { +- ctask->unsol_count = min(session->first_burst, +- ctask->total_length) - ctask->imm_count; ++ ctask->unsol_count = min((session->first_burst), ++ (scsi_bufflen(sc))) - ctask->imm_count; + ctask->unsol_offset = ctask->imm_count; + } + +@@ -165,7 +192,6 @@ + /* No unsolicit Data-Out's */ + ctask->hdr->flags |= ISCSI_FLAG_CMD_FINAL; + } else { +- ctask->datasn = 0; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + zero_data(hdr->dlength); + +@@ -174,8 +200,13 @@ + } + + conn->scsicmd_pdus_cnt++; ++ ++ debug_scsi("iscsi prep [%s cid %d sc %p cdb 0x%x itt 0x%x len %d " ++ "cmdsn %d win %d]\n", ++ sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read", ++ conn->id, sc, sc->cmnd[0], ctask->itt, scsi_bufflen(sc), ++ session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1); + } +-EXPORT_SYMBOL_GPL(iscsi_prep_scsi_cmd_pdu); + + /** + * iscsi_complete_command - return command back to scsi-ml +@@ -204,26 +235,12 @@ + atomic_inc(&ctask->refcount); + } + +-static void iscsi_get_ctask(struct iscsi_cmd_task *ctask) +-{ +- spin_lock_bh(&ctask->conn->session->lock); +- __iscsi_get_ctask(ctask); +- spin_unlock_bh(&ctask->conn->session->lock); +-} +- + static void __iscsi_put_ctask(struct iscsi_cmd_task *ctask) + { + if (atomic_dec_and_test(&ctask->refcount)) + iscsi_complete_command(ctask); + } + +-static void iscsi_put_ctask(struct iscsi_cmd_task *ctask) +-{ +- spin_lock_bh(&ctask->conn->session->lock); +- __iscsi_put_ctask(ctask); +- spin_unlock_bh(&ctask->conn->session->lock); +-} +- + /** + * iscsi_cmd_rsp - SCSI Command Response processing + * @conn: iscsi connection +@@ -235,21 +252,15 @@ + * iscsi_cmd_rsp sets up the scsi_cmnd fields based on the PDU and + * then completes the command and task. + **/ +-static int iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, ++static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, + struct iscsi_cmd_task *ctask, char *data, + int datalen) + { +- int rc; + struct iscsi_cmd_rsp *rhdr = (struct iscsi_cmd_rsp *)hdr; + struct iscsi_session *session = conn->session; + struct scsi_cmnd *sc = ctask->sc; + +- rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr); +- if (rc) { +- sc->result = DID_ERROR << 16; +- goto out; +- } +- ++ iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr); + conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1; + + sc->result = (DID_OK << 16) | rhdr->cmd_status; +@@ -286,14 +297,14 @@ + if (rhdr->flags & ISCSI_FLAG_CMD_UNDERFLOW) { + int res_count = be32_to_cpu(rhdr->residual_count); + +- if (res_count > 0 && res_count <= sc->request_bufflen) +- sc->resid = res_count; ++ if (res_count > 0 && res_count <= scsi_bufflen(sc)) ++ scsi_set_resid(sc, res_count); + else + sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status; + } else if (rhdr->flags & ISCSI_FLAG_CMD_BIDI_UNDERFLOW) + sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status; + else if (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW) +- sc->resid = be32_to_cpu(rhdr->residual_count); ++ scsi_set_resid(sc, be32_to_cpu(rhdr->residual_count)); + + out: + debug_scsi("done [sc %lx res %d itt 0x%x]\n", +@@ -301,7 +312,6 @@ + conn->scsirsp_pdus_cnt++; + + __iscsi_put_ctask(ctask); +- return rc; + } + + static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr) +@@ -381,7 +391,7 @@ + switch(opcode) { + case ISCSI_OP_SCSI_CMD_RSP: + BUG_ON((void*)ctask != ctask->sc->SCp.ptr); +- rc = iscsi_scsi_cmd_rsp(conn, hdr, ctask, data, ++ iscsi_scsi_cmd_rsp(conn, hdr, ctask, data, + datalen); + break; + case ISCSI_OP_SCSI_DATA_IN: +@@ -405,11 +415,7 @@ + debug_scsi("immrsp [op 0x%x cid %d itt 0x%x len %d]\n", + opcode, conn->id, mtask->itt, datalen); + +- rc = iscsi_check_assign_cmdsn(session, +- (struct iscsi_nopin*)hdr); +- if (rc) +- goto done; +- ++ iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr); + switch(opcode) { + case ISCSI_OP_LOGOUT_RSP: + if (datalen) { +@@ -458,10 +464,7 @@ + break; + } + } else if (itt == ~0U) { +- rc = iscsi_check_assign_cmdsn(session, +- (struct iscsi_nopin*)hdr); +- if (rc) +- goto done; ++ iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr); + + switch(opcode) { + case ISCSI_OP_NOOP_IN: +@@ -491,7 +494,6 @@ + } else + rc = ISCSI_ERR_BAD_ITT; + +-done: + return rc; + } + EXPORT_SYMBOL_GPL(__iscsi_complete_pdu); +@@ -578,17 +580,47 @@ + } + EXPORT_SYMBOL_GPL(iscsi_conn_failure); + ++static void iscsi_prep_mtask(struct iscsi_conn *conn, ++ struct iscsi_mgmt_task *mtask) ++{ ++ struct iscsi_session *session = conn->session; ++ struct iscsi_hdr *hdr = mtask->hdr; ++ struct iscsi_nopout *nop = (struct iscsi_nopout *)hdr; ++ ++ if (hdr->opcode != (ISCSI_OP_LOGIN | ISCSI_OP_IMMEDIATE) && ++ hdr->opcode != (ISCSI_OP_TEXT | ISCSI_OP_IMMEDIATE)) ++ nop->exp_statsn = cpu_to_be32(conn->exp_statsn); ++ /* ++ * pre-format CmdSN for outgoing PDU. ++ */ ++ nop->cmdsn = cpu_to_be32(session->cmdsn); ++ if (hdr->itt != RESERVED_ITT) { ++ hdr->itt = build_itt(mtask->itt, conn->id, session->age); ++ if (conn->c_stage == ISCSI_CONN_STARTED && ++ !(hdr->opcode & ISCSI_OP_IMMEDIATE)) ++ session->cmdsn++; ++ } ++ ++ if (session->tt->init_mgmt_task) ++ session->tt->init_mgmt_task(conn, mtask); ++ ++ debug_scsi("mgmtpdu [op 0x%x hdr->itt 0x%x datalen %d]\n", ++ hdr->opcode, hdr->itt, mtask->data_count); ++} ++ + static int iscsi_xmit_mtask(struct iscsi_conn *conn) + { + struct iscsi_hdr *hdr = conn->mtask->hdr; + int rc, was_logout = 0; + ++ spin_unlock_bh(&conn->session->lock); + if ((hdr->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_LOGOUT) { + conn->session->state = ISCSI_STATE_IN_RECOVERY; + iscsi_block_session(session_to_cls(conn->session)); + was_logout = 1; + } + rc = conn->session->tt->xmit_mgmt_task(conn, conn->mtask); ++ spin_lock_bh(&conn->session->lock); + if (rc) + return rc; + +@@ -602,6 +634,45 @@ + return 0; + } + ++static int iscsi_check_cmdsn_window_closed(struct iscsi_conn *conn) ++{ ++ struct iscsi_session *session = conn->session; ++ ++ /* ++ * Check for iSCSI window and take care of CmdSN wrap-around ++ */ ++ if (!iscsi_sna_lte(session->cmdsn, session->max_cmdsn)) { ++ debug_scsi("iSCSI CmdSN closed. MaxCmdSN %u CmdSN %u\n", ++ session->max_cmdsn, session->cmdsn); ++ return -ENOSPC; ++ } ++ return 0; ++} ++ ++static int iscsi_xmit_ctask(struct iscsi_conn *conn) ++{ ++ struct iscsi_cmd_task *ctask = conn->ctask; ++ int rc = 0; ++ ++ /* ++ * serialize with TMF AbortTask ++ */ ++ if (ctask->state == ISCSI_TASK_ABORTING) ++ goto done; ++ ++ __iscsi_get_ctask(ctask); ++ spin_unlock_bh(&conn->session->lock); ++ rc = conn->session->tt->xmit_cmd_task(conn, ctask); ++ spin_lock_bh(&conn->session->lock); ++ __iscsi_put_ctask(ctask); ++ ++done: ++ if (!rc) ++ /* done with this ctask */ ++ conn->ctask = NULL; ++ return rc; ++} ++ + /** + * iscsi_data_xmit - xmit any command into the scheduled connection + * @conn: iscsi connection +@@ -613,106 +684,79 @@ + **/ + static int iscsi_data_xmit(struct iscsi_conn *conn) + { +- struct iscsi_transport *tt; + int rc = 0; + ++ spin_lock_bh(&conn->session->lock); + if (unlikely(conn->suspend_tx)) { + debug_scsi("conn %d Tx suspended!\n", conn->id); ++ spin_unlock_bh(&conn->session->lock); + return -ENODATA; + } +- tt = conn->session->tt; +- +- /* +- * Transmit in the following order: +- * +- * 1) un-finished xmit (ctask or mtask) +- * 2) immediate control PDUs +- * 3) write data +- * 4) SCSI commands +- * 5) non-immediate control PDUs +- * +- * No need to lock around __kfifo_get as long as +- * there's one producer and one consumer. +- */ +- +- BUG_ON(conn->ctask && conn->mtask); + + if (conn->ctask) { +- iscsi_get_ctask(conn->ctask); +- rc = tt->xmit_cmd_task(conn, conn->ctask); +- iscsi_put_ctask(conn->ctask); ++ rc = iscsi_xmit_ctask(conn); + if (rc) + goto again; +- /* done with this in-progress ctask */ +- conn->ctask = NULL; + } ++ + if (conn->mtask) { + rc = iscsi_xmit_mtask(conn); + if (rc) + goto again; + } + +- /* process immediate first */ +- if (unlikely(__kfifo_len(conn->immqueue))) { +- while (__kfifo_get(conn->immqueue, (void*)&conn->mtask, ++ /* ++ * process mgmt pdus like nops before commands since we should ++ * only have one nop-out as a ping from us and targets should not ++ * overflow us with nop-ins ++ */ ++check_mgmt: ++ while (__kfifo_get(conn->mgmtqueue, (void*)&conn->mtask, + sizeof(void*))) { +- spin_lock_bh(&conn->session->lock); +- list_add_tail(&conn->mtask->running, +- &conn->mgmt_run_list); +- spin_unlock_bh(&conn->session->lock); ++ iscsi_prep_mtask(conn, conn->mtask); ++ list_add_tail(&conn->mtask->running, &conn->mgmt_run_list); + rc = iscsi_xmit_mtask(conn); + if (rc) + goto again; + } +- } + + /* process command queue */ +- spin_lock_bh(&conn->session->lock); + while (!list_empty(&conn->xmitqueue)) { ++ rc = iscsi_check_cmdsn_window_closed(conn); ++ if (rc) { ++ spin_unlock_bh(&conn->session->lock); ++ return rc; ++ } + /* + * iscsi tcp may readd the task to the xmitqueue to send + * write data + */ + conn->ctask = list_entry(conn->xmitqueue.next, + struct iscsi_cmd_task, running); ++ if (conn->ctask->state == ISCSI_TASK_PENDING) { ++ iscsi_prep_scsi_cmd_pdu(conn->ctask); ++ conn->session->tt->init_cmd_task(conn->ctask); ++ } + conn->ctask->state = ISCSI_TASK_RUNNING; + list_move_tail(conn->xmitqueue.next, &conn->run_list); +- __iscsi_get_ctask(conn->ctask); +- spin_unlock_bh(&conn->session->lock); +- +- rc = tt->xmit_cmd_task(conn, conn->ctask); +- +- spin_lock_bh(&conn->session->lock); +- __iscsi_put_ctask(conn->ctask); +- if (rc) { +- spin_unlock_bh(&conn->session->lock); +- goto again; +- } +- } +- spin_unlock_bh(&conn->session->lock); +- /* done with this ctask */ +- conn->ctask = NULL; +- +- /* process the rest control plane PDUs, if any */ +- if (unlikely(__kfifo_len(conn->mgmtqueue))) { +- while (__kfifo_get(conn->mgmtqueue, (void*)&conn->mtask, +- sizeof(void*))) { +- spin_lock_bh(&conn->session->lock); +- list_add_tail(&conn->mtask->running, +- &conn->mgmt_run_list); +- spin_unlock_bh(&conn->session->lock); +- rc = iscsi_xmit_mtask(conn); ++ rc = iscsi_xmit_ctask(conn); + if (rc) + goto again; ++ /* ++ * we could continuously get new ctask requests so ++ * we need to check the mgmt queue for nops that need to ++ * be sent to aviod starvation ++ */ ++ if (__kfifo_len(conn->mgmtqueue)) ++ goto check_mgmt; + } +- } +- ++ spin_unlock_bh(&conn->session->lock); + return -ENODATA; + + again: + if (unlikely(conn->suspend_tx)) +- return -ENODATA; +- ++ rc = -ENODATA; ++ spin_unlock_bh(&conn->session->lock); + return rc; + } + +@@ -724,11 +768,9 @@ + /* + * serialize Xmit worker on a per-connection basis. + */ +- mutex_lock(&conn->xmitmutex); + do { + rc = iscsi_data_xmit(conn); + } while (rc >= 0 || rc == -EAGAIN); +- mutex_unlock(&conn->xmitmutex); + } + + enum { +@@ -786,20 +828,23 @@ + goto fault; + } + +- /* +- * Check for iSCSI window and take care of CmdSN wrap-around +- */ +- if ((int)(session->max_cmdsn - session->cmdsn) < 0) { +- reason = FAILURE_WINDOW_CLOSED; +- goto reject; +- } +- + conn = session->leadconn; + if (!conn) { + reason = FAILURE_SESSION_FREED; + goto fault; + } + ++ /* ++ * We check this here and in data xmit, because if we get to the point ++ * that this check is hitting the window then we have enough IO in ++ * flight and enough IO waiting to be transmitted it is better ++ * to let the scsi/block layer queue up. ++ */ ++ if (iscsi_check_cmdsn_window_closed(conn)) { ++ reason = FAILURE_WINDOW_CLOSED; ++ goto reject; ++ } ++ + if (!__kfifo_get(session->cmdpool.queue, (void*)&ctask, + sizeof(void*))) { + reason = FAILURE_OOM; +@@ -814,18 +859,8 @@ + ctask->conn = conn; + ctask->sc = sc; + INIT_LIST_HEAD(&ctask->running); +- ctask->total_length = sc->request_bufflen; +- iscsi_prep_scsi_cmd_pdu(ctask); +- +- session->tt->init_cmd_task(ctask); + + list_add_tail(&ctask->running, &conn->xmitqueue); +- debug_scsi( +- "ctask enq [%s cid %d sc %p cdb 0x%x itt 0x%x len %d cmdsn %d " +- "win %d]\n", +- sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read", +- conn->id, sc, sc->cmnd[0], ctask->itt, sc->request_bufflen, +- session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1); + spin_unlock(&session->lock); + + scsi_queue_work(host, &conn->xmitwork); +@@ -841,7 +876,7 @@ + printk(KERN_ERR "iscsi: cmd 0x%x is not queued (%d)\n", + sc->cmnd[0], reason); + sc->result = (DID_NO_CONNECT << 16); +- sc->resid = sc->request_bufflen; ++ scsi_set_resid(sc, scsi_bufflen(sc)); + sc->scsi_done(sc); + return 0; + } +@@ -856,19 +891,16 @@ + } + EXPORT_SYMBOL_GPL(iscsi_change_queue_depth); + +-static int +-iscsi_conn_send_generic(struct iscsi_conn *conn, struct iscsi_hdr *hdr, ++static struct iscsi_mgmt_task * ++__iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, + char *data, uint32_t data_size) + { + struct iscsi_session *session = conn->session; +- struct iscsi_nopout *nop = (struct iscsi_nopout *)hdr; + struct iscsi_mgmt_task *mtask; + +- spin_lock_bh(&session->lock); +- if (session->state == ISCSI_STATE_TERMINATE) { +- spin_unlock_bh(&session->lock); +- return -EPERM; +- } ++ if (session->state == ISCSI_STATE_TERMINATE) ++ return NULL; ++ + if (hdr->opcode == (ISCSI_OP_LOGIN | ISCSI_OP_IMMEDIATE) || + hdr->opcode == (ISCSI_OP_TEXT | ISCSI_OP_IMMEDIATE)) + /* +@@ -882,27 +914,11 @@ + BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE); + BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED); + +- nop->exp_statsn = cpu_to_be32(conn->exp_statsn); + if (!__kfifo_get(session->mgmtpool.queue, +- (void*)&mtask, sizeof(void*))) { +- spin_unlock_bh(&session->lock); +- return -ENOSPC; +- } ++ (void*)&mtask, sizeof(void*))) ++ return NULL; + } + +- /* +- * pre-format CmdSN for outgoing PDU. +- */ +- if (hdr->itt != RESERVED_ITT) { +- hdr->itt = build_itt(mtask->itt, conn->id, session->age); +- nop->cmdsn = cpu_to_be32(session->cmdsn); +- if (conn->c_stage == ISCSI_CONN_STARTED && +- !(hdr->opcode & ISCSI_OP_IMMEDIATE)) +- session->cmdsn++; +- } else +- /* do not advance CmdSN */ +- nop->cmdsn = cpu_to_be32(session->cmdsn); +- + if (data_size) { + memcpy(mtask->data, data, data_size); + mtask->data_count = data_size; +@@ -911,38 +927,23 @@ + + INIT_LIST_HEAD(&mtask->running); + memcpy(mtask->hdr, hdr, sizeof(struct iscsi_hdr)); +- if (session->tt->init_mgmt_task) +- session->tt->init_mgmt_task(conn, mtask, data, data_size); +- spin_unlock_bh(&session->lock); +- +- debug_scsi("mgmtpdu [op 0x%x hdr->itt 0x%x datalen %d]\n", +- hdr->opcode, hdr->itt, data_size); +- +- /* +- * since send_pdu() could be called at least from two contexts, +- * we need to serialize __kfifo_put, so we don't have to take +- * additional lock on fast data-path +- */ +- if (hdr->opcode & ISCSI_OP_IMMEDIATE) +- __kfifo_put(conn->immqueue, (void*)&mtask, sizeof(void*)); +- else + __kfifo_put(conn->mgmtqueue, (void*)&mtask, sizeof(void*)); +- +- scsi_queue_work(session->host, &conn->xmitwork); +- return 0; ++ return mtask; + } + + int iscsi_conn_send_pdu(struct iscsi_cls_conn *cls_conn, struct iscsi_hdr *hdr, + char *data, uint32_t data_size) + { + struct iscsi_conn *conn = cls_conn->dd_data; +- int rc; +- +- mutex_lock(&conn->xmitmutex); +- rc = iscsi_conn_send_generic(conn, hdr, data, data_size); +- mutex_unlock(&conn->xmitmutex); ++ struct iscsi_session *session = conn->session; ++ int err = 0; + +- return rc; ++ spin_lock_bh(&session->lock); ++ if (!__iscsi_conn_send_pdu(conn, hdr, data, data_size)) ++ err = -EPERM; ++ spin_unlock_bh(&session->lock); ++ scsi_queue_work(session->host, &conn->xmitwork); ++ return err; + } + EXPORT_SYMBOL_GPL(iscsi_conn_send_pdu); + +@@ -1027,14 +1028,12 @@ + spin_unlock(&session->lock); + } + +-/* must be called with the mutex lock */ + static int iscsi_exec_abort_task(struct scsi_cmnd *sc, + struct iscsi_cmd_task *ctask) + { + struct iscsi_conn *conn = ctask->conn; + struct iscsi_session *session = conn->session; + struct iscsi_tm *hdr = &conn->tmhdr; +- int rc; + + /* + * ctask timed out but session is OK requests must be serialized. +@@ -1047,32 +1046,27 @@ + hdr->rtt = ctask->hdr->itt; + hdr->refcmdsn = ctask->hdr->cmdsn; + +- rc = iscsi_conn_send_generic(conn, (struct iscsi_hdr *)hdr, ++ ctask->mtask = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)hdr, + NULL, 0); +- if (rc) { ++ if (!ctask->mtask) { + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); +- debug_scsi("abort sent failure [itt 0x%x] %d\n", ctask->itt, +- rc); +- return rc; ++ debug_scsi("abort sent failure [itt 0x%x]\n", ctask->itt); ++ return -EPERM; + } ++ ctask->state = ISCSI_TASK_ABORTING; + + debug_scsi("abort sent [itt 0x%x]\n", ctask->itt); + +- spin_lock_bh(&session->lock); +- ctask->mtask = (struct iscsi_mgmt_task *) +- session->mgmt_cmds[get_itt(hdr->itt) - +- ISCSI_MGMT_ITT_OFFSET]; +- + if (conn->tmabort_state == TMABORT_INITIAL) { + conn->tmfcmd_pdus_cnt++; +- conn->tmabort_timer.expires = 10*HZ + jiffies; ++ conn->tmabort_timer.expires = 20*HZ + jiffies; + conn->tmabort_timer.function = iscsi_tmabort_timedout; + conn->tmabort_timer.data = (unsigned long)ctask; + add_timer(&conn->tmabort_timer); + debug_scsi("abort set timeout [itt 0x%x]\n", ctask->itt); + } + spin_unlock_bh(&session->lock); +- mutex_unlock(&conn->xmitmutex); ++ scsi_queue_work(session->host, &conn->xmitwork); + + /* + * block eh thread until: +@@ -1089,13 +1083,12 @@ + if (signal_pending(current)) + flush_signals(current); + del_timer_sync(&conn->tmabort_timer); +- +- mutex_lock(&conn->xmitmutex); ++ spin_lock_bh(&session->lock); + return 0; + } + + /* +- * xmit mutex and session lock must be held ++ * session lock must be held + */ + static struct iscsi_mgmt_task * + iscsi_remove_mgmt_task(struct kfifo *fifo, uint32_t itt) +@@ -1127,7 +1120,7 @@ + if (!ctask->mtask) + return -EINVAL; + +- if (!iscsi_remove_mgmt_task(conn->immqueue, ctask->mtask->itt)) ++ if (!iscsi_remove_mgmt_task(conn->mgmtqueue, ctask->mtask->itt)) + list_del(&ctask->mtask->running); + __kfifo_put(session->mgmtpool.queue, (void*)&ctask->mtask, + sizeof(void*)); +@@ -1136,7 +1129,7 @@ + } + + /* +- * session lock and xmitmutex must be held ++ * session lock must be held + */ + static void fail_command(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask, + int err) +@@ -1147,11 +1140,14 @@ + if (!sc) + return; + ++ if (ctask->state != ISCSI_TASK_PENDING) + conn->session->tt->cleanup_cmd_task(conn, ctask); + iscsi_ctask_mtask_cleanup(ctask); + + sc->result = err; +- sc->resid = sc->request_bufflen; ++ scsi_set_resid(sc, scsi_bufflen(sc)); ++ if (conn->ctask == ctask) ++ conn->ctask = NULL; + /* release ref from queuecommand */ + __iscsi_put_ctask(ctask); + } +@@ -1179,7 +1175,6 @@ + conn->eh_abort_cnt++; + debug_scsi("aborting [sc %p itt 0x%x]\n", sc, ctask->itt); + +- mutex_lock(&conn->xmitmutex); + spin_lock_bh(&session->lock); + + /* +@@ -1192,9 +1187,8 @@ + + /* ctask completed before time out */ + if (!ctask->sc) { +- spin_unlock_bh(&session->lock); + debug_scsi("sc completed while abort in progress\n"); +- goto success_rel_mutex; ++ goto success; + } + + /* what should we do here ? */ +@@ -1204,15 +1198,13 @@ + goto failed; + } + +- if (ctask->state == ISCSI_TASK_PENDING) +- goto success_cleanup; ++ if (ctask->state == ISCSI_TASK_PENDING) { ++ fail_command(conn, ctask, DID_ABORT << 16); ++ goto success; ++ } + + conn->tmabort_state = TMABORT_INITIAL; +- +- spin_unlock_bh(&session->lock); + rc = iscsi_exec_abort_task(sc, ctask); +- spin_lock_bh(&session->lock); +- + if (rc || sc->SCp.phase != session->age || + session->state != ISCSI_STATE_LOGGED_IN) + goto failed; +@@ -1220,45 +1212,44 @@ + + switch (conn->tmabort_state) { + case TMABORT_SUCCESS: +- goto success_cleanup; ++ spin_unlock_bh(&session->lock); ++ /* ++ * clean up task if aborted. grab the recv lock as a writer ++ */ ++ write_lock_bh(conn->recv_lock); ++ spin_lock(&session->lock); ++ fail_command(conn, ctask, DID_ABORT << 16); ++ spin_unlock(&session->lock); ++ write_unlock_bh(conn->recv_lock); ++ /* ++ * make sure xmit thread is not still touching the ++ * ctask/scsi_cmnd ++ */ ++ scsi_flush_work(session->host); ++ goto success_unlocked; + case TMABORT_NOT_FOUND: + if (!ctask->sc) { + /* ctask completed before tmf abort response */ +- spin_unlock_bh(&session->lock); + debug_scsi("sc completed while abort in progress\n"); +- goto success_rel_mutex; ++ goto success; + } + /* fall through */ + default: + /* timedout or failed */ + spin_unlock_bh(&session->lock); + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); +- spin_lock_bh(&session->lock); +- goto failed; ++ goto failed_unlocked; + } + +-success_cleanup: +- debug_scsi("abort success [sc %lx itt 0x%x]\n", (long)sc, ctask->itt); ++success: + spin_unlock_bh(&session->lock); +- +- /* +- * clean up task if aborted. we have the xmitmutex so grab +- * the recv lock as a writer +- */ +- write_lock_bh(conn->recv_lock); +- spin_lock(&session->lock); +- fail_command(conn, ctask, DID_ABORT << 16); +- spin_unlock(&session->lock); +- write_unlock_bh(conn->recv_lock); +- +-success_rel_mutex: +- mutex_unlock(&conn->xmitmutex); ++success_unlocked: ++ debug_scsi("abort success [sc %lx itt 0x%x]\n", (long)sc, ctask->itt); + return SUCCESS; + + failed: + spin_unlock_bh(&session->lock); +- mutex_unlock(&conn->xmitmutex); +- ++failed_unlocked: + debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc, ctask->itt); + return FAILED; + } +@@ -1339,6 +1330,10 @@ + * iscsi_session_setup - create iscsi cls session and host and session + * @scsit: scsi transport template + * @iscsit: iscsi transport template ++ * @cmds_max: scsi host can queue ++ * @qdepth: scsi host cmds per lun ++ * @cmd_task_size: LLD ctask private data size ++ * @mgmt_task_size: LLD mtask private data size + * @initial_cmdsn: initial CmdSN + * @hostno: host no allocated + * +@@ -1348,6 +1343,7 @@ + struct iscsi_cls_session * + iscsi_session_setup(struct iscsi_transport *iscsit, + struct scsi_transport_template *scsit, ++ uint16_t cmds_max, uint16_t qdepth, + int cmd_task_size, int mgmt_task_size, + uint32_t initial_cmdsn, uint32_t *hostno) + { +@@ -1356,11 +1352,32 @@ + struct iscsi_cls_session *cls_session; + int cmd_i; + ++ if (qdepth > ISCSI_MAX_CMD_PER_LUN || qdepth < 1) { ++ if (qdepth != 0) ++ printk(KERN_ERR "iscsi: invalid queue depth of %d. " ++ "Queue depth must be between 1 and %d.\n", ++ qdepth, ISCSI_MAX_CMD_PER_LUN); ++ qdepth = ISCSI_DEF_CMD_PER_LUN; ++ } ++ ++ if (cmds_max < 2 || (cmds_max & (cmds_max - 1)) || ++ cmds_max >= ISCSI_MGMT_ITT_OFFSET) { ++ if (cmds_max != 0) ++ printk(KERN_ERR "iscsi: invalid can_queue of %d. " ++ "can_queue must be a power of 2 and between " ++ "2 and %d - setting to %d.\n", cmds_max, ++ ISCSI_MGMT_ITT_OFFSET, ISCSI_DEF_XMIT_CMDS_MAX); ++ cmds_max = ISCSI_DEF_XMIT_CMDS_MAX; ++ } ++ + shost = scsi_host_alloc(iscsit->host_template, + hostdata_privsize(sizeof(*session))); + if (!shost) + return NULL; + ++ /* the iscsi layer takes one task for reserve */ ++ shost->can_queue = cmds_max - 1; ++ shost->cmd_per_lun = qdepth; + shost->max_id = 1; + shost->max_channel = 0; + shost->max_lun = iscsit->max_lun; +@@ -1374,7 +1391,7 @@ + session->host = shost; + session->state = ISCSI_STATE_FREE; + session->mgmtpool_max = ISCSI_MGMT_CMDS_MAX; +- session->cmds_max = ISCSI_XMIT_CMDS_MAX; ++ session->cmds_max = cmds_max; + session->cmdsn = initial_cmdsn; + session->exp_cmdsn = initial_cmdsn + 1; + session->max_cmdsn = initial_cmdsn + 1; +@@ -1461,7 +1478,14 @@ + iscsi_pool_free(&session->mgmtpool, (void**)session->mgmt_cmds); + iscsi_pool_free(&session->cmdpool, (void**)session->cmds); + ++ kfree(session->password); ++ kfree(session->password_in); ++ kfree(session->username); ++ kfree(session->username_in); + kfree(session->targetname); ++ kfree(session->netdev); ++ kfree(session->hwaddress); ++ kfree(session->initiatorname); + + iscsi_destroy_session(cls_session); + scsi_host_put(shost); +@@ -1499,11 +1523,6 @@ + INIT_LIST_HEAD(&conn->xmitqueue); + + /* initialize general immediate & non-immediate PDU commands queue */ +- conn->immqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*), +- GFP_KERNEL, NULL); +- if (conn->immqueue == ERR_PTR(-ENOMEM)) +- goto immqueue_alloc_fail; +- + conn->mgmtqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*), + GFP_KERNEL, NULL); + if (conn->mgmtqueue == ERR_PTR(-ENOMEM)) +@@ -1527,7 +1546,6 @@ + conn->login_mtask->data = conn->data = data; + + init_timer(&conn->tmabort_timer); +- mutex_init(&conn->xmitmutex); + init_waitqueue_head(&conn->ehwait); + + return cls_conn; +@@ -1538,8 +1556,6 @@ + login_mtask_alloc_fail: + kfifo_free(conn->mgmtqueue); + mgmtqueue_alloc_fail: +- kfifo_free(conn->immqueue); +-immqueue_alloc_fail: + iscsi_destroy_conn(cls_conn); + return NULL; + } +@@ -1558,10 +1574,8 @@ + struct iscsi_session *session = conn->session; + unsigned long flags; + +- set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); +- mutex_lock(&conn->xmitmutex); +- + spin_lock_bh(&session->lock); ++ set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); + conn->c_stage = ISCSI_CONN_CLEANUP_WAIT; + if (session->leadconn == conn) { + /* +@@ -1572,8 +1586,6 @@ + } + spin_unlock_bh(&session->lock); + +- mutex_unlock(&conn->xmitmutex); +- + /* + * Block until all in-progress commands for this connection + * time out or fail. +@@ -1610,7 +1622,6 @@ + } + spin_unlock_bh(&session->lock); + +- kfifo_free(conn->immqueue); + kfifo_free(conn->mgmtqueue); + + iscsi_destroy_conn(cls_conn); +@@ -1671,8 +1682,7 @@ + struct iscsi_mgmt_task *mtask, *tmp; + + /* handle pending */ +- while (__kfifo_get(conn->immqueue, (void*)&mtask, sizeof(void*)) || +- __kfifo_get(conn->mgmtqueue, (void*)&mtask, sizeof(void*))) { ++ while (__kfifo_get(conn->mgmtqueue, (void*)&mtask, sizeof(void*))) { + if (mtask == conn->login_mtask) + continue; + debug_scsi("flushing pending mgmt task itt 0x%x\n", mtask->itt); +@@ -1742,12 +1752,12 @@ + conn->c_stage = ISCSI_CONN_STOPPED; + set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); + spin_unlock_bh(&session->lock); ++ scsi_flush_work(session->host); + + write_lock_bh(conn->recv_lock); + set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx); + write_unlock_bh(conn->recv_lock); + +- mutex_lock(&conn->xmitmutex); + /* + * for connection level recovery we should not calculate + * header digest. conn->hdr_size used for optimization +@@ -1771,8 +1781,6 @@ + fail_all_commands(conn); + flush_control_queues(session, conn); + spin_unlock_bh(&session->lock); +- +- mutex_unlock(&conn->xmitmutex); + } + + void iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) +@@ -1867,6 +1875,30 @@ + case ISCSI_PARAM_EXP_STATSN: + sscanf(buf, "%u", &conn->exp_statsn); + break; ++ case ISCSI_PARAM_USERNAME: ++ kfree(session->username); ++ session->username = kstrdup(buf, GFP_KERNEL); ++ if (!session->username) ++ return -ENOMEM; ++ break; ++ case ISCSI_PARAM_USERNAME_IN: ++ kfree(session->username_in); ++ session->username_in = kstrdup(buf, GFP_KERNEL); ++ if (!session->username_in) ++ return -ENOMEM; ++ break; ++ case ISCSI_PARAM_PASSWORD: ++ kfree(session->password); ++ session->password = kstrdup(buf, GFP_KERNEL); ++ if (!session->password) ++ return -ENOMEM; ++ break; ++ case ISCSI_PARAM_PASSWORD_IN: ++ kfree(session->password_in); ++ session->password_in = kstrdup(buf, GFP_KERNEL); ++ if (!session->password_in) ++ return -ENOMEM; ++ break; + case ISCSI_PARAM_TARGET_NAME: + /* this should not change between logins */ + if (session->targetname) +@@ -1940,6 +1972,18 @@ + case ISCSI_PARAM_TPGT: + len = sprintf(buf, "%d\n", session->tpgt); + break; ++ case ISCSI_PARAM_USERNAME: ++ len = sprintf(buf, "%s\n", session->username); ++ break; ++ case ISCSI_PARAM_USERNAME_IN: ++ len = sprintf(buf, "%s\n", session->username_in); ++ break; ++ case ISCSI_PARAM_PASSWORD: ++ len = sprintf(buf, "%s\n", session->password); ++ break; ++ case ISCSI_PARAM_PASSWORD_IN: ++ len = sprintf(buf, "%s\n", session->password_in); ++ break; + default: + return -ENOSYS; + } +@@ -1990,6 +2034,66 @@ + } + EXPORT_SYMBOL_GPL(iscsi_conn_get_param); + ++int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param, ++ char *buf) ++{ ++ struct iscsi_session *session = iscsi_hostdata(shost->hostdata); ++ int len; ++ ++ switch (param) { ++ case ISCSI_HOST_PARAM_NETDEV_NAME: ++ if (!session->netdev) ++ len = sprintf(buf, "%s\n", "default"); ++ else ++ len = sprintf(buf, "%s\n", session->netdev); ++ break; ++ case ISCSI_HOST_PARAM_HWADDRESS: ++ if (!session->hwaddress) ++ len = sprintf(buf, "%s\n", "default"); ++ else ++ len = sprintf(buf, "%s\n", session->hwaddress); ++ break; ++ case ISCSI_HOST_PARAM_INITIATOR_NAME: ++ if (!session->initiatorname) ++ len = sprintf(buf, "%s\n", "unknown"); ++ else ++ len = sprintf(buf, "%s\n", session->initiatorname); ++ break; ++ ++ default: ++ return -ENOSYS; ++ } ++ ++ return len; ++} ++EXPORT_SYMBOL_GPL(iscsi_host_get_param); ++ ++int iscsi_host_set_param(struct Scsi_Host *shost, enum iscsi_host_param param, ++ char *buf, int buflen) ++{ ++ struct iscsi_session *session = iscsi_hostdata(shost->hostdata); ++ ++ switch (param) { ++ case ISCSI_HOST_PARAM_NETDEV_NAME: ++ if (!session->netdev) ++ session->netdev = kstrdup(buf, GFP_KERNEL); ++ break; ++ case ISCSI_HOST_PARAM_HWADDRESS: ++ if (!session->hwaddress) ++ session->hwaddress = kstrdup(buf, GFP_KERNEL); ++ break; ++ case ISCSI_HOST_PARAM_INITIATOR_NAME: ++ if (!session->initiatorname) ++ session->initiatorname = kstrdup(buf, GFP_KERNEL); ++ break; ++ default: ++ return -ENOSYS; ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(iscsi_host_set_param); ++ + MODULE_AUTHOR("Mike Christie"); + MODULE_DESCRIPTION("iSCSI library functions"); + MODULE_LICENSE("GPL"); +diff -Nurb linux-2.6.22-570/drivers/scsi/libsas/sas_expander.c linux-2.6.22-591/drivers/scsi/libsas/sas_expander.c +--- linux-2.6.22-570/drivers/scsi/libsas/sas_expander.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/libsas/sas_expander.c 2007-12-21 15:36:12.000000000 -0500 +@@ -38,8 +38,10 @@ + + #if 0 + /* FIXME: smp needs to migrate into the sas class */ +-static ssize_t smp_portal_read(struct kobject *, char *, loff_t, size_t); +-static ssize_t smp_portal_write(struct kobject *, char *, loff_t, size_t); ++static ssize_t smp_portal_read(struct kobject *, struct bin_attribute *, ++ char *, loff_t, size_t); ++static ssize_t smp_portal_write(struct kobject *, struct bin_attribute *, ++ char *, loff_t, size_t); + #endif + + /* ---------- SMP task management ---------- */ +@@ -1368,7 +1370,6 @@ + memset(bin_attr, 0, sizeof(*bin_attr)); + + bin_attr->attr.name = SMP_BIN_ATTR_NAME; +- bin_attr->attr.owner = THIS_MODULE; + bin_attr->attr.mode = 0600; + + bin_attr->size = 0; +@@ -1846,8 +1847,9 @@ + #if 0 + /* ---------- SMP portal ---------- */ + +-static ssize_t smp_portal_write(struct kobject *kobj, char *buf, loff_t offs, +- size_t size) ++static ssize_t smp_portal_write(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t offs, size_t size) + { + struct domain_device *dev = to_dom_device(kobj); + struct expander_device *ex = &dev->ex_dev; +@@ -1873,8 +1875,9 @@ + return size; + } + +-static ssize_t smp_portal_read(struct kobject *kobj, char *buf, loff_t offs, +- size_t size) ++static ssize_t smp_portal_read(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t offs, size_t size) + { + struct domain_device *dev = to_dom_device(kobj); + struct expander_device *ex = &dev->ex_dev; +diff -Nurb linux-2.6.22-570/drivers/scsi/libsas/sas_scsi_host.c linux-2.6.22-591/drivers/scsi/libsas/sas_scsi_host.c +--- linux-2.6.22-570/drivers/scsi/libsas/sas_scsi_host.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/libsas/sas_scsi_host.c 2007-12-21 15:36:12.000000000 -0500 +@@ -40,6 +40,7 @@ + + #include + #include ++#include + #include + + /* ---------- SCSI Host glue ---------- */ +@@ -76,8 +77,8 @@ + hs = DID_NO_CONNECT; + break; + case SAS_DATA_UNDERRUN: +- sc->resid = ts->residual; +- if (sc->request_bufflen - sc->resid < sc->underflow) ++ scsi_set_resid(sc, ts->residual); ++ if (scsi_bufflen(sc) - scsi_get_resid(sc) < sc->underflow) + hs = DID_ERROR; + break; + case SAS_DATA_OVERRUN: +@@ -161,9 +162,9 @@ + task->ssp_task.task_attr = sas_scsi_get_task_attr(cmd); + memcpy(task->ssp_task.cdb, cmd->cmnd, 16); + +- task->scatter = cmd->request_buffer; +- task->num_scatter = cmd->use_sg; +- task->total_xfer_len = cmd->request_bufflen; ++ task->scatter = scsi_sglist(cmd); ++ task->num_scatter = scsi_sg_count(cmd); ++ task->total_xfer_len = scsi_bufflen(cmd); + task->data_dir = cmd->sc_data_direction; + + task->task_done = sas_scsi_task_done; +@@ -868,8 +869,6 @@ + { + struct sas_ha_struct *sas_ha = _sas_ha; + +- current->flags |= PF_NOFREEZE; +- + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/Makefile linux-2.6.22-591/drivers/scsi/lpfc/Makefile +--- linux-2.6.22-570/drivers/scsi/lpfc/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/Makefile 2007-12-21 15:36:12.000000000 -0500 +@@ -1,7 +1,7 @@ + #/******************************************************************* + # * This file is part of the Emulex Linux Device Driver for * + # * Fibre Channel Host Bus Adapters. * +-# * Copyright (C) 2004-2005 Emulex. All rights reserved. * ++# * Copyright (C) 2004-2006 Emulex. All rights reserved. * + # * EMULEX and SLI are trademarks of Emulex. * + # * www.emulex.com * + # * * +@@ -27,4 +27,5 @@ + obj-$(CONFIG_SCSI_LPFC) := lpfc.o + + lpfc-objs := lpfc_mem.o lpfc_sli.o lpfc_ct.o lpfc_els.o lpfc_hbadisc.o \ +- lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o lpfc_scsi.o lpfc_attr.o ++ lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o lpfc_scsi.o lpfc_attr.o \ ++ lpfc_vport.o lpfc_debugfs.o +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc.h +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc.h 2007-12-21 15:36:12.000000000 -0500 +@@ -19,8 +19,9 @@ + * included with this package. * + *******************************************************************/ + +-struct lpfc_sli2_slim; ++#include + ++struct lpfc_sli2_slim; + + #define LPFC_MAX_TARGET 256 /* max number of targets supported */ + #define LPFC_MAX_DISC_THREADS 64 /* max outstanding discovery els +@@ -32,6 +33,20 @@ + #define LPFC_IOCB_LIST_CNT 2250 /* list of IOCBs for fast-path usage. */ + #define LPFC_Q_RAMP_UP_INTERVAL 120 /* lun q_depth ramp up interval */ + ++/* ++ * Following time intervals are used of adjusting SCSI device ++ * queue depths when there are driver resource error or Firmware ++ * resource error. ++ */ ++#define QUEUE_RAMP_DOWN_INTERVAL (1 * HZ) /* 1 Second */ ++#define QUEUE_RAMP_UP_INTERVAL (300 * HZ) /* 5 minutes */ ++ ++/* Number of exchanges reserved for discovery to complete */ ++#define LPFC_DISC_IOCB_BUFF_COUNT 20 ++ ++#define LPFC_HB_MBOX_INTERVAL 5 /* Heart beat interval in seconds. */ ++#define LPFC_HB_MBOX_TIMEOUT 30 /* Heart beat timeout in seconds. */ ++ + /* Define macros for 64 bit support */ + #define putPaddrLow(addr) ((uint32_t) (0xffffffff & (u64)(addr))) + #define putPaddrHigh(addr) ((uint32_t) (0xffffffff & (((u64)(addr))>>32))) +@@ -61,6 +76,11 @@ + uint32_t current_count; + }; + ++struct hbq_dmabuf { ++ struct lpfc_dmabuf dbuf; ++ uint32_t tag; ++}; ++ + /* Priority bit. Set value to exceed low water mark in lpfc_mem. */ + #define MEM_PRI 0x100 + +@@ -90,6 +110,29 @@ + uint32_t sli2FwRev; + uint8_t sli2FwName[16]; + } rev; ++ struct { ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t rsvd2 :24; /* Reserved */ ++ uint32_t cmv : 1; /* Configure Max VPIs */ ++ uint32_t ccrp : 1; /* Config Command Ring Polling */ ++ uint32_t csah : 1; /* Configure Synchronous Abort Handling */ ++ uint32_t chbs : 1; /* Cofigure Host Backing store */ ++ uint32_t cinb : 1; /* Enable Interrupt Notification Block */ ++ uint32_t cerbm : 1; /* Configure Enhanced Receive Buf Mgmt */ ++ uint32_t cmx : 1; /* Configure Max XRIs */ ++ uint32_t cmr : 1; /* Configure Max RPIs */ ++#else /* __LITTLE_ENDIAN */ ++ uint32_t cmr : 1; /* Configure Max RPIs */ ++ uint32_t cmx : 1; /* Configure Max XRIs */ ++ uint32_t cerbm : 1; /* Configure Enhanced Receive Buf Mgmt */ ++ uint32_t cinb : 1; /* Enable Interrupt Notification Block */ ++ uint32_t chbs : 1; /* Cofigure Host Backing store */ ++ uint32_t csah : 1; /* Configure Synchronous Abort Handling */ ++ uint32_t ccrp : 1; /* Config Command Ring Polling */ ++ uint32_t cmv : 1; /* Configure Max VPIs */ ++ uint32_t rsvd2 :24; /* Reserved */ ++#endif ++ } sli3Feat; + } lpfc_vpd_t; + + struct lpfc_scsi_buf; +@@ -122,6 +165,7 @@ + uint32_t elsRcvRPS; + uint32_t elsRcvRPL; + uint32_t elsXmitFLOGI; ++ uint32_t elsXmitFDISC; + uint32_t elsXmitPLOGI; + uint32_t elsXmitPRLI; + uint32_t elsXmitADISC; +@@ -165,70 +209,53 @@ + struct lpfcMboxq * mbox; + }; + +-struct lpfc_hba { +- struct lpfc_sli sli; +- struct lpfc_sli2_slim *slim2p; +- dma_addr_t slim2p_mapping; +- uint16_t pci_cfg_value; ++struct lpfc_hba; + +- int32_t hba_state; + +-#define LPFC_STATE_UNKNOWN 0 /* HBA state is unknown */ +-#define LPFC_WARM_START 1 /* HBA state after selective reset */ +-#define LPFC_INIT_START 2 /* Initial state after board reset */ +-#define LPFC_INIT_MBX_CMDS 3 /* Initialize HBA with mbox commands */ +-#define LPFC_LINK_DOWN 4 /* HBA initialized, link is down */ +-#define LPFC_LINK_UP 5 /* Link is up - issue READ_LA */ +-#define LPFC_LOCAL_CFG_LINK 6 /* local NPORT Id configured */ +-#define LPFC_FLOGI 7 /* FLOGI sent to Fabric */ +-#define LPFC_FABRIC_CFG_LINK 8 /* Fabric assigned NPORT Id +- configured */ +-#define LPFC_NS_REG 9 /* Register with NameServer */ +-#define LPFC_NS_QRY 10 /* Query NameServer for NPort ID list */ +-#define LPFC_BUILD_DISC_LIST 11 /* Build ADISC and PLOGI lists for ++enum discovery_state { ++ LPFC_VPORT_UNKNOWN = 0, /* vport state is unknown */ ++ LPFC_VPORT_FAILED = 1, /* vport has failed */ ++ LPFC_LOCAL_CFG_LINK = 6, /* local NPORT Id configured */ ++ LPFC_FLOGI = 7, /* FLOGI sent to Fabric */ ++ LPFC_FDISC = 8, /* FDISC sent for vport */ ++ LPFC_FABRIC_CFG_LINK = 9, /* Fabric assigned NPORT Id ++ * configured */ ++ LPFC_NS_REG = 10, /* Register with NameServer */ ++ LPFC_NS_QRY = 11, /* Query NameServer for NPort ID list */ ++ LPFC_BUILD_DISC_LIST = 12, /* Build ADISC and PLOGI lists for + * device authentication / discovery */ +-#define LPFC_DISC_AUTH 12 /* Processing ADISC list */ +-#define LPFC_CLEAR_LA 13 /* authentication cmplt - issue +- CLEAR_LA */ +-#define LPFC_HBA_READY 32 +-#define LPFC_HBA_ERROR -1 ++ LPFC_DISC_AUTH = 13, /* Processing ADISC list */ ++ LPFC_VPORT_READY = 32, ++}; + +- int32_t stopped; /* HBA has not been restarted since last ERATT */ +- uint8_t fc_linkspeed; /* Link speed after last READ_LA */ ++enum hba_state { ++ LPFC_LINK_UNKNOWN = 0, /* HBA state is unknown */ ++ LPFC_WARM_START = 1, /* HBA state after selective reset */ ++ LPFC_INIT_START = 2, /* Initial state after board reset */ ++ LPFC_INIT_MBX_CMDS = 3, /* Initialize HBA with mbox commands */ ++ LPFC_LINK_DOWN = 4, /* HBA initialized, link is down */ ++ LPFC_LINK_UP = 5, /* Link is up - issue READ_LA */ ++ LPFC_CLEAR_LA = 6, /* authentication cmplt - issue ++ * CLEAR_LA */ ++ LPFC_HBA_READY = 32, ++ LPFC_HBA_ERROR = -1 ++}; + +- uint32_t fc_eventTag; /* event tag for link attention */ +- uint32_t fc_prli_sent; /* cntr for outstanding PRLIs */ ++struct lpfc_vport { ++ struct list_head listentry; ++ struct lpfc_hba *phba; ++ uint8_t port_type; ++#define LPFC_PHYSICAL_PORT 1 ++#define LPFC_NPIV_PORT 2 ++#define LPFC_FABRIC_PORT 3 ++ enum discovery_state port_state; + +- uint32_t num_disc_nodes; /*in addition to hba_state */ ++ uint16_t vpi; + +- struct timer_list fc_estabtmo; /* link establishment timer */ +- struct timer_list fc_disctmo; /* Discovery rescue timer */ +- struct timer_list fc_fdmitmo; /* fdmi timer */ +- /* These fields used to be binfo */ +- struct lpfc_name fc_nodename; /* fc nodename */ +- struct lpfc_name fc_portname; /* fc portname */ +- uint32_t fc_pref_DID; /* preferred D_ID */ +- uint8_t fc_pref_ALPA; /* preferred AL_PA */ +- uint32_t fc_edtov; /* E_D_TOV timer value */ +- uint32_t fc_arbtov; /* ARB_TOV timer value */ +- uint32_t fc_ratov; /* R_A_TOV timer value */ +- uint32_t fc_rttov; /* R_T_TOV timer value */ +- uint32_t fc_altov; /* AL_TOV timer value */ +- uint32_t fc_crtov; /* C_R_TOV timer value */ +- uint32_t fc_citov; /* C_I_TOV timer value */ +- uint32_t fc_myDID; /* fibre channel S_ID */ +- uint32_t fc_prevDID; /* previous fibre channel S_ID */ +- +- struct serv_parm fc_sparam; /* buffer for our service parameters */ +- struct serv_parm fc_fabparam; /* fabric service parameters buffer */ +- uint8_t alpa_map[128]; /* AL_PA map from READ_LA */ +- +- uint8_t fc_ns_retry; /* retries for fabric nameserver */ +- uint32_t fc_nlp_cnt; /* outstanding NODELIST requests */ +- uint32_t fc_rscn_id_cnt; /* count of RSCNs payloads in list */ +- struct lpfc_dmabuf *fc_rscn_id_list[FC_MAX_HOLD_RSCN]; +- uint32_t lmt; + uint32_t fc_flag; /* FC flags */ ++/* Several of these flags are HBA centric and should be moved to ++ * phba->link_flag (e.g. FC_PTP, FC_PUBLIC_LOOP) ++ */ + #define FC_PT2PT 0x1 /* pt2pt with no fabric */ + #define FC_PT2PT_PLOGI 0x2 /* pt2pt initiate PLOGI */ + #define FC_DISC_TMO 0x4 /* Discovery timer running */ +@@ -239,22 +266,14 @@ + #define FC_OFFLINE_MODE 0x80 /* Interface is offline for diag */ + #define FC_FABRIC 0x100 /* We are fabric attached */ + #define FC_ESTABLISH_LINK 0x200 /* Reestablish Link */ +-#define FC_RSCN_DISCOVERY 0x400 /* Authenticate all devices after RSCN*/ +-#define FC_BLOCK_MGMT_IO 0x800 /* Don't allow mgmt mbx or iocb cmds */ +-#define FC_LOADING 0x1000 /* HBA in process of loading drvr */ +-#define FC_UNLOADING 0x2000 /* HBA in process of unloading drvr */ ++#define FC_RSCN_DISCOVERY 0x400 /* Auth all devices after RSCN */ + #define FC_SCSI_SCAN_TMO 0x4000 /* scsi scan timer running */ + #define FC_ABORT_DISCOVERY 0x8000 /* we want to abort discovery */ + #define FC_NDISC_ACTIVE 0x10000 /* NPort discovery active */ + #define FC_BYPASSED_MODE 0x20000 /* NPort is in bypassed mode */ +-#define FC_LOOPBACK_MODE 0x40000 /* NPort is in Loopback mode */ +- /* This flag is set while issuing */ +- /* INIT_LINK mailbox command */ +-#define FC_IGNORE_ERATT 0x80000 /* intr handler should ignore ERATT */ +- +- uint32_t fc_topology; /* link topology, from LINK INIT */ +- +- struct lpfc_stats fc_stat; ++#define FC_RFF_NOT_SUPPORTED 0x40000 /* RFF_ID was rejected by switch */ ++#define FC_VPORT_NEEDS_REG_VPI 0x80000 /* Needs to have its vpi registered */ ++#define FC_RSCN_DEFERRED 0x100000 /* A deferred RSCN being processed */ + + struct list_head fc_nodes; + +@@ -267,10 +286,131 @@ + uint16_t fc_map_cnt; + uint16_t fc_npr_cnt; + uint16_t fc_unused_cnt; ++ struct serv_parm fc_sparam; /* buffer for our service parameters */ ++ ++ uint32_t fc_myDID; /* fibre channel S_ID */ ++ uint32_t fc_prevDID; /* previous fibre channel S_ID */ ++ ++ int32_t stopped; /* HBA has not been restarted since last ERATT */ ++ uint8_t fc_linkspeed; /* Link speed after last READ_LA */ ++ ++ uint32_t num_disc_nodes; /*in addition to hba_state */ ++ ++ uint32_t fc_nlp_cnt; /* outstanding NODELIST requests */ ++ uint32_t fc_rscn_id_cnt; /* count of RSCNs payloads in list */ ++ struct lpfc_dmabuf *fc_rscn_id_list[FC_MAX_HOLD_RSCN]; ++ struct lpfc_name fc_nodename; /* fc nodename */ ++ struct lpfc_name fc_portname; /* fc portname */ ++ ++ struct lpfc_work_evt disc_timeout_evt; ++ ++ struct timer_list fc_disctmo; /* Discovery rescue timer */ ++ uint8_t fc_ns_retry; /* retries for fabric nameserver */ ++ uint32_t fc_prli_sent; /* cntr for outstanding PRLIs */ ++ ++ spinlock_t work_port_lock; ++ uint32_t work_port_events; /* Timeout to be handled */ ++#define WORKER_DISC_TMO 0x1 /* vport: Discovery timeout */ ++#define WORKER_ELS_TMO 0x2 /* vport: ELS timeout */ ++#define WORKER_FDMI_TMO 0x4 /* vport: FDMI timeout */ ++ ++#define WORKER_MBOX_TMO 0x100 /* hba: MBOX timeout */ ++#define WORKER_HB_TMO 0x200 /* hba: Heart beat timeout */ ++#define WORKER_FABRIC_BLOCK_TMO 0x400 /* hba: fabric block timout */ ++#define WORKER_RAMP_DOWN_QUEUE 0x800 /* hba: Decrease Q depth */ ++#define WORKER_RAMP_UP_QUEUE 0x1000 /* hba: Increase Q depth */ ++ ++ struct timer_list fc_fdmitmo; ++ struct timer_list els_tmofunc; ++ ++ int unreg_vpi_cmpl; ++ ++ uint8_t load_flag; ++#define FC_LOADING 0x1 /* HBA in process of loading drvr */ ++#define FC_UNLOADING 0x2 /* HBA in process of unloading drvr */ ++ char *vname; /* Application assigned name */ ++ struct fc_vport *fc_vport; ++ ++#ifdef CONFIG_LPFC_DEBUG_FS ++ struct dentry *debug_disc_trc; ++ struct dentry *debug_nodelist; ++ struct dentry *vport_debugfs_root; ++ struct lpfc_disc_trc *disc_trc; ++ atomic_t disc_trc_cnt; ++#endif ++}; ++ ++struct hbq_s { ++ uint16_t entry_count; /* Current number of HBQ slots */ ++ uint32_t next_hbqPutIdx; /* Index to next HBQ slot to use */ ++ uint32_t hbqPutIdx; /* HBQ slot to use */ ++ uint32_t local_hbqGetIdx; /* Local copy of Get index from Port */ ++}; ++ ++#define LPFC_MAX_HBQS 16 ++/* this matches the possition in the lpfc_hbq_defs array */ ++#define LPFC_ELS_HBQ 0 ++ ++struct lpfc_hba { ++ struct lpfc_sli sli; ++ uint32_t sli_rev; /* SLI2 or SLI3 */ ++ uint32_t sli3_options; /* Mask of enabled SLI3 options */ ++#define LPFC_SLI3_ENABLED 0x01 ++#define LPFC_SLI3_HBQ_ENABLED 0x02 ++#define LPFC_SLI3_NPIV_ENABLED 0x04 ++#define LPFC_SLI3_VPORT_TEARDOWN 0x08 ++ uint32_t iocb_cmd_size; ++ uint32_t iocb_rsp_size; ++ ++ enum hba_state link_state; ++ uint32_t link_flag; /* link state flags */ ++#define LS_LOOPBACK_MODE 0x1 /* NPort is in Loopback mode */ ++ /* This flag is set while issuing */ ++ /* INIT_LINK mailbox command */ ++#define LS_NPIV_FAB_SUPPORTED 0x2 /* Fabric supports NPIV */ ++#define LS_IGNORE_ERATT 0x3 /* intr handler should ignore ERATT */ ++ ++ struct lpfc_sli2_slim *slim2p; ++ struct lpfc_dmabuf hbqslimp; ++ ++ dma_addr_t slim2p_mapping; ++ ++ uint16_t pci_cfg_value; ++ ++ uint8_t work_found; ++#define LPFC_MAX_WORKER_ITERATION 4 ++ ++ uint8_t fc_linkspeed; /* Link speed after last READ_LA */ ++ ++ uint32_t fc_eventTag; /* event tag for link attention */ ++ ++ ++ struct timer_list fc_estabtmo; /* link establishment timer */ ++ /* These fields used to be binfo */ ++ uint32_t fc_pref_DID; /* preferred D_ID */ ++ uint8_t fc_pref_ALPA; /* preferred AL_PA */ ++ uint32_t fc_edtov; /* E_D_TOV timer value */ ++ uint32_t fc_arbtov; /* ARB_TOV timer value */ ++ uint32_t fc_ratov; /* R_A_TOV timer value */ ++ uint32_t fc_rttov; /* R_T_TOV timer value */ ++ uint32_t fc_altov; /* AL_TOV timer value */ ++ uint32_t fc_crtov; /* C_R_TOV timer value */ ++ uint32_t fc_citov; /* C_I_TOV timer value */ ++ ++ struct serv_parm fc_fabparam; /* fabric service parameters buffer */ ++ uint8_t alpa_map[128]; /* AL_PA map from READ_LA */ ++ ++ uint32_t lmt; ++ ++ uint32_t fc_topology; /* link topology, from LINK INIT */ ++ ++ struct lpfc_stats fc_stat; ++ + struct lpfc_nodelist fc_fcpnodev; /* nodelist entry for no device */ + uint32_t nport_event_cnt; /* timestamp for nlplist entry */ + +- uint32_t wwnn[2]; ++ uint8_t wwnn[8]; ++ uint8_t wwpn[8]; + uint32_t RandomData[7]; + + uint32_t cfg_log_verbose; +@@ -278,6 +418,9 @@ + uint32_t cfg_nodev_tmo; + uint32_t cfg_devloss_tmo; + uint32_t cfg_hba_queue_depth; ++ uint32_t cfg_peer_port_login; ++ uint32_t cfg_vport_restrict_login; ++ uint32_t cfg_npiv_enable; + uint32_t cfg_fcp_class; + uint32_t cfg_use_adisc; + uint32_t cfg_ack0; +@@ -304,22 +447,20 @@ + + lpfc_vpd_t vpd; /* vital product data */ + +- struct Scsi_Host *host; + struct pci_dev *pcidev; + struct list_head work_list; + uint32_t work_ha; /* Host Attention Bits for WT */ + uint32_t work_ha_mask; /* HA Bits owned by WT */ + uint32_t work_hs; /* HS stored in case of ERRAT */ + uint32_t work_status[2]; /* Extra status from SLIM */ +- uint32_t work_hba_events; /* Timeout to be handled */ +-#define WORKER_DISC_TMO 0x1 /* Discovery timeout */ +-#define WORKER_ELS_TMO 0x2 /* ELS timeout */ +-#define WORKER_MBOX_TMO 0x4 /* MBOX timeout */ +-#define WORKER_FDMI_TMO 0x8 /* FDMI timeout */ + + wait_queue_head_t *work_wait; + struct task_struct *worker_thread; + ++ struct list_head hbq_buffer_list; ++ uint32_t hbq_count; /* Count of configured HBQs */ ++ struct hbq_s hbqs[LPFC_MAX_HBQS]; /* local copy of hbq indicies */ ++ + unsigned long pci_bar0_map; /* Physical address for PCI BAR0 */ + unsigned long pci_bar2_map; /* Physical address for PCI BAR2 */ + void __iomem *slim_memmap_p; /* Kernel memory mapped address for +@@ -334,6 +475,10 @@ + reg */ + void __iomem *HCregaddr; /* virtual address for host ctl reg */ + ++ struct lpfc_hgp __iomem *host_gp; /* Host side get/put pointers */ ++ uint32_t __iomem *hbq_put; /* Address in SLIM to HBQ put ptrs */ ++ uint32_t *hbq_get; /* Host mem address of HBQ get ptrs */ ++ + int brd_no; /* FC board number */ + + char SerialNumber[32]; /* adapter Serial Number */ +@@ -353,7 +498,6 @@ + uint8_t soft_wwn_enable; + + struct timer_list fcp_poll_timer; +- struct timer_list els_tmofunc; + + /* + * stat counters +@@ -370,31 +514,69 @@ + uint32_t total_scsi_bufs; + struct list_head lpfc_iocb_list; + uint32_t total_iocbq_bufs; ++ spinlock_t hbalock; + + /* pci_mem_pools */ + struct pci_pool *lpfc_scsi_dma_buf_pool; + struct pci_pool *lpfc_mbuf_pool; ++ struct pci_pool *lpfc_hbq_pool; + struct lpfc_dma_pool lpfc_mbuf_safety_pool; + + mempool_t *mbox_mem_pool; + mempool_t *nlp_mem_pool; + + struct fc_host_statistics link_stats; ++ ++ struct list_head port_list; ++ struct lpfc_vport *pport; /* physical lpfc_vport pointer */ ++ uint16_t max_vpi; /* Maximum virtual nports */ ++#define LPFC_MAX_VPI 100 /* Max number of VPorts supported */ ++ unsigned long *vpi_bmask; /* vpi allocation table */ ++ ++ /* Data structure used by fabric iocb scheduler */ ++ struct list_head fabric_iocb_list; ++ atomic_t fabric_iocb_count; ++ struct timer_list fabric_block_timer; ++ unsigned long bit_flags; ++#define FABRIC_COMANDS_BLOCKED 0 ++ atomic_t num_rsrc_err; ++ atomic_t num_cmd_success; ++ unsigned long last_rsrc_error_time; ++ unsigned long last_ramp_down_time; ++ unsigned long last_ramp_up_time; ++#ifdef CONFIG_LPFC_DEBUG_FS ++ struct dentry *hba_debugfs_root; ++ atomic_t debugfs_vport_count; ++#endif ++ ++ /* Fields used for heart beat. */ ++ unsigned long last_completion_time; ++ struct timer_list hb_tmofunc; ++ uint8_t hb_outstanding; + }; + ++static inline struct Scsi_Host * ++lpfc_shost_from_vport(struct lpfc_vport *vport) ++{ ++ return container_of((void *) vport, struct Scsi_Host, hostdata[0]); ++} ++ + static inline void +-lpfc_set_loopback_flag(struct lpfc_hba *phba) { ++lpfc_set_loopback_flag(struct lpfc_hba *phba) ++{ + if (phba->cfg_topology == FLAGS_LOCAL_LB) +- phba->fc_flag |= FC_LOOPBACK_MODE; ++ phba->link_flag |= LS_LOOPBACK_MODE; + else +- phba->fc_flag &= ~FC_LOOPBACK_MODE; ++ phba->link_flag &= ~LS_LOOPBACK_MODE; + } + +-struct rnidrsp { +- void *buf; +- uint32_t uniqueid; +- struct list_head list; +- uint32_t data; +-}; ++static inline int ++lpfc_is_link_up(struct lpfc_hba *phba) ++{ ++ return phba->link_state == LPFC_LINK_UP || ++ phba->link_state == LPFC_CLEAR_LA || ++ phba->link_state == LPFC_HBA_READY; ++} + + #define FC_REG_DUMP_EVENT 0x10 /* Register for Dump events */ ++ +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_attr.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_attr.c +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_attr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_attr.c 2007-12-21 15:36:12.000000000 -0500 +@@ -39,6 +39,7 @@ + #include "lpfc_version.h" + #include "lpfc_compat.h" + #include "lpfc_crtn.h" ++#include "lpfc_vport.h" + + #define LPFC_DEF_DEVLOSS_TMO 30 + #define LPFC_MIN_DEVLOSS_TMO 1 +@@ -76,116 +77,156 @@ + lpfc_info_show(struct class_device *cdev, char *buf) + { + struct Scsi_Host *host = class_to_shost(cdev); ++ + return snprintf(buf, PAGE_SIZE, "%s\n",lpfc_info(host)); + } + + static ssize_t + lpfc_serialnum_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ + return snprintf(buf, PAGE_SIZE, "%s\n",phba->SerialNumber); + } + + static ssize_t + lpfc_modeldesc_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ + return snprintf(buf, PAGE_SIZE, "%s\n",phba->ModelDesc); + } + + static ssize_t + lpfc_modelname_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ + return snprintf(buf, PAGE_SIZE, "%s\n",phba->ModelName); + } + + static ssize_t + lpfc_programtype_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ + return snprintf(buf, PAGE_SIZE, "%s\n",phba->ProgramType); + } + + static ssize_t +-lpfc_portnum_show(struct class_device *cdev, char *buf) ++lpfc_vportnum_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ + return snprintf(buf, PAGE_SIZE, "%s\n",phba->Port); + } + + static ssize_t + lpfc_fwrev_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + char fwrev[32]; ++ + lpfc_decode_firmware_rev(phba, fwrev, 1); +- return snprintf(buf, PAGE_SIZE, "%s\n",fwrev); ++ return snprintf(buf, PAGE_SIZE, "%s, sli-%d\n", fwrev, phba->sli_rev); + } + + static ssize_t + lpfc_hdw_show(struct class_device *cdev, char *buf) + { + char hdw[9]; +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + lpfc_vpd_t *vp = &phba->vpd; ++ + lpfc_jedec_to_ascii(vp->rev.biuRev, hdw); + return snprintf(buf, PAGE_SIZE, "%s\n", hdw); + } + static ssize_t + lpfc_option_rom_version_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ + return snprintf(buf, PAGE_SIZE, "%s\n", phba->OptionROMVersion); + } + static ssize_t + lpfc_state_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + int len = 0; +- switch (phba->hba_state) { +- case LPFC_STATE_UNKNOWN: ++ ++ switch (phba->link_state) { ++ case LPFC_LINK_UNKNOWN: + case LPFC_WARM_START: + case LPFC_INIT_START: + case LPFC_INIT_MBX_CMDS: + case LPFC_LINK_DOWN: ++ case LPFC_HBA_ERROR: + len += snprintf(buf + len, PAGE_SIZE-len, "Link Down\n"); + break; + case LPFC_LINK_UP: ++ case LPFC_CLEAR_LA: ++ case LPFC_HBA_READY: ++ len += snprintf(buf + len, PAGE_SIZE-len, "Link Up - \n"); ++ ++ switch (vport->port_state) { ++ len += snprintf(buf + len, PAGE_SIZE-len, ++ "initializing\n"); ++ break; + case LPFC_LOCAL_CFG_LINK: +- len += snprintf(buf + len, PAGE_SIZE-len, "Link Up\n"); ++ len += snprintf(buf + len, PAGE_SIZE-len, ++ "Configuring Link\n"); + break; ++ case LPFC_FDISC: + case LPFC_FLOGI: + case LPFC_FABRIC_CFG_LINK: + case LPFC_NS_REG: + case LPFC_NS_QRY: + case LPFC_BUILD_DISC_LIST: + case LPFC_DISC_AUTH: +- case LPFC_CLEAR_LA: +- len += snprintf(buf + len, PAGE_SIZE-len, +- "Link Up - Discovery\n"); ++ len += snprintf(buf + len, PAGE_SIZE - len, ++ "Discovery\n"); + break; +- case LPFC_HBA_READY: +- len += snprintf(buf + len, PAGE_SIZE-len, +- "Link Up - Ready:\n"); ++ case LPFC_VPORT_READY: ++ len += snprintf(buf + len, PAGE_SIZE - len, "Ready\n"); ++ break; ++ ++ case LPFC_VPORT_FAILED: ++ len += snprintf(buf + len, PAGE_SIZE - len, "Failed\n"); ++ break; ++ ++ case LPFC_VPORT_UNKNOWN: ++ len += snprintf(buf + len, PAGE_SIZE - len, ++ "Unknown\n"); ++ break; ++ } ++ + if (phba->fc_topology == TOPOLOGY_LOOP) { +- if (phba->fc_flag & FC_PUBLIC_LOOP) ++ if (vport->fc_flag & FC_PUBLIC_LOOP) + len += snprintf(buf + len, PAGE_SIZE-len, + " Public Loop\n"); + else + len += snprintf(buf + len, PAGE_SIZE-len, + " Private Loop\n"); + } else { +- if (phba->fc_flag & FC_FABRIC) ++ if (vport->fc_flag & FC_FABRIC) + len += snprintf(buf + len, PAGE_SIZE-len, + " Fabric\n"); + else +@@ -193,29 +234,32 @@ + " Point-2-Point\n"); + } + } ++ + return len; + } + + static ssize_t + lpfc_num_discovered_ports_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; +- return snprintf(buf, PAGE_SIZE, "%d\n", phba->fc_map_cnt + +- phba->fc_unmap_cnt); ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ ++ return snprintf(buf, PAGE_SIZE, "%d\n", ++ vport->fc_map_cnt + vport->fc_unmap_cnt); + } + + + static int +-lpfc_issue_lip(struct Scsi_Host *host) ++lpfc_issue_lip(struct Scsi_Host *shost) + { +- struct lpfc_hba *phba = (struct lpfc_hba *) host->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + LPFC_MBOXQ_t *pmboxq; + int mbxstatus = MBXERR_ERROR; + +- if ((phba->fc_flag & FC_OFFLINE_MODE) || +- (phba->fc_flag & FC_BLOCK_MGMT_IO) || +- (phba->hba_state != LPFC_HBA_READY)) ++ if ((vport->fc_flag & FC_OFFLINE_MODE) || ++ (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) || ++ (vport->port_state != LPFC_VPORT_READY)) + return -EPERM; + + pmboxq = mempool_alloc(phba->mbox_mem_pool,GFP_KERNEL); +@@ -238,9 +282,7 @@ + } + + lpfc_set_loopback_flag(phba); +- if (mbxstatus == MBX_TIMEOUT) +- pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; +- else ++ if (mbxstatus != MBX_TIMEOUT) + mempool_free(pmboxq, phba->mbox_mem_pool); + + if (mbxstatus == MBXERR_ERROR) +@@ -320,8 +362,10 @@ + static ssize_t + lpfc_issue_reset(struct class_device *cdev, const char *buf, size_t count) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ + int status = -EINVAL; + + if (strncmp(buf, "selective", sizeof("selective") - 1) == 0) +@@ -336,23 +380,26 @@ + static ssize_t + lpfc_nport_evt_cnt_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ + return snprintf(buf, PAGE_SIZE, "%d\n", phba->nport_event_cnt); + } + + static ssize_t + lpfc_board_mode_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + char * state; + +- if (phba->hba_state == LPFC_HBA_ERROR) ++ if (phba->link_state == LPFC_HBA_ERROR) + state = "error"; +- else if (phba->hba_state == LPFC_WARM_START) ++ else if (phba->link_state == LPFC_WARM_START) + state = "warm start"; +- else if (phba->hba_state == LPFC_INIT_START) ++ else if (phba->link_state == LPFC_INIT_START) + state = "offline"; + else + state = "online"; +@@ -363,8 +410,9 @@ + static ssize_t + lpfc_board_mode_store(struct class_device *cdev, const char *buf, size_t count) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + struct completion online_compl; + int status=0; + +@@ -389,11 +437,166 @@ + return -EIO; + } + ++int ++lpfc_get_hba_info(struct lpfc_hba *phba, ++ uint32_t *mxri, uint32_t *axri, ++ uint32_t *mrpi, uint32_t *arpi, ++ uint32_t *mvpi, uint32_t *avpi) ++{ ++ struct lpfc_sli *psli = &phba->sli; ++ LPFC_MBOXQ_t *pmboxq; ++ MAILBOX_t *pmb; ++ int rc = 0; ++ ++ /* ++ * prevent udev from issuing mailbox commands until the port is ++ * configured. ++ */ ++ if (phba->link_state < LPFC_LINK_DOWN || ++ !phba->mbox_mem_pool || ++ (phba->sli.sli_flag & LPFC_SLI2_ACTIVE) == 0) ++ return 0; ++ ++ if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) ++ return 0; ++ ++ pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (!pmboxq) ++ return 0; ++ memset(pmboxq, 0, sizeof (LPFC_MBOXQ_t)); ++ ++ pmb = &pmboxq->mb; ++ pmb->mbxCommand = MBX_READ_CONFIG; ++ pmb->mbxOwner = OWN_HOST; ++ pmboxq->context1 = NULL; ++ ++ if ((phba->pport->fc_flag & FC_OFFLINE_MODE) || ++ (!(psli->sli_flag & LPFC_SLI2_ACTIVE))) ++ rc = MBX_NOT_FINISHED; ++ else ++ rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2); ++ ++ if (rc != MBX_SUCCESS) { ++ if (rc != MBX_TIMEOUT) ++ mempool_free(pmboxq, phba->mbox_mem_pool); ++ return 0; ++ } ++ ++ if (mrpi) ++ *mrpi = pmb->un.varRdConfig.max_rpi; ++ if (arpi) ++ *arpi = pmb->un.varRdConfig.avail_rpi; ++ if (mxri) ++ *mxri = pmb->un.varRdConfig.max_xri; ++ if (axri) ++ *axri = pmb->un.varRdConfig.avail_xri; ++ if (mvpi) ++ *mvpi = pmb->un.varRdConfig.max_vpi; ++ if (avpi) ++ *avpi = pmb->un.varRdConfig.avail_vpi; ++ ++ mempool_free(pmboxq, phba->mbox_mem_pool); ++ return 1; ++} ++ ++static ssize_t ++lpfc_max_rpi_show(struct class_device *cdev, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ uint32_t cnt; ++ ++ if (lpfc_get_hba_info(phba, NULL, NULL, &cnt, NULL, NULL, NULL)) ++ return snprintf(buf, PAGE_SIZE, "%d\n", cnt); ++ return snprintf(buf, PAGE_SIZE, "Unknown\n"); ++} ++ ++static ssize_t ++lpfc_used_rpi_show(struct class_device *cdev, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ uint32_t cnt, acnt; ++ ++ if (lpfc_get_hba_info(phba, NULL, NULL, &cnt, &acnt, NULL, NULL)) ++ return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt)); ++ return snprintf(buf, PAGE_SIZE, "Unknown\n"); ++} ++ ++static ssize_t ++lpfc_max_xri_show(struct class_device *cdev, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ uint32_t cnt; ++ ++ if (lpfc_get_hba_info(phba, &cnt, NULL, NULL, NULL, NULL, NULL)) ++ return snprintf(buf, PAGE_SIZE, "%d\n", cnt); ++ return snprintf(buf, PAGE_SIZE, "Unknown\n"); ++} ++ ++static ssize_t ++lpfc_used_xri_show(struct class_device *cdev, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ uint32_t cnt, acnt; ++ ++ if (lpfc_get_hba_info(phba, &cnt, &acnt, NULL, NULL, NULL, NULL)) ++ return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt)); ++ return snprintf(buf, PAGE_SIZE, "Unknown\n"); ++} ++ ++static ssize_t ++lpfc_max_vpi_show(struct class_device *cdev, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ uint32_t cnt; ++ ++ if (lpfc_get_hba_info(phba, NULL, NULL, NULL, NULL, &cnt, NULL)) ++ return snprintf(buf, PAGE_SIZE, "%d\n", cnt); ++ return snprintf(buf, PAGE_SIZE, "Unknown\n"); ++} ++ ++static ssize_t ++lpfc_used_vpi_show(struct class_device *cdev, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ uint32_t cnt, acnt; ++ ++ if (lpfc_get_hba_info(phba, NULL, NULL, NULL, NULL, &cnt, &acnt)) ++ return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt)); ++ return snprintf(buf, PAGE_SIZE, "Unknown\n"); ++} ++ ++static ssize_t ++lpfc_npiv_info_show(struct class_device *cdev, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ ++ if (!(phba->max_vpi)) ++ return snprintf(buf, PAGE_SIZE, "NPIV Not Supported\n"); ++ if (vport->port_type == LPFC_PHYSICAL_PORT) ++ return snprintf(buf, PAGE_SIZE, "NPIV Physical\n"); ++ return snprintf(buf, PAGE_SIZE, "NPIV Virtual (VPI %d)\n", vport->vpi); ++} ++ + static ssize_t + lpfc_poll_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + + return snprintf(buf, PAGE_SIZE, "%#x\n", phba->cfg_poll); + } +@@ -402,8 +605,9 @@ + lpfc_poll_store(struct class_device *cdev, const char *buf, + size_t count) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + uint32_t creg_val; + uint32_t old_val; + int val=0; +@@ -417,7 +621,7 @@ + if ((val & 0x3) != val) + return -EINVAL; + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + + old_val = phba->cfg_poll; + +@@ -432,16 +636,16 @@ + lpfc_poll_start_timer(phba); + } + } else if (val != 0x0) { +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + return -EINVAL; + } + + if (!(val & DISABLE_FCP_RING_INT) && + (old_val & DISABLE_FCP_RING_INT)) + { +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + del_timer(&phba->fcp_poll_timer); +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + creg_val = readl(phba->HCregaddr); + creg_val |= (HC_R0INT_ENA << LPFC_FCP_RING); + writel(creg_val, phba->HCregaddr); +@@ -450,7 +654,7 @@ + + phba->cfg_poll = val; + +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + return strlen(buf); + } +@@ -459,8 +663,9 @@ + static ssize_t \ + lpfc_##attr##_show(struct class_device *cdev, char *buf) \ + { \ +- struct Scsi_Host *host = class_to_shost(cdev);\ +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;\ ++ struct Scsi_Host *shost = class_to_shost(cdev);\ ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\ ++ struct lpfc_hba *phba = vport->phba;\ + int val = 0;\ + val = phba->cfg_##attr;\ + return snprintf(buf, PAGE_SIZE, "%d\n",\ +@@ -471,8 +676,9 @@ + static ssize_t \ + lpfc_##attr##_show(struct class_device *cdev, char *buf) \ + { \ +- struct Scsi_Host *host = class_to_shost(cdev);\ +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;\ ++ struct Scsi_Host *shost = class_to_shost(cdev);\ ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\ ++ struct lpfc_hba *phba = vport->phba;\ + int val = 0;\ + val = phba->cfg_##attr;\ + return snprintf(buf, PAGE_SIZE, "%#x\n",\ +@@ -514,8 +720,9 @@ + static ssize_t \ + lpfc_##attr##_store(struct class_device *cdev, const char *buf, size_t count) \ + { \ +- struct Scsi_Host *host = class_to_shost(cdev);\ +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;\ ++ struct Scsi_Host *shost = class_to_shost(cdev);\ ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\ ++ struct lpfc_hba *phba = vport->phba;\ + int val=0;\ + if (!isdigit(buf[0]))\ + return -EINVAL;\ +@@ -576,7 +783,7 @@ + static CLASS_DEVICE_ATTR(modeldesc, S_IRUGO, lpfc_modeldesc_show, NULL); + static CLASS_DEVICE_ATTR(modelname, S_IRUGO, lpfc_modelname_show, NULL); + static CLASS_DEVICE_ATTR(programtype, S_IRUGO, lpfc_programtype_show, NULL); +-static CLASS_DEVICE_ATTR(portnum, S_IRUGO, lpfc_portnum_show, NULL); ++static CLASS_DEVICE_ATTR(portnum, S_IRUGO, lpfc_vportnum_show, NULL); + static CLASS_DEVICE_ATTR(fwrev, S_IRUGO, lpfc_fwrev_show, NULL); + static CLASS_DEVICE_ATTR(hdw, S_IRUGO, lpfc_hdw_show, NULL); + static CLASS_DEVICE_ATTR(state, S_IRUGO, lpfc_state_show, NULL); +@@ -592,6 +799,13 @@ + static CLASS_DEVICE_ATTR(board_mode, S_IRUGO | S_IWUSR, + lpfc_board_mode_show, lpfc_board_mode_store); + static CLASS_DEVICE_ATTR(issue_reset, S_IWUSR, NULL, lpfc_issue_reset); ++static CLASS_DEVICE_ATTR(max_vpi, S_IRUGO, lpfc_max_vpi_show, NULL); ++static CLASS_DEVICE_ATTR(used_vpi, S_IRUGO, lpfc_used_vpi_show, NULL); ++static CLASS_DEVICE_ATTR(max_rpi, S_IRUGO, lpfc_max_rpi_show, NULL); ++static CLASS_DEVICE_ATTR(used_rpi, S_IRUGO, lpfc_used_rpi_show, NULL); ++static CLASS_DEVICE_ATTR(max_xri, S_IRUGO, lpfc_max_xri_show, NULL); ++static CLASS_DEVICE_ATTR(used_xri, S_IRUGO, lpfc_used_xri_show, NULL); ++static CLASS_DEVICE_ATTR(npiv_info, S_IRUGO, lpfc_npiv_info_show, NULL); + + + static char *lpfc_soft_wwn_key = "C99G71SL8032A"; +@@ -600,8 +814,9 @@ + lpfc_soft_wwn_enable_store(struct class_device *cdev, const char *buf, + size_t count) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + unsigned int cnt = count; + + /* +@@ -634,8 +849,10 @@ + static ssize_t + lpfc_soft_wwpn_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ + return snprintf(buf, PAGE_SIZE, "0x%llx\n", + (unsigned long long)phba->cfg_soft_wwpn); + } +@@ -644,8 +861,9 @@ + static ssize_t + lpfc_soft_wwpn_store(struct class_device *cdev, const char *buf, size_t count) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + struct completion online_compl; + int stat1=0, stat2=0; + unsigned int i, j, cnt=count; +@@ -680,9 +898,9 @@ + } + } + phba->cfg_soft_wwpn = wwn_to_u64(wwpn); +- fc_host_port_name(host) = phba->cfg_soft_wwpn; ++ fc_host_port_name(shost) = phba->cfg_soft_wwpn; + if (phba->cfg_soft_wwnn) +- fc_host_node_name(host) = phba->cfg_soft_wwnn; ++ fc_host_node_name(shost) = phba->cfg_soft_wwnn; + + dev_printk(KERN_NOTICE, &phba->pcidev->dev, + "lpfc%d: Reinitializing to use soft_wwpn\n", phba->brd_no); +@@ -777,6 +995,15 @@ + static CLASS_DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR, + lpfc_poll_show, lpfc_poll_store); + ++int lpfc_sli_mode = 0; ++module_param(lpfc_sli_mode, int, 0); ++MODULE_PARM_DESC(lpfc_sli_mode, "SLI mode selector:" ++ " 0 - auto (SLI-3 if supported)," ++ " 2 - select SLI-2 even on SLI-3 capable HBAs," ++ " 3 - select SLI-3"); ++ ++LPFC_ATTR_R(npiv_enable, 0, 0, 1, "Enable NPIV functionality"); ++ + /* + # lpfc_nodev_tmo: If set, it will hold all I/O errors on devices that disappear + # until the timer expires. Value range is [0,255]. Default value is 30. +@@ -790,8 +1017,9 @@ + static ssize_t + lpfc_nodev_tmo_show(struct class_device *cdev, char *buf) + { +- struct Scsi_Host *host = class_to_shost(cdev); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + int val = 0; + val = phba->cfg_devloss_tmo; + return snprintf(buf, PAGE_SIZE, "%d\n", +@@ -832,13 +1060,19 @@ + static void + lpfc_update_rport_devloss_tmo(struct lpfc_hba *phba) + { ++ struct lpfc_vport *vport; ++ struct Scsi_Host *shost; + struct lpfc_nodelist *ndlp; + +- spin_lock_irq(phba->host->host_lock); +- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) ++ list_for_each_entry(vport, &phba->port_list, listentry) { ++ shost = lpfc_shost_from_vport(vport); ++ spin_lock_irq(shost->host_lock); ++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) + if (ndlp->rport) +- ndlp->rport->dev_loss_tmo = phba->cfg_devloss_tmo; +- spin_unlock_irq(phba->host->host_lock); ++ ndlp->rport->dev_loss_tmo = ++ phba->cfg_devloss_tmo; ++ spin_unlock_irq(shost->host_lock); ++ } + } + + static int +@@ -946,6 +1180,33 @@ + "Max number of FCP commands we can queue to a lpfc HBA"); + + /* ++# peer_port_login: This parameter allows/prevents logins ++# between peer ports hosted on the same physical port. ++# When this parameter is set 0 peer ports of same physical port ++# are not allowed to login to each other. ++# When this parameter is set 1 peer ports of same physical port ++# are allowed to login to each other. ++# Default value of this parameter is 0. ++*/ ++LPFC_ATTR_R(peer_port_login, 0, 0, 1, ++ "Allow peer ports on the same physical port to login to each " ++ "other."); ++ ++/* ++# vport_restrict_login: This parameter allows/prevents logins ++# between Virtual Ports and remote initiators. ++# When this parameter is not set (0) Virtual Ports will accept PLOGIs from ++# other initiators and will attempt to PLOGI all remote ports. ++# When this parameter is set (1) Virtual Ports will reject PLOGIs from ++# remote ports and will not attempt to PLOGI to other initiators. ++# This parameter does not restrict to the physical port. ++# This parameter does not restrict logins to Fabric resident remote ports. ++# Default value of this parameter is 1. ++*/ ++LPFC_ATTR_RW(vport_restrict_login, 1, 0, 1, ++ "Restrict virtual ports login to remote initiators."); ++ ++/* + # Some disk devices have a "select ID" or "select Target" capability. + # From a protocol standpoint "select ID" usually means select the + # Fibre channel "ALPA". In the FC-AL Profile there is an "informative +@@ -1088,7 +1349,8 @@ + LPFC_ATTR_R(use_msi, 0, 0, 1, "Use Message Signaled Interrupts, if possible"); + + +-struct class_device_attribute *lpfc_host_attrs[] = { ++ ++struct class_device_attribute *lpfc_hba_attrs[] = { + &class_device_attr_info, + &class_device_attr_serialnum, + &class_device_attr_modeldesc, +@@ -1104,6 +1366,8 @@ + &class_device_attr_lpfc_log_verbose, + &class_device_attr_lpfc_lun_queue_depth, + &class_device_attr_lpfc_hba_queue_depth, ++ &class_device_attr_lpfc_peer_port_login, ++ &class_device_attr_lpfc_vport_restrict_login, + &class_device_attr_lpfc_nodev_tmo, + &class_device_attr_lpfc_devloss_tmo, + &class_device_attr_lpfc_fcp_class, +@@ -1119,9 +1383,17 @@ + &class_device_attr_lpfc_multi_ring_type, + &class_device_attr_lpfc_fdmi_on, + &class_device_attr_lpfc_max_luns, ++ &class_device_attr_lpfc_npiv_enable, + &class_device_attr_nport_evt_cnt, + &class_device_attr_management_version, + &class_device_attr_board_mode, ++ &class_device_attr_max_vpi, ++ &class_device_attr_used_vpi, ++ &class_device_attr_max_rpi, ++ &class_device_attr_used_rpi, ++ &class_device_attr_max_xri, ++ &class_device_attr_used_xri, ++ &class_device_attr_npiv_info, + &class_device_attr_issue_reset, + &class_device_attr_lpfc_poll, + &class_device_attr_lpfc_poll_tmo, +@@ -1136,9 +1408,11 @@ + sysfs_ctlreg_write(struct kobject *kobj, char *buf, loff_t off, size_t count) + { + size_t buf_off; +- struct Scsi_Host *host = class_to_shost(container_of(kobj, +- struct class_device, kobj)); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct class_device *cdev = container_of(kobj, struct class_device, ++ kobj); ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + + if ((off + count) > FF_REG_AREA_SIZE) + return -ERANGE; +@@ -1148,18 +1422,16 @@ + if (off % 4 || count % 4 || (unsigned long)buf % 4) + return -EINVAL; + +- spin_lock_irq(phba->host->host_lock); +- +- if (!(phba->fc_flag & FC_OFFLINE_MODE)) { +- spin_unlock_irq(phba->host->host_lock); ++ if (!(vport->fc_flag & FC_OFFLINE_MODE)) { + return -EPERM; + } + ++ spin_lock_irq(&phba->hbalock); + for (buf_off = 0; buf_off < count; buf_off += sizeof(uint32_t)) + writel(*((uint32_t *)(buf + buf_off)), + phba->ctrl_regs_memmap_p + off + buf_off); + +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + return count; + } +@@ -1169,9 +1441,11 @@ + { + size_t buf_off; + uint32_t * tmp_ptr; +- struct Scsi_Host *host = class_to_shost(container_of(kobj, +- struct class_device, kobj)); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct class_device *cdev = container_of(kobj, struct class_device, ++ kobj); ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + + if (off > FF_REG_AREA_SIZE) + return -ERANGE; +@@ -1184,14 +1458,14 @@ + if (off % 4 || count % 4 || (unsigned long)buf % 4) + return -EINVAL; + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + + for (buf_off = 0; buf_off < count; buf_off += sizeof(uint32_t)) { + tmp_ptr = (uint32_t *)(buf + buf_off); + *tmp_ptr = readl(phba->ctrl_regs_memmap_p + off + buf_off); + } + +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + return count; + } +@@ -1200,7 +1474,6 @@ + .attr = { + .name = "ctlreg", + .mode = S_IRUSR | S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = 256, + .read = sysfs_ctlreg_read, +@@ -1209,7 +1482,7 @@ + + + static void +-sysfs_mbox_idle (struct lpfc_hba * phba) ++sysfs_mbox_idle(struct lpfc_hba *phba) + { + phba->sysfs_mbox.state = SMBOX_IDLE; + phba->sysfs_mbox.offset = 0; +@@ -1224,10 +1497,12 @@ + static ssize_t + sysfs_mbox_write(struct kobject *kobj, char *buf, loff_t off, size_t count) + { +- struct Scsi_Host * host = +- class_to_shost(container_of(kobj, struct class_device, kobj)); +- struct lpfc_hba * phba = (struct lpfc_hba*)host->hostdata; +- struct lpfcMboxq * mbox = NULL; ++ struct class_device *cdev = container_of(kobj, struct class_device, ++ kobj); ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfcMboxq *mbox = NULL; + + if ((count + off) > MAILBOX_CMD_SIZE) + return -ERANGE; +@@ -1245,7 +1520,7 @@ + memset(mbox, 0, sizeof (LPFC_MBOXQ_t)); + } + +- spin_lock_irq(host->host_lock); ++ spin_lock_irq(&phba->hbalock); + + if (off == 0) { + if (phba->sysfs_mbox.mbox) +@@ -1256,9 +1531,9 @@ + } else { + if (phba->sysfs_mbox.state != SMBOX_WRITING || + phba->sysfs_mbox.offset != off || +- phba->sysfs_mbox.mbox == NULL ) { ++ phba->sysfs_mbox.mbox == NULL) { + sysfs_mbox_idle(phba); +- spin_unlock_irq(host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + return -EAGAIN; + } + } +@@ -1268,7 +1543,7 @@ + + phba->sysfs_mbox.offset = off + count; + +- spin_unlock_irq(host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + return count; + } +@@ -1276,10 +1551,11 @@ + static ssize_t + sysfs_mbox_read(struct kobject *kobj, char *buf, loff_t off, size_t count) + { +- struct Scsi_Host *host = +- class_to_shost(container_of(kobj, struct class_device, +- kobj)); +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct class_device *cdev = container_of(kobj, struct class_device, ++ kobj); ++ struct Scsi_Host *shost = class_to_shost(cdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + int rc; + + if (off > MAILBOX_CMD_SIZE) +@@ -1294,7 +1570,7 @@ + if (off && count == 0) + return 0; + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + + if (off == 0 && + phba->sysfs_mbox.state == SMBOX_WRITING && +@@ -1317,12 +1593,12 @@ + case MBX_SET_MASK: + case MBX_SET_SLIM: + case MBX_SET_DEBUG: +- if (!(phba->fc_flag & FC_OFFLINE_MODE)) { ++ if (!(vport->fc_flag & FC_OFFLINE_MODE)) { + printk(KERN_WARNING "mbox_read:Command 0x%x " + "is illegal in on-line state\n", + phba->sysfs_mbox.mbox->mb.mbxCommand); + sysfs_mbox_idle(phba); +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + return -EPERM; + } + case MBX_LOAD_SM: +@@ -1352,48 +1628,48 @@ + printk(KERN_WARNING "mbox_read: Illegal Command 0x%x\n", + phba->sysfs_mbox.mbox->mb.mbxCommand); + sysfs_mbox_idle(phba); +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + return -EPERM; + default: + printk(KERN_WARNING "mbox_read: Unknown Command 0x%x\n", + phba->sysfs_mbox.mbox->mb.mbxCommand); + sysfs_mbox_idle(phba); +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + return -EPERM; + } + +- if (phba->fc_flag & FC_BLOCK_MGMT_IO) { ++ phba->sysfs_mbox.mbox->vport = vport; ++ ++ if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) { + sysfs_mbox_idle(phba); +- spin_unlock_irq(host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + return -EAGAIN; + } + +- if ((phba->fc_flag & FC_OFFLINE_MODE) || ++ if ((vport->fc_flag & FC_OFFLINE_MODE) || + (!(phba->sli.sli_flag & LPFC_SLI2_ACTIVE))){ + +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + rc = lpfc_sli_issue_mbox (phba, + phba->sysfs_mbox.mbox, + MBX_POLL); +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + + } else { +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + rc = lpfc_sli_issue_mbox_wait (phba, + phba->sysfs_mbox.mbox, + lpfc_mbox_tmo_val(phba, + phba->sysfs_mbox.mbox->mb.mbxCommand) * HZ); +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + } + + if (rc != MBX_SUCCESS) { + if (rc == MBX_TIMEOUT) { +- phba->sysfs_mbox.mbox->mbox_cmpl = +- lpfc_sli_def_mbox_cmpl; + phba->sysfs_mbox.mbox = NULL; + } + sysfs_mbox_idle(phba); +- spin_unlock_irq(host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + return (rc == MBX_TIMEOUT) ? -ETIME : -ENODEV; + } + phba->sysfs_mbox.state = SMBOX_READING; +@@ -1402,7 +1678,7 @@ + phba->sysfs_mbox.state != SMBOX_READING) { + printk(KERN_WARNING "mbox_read: Bad State\n"); + sysfs_mbox_idle(phba); +- spin_unlock_irq(host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + return -EAGAIN; + } + +@@ -1413,7 +1689,7 @@ + if (phba->sysfs_mbox.offset == MAILBOX_CMD_SIZE) + sysfs_mbox_idle(phba); + +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + return count; + } +@@ -1422,7 +1698,6 @@ + .attr = { + .name = "mbox", + .mode = S_IRUSR | S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = MAILBOX_CMD_SIZE, + .read = sysfs_mbox_read, +@@ -1430,35 +1705,35 @@ + }; + + int +-lpfc_alloc_sysfs_attr(struct lpfc_hba *phba) ++lpfc_alloc_sysfs_attr(struct lpfc_vport *vport) + { +- struct Scsi_Host *host = phba->host; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + int error; + +- error = sysfs_create_bin_file(&host->shost_classdev.kobj, ++ error = sysfs_create_bin_file(&shost->shost_classdev.kobj, + &sysfs_ctlreg_attr); + if (error) + goto out; + +- error = sysfs_create_bin_file(&host->shost_classdev.kobj, ++ error = sysfs_create_bin_file(&shost->shost_classdev.kobj, + &sysfs_mbox_attr); + if (error) + goto out_remove_ctlreg_attr; + + return 0; + out_remove_ctlreg_attr: +- sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_ctlreg_attr); ++ sysfs_remove_bin_file(&shost->shost_classdev.kobj, &sysfs_ctlreg_attr); + out: + return error; + } + + void +-lpfc_free_sysfs_attr(struct lpfc_hba *phba) ++lpfc_free_sysfs_attr(struct lpfc_vport *vport) + { +- struct Scsi_Host *host = phba->host; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + +- sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_mbox_attr); +- sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_ctlreg_attr); ++ sysfs_remove_bin_file(&shost->shost_classdev.kobj, &sysfs_mbox_attr); ++ sysfs_remove_bin_file(&shost->shost_classdev.kobj, &sysfs_ctlreg_attr); + } + + +@@ -1469,26 +1744,30 @@ + static void + lpfc_get_host_port_id(struct Scsi_Host *shost) + { +- struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ + /* note: fc_myDID already in cpu endianness */ +- fc_host_port_id(shost) = phba->fc_myDID; ++ fc_host_port_id(shost) = vport->fc_myDID; + } + + static void + lpfc_get_host_port_type(struct Scsi_Host *shost) + { +- struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + + spin_lock_irq(shost->host_lock); + +- if (phba->hba_state == LPFC_HBA_READY) { ++ if (vport->port_type == LPFC_NPIV_PORT) { ++ fc_host_port_type(shost) = FC_PORTTYPE_NPIV; ++ } else if (lpfc_is_link_up(phba)) { + if (phba->fc_topology == TOPOLOGY_LOOP) { +- if (phba->fc_flag & FC_PUBLIC_LOOP) ++ if (vport->fc_flag & FC_PUBLIC_LOOP) + fc_host_port_type(shost) = FC_PORTTYPE_NLPORT; + else + fc_host_port_type(shost) = FC_PORTTYPE_LPORT; + } else { +- if (phba->fc_flag & FC_FABRIC) ++ if (vport->fc_flag & FC_FABRIC) + fc_host_port_type(shost) = FC_PORTTYPE_NPORT; + else + fc_host_port_type(shost) = FC_PORTTYPE_PTP; +@@ -1502,29 +1781,20 @@ + static void + lpfc_get_host_port_state(struct Scsi_Host *shost) + { +- struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + + spin_lock_irq(shost->host_lock); + +- if (phba->fc_flag & FC_OFFLINE_MODE) ++ if (vport->fc_flag & FC_OFFLINE_MODE) + fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE; + else { +- switch (phba->hba_state) { +- case LPFC_STATE_UNKNOWN: +- case LPFC_WARM_START: +- case LPFC_INIT_START: +- case LPFC_INIT_MBX_CMDS: ++ switch (phba->link_state) { ++ case LPFC_LINK_UNKNOWN: + case LPFC_LINK_DOWN: + fc_host_port_state(shost) = FC_PORTSTATE_LINKDOWN; + break; + case LPFC_LINK_UP: +- case LPFC_LOCAL_CFG_LINK: +- case LPFC_FLOGI: +- case LPFC_FABRIC_CFG_LINK: +- case LPFC_NS_REG: +- case LPFC_NS_QRY: +- case LPFC_BUILD_DISC_LIST: +- case LPFC_DISC_AUTH: + case LPFC_CLEAR_LA: + case LPFC_HBA_READY: + /* Links up, beyond this port_type reports state */ +@@ -1545,11 +1815,12 @@ + static void + lpfc_get_host_speed(struct Scsi_Host *shost) + { +- struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + + spin_lock_irq(shost->host_lock); + +- if (phba->hba_state == LPFC_HBA_READY) { ++ if (lpfc_is_link_up(phba)) { + switch(phba->fc_linkspeed) { + case LA_1GHZ_LINK: + fc_host_speed(shost) = FC_PORTSPEED_1GBIT; +@@ -1575,38 +1846,30 @@ + static void + lpfc_get_host_fabric_name (struct Scsi_Host *shost) + { +- struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + u64 node_name; + + spin_lock_irq(shost->host_lock); + +- if ((phba->fc_flag & FC_FABRIC) || ++ if ((vport->fc_flag & FC_FABRIC) || + ((phba->fc_topology == TOPOLOGY_LOOP) && +- (phba->fc_flag & FC_PUBLIC_LOOP))) ++ (vport->fc_flag & FC_PUBLIC_LOOP))) + node_name = wwn_to_u64(phba->fc_fabparam.nodeName.u.wwn); + else + /* fabric is local port if there is no F/FL_Port */ +- node_name = wwn_to_u64(phba->fc_nodename.u.wwn); ++ node_name = wwn_to_u64(vport->fc_nodename.u.wwn); + + spin_unlock_irq(shost->host_lock); + + fc_host_fabric_name(shost) = node_name; + } + +-static void +-lpfc_get_host_symbolic_name (struct Scsi_Host *shost) +-{ +- struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata; +- +- spin_lock_irq(shost->host_lock); +- lpfc_get_hba_sym_node_name(phba, fc_host_symbolic_name(shost)); +- spin_unlock_irq(shost->host_lock); +-} +- + static struct fc_host_statistics * + lpfc_get_stats(struct Scsi_Host *shost) + { +- struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_sli *psli = &phba->sli; + struct fc_host_statistics *hs = &phba->link_stats; + struct lpfc_lnk_stat * lso = &psli->lnk_stat_offsets; +@@ -1615,7 +1878,16 @@ + unsigned long seconds; + int rc = 0; + +- if (phba->fc_flag & FC_BLOCK_MGMT_IO) ++ /* ++ * prevent udev from issuing mailbox commands until the port is ++ * configured. ++ */ ++ if (phba->link_state < LPFC_LINK_DOWN || ++ !phba->mbox_mem_pool || ++ (phba->sli.sli_flag & LPFC_SLI2_ACTIVE) == 0) ++ return NULL; ++ ++ if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) + return NULL; + + pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); +@@ -1627,17 +1899,16 @@ + pmb->mbxCommand = MBX_READ_STATUS; + pmb->mbxOwner = OWN_HOST; + pmboxq->context1 = NULL; ++ pmboxq->vport = vport; + +- if ((phba->fc_flag & FC_OFFLINE_MODE) || ++ if ((vport->fc_flag & FC_OFFLINE_MODE) || + (!(psli->sli_flag & LPFC_SLI2_ACTIVE))) + rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL); + else + rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2); + + if (rc != MBX_SUCCESS) { +- if (rc == MBX_TIMEOUT) +- pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; +- else ++ if (rc != MBX_TIMEOUT) + mempool_free(pmboxq, phba->mbox_mem_pool); + return NULL; + } +@@ -1653,18 +1924,17 @@ + pmb->mbxCommand = MBX_READ_LNK_STAT; + pmb->mbxOwner = OWN_HOST; + pmboxq->context1 = NULL; ++ pmboxq->vport = vport; + +- if ((phba->fc_flag & FC_OFFLINE_MODE) || ++ if ((vport->fc_flag & FC_OFFLINE_MODE) || + (!(psli->sli_flag & LPFC_SLI2_ACTIVE))) + rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL); + else + rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2); + + if (rc != MBX_SUCCESS) { +- if (rc == MBX_TIMEOUT) +- pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; +- else +- mempool_free( pmboxq, phba->mbox_mem_pool); ++ if (rc != MBX_TIMEOUT) ++ mempool_free(pmboxq, phba->mbox_mem_pool); + return NULL; + } + +@@ -1711,14 +1981,15 @@ + static void + lpfc_reset_stats(struct Scsi_Host *shost) + { +- struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_sli *psli = &phba->sli; +- struct lpfc_lnk_stat * lso = &psli->lnk_stat_offsets; ++ struct lpfc_lnk_stat *lso = &psli->lnk_stat_offsets; + LPFC_MBOXQ_t *pmboxq; + MAILBOX_t *pmb; + int rc = 0; + +- if (phba->fc_flag & FC_BLOCK_MGMT_IO) ++ if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) + return; + + pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); +@@ -1731,17 +2002,16 @@ + pmb->mbxOwner = OWN_HOST; + pmb->un.varWords[0] = 0x1; /* reset request */ + pmboxq->context1 = NULL; ++ pmboxq->vport = vport; + +- if ((phba->fc_flag & FC_OFFLINE_MODE) || ++ if ((vport->fc_flag & FC_OFFLINE_MODE) || + (!(psli->sli_flag & LPFC_SLI2_ACTIVE))) + rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL); + else + rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2); + + if (rc != MBX_SUCCESS) { +- if (rc == MBX_TIMEOUT) +- pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; +- else ++ if (rc != MBX_TIMEOUT) + mempool_free(pmboxq, phba->mbox_mem_pool); + return; + } +@@ -1750,17 +2020,16 @@ + pmb->mbxCommand = MBX_READ_LNK_STAT; + pmb->mbxOwner = OWN_HOST; + pmboxq->context1 = NULL; ++ pmboxq->vport = vport; + +- if ((phba->fc_flag & FC_OFFLINE_MODE) || ++ if ((vport->fc_flag & FC_OFFLINE_MODE) || + (!(psli->sli_flag & LPFC_SLI2_ACTIVE))) + rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL); + else + rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2); + + if (rc != MBX_SUCCESS) { +- if (rc == MBX_TIMEOUT) +- pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; +- else ++ if (rc != MBX_TIMEOUT) + mempool_free( pmboxq, phba->mbox_mem_pool); + return; + } +@@ -1790,12 +2059,12 @@ + lpfc_get_node_by_target(struct scsi_target *starget) + { + struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); +- struct lpfc_hba *phba = (struct lpfc_hba *) shost->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; + struct lpfc_nodelist *ndlp; + + spin_lock_irq(shost->host_lock); + /* Search for this, mapped, target ID */ +- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) { ++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_state == NLP_STE_MAPPED_NODE && + starget->id == ndlp->nlp_sid) { + spin_unlock_irq(shost->host_lock); +@@ -1885,8 +2154,66 @@ + .get_host_fabric_name = lpfc_get_host_fabric_name, + .show_host_fabric_name = 1, + +- .get_host_symbolic_name = lpfc_get_host_symbolic_name, +- .show_host_symbolic_name = 1, ++ /* ++ * The LPFC driver treats linkdown handling as target loss events ++ * so there are no sysfs handlers for link_down_tmo. ++ */ ++ ++ .get_fc_host_stats = lpfc_get_stats, ++ .reset_fc_host_stats = lpfc_reset_stats, ++ ++ .dd_fcrport_size = sizeof(struct lpfc_rport_data), ++ .show_rport_maxframe_size = 1, ++ .show_rport_supported_classes = 1, ++ ++ .set_rport_dev_loss_tmo = lpfc_set_rport_loss_tmo, ++ .show_rport_dev_loss_tmo = 1, ++ ++ .get_starget_port_id = lpfc_get_starget_port_id, ++ .show_starget_port_id = 1, ++ ++ .get_starget_node_name = lpfc_get_starget_node_name, ++ .show_starget_node_name = 1, ++ ++ .get_starget_port_name = lpfc_get_starget_port_name, ++ .show_starget_port_name = 1, ++ ++ .issue_fc_host_lip = lpfc_issue_lip, ++ .dev_loss_tmo_callbk = lpfc_dev_loss_tmo_callbk, ++ .terminate_rport_io = lpfc_terminate_rport_io, ++ ++ .vport_create = lpfc_vport_create, ++ .vport_delete = lpfc_vport_delete, ++ .dd_fcvport_size = sizeof(struct lpfc_vport *), ++}; ++ ++struct fc_function_template lpfc_vport_transport_functions = { ++ /* fixed attributes the driver supports */ ++ .show_host_node_name = 1, ++ .show_host_port_name = 1, ++ .show_host_supported_classes = 1, ++ .show_host_supported_fc4s = 1, ++ .show_host_supported_speeds = 1, ++ .show_host_maxframe_size = 1, ++ ++ /* dynamic attributes the driver supports */ ++ .get_host_port_id = lpfc_get_host_port_id, ++ .show_host_port_id = 1, ++ ++ .get_host_port_type = lpfc_get_host_port_type, ++ .show_host_port_type = 1, ++ ++ .get_host_port_state = lpfc_get_host_port_state, ++ .show_host_port_state = 1, ++ ++ /* active_fc4s is shown but doesn't change (thus no get function) */ ++ .show_host_active_fc4s = 1, ++ ++ .get_host_speed = lpfc_get_host_speed, ++ .show_host_speed = 1, ++ ++ .get_host_fabric_name = lpfc_get_host_fabric_name, ++ .show_host_fabric_name = 1, + + /* + * The LPFC driver treats linkdown handling as target loss events +@@ -1915,6 +2242,8 @@ + .issue_fc_host_lip = lpfc_issue_lip, + .dev_loss_tmo_callbk = lpfc_dev_loss_tmo_callbk, + .terminate_rport_io = lpfc_terminate_rport_io, ++ ++ .vport_disable = lpfc_vport_disable, + }; + + void +@@ -1937,6 +2266,9 @@ + lpfc_discovery_threads_init(phba, lpfc_discovery_threads); + lpfc_max_luns_init(phba, lpfc_max_luns); + lpfc_poll_tmo_init(phba, lpfc_poll_tmo); ++ lpfc_peer_port_login_init(phba, lpfc_peer_port_login); ++ lpfc_npiv_enable_init(phba, lpfc_npiv_enable); ++ lpfc_vport_restrict_login_init(phba, lpfc_vport_restrict_login); + lpfc_use_msi_init(phba, lpfc_use_msi); + lpfc_devloss_tmo_init(phba, lpfc_devloss_tmo); + lpfc_nodev_tmo_init(phba, lpfc_nodev_tmo); +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_crtn.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_crtn.h +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_crtn.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_crtn.h 2007-12-21 15:36:12.000000000 -0500 +@@ -23,92 +23,114 @@ + struct fc_rport; + void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t); + void lpfc_read_nv(struct lpfc_hba *, LPFC_MBOXQ_t *); ++void lpfc_heart_beat(struct lpfc_hba *, LPFC_MBOXQ_t *); + int lpfc_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb, + struct lpfc_dmabuf *mp); + void lpfc_clear_la(struct lpfc_hba *, LPFC_MBOXQ_t *); ++void lpfc_issue_clear_la(struct lpfc_hba *phba, struct lpfc_vport *vport); + void lpfc_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *); +-int lpfc_read_sparam(struct lpfc_hba *, LPFC_MBOXQ_t *); ++int lpfc_read_sparam(struct lpfc_hba *, LPFC_MBOXQ_t *, int); + void lpfc_read_config(struct lpfc_hba *, LPFC_MBOXQ_t *); + void lpfc_read_lnk_stat(struct lpfc_hba *, LPFC_MBOXQ_t *); +-int lpfc_reg_login(struct lpfc_hba *, uint32_t, uint8_t *, LPFC_MBOXQ_t *, +- uint32_t); +-void lpfc_unreg_login(struct lpfc_hba *, uint32_t, LPFC_MBOXQ_t *); +-void lpfc_unreg_did(struct lpfc_hba *, uint32_t, LPFC_MBOXQ_t *); ++int lpfc_reg_login(struct lpfc_hba *, uint16_t, uint32_t, uint8_t *, ++ LPFC_MBOXQ_t *, uint32_t); ++void lpfc_unreg_login(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *); ++void lpfc_unreg_did(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *); ++void lpfc_reg_vpi(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *); ++void lpfc_unreg_vpi(struct lpfc_hba *, uint16_t, LPFC_MBOXQ_t *); + void lpfc_init_link(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t, uint32_t); + +- ++void lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove); + int lpfc_linkdown(struct lpfc_hba *); + void lpfc_mbx_cmpl_read_la(struct lpfc_hba *, LPFC_MBOXQ_t *); + + void lpfc_mbx_cmpl_clear_la(struct lpfc_hba *, LPFC_MBOXQ_t *); + void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *); ++void lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *, LPFC_MBOXQ_t *); + void lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *); + void lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *); + void lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *); +-void lpfc_dequeue_node(struct lpfc_hba *, struct lpfc_nodelist *); +-void lpfc_nlp_set_state(struct lpfc_hba *, struct lpfc_nodelist *, int); +-void lpfc_drop_node(struct lpfc_hba *, struct lpfc_nodelist *); +-void lpfc_set_disctmo(struct lpfc_hba *); +-int lpfc_can_disctmo(struct lpfc_hba *); +-int lpfc_unreg_rpi(struct lpfc_hba *, struct lpfc_nodelist *); ++void lpfc_dequeue_node(struct lpfc_vport *, struct lpfc_nodelist *); ++void lpfc_nlp_set_state(struct lpfc_vport *, struct lpfc_nodelist *, int); ++void lpfc_drop_node(struct lpfc_vport *, struct lpfc_nodelist *); ++void lpfc_set_disctmo(struct lpfc_vport *); ++int lpfc_can_disctmo(struct lpfc_vport *); ++int lpfc_unreg_rpi(struct lpfc_vport *, struct lpfc_nodelist *); ++void lpfc_unreg_all_rpis(struct lpfc_vport *); ++void lpfc_unreg_default_rpis(struct lpfc_vport *); ++void lpfc_issue_reg_vpi(struct lpfc_hba *, struct lpfc_vport *); ++ + int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *, + struct lpfc_iocbq *, struct lpfc_nodelist *); +-void lpfc_nlp_init(struct lpfc_hba *, struct lpfc_nodelist *, uint32_t); ++void lpfc_nlp_init(struct lpfc_vport *, struct lpfc_nodelist *, uint32_t); + struct lpfc_nodelist *lpfc_nlp_get(struct lpfc_nodelist *); + int lpfc_nlp_put(struct lpfc_nodelist *); +-struct lpfc_nodelist *lpfc_setup_disc_node(struct lpfc_hba *, uint32_t); +-void lpfc_disc_list_loopmap(struct lpfc_hba *); +-void lpfc_disc_start(struct lpfc_hba *); +-void lpfc_disc_flush_list(struct lpfc_hba *); ++struct lpfc_nodelist *lpfc_setup_disc_node(struct lpfc_vport *, uint32_t); ++void lpfc_disc_list_loopmap(struct lpfc_vport *); ++void lpfc_disc_start(struct lpfc_vport *); ++void lpfc_disc_flush_list(struct lpfc_vport *); ++void lpfc_cleanup_discovery_resources(struct lpfc_vport *); + void lpfc_disc_timeout(unsigned long); + +-struct lpfc_nodelist *__lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi); +-struct lpfc_nodelist *lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi); ++struct lpfc_nodelist *__lpfc_findnode_rpi(struct lpfc_vport *, uint16_t); ++struct lpfc_nodelist *lpfc_findnode_rpi(struct lpfc_vport *, uint16_t); + ++void lpfc_worker_wake_up(struct lpfc_hba *); + int lpfc_workq_post_event(struct lpfc_hba *, void *, void *, uint32_t); + int lpfc_do_work(void *); +-int lpfc_disc_state_machine(struct lpfc_hba *, struct lpfc_nodelist *, void *, ++int lpfc_disc_state_machine(struct lpfc_vport *, struct lpfc_nodelist *, void *, + uint32_t); + +-int lpfc_check_sparm(struct lpfc_hba *, struct lpfc_nodelist *, ++void lpfc_register_new_vport(struct lpfc_hba *, struct lpfc_vport *, ++ struct lpfc_nodelist *); ++void lpfc_do_scr_ns_plogi(struct lpfc_hba *, struct lpfc_vport *); ++int lpfc_check_sparm(struct lpfc_vport *, struct lpfc_nodelist *, + struct serv_parm *, uint32_t); +-int lpfc_els_abort(struct lpfc_hba *, struct lpfc_nodelist * ndlp); ++int lpfc_els_abort(struct lpfc_hba *, struct lpfc_nodelist *); ++int lpfc_els_chk_latt(struct lpfc_vport *); + int lpfc_els_abort_flogi(struct lpfc_hba *); +-int lpfc_initial_flogi(struct lpfc_hba *); +-int lpfc_issue_els_plogi(struct lpfc_hba *, uint32_t, uint8_t); +-int lpfc_issue_els_prli(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t); +-int lpfc_issue_els_adisc(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t); +-int lpfc_issue_els_logo(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t); +-int lpfc_issue_els_scr(struct lpfc_hba *, uint32_t, uint8_t); ++int lpfc_initial_flogi(struct lpfc_vport *); ++int lpfc_initial_fdisc(struct lpfc_vport *); ++int lpfc_issue_els_fdisc(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t); ++int lpfc_issue_els_plogi(struct lpfc_vport *, uint32_t, uint8_t); ++int lpfc_issue_els_prli(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t); ++int lpfc_issue_els_adisc(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t); ++int lpfc_issue_els_logo(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t); ++int lpfc_issue_els_npiv_logo(struct lpfc_vport *, struct lpfc_nodelist *); ++int lpfc_issue_els_scr(struct lpfc_vport *, uint32_t, uint8_t); + int lpfc_els_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *); +-int lpfc_els_rsp_acc(struct lpfc_hba *, uint32_t, struct lpfc_iocbq *, ++int lpfc_ct_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *); ++int lpfc_els_rsp_acc(struct lpfc_vport *, uint32_t, struct lpfc_iocbq *, + struct lpfc_nodelist *, LPFC_MBOXQ_t *, uint8_t); +-int lpfc_els_rsp_reject(struct lpfc_hba *, uint32_t, struct lpfc_iocbq *, ++int lpfc_els_rsp_reject(struct lpfc_vport *, uint32_t, struct lpfc_iocbq *, ++ struct lpfc_nodelist *, LPFC_MBOXQ_t *); ++int lpfc_els_rsp_adisc_acc(struct lpfc_vport *, struct lpfc_iocbq *, + struct lpfc_nodelist *); +-int lpfc_els_rsp_adisc_acc(struct lpfc_hba *, struct lpfc_iocbq *, ++int lpfc_els_rsp_prli_acc(struct lpfc_vport *, struct lpfc_iocbq *, + struct lpfc_nodelist *); +-int lpfc_els_rsp_prli_acc(struct lpfc_hba *, struct lpfc_iocbq *, +- struct lpfc_nodelist *); +-void lpfc_cancel_retry_delay_tmo(struct lpfc_hba *, struct lpfc_nodelist *); ++void lpfc_cancel_retry_delay_tmo(struct lpfc_vport *, struct lpfc_nodelist *); + void lpfc_els_retry_delay(unsigned long); + void lpfc_els_retry_delay_handler(struct lpfc_nodelist *); ++void lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *); + void lpfc_els_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, + struct lpfc_iocbq *); +-int lpfc_els_handle_rscn(struct lpfc_hba *); +-int lpfc_els_flush_rscn(struct lpfc_hba *); +-int lpfc_rscn_payload_check(struct lpfc_hba *, uint32_t); +-void lpfc_els_flush_cmd(struct lpfc_hba *); +-int lpfc_els_disc_adisc(struct lpfc_hba *); +-int lpfc_els_disc_plogi(struct lpfc_hba *); ++int lpfc_els_handle_rscn(struct lpfc_vport *); ++void lpfc_els_flush_rscn(struct lpfc_vport *); ++int lpfc_rscn_payload_check(struct lpfc_vport *, uint32_t); ++void lpfc_els_flush_cmd(struct lpfc_vport *); ++int lpfc_els_disc_adisc(struct lpfc_vport *); ++int lpfc_els_disc_plogi(struct lpfc_vport *); + void lpfc_els_timeout(unsigned long); +-void lpfc_els_timeout_handler(struct lpfc_hba *); ++void lpfc_els_timeout_handler(struct lpfc_vport *); ++void lpfc_hb_timeout(unsigned long); ++void lpfc_hb_timeout_handler(struct lpfc_hba *); + + void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, + struct lpfc_iocbq *); +-int lpfc_ns_cmd(struct lpfc_hba *, struct lpfc_nodelist *, int); +-int lpfc_fdmi_cmd(struct lpfc_hba *, struct lpfc_nodelist *, int); ++int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t); ++int lpfc_fdmi_cmd(struct lpfc_vport *, struct lpfc_nodelist *, int); + void lpfc_fdmi_tmo(unsigned long); +-void lpfc_fdmi_tmo_handler(struct lpfc_hba *); ++void lpfc_fdmi_timeout_handler(struct lpfc_vport *vport); + + int lpfc_config_port_prep(struct lpfc_hba *); + int lpfc_config_port_post(struct lpfc_hba *); +@@ -136,16 +158,23 @@ + void lpfc_kill_board(struct lpfc_hba *, LPFC_MBOXQ_t *); + void lpfc_mbox_put(struct lpfc_hba *, LPFC_MBOXQ_t *); + LPFC_MBOXQ_t *lpfc_mbox_get(struct lpfc_hba *); ++void lpfc_mbox_cmpl_put(struct lpfc_hba *, LPFC_MBOXQ_t *); + int lpfc_mbox_tmo_val(struct lpfc_hba *, int); + ++void lpfc_config_hbq(struct lpfc_hba *, struct lpfc_hbq_init *, uint32_t , ++ LPFC_MBOXQ_t *); ++struct lpfc_hbq_entry * lpfc_sli_next_hbq_slot(struct lpfc_hba *, uint32_t); ++ + int lpfc_mem_alloc(struct lpfc_hba *); + void lpfc_mem_free(struct lpfc_hba *); ++void lpfc_stop_vport_timers(struct lpfc_vport *); + + void lpfc_poll_timeout(unsigned long ptr); + void lpfc_poll_start_timer(struct lpfc_hba * phba); + void lpfc_sli_poll_fcp_ring(struct lpfc_hba * hba); + struct lpfc_iocbq * lpfc_sli_get_iocbq(struct lpfc_hba *); + void lpfc_sli_release_iocbq(struct lpfc_hba * phba, struct lpfc_iocbq * iocb); ++void __lpfc_sli_release_iocbq(struct lpfc_hba * phba, struct lpfc_iocbq * iocb); + uint16_t lpfc_sli_next_iotag(struct lpfc_hba * phba, struct lpfc_iocbq * iocb); + + void lpfc_reset_barrier(struct lpfc_hba * phba); +@@ -154,6 +183,7 @@ + int lpfc_sli_brdreset(struct lpfc_hba *); + int lpfc_sli_brdrestart(struct lpfc_hba *); + int lpfc_sli_hba_setup(struct lpfc_hba *); ++int lpfc_sli_host_down(struct lpfc_vport *); + int lpfc_sli_hba_down(struct lpfc_hba *); + int lpfc_sli_issue_mbox(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t); + int lpfc_sli_handle_mb_event(struct lpfc_hba *); +@@ -164,12 +194,17 @@ + int lpfc_sli_issue_iocb(struct lpfc_hba *, struct lpfc_sli_ring *, + struct lpfc_iocbq *, uint32_t); + void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t); +-int lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *); ++void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *); + int lpfc_sli_ringpostbuf_put(struct lpfc_hba *, struct lpfc_sli_ring *, + struct lpfc_dmabuf *); + struct lpfc_dmabuf *lpfc_sli_ringpostbuf_get(struct lpfc_hba *, + struct lpfc_sli_ring *, + dma_addr_t); ++int lpfc_sli_hbqbuf_init_hbqs(struct lpfc_hba *, uint32_t); ++int lpfc_sli_hbqbuf_add_hbqs(struct lpfc_hba *, uint32_t); ++void lpfc_sli_hbqbuf_free_all(struct lpfc_hba *); ++struct hbq_dmabuf *lpfc_sli_hbqbuf_find(struct lpfc_hba *, uint32_t); ++int lpfc_sli_hbq_size(void); + int lpfc_sli_issue_abort_iotag(struct lpfc_hba *, struct lpfc_sli_ring *, + struct lpfc_iocbq *); + int lpfc_sli_sum_iocb(struct lpfc_hba *, struct lpfc_sli_ring *, uint16_t, +@@ -180,8 +215,12 @@ + void lpfc_mbox_timeout(unsigned long); + void lpfc_mbox_timeout_handler(struct lpfc_hba *); + +-struct lpfc_nodelist *lpfc_findnode_did(struct lpfc_hba *, uint32_t); +-struct lpfc_nodelist *lpfc_findnode_wwpn(struct lpfc_hba *, struct lpfc_name *); ++struct lpfc_nodelist *__lpfc_find_node(struct lpfc_vport *, node_filter, ++ void *); ++struct lpfc_nodelist *lpfc_find_node(struct lpfc_vport *, node_filter, void *); ++struct lpfc_nodelist *lpfc_findnode_did(struct lpfc_vport *, uint32_t); ++struct lpfc_nodelist *lpfc_findnode_wwpn(struct lpfc_vport *, ++ struct lpfc_name *); + + int lpfc_sli_issue_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq, + uint32_t timeout); +@@ -195,25 +234,56 @@ + struct lpfc_iocbq * cmdiocb, + struct lpfc_iocbq * rspiocb); + ++void *lpfc_hbq_alloc(struct lpfc_hba *, int, dma_addr_t *); ++void lpfc_hbq_free(struct lpfc_hba *, void *, dma_addr_t); ++void lpfc_sli_free_hbq(struct lpfc_hba *, struct hbq_dmabuf *); ++ + void *lpfc_mbuf_alloc(struct lpfc_hba *, int, dma_addr_t *); ++void __lpfc_mbuf_free(struct lpfc_hba *, void *, dma_addr_t); + void lpfc_mbuf_free(struct lpfc_hba *, void *, dma_addr_t); + ++void lpfc_in_buf_free(struct lpfc_hba *, struct lpfc_dmabuf *); + /* Function prototypes. */ + const char* lpfc_info(struct Scsi_Host *); +-void lpfc_scan_start(struct Scsi_Host *); + int lpfc_scan_finished(struct Scsi_Host *, unsigned long); + + void lpfc_get_cfgparam(struct lpfc_hba *); +-int lpfc_alloc_sysfs_attr(struct lpfc_hba *); +-void lpfc_free_sysfs_attr(struct lpfc_hba *); +-extern struct class_device_attribute *lpfc_host_attrs[]; ++int lpfc_alloc_sysfs_attr(struct lpfc_vport *); ++void lpfc_free_sysfs_attr(struct lpfc_vport *); ++extern struct class_device_attribute *lpfc_hba_attrs[]; + extern struct scsi_host_template lpfc_template; + extern struct fc_function_template lpfc_transport_functions; ++extern struct fc_function_template lpfc_vport_transport_functions; ++extern int lpfc_sli_mode; + +-void lpfc_get_hba_sym_node_name(struct lpfc_hba * phba, uint8_t * symbp); ++int lpfc_vport_symbolic_node_name(struct lpfc_vport *, char *, size_t); + void lpfc_terminate_rport_io(struct fc_rport *); + void lpfc_dev_loss_tmo_callbk(struct fc_rport *rport); + ++struct lpfc_vport *lpfc_create_port(struct lpfc_hba *, int, struct fc_vport *); ++int lpfc_vport_disable(struct fc_vport *fc_vport, bool disable); ++void lpfc_mbx_unreg_vpi(struct lpfc_vport *); ++void destroy_port(struct lpfc_vport *); ++int lpfc_get_instance(void); ++void lpfc_host_attrib_init(struct Scsi_Host *); ++ ++extern void lpfc_debugfs_initialize(struct lpfc_vport *); ++extern void lpfc_debugfs_terminate(struct lpfc_vport *); ++extern void lpfc_debugfs_disc_trc(struct lpfc_vport *, int, char *, uint32_t, ++ uint32_t, uint32_t); ++ ++/* Interface exported by fabric iocb scheduler */ ++int lpfc_issue_fabric_iocb(struct lpfc_hba *, struct lpfc_iocbq *); ++void lpfc_fabric_abort_vport(struct lpfc_vport *); ++void lpfc_fabric_abort_nport(struct lpfc_nodelist *); ++void lpfc_fabric_abort_hba(struct lpfc_hba *); ++void lpfc_fabric_abort_flogi(struct lpfc_hba *); ++void lpfc_fabric_block_timeout(unsigned long); ++void lpfc_unblock_fabric_iocbs(struct lpfc_hba *); ++void lpfc_adjust_queue_depth(struct lpfc_hba *); ++void lpfc_ramp_down_queue_handler(struct lpfc_hba *); ++void lpfc_ramp_up_queue_handler(struct lpfc_hba *); ++ + #define ScsiResult(host_code, scsi_code) (((host_code) << 16) | scsi_code) + #define HBA_EVENT_RSCN 5 + #define HBA_EVENT_LINK_UP 2 +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_ct.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_ct.c +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_ct.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_ct.c 2007-12-21 15:36:12.000000000 -0500 +@@ -40,6 +40,8 @@ + #include "lpfc_logmsg.h" + #include "lpfc_crtn.h" + #include "lpfc_version.h" ++#include "lpfc_vport.h" ++#include "lpfc_debugfs.h" + + #define HBA_PORTSPEED_UNKNOWN 0 /* Unknown - transceiver + * incapable of reporting */ +@@ -58,24 +60,68 @@ + /* + * lpfc_ct_unsol_event + */ ++static void ++lpfc_ct_unsol_buffer(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq, ++ struct lpfc_dmabuf *mp, uint32_t size) ++{ ++ if (!mp) { ++ printk(KERN_ERR "%s (%d): Unsolited CT, no buffer, " ++ "piocbq = %p, status = x%x, mp = %p, size = %d\n", ++ __FUNCTION__, __LINE__, ++ piocbq, piocbq->iocb.ulpStatus, mp, size); ++ } ++ ++ printk(KERN_ERR "%s (%d): Ignoring unsolicted CT piocbq = %p, " ++ "buffer = %p, size = %d, status = x%x\n", ++ __FUNCTION__, __LINE__, ++ piocbq, mp, size, ++ piocbq->iocb.ulpStatus); ++ ++} ++ ++static void ++lpfc_ct_ignore_hbq_buffer(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq, ++ struct lpfc_dmabuf *mp, uint32_t size) ++{ ++ if (!mp) { ++ printk(KERN_ERR "%s (%d): Unsolited CT, no " ++ "HBQ buffer, piocbq = %p, status = x%x\n", ++ __FUNCTION__, __LINE__, ++ piocbq, piocbq->iocb.ulpStatus); ++ } else { ++ lpfc_ct_unsol_buffer(phba, piocbq, mp, size); ++ printk(KERN_ERR "%s (%d): Ignoring unsolicted CT " ++ "piocbq = %p, buffer = %p, size = %d, " ++ "status = x%x\n", ++ __FUNCTION__, __LINE__, ++ piocbq, mp, size, piocbq->iocb.ulpStatus); ++ } ++} ++ + void +-lpfc_ct_unsol_event(struct lpfc_hba * phba, +- struct lpfc_sli_ring * pring, struct lpfc_iocbq * piocbq) ++lpfc_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ++ struct lpfc_iocbq *piocbq) + { + +- struct lpfc_iocbq *next_piocbq; +- struct lpfc_dmabuf *pmbuf = NULL; +- struct lpfc_dmabuf *matp, *next_matp; +- uint32_t ctx = 0, size = 0, cnt = 0; ++ struct lpfc_dmabuf *mp = NULL; + IOCB_t *icmd = &piocbq->iocb; +- IOCB_t *save_icmd = icmd; +- int i, go_exit = 0; +- struct list_head head; ++ int i; ++ struct lpfc_iocbq *iocbq; ++ dma_addr_t paddr; ++ uint32_t size; ++ struct lpfc_dmabuf *bdeBuf1 = piocbq->context2; ++ struct lpfc_dmabuf *bdeBuf2 = piocbq->context3; ++ ++ piocbq->context2 = NULL; ++ piocbq->context3 = NULL; + +- if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) && ++ if (unlikely(icmd->ulpStatus == IOSTAT_NEED_BUFFER)) { ++ lpfc_sli_hbqbuf_add_hbqs(phba, LPFC_ELS_HBQ); ++ } else if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) && + ((icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING)) { + /* Not enough posted buffers; Try posting more buffers */ + phba->fc_stat.NoRcvBuf++; ++ if (!(phba->sli3_options & LPFC_SLI3_HBQ_ENABLED)) + lpfc_post_buffer(phba, pring, 0, 1); + return; + } +@@ -86,66 +132,56 @@ + if (icmd->ulpBdeCount == 0) + return; + +- INIT_LIST_HEAD(&head); +- list_add_tail(&head, &piocbq->list); +- +- list_for_each_entry_safe(piocbq, next_piocbq, &head, list) { +- icmd = &piocbq->iocb; +- if (ctx == 0) +- ctx = (uint32_t) (icmd->ulpContext); +- if (icmd->ulpBdeCount == 0) ++ if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) { ++ list_for_each_entry(iocbq, &piocbq->list, list) { ++ icmd = &iocbq->iocb; ++ if (icmd->ulpBdeCount == 0) { ++ printk(KERN_ERR "%s (%d): Unsolited CT, no " ++ "BDE, iocbq = %p, status = x%x\n", ++ __FUNCTION__, __LINE__, ++ iocbq, iocbq->iocb.ulpStatus); + continue; +- +- for (i = 0; i < icmd->ulpBdeCount; i++) { +- matp = lpfc_sli_ringpostbuf_get(phba, pring, +- getPaddr(icmd->un. +- cont64[i]. +- addrHigh, +- icmd->un. +- cont64[i]. +- addrLow)); +- if (!matp) { +- /* Insert lpfc log message here */ +- lpfc_post_buffer(phba, pring, cnt, 1); +- go_exit = 1; +- goto ct_unsol_event_exit_piocbq; +- } +- +- /* Typically for Unsolicited CT requests */ +- if (!pmbuf) { +- pmbuf = matp; +- INIT_LIST_HEAD(&pmbuf->list); +- } else +- list_add_tail(&matp->list, &pmbuf->list); +- +- size += icmd->un.cont64[i].tus.f.bdeSize; +- cnt++; + } + +- icmd->ulpBdeCount = 0; ++ size = icmd->un.cont64[0].tus.f.bdeSize; ++ lpfc_ct_ignore_hbq_buffer(phba, piocbq, bdeBuf1, size); ++ lpfc_in_buf_free(phba, bdeBuf1); ++ if (icmd->ulpBdeCount == 2) { ++ lpfc_ct_ignore_hbq_buffer(phba, piocbq, bdeBuf2, ++ size); ++ lpfc_in_buf_free(phba, bdeBuf2); ++ } + } ++ } else { ++ struct lpfc_iocbq *next; + +- lpfc_post_buffer(phba, pring, cnt, 1); +- if (save_icmd->ulpStatus) { +- go_exit = 1; ++ list_for_each_entry_safe(iocbq, next, &piocbq->list, list) { ++ icmd = &iocbq->iocb; ++ if (icmd->ulpBdeCount == 0) { ++ printk(KERN_ERR "%s (%d): Unsolited CT, no " ++ "BDE, iocbq = %p, status = x%x\n", ++ __FUNCTION__, __LINE__, ++ iocbq, iocbq->iocb.ulpStatus); ++ continue; + } + +-ct_unsol_event_exit_piocbq: +- list_del(&head); +- if (pmbuf) { +- list_for_each_entry_safe(matp, next_matp, &pmbuf->list, list) { +- lpfc_mbuf_free(phba, matp->virt, matp->phys); +- list_del(&matp->list); +- kfree(matp); ++ for (i = 0; i < icmd->ulpBdeCount; i++) { ++ paddr = getPaddr(icmd->un.cont64[i].addrHigh, ++ icmd->un.cont64[i].addrLow); ++ mp = lpfc_sli_ringpostbuf_get(phba, pring, ++ paddr); ++ size = icmd->un.cont64[i].tus.f.bdeSize; ++ lpfc_ct_unsol_buffer(phba, piocbq, mp, size); ++ lpfc_in_buf_free(phba, mp); ++ } ++ list_del(&iocbq->list); ++ lpfc_sli_release_iocbq(phba, iocbq); + } +- lpfc_mbuf_free(phba, pmbuf->virt, pmbuf->phys); +- kfree(pmbuf); + } +- return; + } + + static void +-lpfc_free_ct_rsp(struct lpfc_hba * phba, struct lpfc_dmabuf * mlist) ++lpfc_free_ct_rsp(struct lpfc_hba *phba, struct lpfc_dmabuf *mlist) + { + struct lpfc_dmabuf *mlast, *next_mlast; + +@@ -160,7 +196,7 @@ + } + + static struct lpfc_dmabuf * +-lpfc_alloc_ct_rsp(struct lpfc_hba * phba, int cmdcode, struct ulp_bde64 * bpl, ++lpfc_alloc_ct_rsp(struct lpfc_hba *phba, int cmdcode, struct ulp_bde64 *bpl, + uint32_t size, int *entries) + { + struct lpfc_dmabuf *mlist = NULL; +@@ -181,7 +217,8 @@ + + INIT_LIST_HEAD(&mp->list); + +- if (cmdcode == be16_to_cpu(SLI_CTNS_GID_FT)) ++ if (cmdcode == be16_to_cpu(SLI_CTNS_GID_FT) || ++ cmdcode == be16_to_cpu(SLI_CTNS_GFF_ID)) + mp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(mp->phys)); + else + mp->virt = lpfc_mbuf_alloc(phba, 0, &(mp->phys)); +@@ -201,8 +238,8 @@ + + bpl->tus.f.bdeFlags = BUFF_USE_RCV; + /* build buffer ptr list for IOCB */ +- bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) ); +- bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) ); ++ bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys) ); ++ bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys) ); + bpl->tus.f.bdeSize = (uint16_t) cnt; + bpl->tus.w = le32_to_cpu(bpl->tus.w); + bpl++; +@@ -215,24 +252,49 @@ + return mlist; + } + ++int ++lpfc_ct_free_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *ctiocb) ++{ ++ struct lpfc_dmabuf *buf_ptr; ++ ++ if (ctiocb->context1) { ++ buf_ptr = (struct lpfc_dmabuf *) ctiocb->context1; ++ lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys); ++ kfree(buf_ptr); ++ ctiocb->context1 = NULL; ++ } ++ if (ctiocb->context2) { ++ lpfc_free_ct_rsp(phba, (struct lpfc_dmabuf *) ctiocb->context2); ++ ctiocb->context2 = NULL; ++ } ++ ++ if (ctiocb->context3) { ++ buf_ptr = (struct lpfc_dmabuf *) ctiocb->context3; ++ lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys); ++ kfree(buf_ptr); ++ ctiocb->context1 = NULL; ++ } ++ lpfc_sli_release_iocbq(phba, ctiocb); ++ return 0; ++} ++ + static int +-lpfc_gen_req(struct lpfc_hba *phba, struct lpfc_dmabuf *bmp, ++lpfc_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, + struct lpfc_dmabuf *inp, struct lpfc_dmabuf *outp, + void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *, + struct lpfc_iocbq *), + struct lpfc_nodelist *ndlp, uint32_t usr_flg, uint32_t num_entry, +- uint32_t tmo) ++ uint32_t tmo, uint8_t retry) + { +- ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING]; + IOCB_t *icmd; + struct lpfc_iocbq *geniocb; ++ int rc; + + /* Allocate buffer for command iocb */ +- spin_lock_irq(phba->host->host_lock); + geniocb = lpfc_sli_get_iocbq(phba); +- spin_unlock_irq(phba->host->host_lock); + + if (geniocb == NULL) + return 1; +@@ -272,31 +334,40 @@ + icmd->ulpClass = CLASS3; + icmd->ulpContext = ndlp->nlp_rpi; + ++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) { ++ /* For GEN_REQUEST64_CR, use the RPI */ ++ icmd->ulpCt_h = 0; ++ icmd->ulpCt_l = 0; ++ } ++ + /* Issue GEN REQ IOCB for NPORT */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0119 Issue GEN REQ IOCB for NPORT x%x " +- "Data: x%x x%x\n", phba->brd_no, icmd->un.ulpWord[5], +- icmd->ulpIoTag, phba->hba_state); ++ "%d (%d):0119 Issue GEN REQ IOCB to NPORT x%x " ++ "Data: x%x x%x\n", phba->brd_no, vport->vpi, ++ ndlp->nlp_DID, icmd->ulpIoTag, ++ vport->port_state); + geniocb->iocb_cmpl = cmpl; + geniocb->drvrTimeout = icmd->ulpTimeout + LPFC_DRVR_TIMEOUT; +- spin_lock_irq(phba->host->host_lock); +- if (lpfc_sli_issue_iocb(phba, pring, geniocb, 0) == IOCB_ERROR) { ++ geniocb->vport = vport; ++ geniocb->retry = retry; ++ rc = lpfc_sli_issue_iocb(phba, pring, geniocb, 0); ++ ++ if (rc == IOCB_ERROR) { + lpfc_sli_release_iocbq(phba, geniocb); +- spin_unlock_irq(phba->host->host_lock); + return 1; + } +- spin_unlock_irq(phba->host->host_lock); + + return 0; + } + + static int +-lpfc_ct_cmd(struct lpfc_hba *phba, struct lpfc_dmabuf *inmp, ++lpfc_ct_cmd(struct lpfc_vport *vport, struct lpfc_dmabuf *inmp, + struct lpfc_dmabuf *bmp, struct lpfc_nodelist *ndlp, + void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *, + struct lpfc_iocbq *), +- uint32_t rsp_size) ++ uint32_t rsp_size, uint8_t retry) + { ++ struct lpfc_hba *phba = vport->phba; + struct ulp_bde64 *bpl = (struct ulp_bde64 *) bmp->virt; + struct lpfc_dmabuf *outmp; + int cnt = 0, status; +@@ -310,8 +381,8 @@ + if (!outmp) + return -ENOMEM; + +- status = lpfc_gen_req(phba, bmp, inmp, outmp, cmpl, ndlp, 0, +- cnt+1, 0); ++ status = lpfc_gen_req(vport, bmp, inmp, outmp, cmpl, ndlp, 0, ++ cnt+1, 0, retry); + if (status) { + lpfc_free_ct_rsp(phba, outmp); + return -ENOMEM; +@@ -319,20 +390,35 @@ + return 0; + } + ++static struct lpfc_vport * ++lpfc_find_vport_by_did(struct lpfc_hba *phba, uint32_t did) { ++ ++ struct lpfc_vport *vport_curr; ++ ++ list_for_each_entry(vport_curr, &phba->port_list, listentry) { ++ if ((vport_curr->fc_myDID) && ++ (vport_curr->fc_myDID == did)) ++ return vport_curr; ++ } ++ ++ return NULL; ++} ++ + static int +-lpfc_ns_rsp(struct lpfc_hba * phba, struct lpfc_dmabuf * mp, uint32_t Size) ++lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint32_t Size) + { ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_sli_ct_request *Response = + (struct lpfc_sli_ct_request *) mp->virt; + struct lpfc_nodelist *ndlp = NULL; + struct lpfc_dmabuf *mlast, *next_mp; + uint32_t *ctptr = (uint32_t *) & Response->un.gid.PortType; +- uint32_t Did; +- uint32_t CTentry; ++ uint32_t Did, CTentry; + int Cnt; + struct list_head head; + +- lpfc_set_disctmo(phba); ++ lpfc_set_disctmo(vport); ++ vport->num_disc_nodes = 0; + + + list_add_tail(&head, &mp->list); +@@ -350,39 +436,96 @@ + + /* Loop through entire NameServer list of DIDs */ + while (Cnt >= sizeof (uint32_t)) { +- + /* Get next DID from NameServer List */ + CTentry = *ctptr++; + Did = ((be32_to_cpu(CTentry)) & Mask_DID); + + ndlp = NULL; +- if (Did != phba->fc_myDID) { +- /* Check for rscn processing or not */ +- ndlp = lpfc_setup_disc_node(phba, Did); +- } +- /* Mark all node table entries that are in the +- Nameserver */ ++ ++ /* ++ * Check for rscn processing or not ++ * To conserve rpi's, filter out addresses for other ++ * vports on the same physical HBAs. ++ */ ++ if ((Did != vport->fc_myDID) && ++ ((lpfc_find_vport_by_did(phba, Did) == NULL) || ++ phba->cfg_peer_port_login)) { ++ if ((vport->port_type != LPFC_NPIV_PORT) || ++ (vport->fc_flag & FC_RFF_NOT_SUPPORTED) || ++ (!phba->cfg_vport_restrict_login)) { ++ ndlp = lpfc_setup_disc_node(vport, Did); + if (ndlp) { +- /* NameServer Rsp */ +- lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, +- "%d:0238 Process x%x NameServer" +- " Rsp Data: x%x x%x x%x\n", +- phba->brd_no, ++ lpfc_debugfs_disc_trc(vport, ++ LPFC_DISC_TRC_CT, ++ "Parse GID_FTrsp: " ++ "did:x%x flg:x%x x%x", + Did, ndlp->nlp_flag, +- phba->fc_flag, +- phba->fc_rscn_id_cnt); ++ vport->fc_flag); ++ ++ lpfc_printf_log(phba, KERN_INFO, ++ LOG_DISCOVERY, ++ "%d (%d):0238 Process " ++ "x%x NameServer Rsp" ++ "Data: x%x x%x x%x\n", ++ phba->brd_no, ++ vport->vpi, Did, ++ ndlp->nlp_flag, ++ vport->fc_flag, ++ vport->fc_rscn_id_cnt); + } else { +- /* NameServer Rsp */ +- lpfc_printf_log(phba, +- KERN_INFO, ++ lpfc_debugfs_disc_trc(vport, ++ LPFC_DISC_TRC_CT, ++ "Skip1 GID_FTrsp: " ++ "did:x%x flg:x%x cnt:%d", ++ Did, vport->fc_flag, ++ vport->fc_rscn_id_cnt); ++ ++ lpfc_printf_log(phba, KERN_INFO, + LOG_DISCOVERY, +- "%d:0239 Skip x%x NameServer " +- "Rsp Data: x%x x%x x%x\n", ++ "%d (%d):0239 Skip x%x " ++ "NameServer Rsp Data: " ++ "x%x x%x\n", + phba->brd_no, +- Did, Size, phba->fc_flag, +- phba->fc_rscn_id_cnt); ++ vport->vpi, Did, ++ vport->fc_flag, ++ vport->fc_rscn_id_cnt); + } + ++ } else { ++ if (!(vport->fc_flag & FC_RSCN_MODE) || ++ (lpfc_rscn_payload_check(vport, Did))) { ++ lpfc_debugfs_disc_trc(vport, ++ LPFC_DISC_TRC_CT, ++ "Query GID_FTrsp: " ++ "did:x%x flg:x%x cnt:%d", ++ Did, vport->fc_flag, ++ vport->fc_rscn_id_cnt); ++ ++ if (lpfc_ns_cmd(vport, ++ SLI_CTNS_GFF_ID, ++ 0, Did) == 0) ++ vport->num_disc_nodes++; ++ } ++ else { ++ lpfc_debugfs_disc_trc(vport, ++ LPFC_DISC_TRC_CT, ++ "Skip2 GID_FTrsp: " ++ "did:x%x flg:x%x cnt:%d", ++ Did, vport->fc_flag, ++ vport->fc_rscn_id_cnt); ++ ++ lpfc_printf_log(phba, KERN_INFO, ++ LOG_DISCOVERY, ++ "%d (%d):0245 Skip x%x " ++ "NameServer Rsp Data: " ++ "x%x x%x\n", ++ phba->brd_no, ++ vport->vpi, Did, ++ vport->fc_flag, ++ vport->fc_rscn_id_cnt); ++ } ++ } ++ } + if (CTentry & (be32_to_cpu(SLI_CT_LAST_ENTRY))) + goto nsout1; + Cnt -= sizeof (uint32_t); +@@ -393,190 +536,369 @@ + + nsout1: + list_del(&head); +- +- /* +- * The driver has cycled through all Nports in the RSCN payload. +- * Complete the handling by cleaning up and marking the +- * current driver state. +- */ +- if (phba->hba_state == LPFC_HBA_READY) { +- lpfc_els_flush_rscn(phba); +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_RSCN_MODE; /* we are still in RSCN mode */ +- spin_unlock_irq(phba->host->host_lock); +- } + return 0; + } + +- +- +- + static void +-lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { ++ struct lpfc_vport *vport = cmdiocb->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + IOCB_t *irsp; +- struct lpfc_sli *psli; + struct lpfc_dmabuf *bmp; +- struct lpfc_dmabuf *inp; + struct lpfc_dmabuf *outp; +- struct lpfc_nodelist *ndlp; + struct lpfc_sli_ct_request *CTrsp; ++ int rc; + +- psli = &phba->sli; + /* we pass cmdiocb to state machine which needs rspiocb as well */ + cmdiocb->context_un.rsp_iocb = rspiocb; + +- inp = (struct lpfc_dmabuf *) cmdiocb->context1; + outp = (struct lpfc_dmabuf *) cmdiocb->context2; + bmp = (struct lpfc_dmabuf *) cmdiocb->context3; +- + irsp = &rspiocb->iocb; +- if (irsp->ulpStatus) { +- if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && +- ((irsp->un.ulpWord[4] == IOERR_SLI_DOWN) || +- (irsp->un.ulpWord[4] == IOERR_SLI_ABORTED))) { ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, ++ "GID_FT cmpl: status:x%x/x%x rtry:%d", ++ irsp->ulpStatus, irsp->un.ulpWord[4], vport->fc_ns_retry); ++ ++ /* Don't bother processing response if vport is being torn down. */ ++ if (vport->load_flag & FC_UNLOADING) ++ goto out; ++ ++ ++ if (lpfc_els_chk_latt(vport) || lpfc_error_lost_link(irsp)) { ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0216 Link event during NS query\n", ++ phba->brd_no, vport->vpi); ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); + goto out; + } + ++ if (irsp->ulpStatus) { + /* Check for retry */ +- if (phba->fc_ns_retry < LPFC_MAX_NS_RETRY) { +- phba->fc_ns_retry++; ++ if (vport->fc_ns_retry < LPFC_MAX_NS_RETRY) { ++ if ((irsp->ulpStatus != IOSTAT_LOCAL_REJECT) || ++ (irsp->un.ulpWord[4] != IOERR_NO_RESOURCES)) ++ vport->fc_ns_retry++; + /* CT command is being retried */ +- ndlp = lpfc_findnode_did(phba, NameServer_DID); +- if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { +- if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT) == +- 0) { ++ rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, ++ vport->fc_ns_retry, 0); ++ if (rc == 0) + goto out; + } +- } +- } ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0257 GID_FT Query error: 0x%x 0x%x\n", ++ phba->brd_no, vport->vpi, irsp->ulpStatus, ++ vport->fc_ns_retry); + } else { + /* Good status, continue checking */ + CTrsp = (struct lpfc_sli_ct_request *) outp->virt; + if (CTrsp->CommandResponse.bits.CmdRsp == + be16_to_cpu(SLI_CT_RESPONSE_FS_ACC)) { + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, +- "%d:0208 NameServer Rsp " ++ "%d (%d):0208 NameServer Rsp " + "Data: x%x\n", +- phba->brd_no, +- phba->fc_flag); +- lpfc_ns_rsp(phba, outp, ++ phba->brd_no, vport->vpi, ++ vport->fc_flag); ++ lpfc_ns_rsp(vport, outp, + (uint32_t) (irsp->un.genreq64.bdl.bdeSize)); + } else if (CTrsp->CommandResponse.bits.CmdRsp == + be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) { + /* NameServer Rsp Error */ + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, +- "%d:0240 NameServer Rsp Error " ++ "%d (%d):0240 NameServer Rsp Error " + "Data: x%x x%x x%x x%x\n", +- phba->brd_no, ++ phba->brd_no, vport->vpi, + CTrsp->CommandResponse.bits.CmdRsp, + (uint32_t) CTrsp->ReasonCode, + (uint32_t) CTrsp->Explanation, +- phba->fc_flag); ++ vport->fc_flag); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, ++ "GID_FT rsp err1 cmd:x%x rsn:x%x exp:x%x", ++ (uint32_t)CTrsp->CommandResponse.bits.CmdRsp, ++ (uint32_t) CTrsp->ReasonCode, ++ (uint32_t) CTrsp->Explanation); ++ + } else { + /* NameServer Rsp Error */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_DISCOVERY, +- "%d:0241 NameServer Rsp Error " ++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, ++ "%d (%d):0241 NameServer Rsp Error " + "Data: x%x x%x x%x x%x\n", +- phba->brd_no, ++ phba->brd_no, vport->vpi, + CTrsp->CommandResponse.bits.CmdRsp, + (uint32_t) CTrsp->ReasonCode, + (uint32_t) CTrsp->Explanation, +- phba->fc_flag); ++ vport->fc_flag); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, ++ "GID_FT rsp err2 cmd:x%x rsn:x%x exp:x%x", ++ (uint32_t)CTrsp->CommandResponse.bits.CmdRsp, ++ (uint32_t) CTrsp->ReasonCode, ++ (uint32_t) CTrsp->Explanation); + } + } + /* Link up / RSCN discovery */ +- lpfc_disc_start(phba); ++ if (vport->num_disc_nodes == 0) { ++ /* ++ * The driver has cycled through all Nports in the RSCN payload. ++ * Complete the handling by cleaning up and marking the ++ * current driver state. ++ */ ++ if (vport->port_state >= LPFC_DISC_AUTH) { ++ if (vport->fc_flag & FC_RSCN_MODE) { ++ lpfc_els_flush_rscn(vport); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_RSCN_MODE; /* RSCN still */ ++ spin_unlock_irq(shost->host_lock); ++ } ++ else ++ lpfc_els_flush_rscn(vport); ++ } ++ ++ lpfc_disc_start(vport); ++ } + out: +- lpfc_free_ct_rsp(phba, outp); +- lpfc_mbuf_free(phba, inp->virt, inp->phys); +- lpfc_mbuf_free(phba, bmp->virt, bmp->phys); +- kfree(inp); +- kfree(bmp); +- spin_lock_irq(phba->host->host_lock); +- lpfc_sli_release_iocbq(phba, cmdiocb); +- spin_unlock_irq(phba->host->host_lock); ++ lpfc_ct_free_iocb(phba, cmdiocb); ++ return; ++} ++ ++void ++lpfc_cmpl_ct_cmd_gff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) ++{ ++ struct lpfc_vport *vport = cmdiocb->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ IOCB_t *irsp = &rspiocb->iocb; ++ struct lpfc_dmabuf *inp = (struct lpfc_dmabuf *) cmdiocb->context1; ++ struct lpfc_dmabuf *outp = (struct lpfc_dmabuf *) cmdiocb->context2; ++ struct lpfc_sli_ct_request *CTrsp; ++ int did; ++ uint8_t fbits; ++ struct lpfc_nodelist *ndlp; ++ ++ did = ((struct lpfc_sli_ct_request *) inp->virt)->un.gff.PortId; ++ did = be32_to_cpu(did); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, ++ "GFF_ID cmpl: status:x%x/x%x did:x%x", ++ irsp->ulpStatus, irsp->un.ulpWord[4], did); ++ ++ if (irsp->ulpStatus == IOSTAT_SUCCESS) { ++ /* Good status, continue checking */ ++ CTrsp = (struct lpfc_sli_ct_request *) outp->virt; ++ fbits = CTrsp->un.gff_acc.fbits[FCP_TYPE_FEATURE_OFFSET]; ++ ++ if (CTrsp->CommandResponse.bits.CmdRsp == ++ be16_to_cpu(SLI_CT_RESPONSE_FS_ACC)) { ++ if ((fbits & FC4_FEATURE_INIT) && ++ !(fbits & FC4_FEATURE_TARGET)) { ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0245 Skip x%x GFF " ++ "NameServer Rsp Data: (init) " ++ "x%x x%x\n", phba->brd_no, ++ vport->vpi, did, fbits, ++ vport->fc_rscn_id_cnt); ++ goto out; ++ } ++ } ++ } ++ else { ++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, ++ "%d (%d):0267 NameServer GFF Rsp" ++ " x%x Error (%d %d) Data: x%x x%x\n", ++ phba->brd_no, vport->vpi, did, ++ irsp->ulpStatus, irsp->un.ulpWord[4], ++ vport->fc_flag, vport->fc_rscn_id_cnt) ++ } ++ ++ /* This is a target port, unregistered port, or the GFF_ID failed */ ++ ndlp = lpfc_setup_disc_node(vport, did); ++ if (ndlp) { ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0242 Process x%x GFF " ++ "NameServer Rsp Data: x%x x%x x%x\n", ++ phba->brd_no, vport->vpi, ++ did, ndlp->nlp_flag, vport->fc_flag, ++ vport->fc_rscn_id_cnt); ++ } else { ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0243 Skip x%x GFF " ++ "NameServer Rsp Data: x%x x%x\n", ++ phba->brd_no, vport->vpi, did, ++ vport->fc_flag, vport->fc_rscn_id_cnt); ++ } ++out: ++ /* Link up / RSCN discovery */ ++ if (vport->num_disc_nodes) ++ vport->num_disc_nodes--; ++ if (vport->num_disc_nodes == 0) { ++ /* ++ * The driver has cycled through all Nports in the RSCN payload. ++ * Complete the handling by cleaning up and marking the ++ * current driver state. ++ */ ++ if (vport->port_state >= LPFC_DISC_AUTH) { ++ if (vport->fc_flag & FC_RSCN_MODE) { ++ lpfc_els_flush_rscn(vport); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_RSCN_MODE; /* RSCN still */ ++ spin_unlock_irq(shost->host_lock); ++ } ++ else ++ lpfc_els_flush_rscn(vport); ++ } ++ lpfc_disc_start(vport); ++ } ++ lpfc_ct_free_iocb(phba, cmdiocb); + return; + } + ++ + static void +-lpfc_cmpl_ct_cmd_rft_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_cmpl_ct_cmd_rft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { +- struct lpfc_sli *psli; +- struct lpfc_dmabuf *bmp; ++ struct lpfc_vport *vport = cmdiocb->vport; + struct lpfc_dmabuf *inp; + struct lpfc_dmabuf *outp; + IOCB_t *irsp; + struct lpfc_sli_ct_request *CTrsp; ++ int cmdcode, rc; ++ uint8_t retry; ++ uint32_t latt; + +- psli = &phba->sli; + /* we pass cmdiocb to state machine which needs rspiocb as well */ + cmdiocb->context_un.rsp_iocb = rspiocb; + + inp = (struct lpfc_dmabuf *) cmdiocb->context1; + outp = (struct lpfc_dmabuf *) cmdiocb->context2; +- bmp = (struct lpfc_dmabuf *) cmdiocb->context3; + irsp = &rspiocb->iocb; + ++ cmdcode = be16_to_cpu(((struct lpfc_sli_ct_request *) inp->virt)-> ++ CommandResponse.bits.CmdRsp); + CTrsp = (struct lpfc_sli_ct_request *) outp->virt; + ++ latt = lpfc_els_chk_latt(vport); ++ + /* RFT request completes status CmdRsp */ + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, +- "%d:0209 RFT request completes ulpStatus x%x " +- "CmdRsp x%x\n", phba->brd_no, irsp->ulpStatus, +- CTrsp->CommandResponse.bits.CmdRsp); ++ "%d (%d):0209 RFT request completes, latt %d, " ++ "ulpStatus x%x CmdRsp x%x, Context x%x, Tag x%x\n", ++ phba->brd_no, vport->vpi, latt, irsp->ulpStatus, ++ CTrsp->CommandResponse.bits.CmdRsp, ++ cmdiocb->iocb.ulpContext, cmdiocb->iocb.ulpIoTag); + +- lpfc_free_ct_rsp(phba, outp); +- lpfc_mbuf_free(phba, inp->virt, inp->phys); +- lpfc_mbuf_free(phba, bmp->virt, bmp->phys); +- kfree(inp); +- kfree(bmp); +- spin_lock_irq(phba->host->host_lock); +- lpfc_sli_release_iocbq(phba, cmdiocb); +- spin_unlock_irq(phba->host->host_lock); ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, ++ "CT cmd cmpl: status:x%x/x%x cmd:x%x", ++ irsp->ulpStatus, irsp->un.ulpWord[4], cmdcode); ++ ++ if (irsp->ulpStatus) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, ++ "%d (%d):0268 NS cmd %x Error (%d %d)\n", ++ phba->brd_no, vport->vpi, cmdcode, ++ irsp->ulpStatus, irsp->un.ulpWord[4]); ++ ++ if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && ++ ((irsp->un.ulpWord[4] == IOERR_SLI_DOWN) || ++ (irsp->un.ulpWord[4] == IOERR_SLI_ABORTED))) ++ goto out; ++ ++ retry = cmdiocb->retry; ++ if (retry >= LPFC_MAX_NS_RETRY) ++ goto out; ++ ++ retry++; ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0216 Retrying NS cmd %x\n", ++ phba->brd_no, vport->vpi, cmdcode); ++ rc = lpfc_ns_cmd(vport, cmdcode, retry, 0); ++ if (rc == 0) ++ goto out; ++ } ++ ++out: ++ lpfc_ct_free_iocb(phba, cmdiocb); + return; + } + + static void +-lpfc_cmpl_ct_cmd_rnn_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_cmpl_ct_cmd_rnn_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { + lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb); + return; + } + + static void +-lpfc_cmpl_ct_cmd_rsnn_nn(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_cmpl_ct_cmd_rspn_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { + lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb); + return; + } + + static void +-lpfc_cmpl_ct_cmd_rff_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_cmpl_ct_cmd_rsnn_nn(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { + lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb); + return; + } + +-void +-lpfc_get_hba_sym_node_name(struct lpfc_hba * phba, uint8_t * symbp) ++static void ++lpfc_cmpl_ct_cmd_rff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { +- char fwrev[16]; ++ IOCB_t *irsp = &rspiocb->iocb; ++ struct lpfc_vport *vport = cmdiocb->vport; + +- lpfc_decode_firmware_rev(phba, fwrev, 0); ++ if (irsp->ulpStatus != IOSTAT_SUCCESS) ++ vport->fc_flag |= FC_RFF_NOT_SUPPORTED; + +- sprintf(symbp, "Emulex %s FV%s DV%s", phba->ModelName, +- fwrev, lpfc_release_version); ++ lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb); + return; + } + ++int ++lpfc_vport_symbolic_port_name(struct lpfc_vport *vport, char *symbol, ++ size_t size) ++{ ++ int n; ++ uint8_t *wwn = vport->phba->wwpn; ++ ++ n = snprintf(symbol, size, ++ "Emulex PPN-%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", ++ wwn[0], wwn[1], wwn[2], wwn[3], ++ wwn[4], wwn[5], wwn[6], wwn[7]); ++ ++ if (vport->port_type == LPFC_PHYSICAL_PORT) ++ return n; ++ ++ if (n < size) ++ n += snprintf(symbol + n, size - n, " VPort-%d", vport->vpi); ++ ++ if (n < size && vport->vname) ++ n += snprintf(symbol + n, size - n, " VName-%s", vport->vname); ++ return n; ++} ++ ++int ++lpfc_vport_symbolic_node_name(struct lpfc_vport *vport, char *symbol, ++ size_t size) ++{ ++ char fwrev[16]; ++ int n; ++ ++ lpfc_decode_firmware_rev(vport->phba, fwrev, 0); ++ ++ n = snprintf(symbol, size, "Emulex %s FV%s DV%s", ++ vport->phba->ModelName, fwrev, lpfc_release_version); ++ return n; ++} ++ + /* + * lpfc_ns_cmd + * Description: +@@ -585,55 +907,76 @@ + * LI_CTNS_RFT_ID + */ + int +-lpfc_ns_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode) ++lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode, ++ uint8_t retry, uint32_t context) + { ++ struct lpfc_nodelist * ndlp; ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_dmabuf *mp, *bmp; + struct lpfc_sli_ct_request *CtReq; + struct ulp_bde64 *bpl; + void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *, + struct lpfc_iocbq *) = NULL; + uint32_t rsp_size = 1024; ++ size_t size; ++ int rc = 0; ++ ++ ndlp = lpfc_findnode_did(vport, NameServer_DID); ++ if (ndlp == NULL || ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) { ++ rc=1; ++ goto ns_cmd_exit; ++ } + + /* fill in BDEs for command */ + /* Allocate buffer for command payload */ + mp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL); +- if (!mp) ++ if (!mp) { ++ rc=2; + goto ns_cmd_exit; ++ } + + INIT_LIST_HEAD(&mp->list); + mp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(mp->phys)); +- if (!mp->virt) ++ if (!mp->virt) { ++ rc=3; + goto ns_cmd_free_mp; ++ } + + /* Allocate buffer for Buffer ptr list */ + bmp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL); +- if (!bmp) ++ if (!bmp) { ++ rc=4; + goto ns_cmd_free_mpvirt; ++ } + + INIT_LIST_HEAD(&bmp->list); + bmp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(bmp->phys)); +- if (!bmp->virt) ++ if (!bmp->virt) { ++ rc=5; + goto ns_cmd_free_bmp; ++ } + + /* NameServer Req */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_DISCOVERY, +- "%d:0236 NameServer Req Data: x%x x%x x%x\n", +- phba->brd_no, cmdcode, phba->fc_flag, +- phba->fc_rscn_id_cnt); ++ lpfc_printf_log(phba, KERN_INFO ,LOG_DISCOVERY, ++ "%d (%d):0236 NameServer Req Data: x%x x%x x%x\n", ++ phba->brd_no, vport->vpi, cmdcode, vport->fc_flag, ++ vport->fc_rscn_id_cnt); + + bpl = (struct ulp_bde64 *) bmp->virt; + memset(bpl, 0, sizeof(struct ulp_bde64)); +- bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) ); +- bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) ); ++ bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys) ); ++ bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys) ); + bpl->tus.f.bdeFlags = 0; + if (cmdcode == SLI_CTNS_GID_FT) + bpl->tus.f.bdeSize = GID_REQUEST_SZ; ++ else if (cmdcode == SLI_CTNS_GFF_ID) ++ bpl->tus.f.bdeSize = GFF_REQUEST_SZ; + else if (cmdcode == SLI_CTNS_RFT_ID) + bpl->tus.f.bdeSize = RFT_REQUEST_SZ; + else if (cmdcode == SLI_CTNS_RNN_ID) + bpl->tus.f.bdeSize = RNN_REQUEST_SZ; ++ else if (cmdcode == SLI_CTNS_RSPN_ID) ++ bpl->tus.f.bdeSize = RSPN_REQUEST_SZ; + else if (cmdcode == SLI_CTNS_RSNN_NN) + bpl->tus.f.bdeSize = RSNN_REQUEST_SZ; + else if (cmdcode == SLI_CTNS_RFF_ID) +@@ -654,56 +997,78 @@ + CtReq->CommandResponse.bits.CmdRsp = + be16_to_cpu(SLI_CTNS_GID_FT); + CtReq->un.gid.Fc4Type = SLI_CTPT_FCP; +- if (phba->hba_state < LPFC_HBA_READY) +- phba->hba_state = LPFC_NS_QRY; +- lpfc_set_disctmo(phba); ++ if (vport->port_state < LPFC_NS_QRY) ++ vport->port_state = LPFC_NS_QRY; ++ lpfc_set_disctmo(vport); + cmpl = lpfc_cmpl_ct_cmd_gid_ft; + rsp_size = FC_MAX_NS_RSP; + break; + ++ case SLI_CTNS_GFF_ID: ++ CtReq->CommandResponse.bits.CmdRsp = ++ be16_to_cpu(SLI_CTNS_GFF_ID); ++ CtReq->un.gff.PortId = be32_to_cpu(context); ++ cmpl = lpfc_cmpl_ct_cmd_gff_id; ++ break; ++ + case SLI_CTNS_RFT_ID: + CtReq->CommandResponse.bits.CmdRsp = + be16_to_cpu(SLI_CTNS_RFT_ID); +- CtReq->un.rft.PortId = be32_to_cpu(phba->fc_myDID); ++ CtReq->un.rft.PortId = be32_to_cpu(vport->fc_myDID); + CtReq->un.rft.fcpReg = 1; + cmpl = lpfc_cmpl_ct_cmd_rft_id; + break; + +- case SLI_CTNS_RFF_ID: +- CtReq->CommandResponse.bits.CmdRsp = +- be16_to_cpu(SLI_CTNS_RFF_ID); +- CtReq->un.rff.PortId = be32_to_cpu(phba->fc_myDID); +- CtReq->un.rff.feature_res = 0; +- CtReq->un.rff.feature_tgt = 0; +- CtReq->un.rff.type_code = FC_FCP_DATA; +- CtReq->un.rff.feature_init = 1; +- cmpl = lpfc_cmpl_ct_cmd_rff_id; +- break; +- + case SLI_CTNS_RNN_ID: + CtReq->CommandResponse.bits.CmdRsp = + be16_to_cpu(SLI_CTNS_RNN_ID); +- CtReq->un.rnn.PortId = be32_to_cpu(phba->fc_myDID); +- memcpy(CtReq->un.rnn.wwnn, &phba->fc_nodename, ++ CtReq->un.rnn.PortId = be32_to_cpu(vport->fc_myDID); ++ memcpy(CtReq->un.rnn.wwnn, &vport->fc_nodename, + sizeof (struct lpfc_name)); + cmpl = lpfc_cmpl_ct_cmd_rnn_id; + break; + ++ case SLI_CTNS_RSPN_ID: ++ CtReq->CommandResponse.bits.CmdRsp = ++ be16_to_cpu(SLI_CTNS_RSPN_ID); ++ CtReq->un.rspn.PortId = be32_to_cpu(vport->fc_myDID); ++ size = sizeof(CtReq->un.rspn.symbname); ++ CtReq->un.rspn.len = ++ lpfc_vport_symbolic_port_name(vport, ++ CtReq->un.rspn.symbname, size); ++ cmpl = lpfc_cmpl_ct_cmd_rspn_id; ++ break; + case SLI_CTNS_RSNN_NN: + CtReq->CommandResponse.bits.CmdRsp = + be16_to_cpu(SLI_CTNS_RSNN_NN); +- memcpy(CtReq->un.rsnn.wwnn, &phba->fc_nodename, ++ memcpy(CtReq->un.rsnn.wwnn, &vport->fc_nodename, + sizeof (struct lpfc_name)); +- lpfc_get_hba_sym_node_name(phba, CtReq->un.rsnn.symbname); +- CtReq->un.rsnn.len = strlen(CtReq->un.rsnn.symbname); ++ size = sizeof(CtReq->un.rsnn.symbname); ++ CtReq->un.rsnn.len = ++ lpfc_vport_symbolic_node_name(vport, ++ CtReq->un.rsnn.symbname, size); + cmpl = lpfc_cmpl_ct_cmd_rsnn_nn; + break; ++ case SLI_CTNS_RFF_ID: ++ vport->fc_flag &= ~FC_RFF_NOT_SUPPORTED; ++ CtReq->CommandResponse.bits.CmdRsp = ++ be16_to_cpu(SLI_CTNS_RFF_ID); ++ CtReq->un.rff.PortId = be32_to_cpu(vport->fc_myDID);; ++ CtReq->un.rff.fbits = FC4_FEATURE_INIT; ++ CtReq->un.rff.type_code = FC_FCP_DATA; ++ cmpl = lpfc_cmpl_ct_cmd_rff_id; ++ break; + } + +- if (!lpfc_ct_cmd(phba, mp, bmp, ndlp, cmpl, rsp_size)) ++ if (!lpfc_ct_cmd(vport, mp, bmp, ndlp, cmpl, rsp_size, retry)) { + /* On success, The cmpl function will free the buffers */ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, ++ "Issue CT cmd: cmd:x%x did:x%x", ++ cmdcode, ndlp->nlp_DID, 0); + return 0; ++ } + ++ rc=6; + lpfc_mbuf_free(phba, bmp->virt, bmp->phys); + ns_cmd_free_bmp: + kfree(bmp); +@@ -712,14 +1077,17 @@ + ns_cmd_free_mp: + kfree(mp); + ns_cmd_exit: ++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, ++ "%d (%d):0266 Issue NameServer Req x%x err %d Data: x%x x%x\n", ++ phba->brd_no, vport->vpi, cmdcode, rc, vport->fc_flag, ++ vport->fc_rscn_id_cnt); + return 1; + } + + static void +-lpfc_cmpl_ct_cmd_fdmi(struct lpfc_hba * phba, +- struct lpfc_iocbq * cmdiocb, struct lpfc_iocbq * rspiocb) ++lpfc_cmpl_ct_cmd_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq * rspiocb) + { +- struct lpfc_dmabuf *bmp = cmdiocb->context3; + struct lpfc_dmabuf *inp = cmdiocb->context1; + struct lpfc_dmabuf *outp = cmdiocb->context2; + struct lpfc_sli_ct_request *CTrsp = outp->virt; +@@ -727,48 +1095,60 @@ + struct lpfc_nodelist *ndlp; + uint16_t fdmi_cmd = CTcmd->CommandResponse.bits.CmdRsp; + uint16_t fdmi_rsp = CTrsp->CommandResponse.bits.CmdRsp; ++ struct lpfc_vport *vport = cmdiocb->vport; ++ IOCB_t *irsp = &rspiocb->iocb; ++ uint32_t latt; ++ ++ latt = lpfc_els_chk_latt(vport); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT, ++ "FDMI cmpl: status:x%x/x%x latt:%d", ++ irsp->ulpStatus, irsp->un.ulpWord[4], latt); + +- ndlp = lpfc_findnode_did(phba, FDMI_DID); ++ if (latt || irsp->ulpStatus) { ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0229 FDMI cmd %04x failed, latt = %d " ++ "ulpStatus: x%x, rid x%x\n", ++ phba->brd_no, vport->vpi, ++ be16_to_cpu(fdmi_cmd), latt, irsp->ulpStatus, ++ irsp->un.ulpWord[4]); ++ lpfc_ct_free_iocb(phba, cmdiocb); ++ return; ++ } ++ ++ ndlp = lpfc_findnode_did(vport, FDMI_DID); + if (fdmi_rsp == be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) { + /* FDMI rsp failed */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_DISCOVERY, +- "%d:0220 FDMI rsp failed Data: x%x\n", +- phba->brd_no, ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0220 FDMI rsp failed Data: x%x\n", ++ phba->brd_no, vport->vpi, + be16_to_cpu(fdmi_cmd)); + } + + switch (be16_to_cpu(fdmi_cmd)) { + case SLI_MGMT_RHBA: +- lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_RPA); ++ lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_RPA); + break; + + case SLI_MGMT_RPA: + break; + + case SLI_MGMT_DHBA: +- lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DPRT); ++ lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DPRT); + break; + + case SLI_MGMT_DPRT: +- lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_RHBA); ++ lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_RHBA); + break; + } +- +- lpfc_free_ct_rsp(phba, outp); +- lpfc_mbuf_free(phba, inp->virt, inp->phys); +- lpfc_mbuf_free(phba, bmp->virt, bmp->phys); +- kfree(inp); +- kfree(bmp); +- spin_lock_irq(phba->host->host_lock); +- lpfc_sli_release_iocbq(phba, cmdiocb); +- spin_unlock_irq(phba->host->host_lock); ++ lpfc_ct_free_iocb(phba, cmdiocb); + return; + } ++ + int +-lpfc_fdmi_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode) ++lpfc_fdmi_cmd(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int cmdcode) + { ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_dmabuf *mp, *bmp; + struct lpfc_sli_ct_request *CtReq; + struct ulp_bde64 *bpl; +@@ -805,12 +1185,10 @@ + INIT_LIST_HEAD(&bmp->list); + + /* FDMI request */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_DISCOVERY, +- "%d:0218 FDMI Request Data: x%x x%x x%x\n", +- phba->brd_no, +- phba->fc_flag, phba->hba_state, cmdcode); ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0218 FDMI Request Data: x%x x%x x%x\n", ++ phba->brd_no, vport->vpi, vport->fc_flag, ++ vport->port_state, cmdcode); + + CtReq = (struct lpfc_sli_ct_request *) mp->virt; + +@@ -833,11 +1211,11 @@ + be16_to_cpu(SLI_MGMT_RHBA); + CtReq->CommandResponse.bits.Size = 0; + rh = (REG_HBA *) & CtReq->un.PortID; +- memcpy(&rh->hi.PortName, &phba->fc_sparam.portName, ++ memcpy(&rh->hi.PortName, &vport->fc_sparam.portName, + sizeof (struct lpfc_name)); + /* One entry (port) per adapter */ + rh->rpl.EntryCnt = be32_to_cpu(1); +- memcpy(&rh->rpl.pe, &phba->fc_sparam.portName, ++ memcpy(&rh->rpl.pe, &vport->fc_sparam.portName, + sizeof (struct lpfc_name)); + + /* point to the HBA attribute block */ +@@ -853,7 +1231,7 @@ + ae->ad.bits.AttrType = be16_to_cpu(NODE_NAME); + ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + + sizeof (struct lpfc_name)); +- memcpy(&ae->un.NodeName, &phba->fc_sparam.nodeName, ++ memcpy(&ae->un.NodeName, &vport->fc_sparam.nodeName, + sizeof (struct lpfc_name)); + ab->EntryCnt++; + size += FOURBYTES + sizeof (struct lpfc_name); +@@ -991,7 +1369,7 @@ + pab = (REG_PORT_ATTRIBUTE *) & CtReq->un.PortID; + size = sizeof (struct lpfc_name) + FOURBYTES; + memcpy((uint8_t *) & pab->PortName, +- (uint8_t *) & phba->fc_sparam.portName, ++ (uint8_t *) & vport->fc_sparam.portName, + sizeof (struct lpfc_name)); + pab->ab.EntryCnt = 0; + +@@ -1053,7 +1431,7 @@ + ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size); + ae->ad.bits.AttrType = be16_to_cpu(MAX_FRAME_SIZE); + ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 4); +- hsp = (struct serv_parm *) & phba->fc_sparam; ++ hsp = (struct serv_parm *) & vport->fc_sparam; + ae->un.MaxFrameSize = + (((uint32_t) hsp->cmn. + bbRcvSizeMsb) << 8) | (uint32_t) hsp->cmn. +@@ -1097,7 +1475,7 @@ + CtReq->CommandResponse.bits.Size = 0; + pe = (PORT_ENTRY *) & CtReq->un.PortID; + memcpy((uint8_t *) & pe->PortName, +- (uint8_t *) & phba->fc_sparam.portName, ++ (uint8_t *) & vport->fc_sparam.portName, + sizeof (struct lpfc_name)); + size = GID_REQUEST_SZ - 4 + sizeof (struct lpfc_name); + break; +@@ -1107,22 +1485,22 @@ + CtReq->CommandResponse.bits.Size = 0; + pe = (PORT_ENTRY *) & CtReq->un.PortID; + memcpy((uint8_t *) & pe->PortName, +- (uint8_t *) & phba->fc_sparam.portName, ++ (uint8_t *) & vport->fc_sparam.portName, + sizeof (struct lpfc_name)); + size = GID_REQUEST_SZ - 4 + sizeof (struct lpfc_name); + break; + } + + bpl = (struct ulp_bde64 *) bmp->virt; +- bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) ); +- bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) ); ++ bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys) ); ++ bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys) ); + bpl->tus.f.bdeFlags = 0; + bpl->tus.f.bdeSize = size; + bpl->tus.w = le32_to_cpu(bpl->tus.w); + + cmpl = lpfc_cmpl_ct_cmd_fdmi; + +- if (!lpfc_ct_cmd(phba, mp, bmp, ndlp, cmpl, FC_MAX_NS_RSP)) ++ if (!lpfc_ct_cmd(vport, mp, bmp, ndlp, cmpl, FC_MAX_NS_RSP, 0)) + return 0; + + lpfc_mbuf_free(phba, bmp->virt, bmp->phys); +@@ -1134,49 +1512,50 @@ + kfree(mp); + fdmi_cmd_exit: + /* Issue FDMI request failed */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_DISCOVERY, +- "%d:0244 Issue FDMI request failed Data: x%x\n", +- phba->brd_no, +- cmdcode); ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0244 Issue FDMI request failed Data: x%x\n", ++ phba->brd_no, vport->vpi, cmdcode); + return 1; + } + + void + lpfc_fdmi_tmo(unsigned long ptr) + { +- struct lpfc_hba *phba = (struct lpfc_hba *)ptr; ++ struct lpfc_vport *vport = (struct lpfc_vport *)ptr; ++ struct lpfc_hba *phba = vport->phba; + unsigned long iflag; + +- spin_lock_irqsave(phba->host->host_lock, iflag); +- if (!(phba->work_hba_events & WORKER_FDMI_TMO)) { +- phba->work_hba_events |= WORKER_FDMI_TMO; ++ spin_lock_irqsave(&vport->work_port_lock, iflag); ++ if (!(vport->work_port_events & WORKER_FDMI_TMO)) { ++ vport->work_port_events |= WORKER_FDMI_TMO; ++ spin_unlock_irqrestore(&vport->work_port_lock, iflag); ++ ++ spin_lock_irqsave(&phba->hbalock, iflag); + if (phba->work_wait) +- wake_up(phba->work_wait); ++ lpfc_worker_wake_up(phba); ++ spin_unlock_irqrestore(&phba->hbalock, iflag); + } +- spin_unlock_irqrestore(phba->host->host_lock,iflag); ++ else ++ spin_unlock_irqrestore(&vport->work_port_lock, iflag); + } + + void +-lpfc_fdmi_tmo_handler(struct lpfc_hba *phba) ++lpfc_fdmi_timeout_handler(struct lpfc_vport *vport) + { + struct lpfc_nodelist *ndlp; + +- ndlp = lpfc_findnode_did(phba, FDMI_DID); ++ ndlp = lpfc_findnode_did(vport, FDMI_DID); + if (ndlp) { +- if (init_utsname()->nodename[0] != '\0') { +- lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DHBA); +- } else { +- mod_timer(&phba->fc_fdmitmo, jiffies + HZ * 60); +- } ++ if (init_utsname()->nodename[0] != '\0') ++ lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DHBA); ++ else ++ mod_timer(&vport->fc_fdmitmo, jiffies + HZ * 60); + } + return; + } + +- + void +-lpfc_decode_firmware_rev(struct lpfc_hba * phba, char *fwrevision, int flag) ++lpfc_decode_firmware_rev(struct lpfc_hba *phba, char *fwrevision, int flag) + { + struct lpfc_sli *psli = &phba->sli; + lpfc_vpd_t *vp = &phba->vpd; +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_debugfs.c +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_debugfs.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,508 @@ ++/******************************************************************* ++ * This file is part of the Emulex Linux Device Driver for * ++ * Fibre Channel Host Bus Adapters. * ++ * Copyright (C) 2007 Emulex. All rights reserved. * ++ * EMULEX and SLI are trademarks of Emulex. * ++ * www.emulex.com * ++ * * ++ * This program is free software; you can redistribute it and/or * ++ * modify it under the terms of version 2 of the GNU General * ++ * Public License as published by the Free Software Foundation. * ++ * This program is distributed in the hope that it will be useful. * ++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * ++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * ++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * ++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * ++ * TO BE LEGALLY INVALID. See the GNU General Public License for * ++ * more details, a copy of which can be found in the file COPYING * ++ * included with this package. * ++ *******************************************************************/ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++#include "lpfc_hw.h" ++#include "lpfc_sli.h" ++#include "lpfc_disc.h" ++#include "lpfc_scsi.h" ++#include "lpfc.h" ++#include "lpfc_logmsg.h" ++#include "lpfc_crtn.h" ++#include "lpfc_vport.h" ++#include "lpfc_version.h" ++#include "lpfc_vport.h" ++#include "lpfc_debugfs.h" ++ ++#ifdef CONFIG_LPFC_DEBUG_FS ++/* debugfs interface ++ * ++ * To access this interface the user should: ++ * # mkdir /debug ++ * # mount -t debugfs none /debug ++ * ++ * The lpfc debugfs directory hierachy is: ++ * lpfc/lpfcX/vportY ++ * where X is the lpfc hba unique_id ++ * where Y is the vport VPI on that hba ++ * ++ * Debugging services available per vport: ++ * discovery_trace ++ * This is an ACSII readable file that contains a trace of the last ++ * lpfc_debugfs_max_disc_trc events that happened on a specific vport. ++ * See lpfc_debugfs.h for different categories of ++ * discovery events. To enable the discovery trace, the following ++ * module parameters must be set: ++ * lpfc_debugfs_enable=1 Turns on lpfc debugfs filesystem support ++ * lpfc_debugfs_max_disc_trc=X Where X is the event trace depth for ++ * EACH vport. X MUST also be a power of 2. ++ * lpfc_debugfs_mask_disc_trc=Y Where Y is an event mask as defined in ++ * lpfc_debugfs.h . ++ */ ++static int lpfc_debugfs_enable = 0; ++module_param(lpfc_debugfs_enable, int, 0); ++MODULE_PARM_DESC(lpfc_debugfs_enable, "Enable debugfs services"); ++ ++static int lpfc_debugfs_max_disc_trc = 0; /* This MUST be a power of 2 */ ++module_param(lpfc_debugfs_max_disc_trc, int, 0); ++MODULE_PARM_DESC(lpfc_debugfs_max_disc_trc, ++ "Set debugfs discovery trace depth"); ++ ++static int lpfc_debugfs_mask_disc_trc = 0; ++module_param(lpfc_debugfs_mask_disc_trc, int, 0); ++MODULE_PARM_DESC(lpfc_debugfs_mask_disc_trc, ++ "Set debugfs discovery trace mask"); ++ ++#include ++ ++/* size of discovery_trace output line */ ++#define LPFC_DISC_TRC_ENTRY_SIZE 80 ++ ++/* nodelist output buffer size */ ++#define LPFC_NODELIST_SIZE 8192 ++#define LPFC_NODELIST_ENTRY_SIZE 120 ++ ++struct lpfc_debug { ++ char *buffer; ++ int len; ++}; ++ ++atomic_t lpfc_debugfs_disc_trc_cnt = ATOMIC_INIT(0); ++unsigned long lpfc_debugfs_start_time = 0L; ++ ++static int ++lpfc_debugfs_disc_trc_data(struct lpfc_vport *vport, char *buf, int size) ++{ ++ int i, index, len, enable; ++ uint32_t ms; ++ struct lpfc_disc_trc *dtp; ++ char buffer[80]; ++ ++ ++ enable = lpfc_debugfs_enable; ++ lpfc_debugfs_enable = 0; ++ ++ len = 0; ++ index = (atomic_read(&vport->disc_trc_cnt) + 1) & ++ (lpfc_debugfs_max_disc_trc - 1); ++ for (i = index; i < lpfc_debugfs_max_disc_trc; i++) { ++ dtp = vport->disc_trc + i; ++ if (!dtp->fmt) ++ continue; ++ ms = jiffies_to_msecs(dtp->jif - lpfc_debugfs_start_time); ++ snprintf(buffer, 80, "%010d:%010d ms:%s\n", ++ dtp->seq_cnt, ms, dtp->fmt); ++ len += snprintf(buf+len, size-len, buffer, ++ dtp->data1, dtp->data2, dtp->data3); ++ } ++ for (i = 0; i < index; i++) { ++ dtp = vport->disc_trc + i; ++ if (!dtp->fmt) ++ continue; ++ ms = jiffies_to_msecs(dtp->jif - lpfc_debugfs_start_time); ++ snprintf(buffer, 80, "%010d:%010d ms:%s\n", ++ dtp->seq_cnt, ms, dtp->fmt); ++ len += snprintf(buf+len, size-len, buffer, ++ dtp->data1, dtp->data2, dtp->data3); ++ } ++ ++ lpfc_debugfs_enable = enable; ++ return len; ++} ++ ++static int ++lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size) ++{ ++ int len = 0; ++ int cnt; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_nodelist *ndlp; ++ unsigned char *statep, *name; ++ ++ cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE); ++ ++ spin_lock_irq(shost->host_lock); ++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { ++ if (!cnt) { ++ len += snprintf(buf+len, size-len, ++ "Missing Nodelist Entries\n"); ++ break; ++ } ++ cnt--; ++ switch (ndlp->nlp_state) { ++ case NLP_STE_UNUSED_NODE: ++ statep = "UNUSED"; ++ break; ++ case NLP_STE_PLOGI_ISSUE: ++ statep = "PLOGI "; ++ break; ++ case NLP_STE_ADISC_ISSUE: ++ statep = "ADISC "; ++ break; ++ case NLP_STE_REG_LOGIN_ISSUE: ++ statep = "REGLOG"; ++ break; ++ case NLP_STE_PRLI_ISSUE: ++ statep = "PRLI "; ++ break; ++ case NLP_STE_UNMAPPED_NODE: ++ statep = "UNMAP "; ++ break; ++ case NLP_STE_MAPPED_NODE: ++ statep = "MAPPED"; ++ break; ++ case NLP_STE_NPR_NODE: ++ statep = "NPR "; ++ break; ++ default: ++ statep = "UNKNOWN"; ++ } ++ len += snprintf(buf+len, size-len, "%s DID:x%06x ", ++ statep, ndlp->nlp_DID); ++ name = (unsigned char *)&ndlp->nlp_portname; ++ len += snprintf(buf+len, size-len, ++ "WWPN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x ", ++ *name, *(name+1), *(name+2), *(name+3), ++ *(name+4), *(name+5), *(name+6), *(name+7)); ++ name = (unsigned char *)&ndlp->nlp_nodename; ++ len += snprintf(buf+len, size-len, ++ "WWNN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x ", ++ *name, *(name+1), *(name+2), *(name+3), ++ *(name+4), *(name+5), *(name+6), *(name+7)); ++ len += snprintf(buf+len, size-len, "RPI:%03d flag:x%08x ", ++ ndlp->nlp_rpi, ndlp->nlp_flag); ++ if (!ndlp->nlp_type) ++ len += snprintf(buf+len, size-len, "UNKNOWN_TYPE"); ++ if (ndlp->nlp_type & NLP_FC_NODE) ++ len += snprintf(buf+len, size-len, "FC_NODE "); ++ if (ndlp->nlp_type & NLP_FABRIC) ++ len += snprintf(buf+len, size-len, "FABRIC "); ++ if (ndlp->nlp_type & NLP_FCP_TARGET) ++ len += snprintf(buf+len, size-len, "FCP_TGT sid:%d ", ++ ndlp->nlp_sid); ++ if (ndlp->nlp_type & NLP_FCP_INITIATOR) ++ len += snprintf(buf+len, size-len, "FCP_INITIATOR"); ++ len += snprintf(buf+len, size-len, "\n"); ++ } ++ spin_unlock_irq(shost->host_lock); ++ return len; ++} ++#endif ++ ++ ++inline void ++lpfc_debugfs_disc_trc(struct lpfc_vport *vport, int mask, char *fmt, ++ uint32_t data1, uint32_t data2, uint32_t data3) ++{ ++#ifdef CONFIG_LPFC_DEBUG_FS ++ struct lpfc_disc_trc *dtp; ++ int index; ++ ++ if (!(lpfc_debugfs_mask_disc_trc & mask)) ++ return; ++ ++ if (!lpfc_debugfs_enable || !lpfc_debugfs_max_disc_trc || ++ !vport || !vport->disc_trc) ++ return; ++ ++ index = atomic_inc_return(&vport->disc_trc_cnt) & ++ (lpfc_debugfs_max_disc_trc - 1); ++ dtp = vport->disc_trc + index; ++ dtp->fmt = fmt; ++ dtp->data1 = data1; ++ dtp->data2 = data2; ++ dtp->data3 = data3; ++ dtp->seq_cnt = atomic_inc_return(&lpfc_debugfs_disc_trc_cnt); ++ dtp->jif = jiffies; ++#endif ++ return; ++} ++ ++#ifdef CONFIG_LPFC_DEBUG_FS ++static int ++lpfc_debugfs_disc_trc_open(struct inode *inode, struct file *file) ++{ ++ struct lpfc_vport *vport = inode->i_private; ++ struct lpfc_debug *debug; ++ int size; ++ int rc = -ENOMEM; ++ ++ if (!lpfc_debugfs_max_disc_trc) { ++ rc = -ENOSPC; ++ goto out; ++ } ++ ++ debug = kmalloc(sizeof(*debug), GFP_KERNEL); ++ if (!debug) ++ goto out; ++ ++ /* Round to page boundry */ ++ size = (lpfc_debugfs_max_disc_trc * LPFC_DISC_TRC_ENTRY_SIZE); ++ size = PAGE_ALIGN(size); ++ ++ debug->buffer = kmalloc(size, GFP_KERNEL); ++ if (!debug->buffer) { ++ kfree(debug); ++ goto out; ++ } ++ ++ debug->len = lpfc_debugfs_disc_trc_data(vport, debug->buffer, size); ++ file->private_data = debug; ++ ++ rc = 0; ++out: ++ return rc; ++} ++ ++static int ++lpfc_debugfs_nodelist_open(struct inode *inode, struct file *file) ++{ ++ struct lpfc_vport *vport = inode->i_private; ++ struct lpfc_debug *debug; ++ int rc = -ENOMEM; ++ ++ debug = kmalloc(sizeof(*debug), GFP_KERNEL); ++ if (!debug) ++ goto out; ++ ++ /* Round to page boundry */ ++ debug->buffer = kmalloc(LPFC_NODELIST_SIZE, GFP_KERNEL); ++ if (!debug->buffer) { ++ kfree(debug); ++ goto out; ++ } ++ ++ debug->len = lpfc_debugfs_nodelist_data(vport, debug->buffer, ++ LPFC_NODELIST_SIZE); ++ file->private_data = debug; ++ ++ rc = 0; ++out: ++ return rc; ++} ++ ++static loff_t ++lpfc_debugfs_lseek(struct file *file, loff_t off, int whence) ++{ ++ struct lpfc_debug *debug; ++ loff_t pos = -1; ++ ++ debug = file->private_data; ++ ++ switch (whence) { ++ case 0: ++ pos = off; ++ break; ++ case 1: ++ pos = file->f_pos + off; ++ break; ++ case 2: ++ pos = debug->len - off; ++ } ++ return (pos < 0 || pos > debug->len) ? -EINVAL : (file->f_pos = pos); ++} ++ ++static ssize_t ++lpfc_debugfs_read(struct file *file, char __user *buf, ++ size_t nbytes, loff_t *ppos) ++{ ++ struct lpfc_debug *debug = file->private_data; ++ return simple_read_from_buffer(buf, nbytes, ppos, debug->buffer, ++ debug->len); ++} ++ ++static int ++lpfc_debugfs_release(struct inode *inode, struct file *file) ++{ ++ struct lpfc_debug *debug = file->private_data; ++ ++ kfree(debug->buffer); ++ kfree(debug); ++ ++ return 0; ++} ++ ++#undef lpfc_debugfs_op_disc_trc ++static struct file_operations lpfc_debugfs_op_disc_trc = { ++ .owner = THIS_MODULE, ++ .open = lpfc_debugfs_disc_trc_open, ++ .llseek = lpfc_debugfs_lseek, ++ .read = lpfc_debugfs_read, ++ .release = lpfc_debugfs_release, ++}; ++ ++#undef lpfc_debugfs_op_nodelist ++static struct file_operations lpfc_debugfs_op_nodelist = { ++ .owner = THIS_MODULE, ++ .open = lpfc_debugfs_nodelist_open, ++ .llseek = lpfc_debugfs_lseek, ++ .read = lpfc_debugfs_read, ++ .release = lpfc_debugfs_release, ++}; ++ ++static struct dentry *lpfc_debugfs_root = NULL; ++static atomic_t lpfc_debugfs_hba_count; ++#endif ++ ++inline void ++lpfc_debugfs_initialize(struct lpfc_vport *vport) ++{ ++#ifdef CONFIG_LPFC_DEBUG_FS ++ struct lpfc_hba *phba = vport->phba; ++ char name[64]; ++ uint32_t num, i; ++ ++ if (!lpfc_debugfs_enable) ++ return; ++ ++ if (lpfc_debugfs_max_disc_trc) { ++ num = lpfc_debugfs_max_disc_trc - 1; ++ if (num & lpfc_debugfs_max_disc_trc) { ++ /* Change to be a power of 2 */ ++ num = lpfc_debugfs_max_disc_trc; ++ i = 0; ++ while (num > 1) { ++ num = num >> 1; ++ i++; ++ } ++ lpfc_debugfs_max_disc_trc = (1 << i); ++ printk(KERN_ERR ++ "lpfc_debugfs_max_disc_trc changed to %d\n", ++ lpfc_debugfs_max_disc_trc); ++ } ++ } ++ ++ if (!lpfc_debugfs_root) { ++ lpfc_debugfs_root = debugfs_create_dir("lpfc", NULL); ++ atomic_set(&lpfc_debugfs_hba_count, 0); ++ if (!lpfc_debugfs_root) ++ goto debug_failed; ++ } ++ ++ snprintf(name, sizeof(name), "lpfc%d", phba->brd_no); ++ if (!phba->hba_debugfs_root) { ++ phba->hba_debugfs_root = ++ debugfs_create_dir(name, lpfc_debugfs_root); ++ if (!phba->hba_debugfs_root) ++ goto debug_failed; ++ atomic_inc(&lpfc_debugfs_hba_count); ++ atomic_set(&phba->debugfs_vport_count, 0); ++ } ++ ++ snprintf(name, sizeof(name), "vport%d", vport->vpi); ++ if (!vport->vport_debugfs_root) { ++ vport->vport_debugfs_root = ++ debugfs_create_dir(name, phba->hba_debugfs_root); ++ if (!vport->vport_debugfs_root) ++ goto debug_failed; ++ atomic_inc(&phba->debugfs_vport_count); ++ } ++ ++ if (!lpfc_debugfs_start_time) ++ lpfc_debugfs_start_time = jiffies; ++ ++ vport->disc_trc = kmalloc( ++ (sizeof(struct lpfc_disc_trc) * lpfc_debugfs_max_disc_trc), ++ GFP_KERNEL); ++ ++ if (!vport->disc_trc) ++ goto debug_failed; ++ memset(vport->disc_trc, 0, ++ (sizeof(struct lpfc_disc_trc) * lpfc_debugfs_max_disc_trc)); ++ ++ snprintf(name, sizeof(name), "discovery_trace"); ++ vport->debug_disc_trc = ++ debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR, ++ vport->vport_debugfs_root, ++ vport, &lpfc_debugfs_op_disc_trc); ++ if (!vport->debug_disc_trc) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, ++ "%d:0409 Cannot create debugfs", ++ phba->brd_no); ++ goto debug_failed; ++ } ++ snprintf(name, sizeof(name), "nodelist"); ++ vport->debug_nodelist = ++ debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR, ++ vport->vport_debugfs_root, ++ vport, &lpfc_debugfs_op_nodelist); ++ if (!vport->debug_nodelist) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, ++ "%d:0409 Cannot create debugfs", ++ phba->brd_no); ++ goto debug_failed; ++ } ++debug_failed: ++ return; ++#endif ++} ++ ++ ++inline void ++lpfc_debugfs_terminate(struct lpfc_vport *vport) ++{ ++#ifdef CONFIG_LPFC_DEBUG_FS ++ struct lpfc_hba *phba = vport->phba; ++ ++ if (vport->disc_trc) { ++ kfree(vport->disc_trc); ++ vport->disc_trc = NULL; ++ } ++ if (vport->debug_disc_trc) { ++ debugfs_remove(vport->debug_disc_trc); /* discovery_trace */ ++ vport->debug_disc_trc = NULL; ++ } ++ if (vport->debug_nodelist) { ++ debugfs_remove(vport->debug_nodelist); /* nodelist */ ++ vport->debug_nodelist = NULL; ++ } ++ if (vport->vport_debugfs_root) { ++ debugfs_remove(vport->vport_debugfs_root); /* vportX */ ++ vport->vport_debugfs_root = NULL; ++ atomic_dec(&phba->debugfs_vport_count); ++ } ++ if (atomic_read(&phba->debugfs_vport_count) == 0) { ++ debugfs_remove(vport->phba->hba_debugfs_root); /* lpfcX */ ++ vport->phba->hba_debugfs_root = NULL; ++ atomic_dec(&lpfc_debugfs_hba_count); ++ if (atomic_read(&lpfc_debugfs_hba_count) == 0) { ++ debugfs_remove(lpfc_debugfs_root); /* lpfc */ ++ lpfc_debugfs_root = NULL; ++ } ++ } ++#endif ++} ++ ++ +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_debugfs.h +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_debugfs.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,50 @@ ++/******************************************************************* ++ * This file is part of the Emulex Linux Device Driver for * ++ * Fibre Channel Host Bus Adapters. * ++ * Copyright (C) 2007 Emulex. All rights reserved. * ++ * EMULEX and SLI are trademarks of Emulex. * ++ * www.emulex.com * ++ * * ++ * This program is free software; you can redistribute it and/or * ++ * modify it under the terms of version 2 of the GNU General * ++ * Public License as published by the Free Software Foundation. * ++ * This program is distributed in the hope that it will be useful. * ++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * ++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * ++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * ++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * ++ * TO BE LEGALLY INVALID. See the GNU General Public License for * ++ * more details, a copy of which can be found in the file COPYING * ++ * included with this package. * ++ *******************************************************************/ ++ ++#ifndef _H_LPFC_DEBUG_FS ++#define _H_LPFC_DEBUG_FS ++ ++#ifdef CONFIG_LPFC_DEBUG_FS ++struct lpfc_disc_trc { ++ char *fmt; ++ uint32_t data1; ++ uint32_t data2; ++ uint32_t data3; ++ uint32_t seq_cnt; ++ unsigned long jif; ++}; ++#endif ++ ++/* Mask for discovery_trace */ ++#define LPFC_DISC_TRC_ELS_CMD 0x1 /* Trace ELS commands */ ++#define LPFC_DISC_TRC_ELS_RSP 0x2 /* Trace ELS response */ ++#define LPFC_DISC_TRC_ELS_UNSOL 0x4 /* Trace ELS rcv'ed */ ++#define LPFC_DISC_TRC_ELS_ALL 0x7 /* Trace ELS */ ++#define LPFC_DISC_TRC_MBOX_VPORT 0x8 /* Trace vport MBOXs */ ++#define LPFC_DISC_TRC_MBOX 0x10 /* Trace other MBOXs */ ++#define LPFC_DISC_TRC_MBOX_ALL 0x18 /* Trace all MBOXs */ ++#define LPFC_DISC_TRC_CT 0x20 /* Trace disc CT requests */ ++#define LPFC_DISC_TRC_DSM 0x40 /* Trace DSM events */ ++#define LPFC_DISC_TRC_RPORT 0x80 /* Trace rport events */ ++#define LPFC_DISC_TRC_NODE 0x100 /* Trace ndlp state changes */ ++ ++#define LPFC_DISC_TRC_DISCOVERY 0xef /* common mask for general ++ * discovery */ ++#endif /* H_LPFC_DEBUG_FS */ +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_disc.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_disc.h +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_disc.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_disc.h 2007-12-21 15:36:12.000000000 -0500 +@@ -36,21 +36,23 @@ + LPFC_EVT_WARM_START, + LPFC_EVT_KILL, + LPFC_EVT_ELS_RETRY, ++ LPFC_EVT_DEV_LOSS_DELAY, ++ LPFC_EVT_DEV_LOSS, + }; + + /* structure used to queue event to the discovery tasklet */ + struct lpfc_work_evt { + struct list_head evt_listp; +- void * evt_arg1; +- void * evt_arg2; ++ void *evt_arg1; ++ void *evt_arg2; + enum lpfc_work_type evt; + }; + + + struct lpfc_nodelist { + struct list_head nlp_listp; +- struct lpfc_name nlp_portname; /* port name */ +- struct lpfc_name nlp_nodename; /* node name */ ++ struct lpfc_name nlp_portname; ++ struct lpfc_name nlp_nodename; + uint32_t nlp_flag; /* entry flags */ + uint32_t nlp_DID; /* FC D_ID of entry */ + uint32_t nlp_last_elscmd; /* Last ELS cmd sent */ +@@ -75,8 +77,9 @@ + struct timer_list nlp_delayfunc; /* Used for delayed ELS cmds */ + struct fc_rport *rport; /* Corresponding FC transport + port structure */ +- struct lpfc_hba *nlp_phba; ++ struct lpfc_vport *vport; + struct lpfc_work_evt els_retry_evt; ++ struct lpfc_work_evt dev_loss_evt; + unsigned long last_ramp_up_time; /* jiffy of last ramp up */ + unsigned long last_q_full_time; /* jiffy of last queue full */ + struct kref kref; +@@ -98,7 +101,9 @@ + ACC */ + #define NLP_NPR_ADISC 0x2000000 /* Issue ADISC when dq'ed from + NPR list */ ++#define NLP_RM_DFLT_RPI 0x4000000 /* need to remove leftover dflt RPI */ + #define NLP_NODEV_REMOVE 0x8000000 /* Defer removal till discovery ends */ ++#define NLP_TARGET_REMOVE 0x10000000 /* Target remove in process */ + + /* There are 4 different double linked lists nodelist entries can reside on. + * The Port Login (PLOGI) list and Address Discovery (ADISC) list are used +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_els.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_els.c +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_els.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_els.c 2007-12-21 15:36:12.000000000 -0500 +@@ -35,38 +35,38 @@ + #include "lpfc.h" + #include "lpfc_logmsg.h" + #include "lpfc_crtn.h" ++#include "lpfc_vport.h" ++#include "lpfc_debugfs.h" + + static int lpfc_els_retry(struct lpfc_hba *, struct lpfc_iocbq *, + struct lpfc_iocbq *); ++static void lpfc_cmpl_fabric_iocb(struct lpfc_hba *, struct lpfc_iocbq *, ++ struct lpfc_iocbq *); ++ + static int lpfc_max_els_tries = 3; + +-static int +-lpfc_els_chk_latt(struct lpfc_hba * phba) ++int ++lpfc_els_chk_latt(struct lpfc_vport *vport) + { +- struct lpfc_sli *psli; +- LPFC_MBOXQ_t *mbox; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + uint32_t ha_copy; +- int rc; +- +- psli = &phba->sli; + +- if ((phba->hba_state >= LPFC_HBA_READY) || +- (phba->hba_state == LPFC_LINK_DOWN)) ++ if (vport->port_state >= LPFC_VPORT_READY || ++ phba->link_state == LPFC_LINK_DOWN) + return 0; + + /* Read the HBA Host Attention Register */ +- spin_lock_irq(phba->host->host_lock); + ha_copy = readl(phba->HAregaddr); +- spin_unlock_irq(phba->host->host_lock); + + if (!(ha_copy & HA_LATT)) + return 0; + + /* Pending Link Event during Discovery */ +- lpfc_printf_log(phba, KERN_WARNING, LOG_DISCOVERY, +- "%d:0237 Pending Link Event during " ++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, ++ "%d (%d):0237 Pending Link Event during " + "Discovery: State x%x\n", +- phba->brd_no, phba->hba_state); ++ phba->brd_no, vport->vpi, phba->pport->port_state); + + /* CLEAR_LA should re-enable link attention events and + * we should then imediately take a LATT event. The +@@ -74,48 +74,34 @@ + * will cleanup any left over in-progress discovery + * events. + */ +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_ABORT_DISCOVERY; +- spin_unlock_irq(phba->host->host_lock); +- +- if (phba->hba_state != LPFC_CLEAR_LA) { +- if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) { +- phba->hba_state = LPFC_CLEAR_LA; +- lpfc_clear_la(phba, mbox); +- mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la; +- rc = lpfc_sli_issue_mbox (phba, mbox, +- (MBX_NOWAIT | MBX_STOP_IOCB)); +- if (rc == MBX_NOT_FINISHED) { +- mempool_free(mbox, phba->mbox_mem_pool); +- phba->hba_state = LPFC_HBA_ERROR; +- } +- } +- } ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_ABORT_DISCOVERY; ++ spin_unlock_irq(shost->host_lock); + +- return 1; ++ if (phba->link_state != LPFC_CLEAR_LA) ++ lpfc_issue_clear_la(phba, vport); + ++ return 1; + } + + static struct lpfc_iocbq * +-lpfc_prep_els_iocb(struct lpfc_hba * phba, uint8_t expectRsp, +- uint16_t cmdSize, uint8_t retry, struct lpfc_nodelist * ndlp, +- uint32_t did, uint32_t elscmd) ++lpfc_prep_els_iocb(struct lpfc_vport *vport, uint8_t expectRsp, ++ uint16_t cmdSize, uint8_t retry, ++ struct lpfc_nodelist *ndlp, uint32_t did, ++ uint32_t elscmd) + { +- struct lpfc_sli_ring *pring; ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *elsiocb; + struct lpfc_dmabuf *pcmd, *prsp, *pbuflist; + struct ulp_bde64 *bpl; + IOCB_t *icmd; + +- pring = &phba->sli.ring[LPFC_ELS_RING]; + +- if (phba->hba_state < LPFC_LINK_UP) ++ if (!lpfc_is_link_up(phba)) + return NULL; + + /* Allocate buffer for command iocb */ +- spin_lock_irq(phba->host->host_lock); + elsiocb = lpfc_sli_get_iocbq(phba); +- spin_unlock_irq(phba->host->host_lock); + + if (elsiocb == NULL) + return NULL; +@@ -123,14 +109,12 @@ + + /* fill in BDEs for command */ + /* Allocate buffer for command payload */ +- if (((pcmd = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL)) == 0) || ++ if (((pcmd = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL)) == 0) || + ((pcmd->virt = lpfc_mbuf_alloc(phba, + MEM_PRI, &(pcmd->phys))) == 0)) { + kfree(pcmd); + +- spin_lock_irq(phba->host->host_lock); + lpfc_sli_release_iocbq(phba, elsiocb); +- spin_unlock_irq(phba->host->host_lock); + return NULL; + } + +@@ -138,7 +122,7 @@ + + /* Allocate buffer for response payload */ + if (expectRsp) { +- prsp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL); ++ prsp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); + if (prsp) + prsp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, + &prsp->phys); +@@ -146,9 +130,7 @@ + kfree(prsp); + lpfc_mbuf_free(phba, pcmd->virt, pcmd->phys); + kfree(pcmd); +- spin_lock_irq(phba->host->host_lock); + lpfc_sli_release_iocbq(phba, elsiocb); +- spin_unlock_irq(phba->host->host_lock); + return NULL; + } + INIT_LIST_HEAD(&prsp->list); +@@ -157,14 +139,12 @@ + } + + /* Allocate buffer for Buffer ptr list */ +- pbuflist = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL); ++ pbuflist = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); + if (pbuflist) + pbuflist->virt = lpfc_mbuf_alloc(phba, MEM_PRI, + &pbuflist->phys); + if (pbuflist == 0 || pbuflist->virt == 0) { +- spin_lock_irq(phba->host->host_lock); + lpfc_sli_release_iocbq(phba, elsiocb); +- spin_unlock_irq(phba->host->host_lock); + lpfc_mbuf_free(phba, pcmd->virt, pcmd->phys); + lpfc_mbuf_free(phba, prsp->virt, prsp->phys); + kfree(pcmd); +@@ -178,20 +158,28 @@ + icmd->un.elsreq64.bdl.addrHigh = putPaddrHigh(pbuflist->phys); + icmd->un.elsreq64.bdl.addrLow = putPaddrLow(pbuflist->phys); + icmd->un.elsreq64.bdl.bdeFlags = BUFF_TYPE_BDL; +- if (expectRsp) { +- icmd->un.elsreq64.bdl.bdeSize = (2 * sizeof (struct ulp_bde64)); + icmd->un.elsreq64.remoteID = did; /* DID */ ++ if (expectRsp) { ++ icmd->un.elsreq64.bdl.bdeSize = (2 * sizeof(struct ulp_bde64)); + icmd->ulpCommand = CMD_ELS_REQUEST64_CR; + icmd->ulpTimeout = phba->fc_ratov * 2; + } else { +- icmd->un.elsreq64.bdl.bdeSize = sizeof (struct ulp_bde64); ++ icmd->un.elsreq64.bdl.bdeSize = sizeof(struct ulp_bde64); + icmd->ulpCommand = CMD_XMIT_ELS_RSP64_CX; + } +- + icmd->ulpBdeCount = 1; + icmd->ulpLe = 1; + icmd->ulpClass = CLASS3; + ++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) { ++ icmd->un.elsreq64.myID = vport->fc_myDID; ++ ++ /* For ELS_REQUEST64_CR, use the VPI by default */ ++ icmd->ulpContext = vport->vpi; ++ icmd->ulpCt_h = 0; ++ icmd->ulpCt_l = 1; ++ } ++ + bpl = (struct ulp_bde64 *) pbuflist->virt; + bpl->addrLow = le32_to_cpu(putPaddrLow(pcmd->phys)); + bpl->addrHigh = le32_to_cpu(putPaddrHigh(pcmd->phys)); +@@ -209,10 +197,12 @@ + } + + /* Save for completion so we can release these resources */ ++ if (elscmd != ELS_CMD_LS_RJT) + elsiocb->context1 = lpfc_nlp_get(ndlp); + elsiocb->context2 = pcmd; + elsiocb->context3 = pbuflist; + elsiocb->retry = retry; ++ elsiocb->vport = vport; + elsiocb->drvrTimeout = (phba->fc_ratov << 1) + LPFC_DRVR_TIMEOUT; + + if (prsp) { +@@ -222,16 +212,16 @@ + if (expectRsp) { + /* Xmit ELS command to remote NPORT */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0116 Xmit ELS command x%x to remote " +- "NPORT x%x I/O tag: x%x, HBA state: x%x\n", +- phba->brd_no, elscmd, +- did, elsiocb->iotag, phba->hba_state); ++ "%d (%d):0116 Xmit ELS command x%x to remote " ++ "NPORT x%x I/O tag: x%x, port state: x%x\n", ++ phba->brd_no, vport->vpi, elscmd, did, ++ elsiocb->iotag, vport->port_state); + } else { + /* Xmit ELS response to remote NPORT */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0117 Xmit ELS response x%x to remote " ++ "%d (%d):0117 Xmit ELS response x%x to remote " + "NPORT x%x I/O tag: x%x, size: x%x\n", +- phba->brd_no, elscmd, ++ phba->brd_no, vport->vpi, elscmd, + ndlp->nlp_DID, elsiocb->iotag, cmdSize); + } + +@@ -240,16 +230,79 @@ + + + static int +-lpfc_cmpl_els_flogi_fabric(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, +- struct serv_parm *sp, IOCB_t *irsp) ++lpfc_issue_fabric_reglogin(struct lpfc_vport *vport) + { ++ struct lpfc_hba *phba = vport->phba; + LPFC_MBOXQ_t *mbox; + struct lpfc_dmabuf *mp; ++ struct lpfc_nodelist *ndlp; ++ struct serv_parm *sp; + int rc; + +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_FABRIC; +- spin_unlock_irq(phba->host->host_lock); ++ sp = &phba->fc_fabparam; ++ ndlp = lpfc_findnode_did(vport, Fabric_DID); ++ if (!ndlp) ++ goto fail; ++ ++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (!mbox) ++ goto fail; ++ ++ vport->port_state = LPFC_FABRIC_CFG_LINK; ++ lpfc_config_link(phba, mbox); ++ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; ++ mbox->vport = vport; ++ ++ rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB); ++ if (rc == MBX_NOT_FINISHED) ++ goto fail_free_mbox; ++ ++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (!mbox) ++ goto fail; ++ rc = lpfc_reg_login(phba, vport->vpi, Fabric_DID, (uint8_t *)sp, mbox, ++ 0); ++ if (rc) ++ goto fail_free_mbox; ++ ++ mbox->mbox_cmpl = lpfc_mbx_cmpl_fabric_reg_login; ++ mbox->vport = vport; ++ mbox->context2 = lpfc_nlp_get(ndlp); ++ ++ rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB); ++ if (rc == MBX_NOT_FINISHED) ++ goto fail_issue_reg_login; ++ ++ return 0; ++ ++fail_issue_reg_login: ++ lpfc_nlp_put(ndlp); ++ mp = (struct lpfc_dmabuf *) mbox->context1; ++ lpfc_mbuf_free(phba, mp->virt, mp->phys); ++ kfree(mp); ++fail_free_mbox: ++ mempool_free(mbox, phba->mbox_mem_pool); ++ ++fail: ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0249 Cannot issue Register Fabric login\n", ++ phba->brd_no, vport->vpi); ++ return -ENXIO; ++} ++ ++static int ++lpfc_cmpl_els_flogi_fabric(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ struct serv_parm *sp, IOCB_t *irsp) ++{ ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_nodelist *np; ++ struct lpfc_nodelist *next_np; ++ ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_FABRIC; ++ spin_unlock_irq(shost->host_lock); + + phba->fc_edtov = be32_to_cpu(sp->cmn.e_d_tov); + if (sp->cmn.edtovResolution) /* E_D_TOV ticks are in nanoseconds */ +@@ -258,20 +311,20 @@ + phba->fc_ratov = (be32_to_cpu(sp->cmn.w2.r_a_tov) + 999) / 1000; + + if (phba->fc_topology == TOPOLOGY_LOOP) { +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_PUBLIC_LOOP; +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_PUBLIC_LOOP; ++ spin_unlock_irq(shost->host_lock); + } else { + /* + * If we are a N-port connected to a Fabric, fixup sparam's so + * logins to devices on remote loops work. + */ +- phba->fc_sparam.cmn.altBbCredit = 1; ++ vport->fc_sparam.cmn.altBbCredit = 1; + } + +- phba->fc_myDID = irsp->un.ulpWord[4] & Mask_DID; ++ vport->fc_myDID = irsp->un.ulpWord[4] & Mask_DID; + memcpy(&ndlp->nlp_portname, &sp->portName, sizeof(struct lpfc_name)); +- memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof (struct lpfc_name)); ++ memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof(struct lpfc_name)); + ndlp->nlp_class_sup = 0; + if (sp->cls1.classValid) + ndlp->nlp_class_sup |= FC_COS_CLASS1; +@@ -285,68 +338,85 @@ + sp->cmn.bbRcvSizeLsb; + memcpy(&phba->fc_fabparam, sp, sizeof(struct serv_parm)); + +- mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); +- if (!mbox) +- goto fail; +- +- phba->hba_state = LPFC_FABRIC_CFG_LINK; +- lpfc_config_link(phba, mbox); +- mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; +- +- rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB); +- if (rc == MBX_NOT_FINISHED) +- goto fail_free_mbox; ++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) { ++ if (sp->cmn.response_multiple_NPort) { ++ lpfc_printf_log(phba, KERN_WARNING, LOG_ELS | LOG_VPORT, ++ "%d:1816 FLOGI NPIV supported, " ++ "response data 0x%x\n", ++ phba->brd_no, ++ sp->cmn.response_multiple_NPort); ++ phba->link_flag |= LS_NPIV_FAB_SUPPORTED; + +- mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); +- if (!mbox) +- goto fail; ++ } else { ++ /* Because we asked f/w for NPIV it still expects us ++ to call reg_vnpid atleast for the physcial host */ ++ lpfc_printf_log(phba, KERN_WARNING, LOG_ELS | LOG_VPORT, ++ "%d:1817 Fabric does not support NPIV " ++ "- configuring single port mode.\n", ++ phba->brd_no); ++ phba->link_flag &= ~LS_NPIV_FAB_SUPPORTED; ++ } ++ } + +- if (lpfc_reg_login(phba, Fabric_DID, (uint8_t *) sp, mbox, 0)) +- goto fail_free_mbox; ++ if ((vport->fc_prevDID != vport->fc_myDID) && ++ !(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)) { + +- mbox->mbox_cmpl = lpfc_mbx_cmpl_fabric_reg_login; +- mbox->context2 = lpfc_nlp_get(ndlp); ++ /* If our NportID changed, we need to ensure all ++ * remaining NPORTs get unreg_login'ed. ++ */ ++ list_for_each_entry_safe(np, next_np, ++ &vport->fc_nodes, nlp_listp) { ++ if ((np->nlp_state != NLP_STE_NPR_NODE) || ++ !(np->nlp_flag & NLP_NPR_ADISC)) ++ continue; ++ spin_lock_irq(shost->host_lock); ++ np->nlp_flag &= ~NLP_NPR_ADISC; ++ spin_unlock_irq(shost->host_lock); ++ lpfc_unreg_rpi(vport, np); ++ } ++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) { ++ lpfc_mbx_unreg_vpi(vport); ++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; ++ } ++ } + +- rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB); +- if (rc == MBX_NOT_FINISHED) +- goto fail_issue_reg_login; ++ ndlp->nlp_sid = irsp->un.ulpWord[4] & Mask_DID; ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_REG_LOGIN_ISSUE); + ++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED && ++ vport->fc_flag & FC_VPORT_NEEDS_REG_VPI) { ++ lpfc_register_new_vport(phba, vport, ndlp); ++ return 0; ++ } ++ lpfc_issue_fabric_reglogin(vport); + return 0; +- +- fail_issue_reg_login: +- lpfc_nlp_put(ndlp); +- mp = (struct lpfc_dmabuf *) mbox->context1; +- lpfc_mbuf_free(phba, mp->virt, mp->phys); +- kfree(mp); +- fail_free_mbox: +- mempool_free(mbox, phba->mbox_mem_pool); +- fail: +- return -ENXIO; + } + + /* + * We FLOGIed into an NPort, initiate pt2pt protocol + */ + static int +-lpfc_cmpl_els_flogi_nport(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, ++lpfc_cmpl_els_flogi_nport(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct serv_parm *sp) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + LPFC_MBOXQ_t *mbox; + int rc; + +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); ++ spin_unlock_irq(shost->host_lock); + + phba->fc_edtov = FF_DEF_EDTOV; + phba->fc_ratov = FF_DEF_RATOV; +- rc = memcmp(&phba->fc_portname, &sp->portName, +- sizeof(struct lpfc_name)); ++ rc = memcmp(&vport->fc_portname, &sp->portName, ++ sizeof(vport->fc_portname)); + if (rc >= 0) { + /* This side will initiate the PLOGI */ +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_PT2PT_PLOGI; +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_PT2PT_PLOGI; ++ spin_unlock_irq(shost->host_lock); + + /* + * N_Port ID cannot be 0, set our to LocalID the other +@@ -355,7 +425,7 @@ + + /* not equal */ + if (rc) +- phba->fc_myDID = PT2PT_LocalID; ++ vport->fc_myDID = PT2PT_LocalID; + + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mbox) +@@ -364,6 +434,7 @@ + lpfc_config_link(phba, mbox); + + mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; ++ mbox->vport = vport; + rc = lpfc_sli_issue_mbox(phba, mbox, + MBX_NOWAIT | MBX_STOP_IOCB); + if (rc == MBX_NOT_FINISHED) { +@@ -372,7 +443,7 @@ + } + lpfc_nlp_put(ndlp); + +- ndlp = lpfc_findnode_did(phba, PT2PT_RemoteID); ++ ndlp = lpfc_findnode_did(vport, PT2PT_RemoteID); + if (!ndlp) { + /* + * Cannot find existing Fabric ndlp, so allocate a +@@ -382,28 +453,30 @@ + if (!ndlp) + goto fail; + +- lpfc_nlp_init(phba, ndlp, PT2PT_RemoteID); ++ lpfc_nlp_init(vport, ndlp, PT2PT_RemoteID); + } + + memcpy(&ndlp->nlp_portname, &sp->portName, + sizeof(struct lpfc_name)); + memcpy(&ndlp->nlp_nodename, &sp->nodeName, + sizeof(struct lpfc_name)); +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NPR_2B_DISC; ++ spin_unlock_irq(shost->host_lock); + } else { + /* This side will wait for the PLOGI */ + lpfc_nlp_put(ndlp); + } + +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_PT2PT; +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_PT2PT; ++ spin_unlock_irq(shost->host_lock); + + /* Start discovery - this should just do CLEAR_LA */ +- lpfc_disc_start(phba); ++ lpfc_disc_start(vport); + return 0; +- fail: ++fail: + return -ENXIO; + } + +@@ -411,6 +484,8 @@ + lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) + { ++ struct lpfc_vport *vport = cmdiocb->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + IOCB_t *irsp = &rspiocb->iocb; + struct lpfc_nodelist *ndlp = cmdiocb->context1; + struct lpfc_dmabuf *pcmd = cmdiocb->context2, *prsp; +@@ -418,21 +493,25 @@ + int rc; + + /* Check to see if link went down during discovery */ +- if (lpfc_els_chk_latt(phba)) { ++ if (lpfc_els_chk_latt(vport)) { + lpfc_nlp_put(ndlp); + goto out; + } + ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "FLOGI cmpl: status:x%x/x%x state:x%x", ++ irsp->ulpStatus, irsp->un.ulpWord[4], ++ vport->port_state); ++ + if (irsp->ulpStatus) { + /* Check for retry */ +- if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { +- /* ELS command is being retried */ ++ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) + goto out; +- } ++ + /* FLOGI failed, so there is no fabric */ +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); ++ spin_unlock_irq(shost->host_lock); + + /* If private loop, then allow max outstanding els to be + * LPFC_MAX_DISC_THREADS (32). Scanning in the case of no +@@ -443,11 +522,10 @@ + } + + /* FLOGI failure */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_ELS, +- "%d:0100 FLOGI failure Data: x%x x%x x%x\n", +- phba->brd_no, ++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, ++ "%d (%d):0100 FLOGI failure Data: x%x x%x " ++ "x%x\n", ++ phba->brd_no, vport->vpi, + irsp->ulpStatus, irsp->un.ulpWord[4], + irsp->ulpTimeout); + goto flogifail; +@@ -463,21 +541,21 @@ + + /* FLOGI completes successfully */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0101 FLOGI completes sucessfully " ++ "%d (%d):0101 FLOGI completes sucessfully " + "Data: x%x x%x x%x x%x\n", +- phba->brd_no, ++ phba->brd_no, vport->vpi, + irsp->un.ulpWord[4], sp->cmn.e_d_tov, + sp->cmn.w2.r_a_tov, sp->cmn.edtovResolution); + +- if (phba->hba_state == LPFC_FLOGI) { ++ if (vport->port_state == LPFC_FLOGI) { + /* + * If Common Service Parameters indicate Nport + * we are point to point, if Fport we are Fabric. + */ + if (sp->cmn.fPort) +- rc = lpfc_cmpl_els_flogi_fabric(phba, ndlp, sp, irsp); ++ rc = lpfc_cmpl_els_flogi_fabric(vport, ndlp, sp, irsp); + else +- rc = lpfc_cmpl_els_flogi_nport(phba, ndlp, sp); ++ rc = lpfc_cmpl_els_flogi_nport(vport, ndlp, sp); + + if (!rc) + goto out; +@@ -486,14 +564,12 @@ + flogifail: + lpfc_nlp_put(ndlp); + +- if (irsp->ulpStatus != IOSTAT_LOCAL_REJECT || +- (irsp->un.ulpWord[4] != IOERR_SLI_ABORTED && +- irsp->un.ulpWord[4] != IOERR_SLI_DOWN)) { ++ if (!lpfc_error_lost_link(irsp)) { + /* FLOGI failed, so just use loop map to make discovery list */ +- lpfc_disc_list_loopmap(phba); ++ lpfc_disc_list_loopmap(vport); + + /* Start discovery */ +- lpfc_disc_start(phba); ++ lpfc_disc_start(vport); + } + + out: +@@ -501,9 +577,10 @@ + } + + static int +-lpfc_issue_els_flogi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, ++lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + uint8_t retry) + { ++ struct lpfc_hba *phba = vport->phba; + struct serv_parm *sp; + IOCB_t *icmd; + struct lpfc_iocbq *elsiocb; +@@ -515,9 +592,10 @@ + + pring = &phba->sli.ring[LPFC_ELS_RING]; + +- cmdsize = (sizeof (uint32_t) + sizeof (struct serv_parm)); +- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp, ++ cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm)); ++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, + ndlp->nlp_DID, ELS_CMD_FLOGI); ++ + if (!elsiocb) + return 1; + +@@ -526,8 +604,8 @@ + + /* For FLOGI request, remainder of payload is service parameters */ + *((uint32_t *) (pcmd)) = ELS_CMD_FLOGI; +- pcmd += sizeof (uint32_t); +- memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm)); ++ pcmd += sizeof(uint32_t); ++ memcpy(pcmd, &vport->fc_sparam, sizeof(struct serv_parm)); + sp = (struct serv_parm *) pcmd; + + /* Setup CSPs accordingly for Fabric */ +@@ -541,16 +619,32 @@ + if (sp->cmn.fcphHigh < FC_PH3) + sp->cmn.fcphHigh = FC_PH3; + ++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) { ++ sp->cmn.request_multiple_Nport = 1; ++ ++ /* For FLOGI, Let FLOGI rsp set the NPortID for VPI 0 */ ++ icmd->ulpCt_h = 1; ++ icmd->ulpCt_l = 0; ++ } ++ ++ if (phba->fc_topology != TOPOLOGY_LOOP) { ++ icmd->un.elsreq64.myID = 0; ++ icmd->un.elsreq64.fl = 1; ++ } ++ + tmo = phba->fc_ratov; + phba->fc_ratov = LPFC_DISC_FLOGI_TMO; +- lpfc_set_disctmo(phba); ++ lpfc_set_disctmo(vport); + phba->fc_ratov = tmo; + + phba->fc_stat.elsXmitFLOGI++; + elsiocb->iocb_cmpl = lpfc_cmpl_els_flogi; +- spin_lock_irq(phba->host->host_lock); +- rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); +- spin_unlock_irq(phba->host->host_lock); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "Issue FLOGI: opt:x%x", ++ phba->sli3_options, 0, 0); ++ ++ rc = lpfc_issue_fabric_iocb(phba, elsiocb); + if (rc == IOCB_ERROR) { + lpfc_els_free_iocb(phba, elsiocb); + return 1; +@@ -559,7 +653,7 @@ + } + + int +-lpfc_els_abort_flogi(struct lpfc_hba * phba) ++lpfc_els_abort_flogi(struct lpfc_hba *phba) + { + struct lpfc_sli_ring *pring; + struct lpfc_iocbq *iocb, *next_iocb; +@@ -577,73 +671,99 @@ + * Check the txcmplq for an iocb that matches the nport the driver is + * searching for. + */ +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) { + icmd = &iocb->iocb; +- if (icmd->ulpCommand == CMD_ELS_REQUEST64_CR) { ++ if (icmd->ulpCommand == CMD_ELS_REQUEST64_CR && ++ icmd->un.elsreq64.bdl.ulpIoTag32) { + ndlp = (struct lpfc_nodelist *)(iocb->context1); +- if (ndlp && (ndlp->nlp_DID == Fabric_DID)) ++ if (ndlp && (ndlp->nlp_DID == Fabric_DID)) { + lpfc_sli_issue_abort_iotag(phba, pring, iocb); + } + } +- spin_unlock_irq(phba->host->host_lock); ++ } ++ spin_unlock_irq(&phba->hbalock); + + return 0; + } + + int +-lpfc_initial_flogi(struct lpfc_hba *phba) ++lpfc_initial_flogi(struct lpfc_vport *vport) + { ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_nodelist *ndlp; + + /* First look for the Fabric ndlp */ +- ndlp = lpfc_findnode_did(phba, Fabric_DID); ++ ndlp = lpfc_findnode_did(vport, Fabric_DID); + if (!ndlp) { + /* Cannot find existing Fabric ndlp, so allocate a new one */ + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) + return 0; +- lpfc_nlp_init(phba, ndlp, Fabric_DID); ++ lpfc_nlp_init(vport, ndlp, Fabric_DID); + } else { +- lpfc_dequeue_node(phba, ndlp); ++ lpfc_dequeue_node(vport, ndlp); + } +- if (lpfc_issue_els_flogi(phba, ndlp, 0)) { ++ if (lpfc_issue_els_flogi(vport, ndlp, 0)) { + lpfc_nlp_put(ndlp); + } + return 1; + } + ++int ++lpfc_initial_fdisc(struct lpfc_vport *vport) ++{ ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_nodelist *ndlp; ++ ++ /* First look for the Fabric ndlp */ ++ ndlp = lpfc_findnode_did(vport, Fabric_DID); ++ if (!ndlp) { ++ /* Cannot find existing Fabric ndlp, so allocate a new one */ ++ ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); ++ if (!ndlp) ++ return 0; ++ lpfc_nlp_init(vport, ndlp, Fabric_DID); ++ } else { ++ lpfc_dequeue_node(vport, ndlp); ++ } ++ if (lpfc_issue_els_fdisc(vport, ndlp, 0)) { ++ lpfc_nlp_put(ndlp); ++ } ++ return 1; ++} + static void +-lpfc_more_plogi(struct lpfc_hba * phba) ++lpfc_more_plogi(struct lpfc_vport *vport) + { + int sentplogi; ++ struct lpfc_hba *phba = vport->phba; + +- if (phba->num_disc_nodes) +- phba->num_disc_nodes--; ++ if (vport->num_disc_nodes) ++ vport->num_disc_nodes--; + + /* Continue discovery with PLOGIs to go */ + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, +- "%d:0232 Continue discovery with %d PLOGIs to go " ++ "%d (%d):0232 Continue discovery with %d PLOGIs to go " + "Data: x%x x%x x%x\n", +- phba->brd_no, phba->num_disc_nodes, phba->fc_plogi_cnt, +- phba->fc_flag, phba->hba_state); ++ phba->brd_no, vport->vpi, vport->num_disc_nodes, ++ vport->fc_plogi_cnt, vport->fc_flag, vport->port_state); + + /* Check to see if there are more PLOGIs to be sent */ +- if (phba->fc_flag & FC_NLP_MORE) { +- /* go thru NPR list and issue any remaining ELS PLOGIs */ +- sentplogi = lpfc_els_disc_plogi(phba); +- } ++ if (vport->fc_flag & FC_NLP_MORE) ++ /* go thru NPR nodes and issue any remaining ELS PLOGIs */ ++ sentplogi = lpfc_els_disc_plogi(vport); ++ + return; + } + + static struct lpfc_nodelist * +-lpfc_plogi_confirm_nport(struct lpfc_hba *phba, struct lpfc_dmabuf *prsp, ++lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, + struct lpfc_nodelist *ndlp) + { ++ struct lpfc_vport *vport = ndlp->vport; + struct lpfc_nodelist *new_ndlp; +- uint32_t *lp; + struct serv_parm *sp; +- uint8_t name[sizeof (struct lpfc_name)]; ++ uint8_t name[sizeof(struct lpfc_name)]; + uint32_t rc; + + /* Fabric nodes can have the same WWPN so we don't bother searching +@@ -652,50 +772,51 @@ + if (ndlp->nlp_type & NLP_FABRIC) + return ndlp; + +- lp = (uint32_t *) prsp->virt; +- sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t)); ++ sp = (struct serv_parm *) ((uint8_t *) prsp + sizeof(uint32_t)); + memset(name, 0, sizeof(struct lpfc_name)); + + /* Now we find out if the NPort we are logging into, matches the WWPN + * we have for that ndlp. If not, we have some work to do. + */ +- new_ndlp = lpfc_findnode_wwpn(phba, &sp->portName); ++ new_ndlp = lpfc_findnode_wwpn(vport, &sp->portName); + + if (new_ndlp == ndlp) + return ndlp; + + if (!new_ndlp) { +- rc = +- memcmp(&ndlp->nlp_portname, name, sizeof(struct lpfc_name)); ++ rc = memcmp(&ndlp->nlp_portname, name, ++ sizeof(struct lpfc_name)); + if (!rc) + return ndlp; + new_ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC); + if (!new_ndlp) + return ndlp; + +- lpfc_nlp_init(phba, new_ndlp, ndlp->nlp_DID); ++ lpfc_nlp_init(vport, new_ndlp, ndlp->nlp_DID); + } + +- lpfc_unreg_rpi(phba, new_ndlp); ++ lpfc_unreg_rpi(vport, new_ndlp); + new_ndlp->nlp_DID = ndlp->nlp_DID; + new_ndlp->nlp_prev_state = ndlp->nlp_prev_state; +- lpfc_nlp_set_state(phba, new_ndlp, ndlp->nlp_state); ++ lpfc_nlp_set_state(vport, new_ndlp, ndlp->nlp_state); + +- /* Move this back to NPR list */ ++ /* Move this back to NPR state */ + if (memcmp(&ndlp->nlp_portname, name, sizeof(struct lpfc_name)) == 0) +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + else { +- lpfc_unreg_rpi(phba, ndlp); ++ lpfc_unreg_rpi(vport, ndlp); + ndlp->nlp_DID = 0; /* Two ndlps cannot have the same did */ +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + } + return new_ndlp; + } + + static void +-lpfc_cmpl_els_plogi(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { ++ struct lpfc_vport *vport = cmdiocb->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + IOCB_t *irsp; + struct lpfc_nodelist *ndlp; + struct lpfc_dmabuf *prsp; +@@ -705,32 +826,43 @@ + cmdiocb->context_un.rsp_iocb = rspiocb; + + irsp = &rspiocb->iocb; +- ndlp = lpfc_findnode_did(phba, irsp->un.elsreq64.remoteID); +- if (!ndlp) ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "PLOGI cmpl: status:x%x/x%x did:x%x", ++ irsp->ulpStatus, irsp->un.ulpWord[4], ++ irsp->un.elsreq64.remoteID); ++ ++ ndlp = lpfc_findnode_did(vport, irsp->un.elsreq64.remoteID); ++ if (!ndlp) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0136 PLOGI completes to NPort x%x " ++ "with no ndlp. Data: x%x x%x x%x\n", ++ phba->brd_no, vport->vpi, irsp->un.elsreq64.remoteID, ++ irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpIoTag); + goto out; ++ } + + /* Since ndlp can be freed in the disc state machine, note if this node + * is being used during discovery. + */ ++ spin_lock_irq(shost->host_lock); + disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC); +- spin_lock_irq(phba->host->host_lock); + ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + rc = 0; + + /* PLOGI completes to NPort */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0102 PLOGI completes to NPort x%x " ++ "%d (%d):0102 PLOGI completes to NPort x%x " + "Data: x%x x%x x%x x%x x%x\n", +- phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus, +- irsp->un.ulpWord[4], irsp->ulpTimeout, disc, +- phba->num_disc_nodes); ++ phba->brd_no, vport->vpi, ndlp->nlp_DID, ++ irsp->ulpStatus, irsp->un.ulpWord[4], ++ irsp->ulpTimeout, disc, vport->num_disc_nodes); + + /* Check to see if link went down during discovery */ +- if (lpfc_els_chk_latt(phba)) { +- spin_lock_irq(phba->host->host_lock); ++ if (lpfc_els_chk_latt(vport)) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NPR_2B_DISC; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + goto out; + } + +@@ -743,22 +875,28 @@ + if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { + /* ELS command is being retried */ + if (disc) { +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NPR_2B_DISC; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + } + goto out; + } + + /* PLOGI failed */ ++ if (ndlp->nlp_DID == NameServer_DID) { ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0250 Nameserver login error: " ++ "0x%x / 0x%x\n", ++ phba->brd_no, vport->vpi, ++ irsp->ulpStatus, irsp->un.ulpWord[4]); ++ } ++ + /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ +- if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && +- ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) || +- (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) || +- (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) { ++ if (lpfc_error_lost_link(irsp)) { + rc = NLP_STE_FREED_NODE; + } else { +- rc = lpfc_disc_state_machine(phba, ndlp, cmdiocb, ++ rc = lpfc_disc_state_machine(vport, ndlp, cmdiocb, + NLP_EVT_CMPL_PLOGI); + } + } else { +@@ -766,33 +904,33 @@ + prsp = list_entry(((struct lpfc_dmabuf *) + cmdiocb->context2)->list.next, + struct lpfc_dmabuf, list); +- ndlp = lpfc_plogi_confirm_nport(phba, prsp, ndlp); +- rc = lpfc_disc_state_machine(phba, ndlp, cmdiocb, ++ ndlp = lpfc_plogi_confirm_nport(phba, prsp->virt, ndlp); ++ rc = lpfc_disc_state_machine(vport, ndlp, cmdiocb, + NLP_EVT_CMPL_PLOGI); + } + +- if (disc && phba->num_disc_nodes) { ++ if (disc && vport->num_disc_nodes) { + /* Check to see if there are more PLOGIs to be sent */ +- lpfc_more_plogi(phba); ++ lpfc_more_plogi(vport); + +- if (phba->num_disc_nodes == 0) { +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~FC_NDISC_ACTIVE; +- spin_unlock_irq(phba->host->host_lock); ++ if (vport->num_disc_nodes == 0) { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~FC_NDISC_ACTIVE; ++ spin_unlock_irq(shost->host_lock); + +- lpfc_can_disctmo(phba); +- if (phba->fc_flag & FC_RSCN_MODE) { ++ lpfc_can_disctmo(vport); ++ if (vport->fc_flag & FC_RSCN_MODE) { + /* + * Check to see if more RSCNs came in while + * we were processing this one. + */ +- if ((phba->fc_rscn_id_cnt == 0) && +- (!(phba->fc_flag & FC_RSCN_DISCOVERY))) { +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~FC_RSCN_MODE; +- spin_unlock_irq(phba->host->host_lock); ++ if ((vport->fc_rscn_id_cnt == 0) && ++ (!(vport->fc_flag & FC_RSCN_DISCOVERY))) { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~FC_RSCN_MODE; ++ spin_unlock_irq(shost->host_lock); + } else { +- lpfc_els_handle_rscn(phba); ++ lpfc_els_handle_rscn(vport); + } + } + } +@@ -804,8 +942,9 @@ + } + + int +-lpfc_issue_els_plogi(struct lpfc_hba * phba, uint32_t did, uint8_t retry) ++lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry) + { ++ struct lpfc_hba *phba = vport->phba; + struct serv_parm *sp; + IOCB_t *icmd; + struct lpfc_iocbq *elsiocb; +@@ -813,12 +952,13 @@ + struct lpfc_sli *psli; + uint8_t *pcmd; + uint16_t cmdsize; ++ int ret; + + psli = &phba->sli; + pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ + +- cmdsize = (sizeof (uint32_t) + sizeof (struct serv_parm)); +- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, NULL, did, ++ cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm)); ++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, NULL, did, + ELS_CMD_PLOGI); + if (!elsiocb) + return 1; +@@ -828,8 +968,8 @@ + + /* For PLOGI request, remainder of payload is service parameters */ + *((uint32_t *) (pcmd)) = ELS_CMD_PLOGI; +- pcmd += sizeof (uint32_t); +- memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm)); ++ pcmd += sizeof(uint32_t); ++ memcpy(pcmd, &vport->fc_sparam, sizeof(struct serv_parm)); + sp = (struct serv_parm *) pcmd; + + if (sp->cmn.fcphLow < FC_PH_4_3) +@@ -838,22 +978,27 @@ + if (sp->cmn.fcphHigh < FC_PH3) + sp->cmn.fcphHigh = FC_PH3; + ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "Issue PLOGI: did:x%x", ++ did, 0, 0); ++ + phba->fc_stat.elsXmitPLOGI++; + elsiocb->iocb_cmpl = lpfc_cmpl_els_plogi; +- spin_lock_irq(phba->host->host_lock); +- if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { +- spin_unlock_irq(phba->host->host_lock); ++ ret = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); ++ ++ if (ret == IOCB_ERROR) { + lpfc_els_free_iocb(phba, elsiocb); + return 1; + } +- spin_unlock_irq(phba->host->host_lock); + return 0; + } + + static void +-lpfc_cmpl_els_prli(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { ++ struct lpfc_vport *vport = cmdiocb->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + IOCB_t *irsp; + struct lpfc_sli *psli; + struct lpfc_nodelist *ndlp; +@@ -864,21 +1009,26 @@ + + irsp = &(rspiocb->iocb); + ndlp = (struct lpfc_nodelist *) cmdiocb->context1; +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_PRLI_SND; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "PRLI cmpl: status:x%x/x%x did:x%x", ++ irsp->ulpStatus, irsp->un.ulpWord[4], ++ ndlp->nlp_DID); + + /* PRLI completes to NPort */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0103 PRLI completes to NPort x%x " ++ "%d (%d):0103 PRLI completes to NPort x%x " + "Data: x%x x%x x%x x%x\n", +- phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus, +- irsp->un.ulpWord[4], irsp->ulpTimeout, +- phba->num_disc_nodes); ++ phba->brd_no, vport->vpi, ndlp->nlp_DID, ++ irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout, ++ vport->num_disc_nodes); + +- phba->fc_prli_sent--; ++ vport->fc_prli_sent--; + /* Check to see if link went down during discovery */ +- if (lpfc_els_chk_latt(phba)) ++ if (lpfc_els_chk_latt(vport)) + goto out; + + if (irsp->ulpStatus) { +@@ -889,18 +1039,16 @@ + } + /* PRLI failed */ + /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ +- if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && +- ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) || +- (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) || +- (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) { ++ if (lpfc_error_lost_link(irsp)) { + goto out; + } else { +- lpfc_disc_state_machine(phba, ndlp, cmdiocb, ++ lpfc_disc_state_machine(vport, ndlp, cmdiocb, + NLP_EVT_CMPL_PRLI); + } + } else { + /* Good status, call state machine */ +- lpfc_disc_state_machine(phba, ndlp, cmdiocb, NLP_EVT_CMPL_PRLI); ++ lpfc_disc_state_machine(vport, ndlp, cmdiocb, ++ NLP_EVT_CMPL_PRLI); + } + + out: +@@ -909,9 +1057,11 @@ + } + + int +-lpfc_issue_els_prli(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, ++lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + uint8_t retry) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + PRLI *npr; + IOCB_t *icmd; + struct lpfc_iocbq *elsiocb; +@@ -923,8 +1073,8 @@ + psli = &phba->sli; + pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ + +- cmdsize = (sizeof (uint32_t) + sizeof (PRLI)); +- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp, ++ cmdsize = (sizeof(uint32_t) + sizeof(PRLI)); ++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, + ndlp->nlp_DID, ELS_CMD_PRLI); + if (!elsiocb) + return 1; +@@ -933,9 +1083,9 @@ + pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + + /* For PRLI request, remainder of payload is service parameters */ +- memset(pcmd, 0, (sizeof (PRLI) + sizeof (uint32_t))); ++ memset(pcmd, 0, (sizeof(PRLI) + sizeof(uint32_t))); + *((uint32_t *) (pcmd)) = ELS_CMD_PRLI; +- pcmd += sizeof (uint32_t); ++ pcmd += sizeof(uint32_t); + + /* For PRLI, remainder of payload is PRLI parameter page */ + npr = (PRLI *) pcmd; +@@ -955,81 +1105,88 @@ + npr->prliType = PRLI_FCP_TYPE; + npr->initiatorFunc = 1; + ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "Issue PRLI: did:x%x", ++ ndlp->nlp_DID, 0, 0); ++ + phba->fc_stat.elsXmitPRLI++; + elsiocb->iocb_cmpl = lpfc_cmpl_els_prli; +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_PRLI_SND; ++ spin_unlock_irq(shost->host_lock); + if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_PRLI_SND; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + lpfc_els_free_iocb(phba, elsiocb); + return 1; + } +- spin_unlock_irq(phba->host->host_lock); +- phba->fc_prli_sent++; ++ vport->fc_prli_sent++; + return 0; + } + + static void +-lpfc_more_adisc(struct lpfc_hba * phba) ++lpfc_more_adisc(struct lpfc_vport *vport) + { + int sentadisc; ++ struct lpfc_hba *phba = vport->phba; + +- if (phba->num_disc_nodes) +- phba->num_disc_nodes--; ++ if (vport->num_disc_nodes) ++ vport->num_disc_nodes--; + + /* Continue discovery with ADISCs to go */ + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, +- "%d:0210 Continue discovery with %d ADISCs to go " ++ "%d (%d):0210 Continue discovery with %d ADISCs to go " + "Data: x%x x%x x%x\n", +- phba->brd_no, phba->num_disc_nodes, phba->fc_adisc_cnt, +- phba->fc_flag, phba->hba_state); ++ phba->brd_no, vport->vpi, vport->num_disc_nodes, ++ vport->fc_adisc_cnt, vport->fc_flag, vport->port_state); + + /* Check to see if there are more ADISCs to be sent */ +- if (phba->fc_flag & FC_NLP_MORE) { +- lpfc_set_disctmo(phba); +- +- /* go thru NPR list and issue any remaining ELS ADISCs */ +- sentadisc = lpfc_els_disc_adisc(phba); ++ if (vport->fc_flag & FC_NLP_MORE) { ++ lpfc_set_disctmo(vport); ++ /* go thru NPR nodes and issue any remaining ELS ADISCs */ ++ sentadisc = lpfc_els_disc_adisc(vport); + } + return; + } + + static void +-lpfc_rscn_disc(struct lpfc_hba * phba) ++lpfc_rscn_disc(struct lpfc_vport *vport) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ lpfc_can_disctmo(vport); ++ + /* RSCN discovery */ +- /* go thru NPR list and issue ELS PLOGIs */ +- if (phba->fc_npr_cnt) { +- if (lpfc_els_disc_plogi(phba)) ++ /* go thru NPR nodes and issue ELS PLOGIs */ ++ if (vport->fc_npr_cnt) ++ if (lpfc_els_disc_plogi(vport)) + return; +- } +- if (phba->fc_flag & FC_RSCN_MODE) { ++ ++ if (vport->fc_flag & FC_RSCN_MODE) { + /* Check to see if more RSCNs came in while we were + * processing this one. + */ +- if ((phba->fc_rscn_id_cnt == 0) && +- (!(phba->fc_flag & FC_RSCN_DISCOVERY))) { +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~FC_RSCN_MODE; +- spin_unlock_irq(phba->host->host_lock); ++ if ((vport->fc_rscn_id_cnt == 0) && ++ (!(vport->fc_flag & FC_RSCN_DISCOVERY))) { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~FC_RSCN_MODE; ++ spin_unlock_irq(shost->host_lock); + } else { +- lpfc_els_handle_rscn(phba); ++ lpfc_els_handle_rscn(vport); + } + } + } + + static void +-lpfc_cmpl_els_adisc(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_cmpl_els_adisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { ++ struct lpfc_vport *vport = cmdiocb->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + IOCB_t *irsp; +- struct lpfc_sli *psli; + struct lpfc_nodelist *ndlp; +- LPFC_MBOXQ_t *mbox; +- int disc, rc; +- +- psli = &phba->sli; ++ int disc; + + /* we pass cmdiocb to state machine which needs rspiocb as well */ + cmdiocb->context_un.rsp_iocb = rspiocb; +@@ -1037,27 +1194,32 @@ + irsp = &(rspiocb->iocb); + ndlp = (struct lpfc_nodelist *) cmdiocb->context1; + ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "ADISC cmpl: status:x%x/x%x did:x%x", ++ irsp->ulpStatus, irsp->un.ulpWord[4], ++ ndlp->nlp_DID); ++ + /* Since ndlp can be freed in the disc state machine, note if this node + * is being used during discovery. + */ ++ spin_lock_irq(shost->host_lock); + disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC); +- spin_lock_irq(phba->host->host_lock); + ndlp->nlp_flag &= ~(NLP_ADISC_SND | NLP_NPR_2B_DISC); +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + + /* ADISC completes to NPort */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0104 ADISC completes to NPort x%x " ++ "%d (%d):0104 ADISC completes to NPort x%x " + "Data: x%x x%x x%x x%x x%x\n", +- phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus, +- irsp->un.ulpWord[4], irsp->ulpTimeout, disc, +- phba->num_disc_nodes); ++ phba->brd_no, vport->vpi, ndlp->nlp_DID, ++ irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout, ++ disc, vport->num_disc_nodes); + + /* Check to see if link went down during discovery */ +- if (lpfc_els_chk_latt(phba)) { +- spin_lock_irq(phba->host->host_lock); ++ if (lpfc_els_chk_latt(vport)) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NPR_2B_DISC; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + goto out; + } + +@@ -1066,67 +1228,68 @@ + if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { + /* ELS command is being retried */ + if (disc) { +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NPR_2B_DISC; +- spin_unlock_irq(phba->host->host_lock); +- lpfc_set_disctmo(phba); ++ spin_unlock_irq(shost->host_lock); ++ lpfc_set_disctmo(vport); + } + goto out; + } + /* ADISC failed */ + /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ +- if ((irsp->ulpStatus != IOSTAT_LOCAL_REJECT) || +- ((irsp->un.ulpWord[4] != IOERR_SLI_ABORTED) && +- (irsp->un.ulpWord[4] != IOERR_LINK_DOWN) && +- (irsp->un.ulpWord[4] != IOERR_SLI_DOWN))) { +- lpfc_disc_state_machine(phba, ndlp, cmdiocb, ++ if (!lpfc_error_lost_link(irsp)) { ++ lpfc_disc_state_machine(vport, ndlp, cmdiocb, + NLP_EVT_CMPL_ADISC); + } + } else { + /* Good status, call state machine */ +- lpfc_disc_state_machine(phba, ndlp, cmdiocb, ++ lpfc_disc_state_machine(vport, ndlp, cmdiocb, + NLP_EVT_CMPL_ADISC); + } + +- if (disc && phba->num_disc_nodes) { ++ if (disc && vport->num_disc_nodes) { + /* Check to see if there are more ADISCs to be sent */ +- lpfc_more_adisc(phba); ++ lpfc_more_adisc(vport); + + /* Check to see if we are done with ADISC authentication */ +- if (phba->num_disc_nodes == 0) { +- lpfc_can_disctmo(phba); +- /* If we get here, there is nothing left to wait for */ +- if ((phba->hba_state < LPFC_HBA_READY) && +- (phba->hba_state != LPFC_CLEAR_LA)) { +- /* Link up discovery */ +- if ((mbox = mempool_alloc(phba->mbox_mem_pool, +- GFP_KERNEL))) { +- phba->hba_state = LPFC_CLEAR_LA; +- lpfc_clear_la(phba, mbox); +- mbox->mbox_cmpl = +- lpfc_mbx_cmpl_clear_la; +- rc = lpfc_sli_issue_mbox +- (phba, mbox, +- (MBX_NOWAIT | MBX_STOP_IOCB)); +- if (rc == MBX_NOT_FINISHED) { +- mempool_free(mbox, +- phba->mbox_mem_pool); +- lpfc_disc_flush_list(phba); +- psli->ring[(psli->extra_ring)]. +- flag &= +- ~LPFC_STOP_IOCB_EVENT; +- psli->ring[(psli->fcp_ring)]. +- flag &= +- ~LPFC_STOP_IOCB_EVENT; +- psli->ring[(psli->next_ring)]. +- flag &= +- ~LPFC_STOP_IOCB_EVENT; +- phba->hba_state = +- LPFC_HBA_READY; ++ if (vport->num_disc_nodes == 0) { ++ /* If we get here, there is nothing left to ADISC */ ++ /* ++ * For NPIV, cmpl_reg_vpi will set port_state to READY, ++ * and continue discovery. ++ */ ++ if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && ++ !(vport->fc_flag & FC_RSCN_MODE)) { ++ lpfc_issue_reg_vpi(phba, vport); ++ goto out; ++ } ++ /* ++ * For SLI2, we need to set port_state to READY ++ * and continue discovery. ++ */ ++ if (vport->port_state < LPFC_VPORT_READY) { ++ /* If we get here, there is nothing to ADISC */ ++ if (vport->port_type == LPFC_PHYSICAL_PORT) ++ lpfc_issue_clear_la(phba, vport); ++ ++ if (!(vport->fc_flag & FC_ABORT_DISCOVERY)) { ++ vport->num_disc_nodes = 0; ++ /* go thru NPR list, issue ELS PLOGIs */ ++ if (vport->fc_npr_cnt) ++ lpfc_els_disc_plogi(vport); ++ ++ if (!vport->num_disc_nodes) { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ++ ~FC_NDISC_ACTIVE; ++ spin_unlock_irq( ++ shost->host_lock); ++ lpfc_can_disctmo(vport); + } + } ++ vport->port_state = LPFC_VPORT_READY; + } else { +- lpfc_rscn_disc(phba); ++ lpfc_rscn_disc(vport); + } + } + } +@@ -1136,22 +1299,21 @@ + } + + int +-lpfc_issue_els_adisc(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, ++lpfc_issue_els_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + uint8_t retry) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + ADISC *ap; + IOCB_t *icmd; + struct lpfc_iocbq *elsiocb; +- struct lpfc_sli_ring *pring; +- struct lpfc_sli *psli; ++ struct lpfc_sli *psli = &phba->sli; ++ struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING]; + uint8_t *pcmd; + uint16_t cmdsize; + +- psli = &phba->sli; +- pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ +- +- cmdsize = (sizeof (uint32_t) + sizeof (ADISC)); +- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp, ++ cmdsize = (sizeof(uint32_t) + sizeof(ADISC)); ++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, + ndlp->nlp_DID, ELS_CMD_ADISC); + if (!elsiocb) + return 1; +@@ -1161,81 +1323,97 @@ + + /* For ADISC request, remainder of payload is service parameters */ + *((uint32_t *) (pcmd)) = ELS_CMD_ADISC; +- pcmd += sizeof (uint32_t); ++ pcmd += sizeof(uint32_t); + + /* Fill in ADISC payload */ + ap = (ADISC *) pcmd; + ap->hardAL_PA = phba->fc_pref_ALPA; +- memcpy(&ap->portName, &phba->fc_portname, sizeof (struct lpfc_name)); +- memcpy(&ap->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name)); +- ap->DID = be32_to_cpu(phba->fc_myDID); ++ memcpy(&ap->portName, &vport->fc_portname, sizeof(struct lpfc_name)); ++ memcpy(&ap->nodeName, &vport->fc_nodename, sizeof(struct lpfc_name)); ++ ap->DID = be32_to_cpu(vport->fc_myDID); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "Issue ADISC: did:x%x", ++ ndlp->nlp_DID, 0, 0); + + phba->fc_stat.elsXmitADISC++; + elsiocb->iocb_cmpl = lpfc_cmpl_els_adisc; +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_ADISC_SND; ++ spin_unlock_irq(shost->host_lock); + if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_ADISC_SND; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + lpfc_els_free_iocb(phba, elsiocb); + return 1; + } +- spin_unlock_irq(phba->host->host_lock); + return 0; + } + + static void +-lpfc_cmpl_els_logo(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { ++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1; ++ struct lpfc_vport *vport = ndlp->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + IOCB_t *irsp; + struct lpfc_sli *psli; +- struct lpfc_nodelist *ndlp; + + psli = &phba->sli; + /* we pass cmdiocb to state machine which needs rspiocb as well */ + cmdiocb->context_un.rsp_iocb = rspiocb; + + irsp = &(rspiocb->iocb); +- ndlp = (struct lpfc_nodelist *) cmdiocb->context1; +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_LOGO_SND; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "LOGO cmpl: status:x%x/x%x did:x%x", ++ irsp->ulpStatus, irsp->un.ulpWord[4], ++ ndlp->nlp_DID); + + /* LOGO completes to NPort */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0105 LOGO completes to NPort x%x " ++ "%d (%d):0105 LOGO completes to NPort x%x " + "Data: x%x x%x x%x x%x\n", +- phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus, +- irsp->un.ulpWord[4], irsp->ulpTimeout, +- phba->num_disc_nodes); ++ phba->brd_no, vport->vpi, ndlp->nlp_DID, ++ irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout, ++ vport->num_disc_nodes); + + /* Check to see if link went down during discovery */ +- if (lpfc_els_chk_latt(phba)) ++ if (lpfc_els_chk_latt(vport)) ++ goto out; ++ ++ if (ndlp->nlp_flag & NLP_TARGET_REMOVE) { ++ /* NLP_EVT_DEVICE_RM should unregister the RPI ++ * which should abort all outstanding IOs. ++ */ ++ lpfc_disc_state_machine(vport, ndlp, cmdiocb, ++ NLP_EVT_DEVICE_RM); + goto out; ++ } + + if (irsp->ulpStatus) { + /* Check for retry */ +- if (lpfc_els_retry(phba, cmdiocb, rspiocb)) { ++ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) + /* ELS command is being retried */ + goto out; +- } + /* LOGO failed */ + /* Do not call DSM for lpfc_els_abort'ed ELS cmds */ +- if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && +- ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) || +- (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) || +- (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) { ++ if (lpfc_error_lost_link(irsp)) + goto out; +- } else { +- lpfc_disc_state_machine(phba, ndlp, cmdiocb, ++ else ++ lpfc_disc_state_machine(vport, ndlp, cmdiocb, + NLP_EVT_CMPL_LOGO); +- } + } else { + /* Good status, call state machine. + * This will unregister the rpi if needed. + */ +- lpfc_disc_state_machine(phba, ndlp, cmdiocb, NLP_EVT_CMPL_LOGO); ++ lpfc_disc_state_machine(vport, ndlp, cmdiocb, ++ NLP_EVT_CMPL_LOGO); + } + + out: +@@ -1244,21 +1422,24 @@ + } + + int +-lpfc_issue_els_logo(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, ++lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + uint8_t retry) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + IOCB_t *icmd; + struct lpfc_iocbq *elsiocb; + struct lpfc_sli_ring *pring; + struct lpfc_sli *psli; + uint8_t *pcmd; + uint16_t cmdsize; ++ int rc; + + psli = &phba->sli; + pring = &psli->ring[LPFC_ELS_RING]; + +- cmdsize = (2 * sizeof (uint32_t)) + sizeof (struct lpfc_name); +- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp, ++ cmdsize = (2 * sizeof(uint32_t)) + sizeof(struct lpfc_name); ++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, + ndlp->nlp_DID, ELS_CMD_LOGO); + if (!elsiocb) + return 1; +@@ -1266,53 +1447,66 @@ + icmd = &elsiocb->iocb; + pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + *((uint32_t *) (pcmd)) = ELS_CMD_LOGO; +- pcmd += sizeof (uint32_t); ++ pcmd += sizeof(uint32_t); + + /* Fill in LOGO payload */ +- *((uint32_t *) (pcmd)) = be32_to_cpu(phba->fc_myDID); +- pcmd += sizeof (uint32_t); +- memcpy(pcmd, &phba->fc_portname, sizeof (struct lpfc_name)); ++ *((uint32_t *) (pcmd)) = be32_to_cpu(vport->fc_myDID); ++ pcmd += sizeof(uint32_t); ++ memcpy(pcmd, &vport->fc_portname, sizeof(struct lpfc_name)); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "Issue LOGO: did:x%x", ++ ndlp->nlp_DID, 0, 0); + + phba->fc_stat.elsXmitLOGO++; + elsiocb->iocb_cmpl = lpfc_cmpl_els_logo; +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_LOGO_SND; +- if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { ++ spin_unlock_irq(shost->host_lock); ++ rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); ++ ++ if (rc == IOCB_ERROR) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_LOGO_SND; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + lpfc_els_free_iocb(phba, elsiocb); + return 1; + } +- spin_unlock_irq(phba->host->host_lock); + return 0; + } + + static void +-lpfc_cmpl_els_cmd(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_cmpl_els_cmd(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { ++ struct lpfc_vport *vport = cmdiocb->vport; + IOCB_t *irsp; + + irsp = &rspiocb->iocb; + ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "ELS cmd cmpl: status:x%x/x%x did:x%x", ++ irsp->ulpStatus, irsp->un.ulpWord[4], ++ irsp->un.elsreq64.remoteID); ++ + /* ELS cmd tag completes */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_ELS, +- "%d:0106 ELS cmd tag x%x completes Data: x%x x%x x%x\n", +- phba->brd_no, ++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, ++ "%d (%d):0106 ELS cmd tag x%x completes Data: x%x x%x " ++ "x%x\n", ++ phba->brd_no, vport->vpi, + irsp->ulpIoTag, irsp->ulpStatus, + irsp->un.ulpWord[4], irsp->ulpTimeout); + + /* Check to see if link went down during discovery */ +- lpfc_els_chk_latt(phba); ++ lpfc_els_chk_latt(vport); + lpfc_els_free_iocb(phba, cmdiocb); + return; + } + + int +-lpfc_issue_els_scr(struct lpfc_hba * phba, uint32_t nportid, uint8_t retry) ++lpfc_issue_els_scr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry) + { ++ struct lpfc_hba *phba = vport->phba; + IOCB_t *icmd; + struct lpfc_iocbq *elsiocb; + struct lpfc_sli_ring *pring; +@@ -1323,15 +1517,16 @@ + + psli = &phba->sli; + pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ +- cmdsize = (sizeof (uint32_t) + sizeof (SCR)); ++ cmdsize = (sizeof(uint32_t) + sizeof(SCR)); + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) + return 1; + +- lpfc_nlp_init(phba, ndlp, nportid); ++ lpfc_nlp_init(vport, ndlp, nportid); + +- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp, ++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, + ndlp->nlp_DID, ELS_CMD_SCR); ++ + if (!elsiocb) { + lpfc_nlp_put(ndlp); + return 1; +@@ -1341,29 +1536,31 @@ + pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + + *((uint32_t *) (pcmd)) = ELS_CMD_SCR; +- pcmd += sizeof (uint32_t); ++ pcmd += sizeof(uint32_t); + + /* For SCR, remainder of payload is SCR parameter page */ +- memset(pcmd, 0, sizeof (SCR)); ++ memset(pcmd, 0, sizeof(SCR)); + ((SCR *) pcmd)->Function = SCR_FUNC_FULL; + ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "Issue SCR: did:x%x", ++ ndlp->nlp_DID, 0, 0); ++ + phba->fc_stat.elsXmitSCR++; + elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd; +- spin_lock_irq(phba->host->host_lock); + if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { +- spin_unlock_irq(phba->host->host_lock); + lpfc_nlp_put(ndlp); + lpfc_els_free_iocb(phba, elsiocb); + return 1; + } +- spin_unlock_irq(phba->host->host_lock); + lpfc_nlp_put(ndlp); + return 0; + } + + static int +-lpfc_issue_els_farpr(struct lpfc_hba * phba, uint32_t nportid, uint8_t retry) ++lpfc_issue_els_farpr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry) + { ++ struct lpfc_hba *phba = vport->phba; + IOCB_t *icmd; + struct lpfc_iocbq *elsiocb; + struct lpfc_sli_ring *pring; +@@ -1377,13 +1574,14 @@ + + psli = &phba->sli; + pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ +- cmdsize = (sizeof (uint32_t) + sizeof (FARP)); ++ cmdsize = (sizeof(uint32_t) + sizeof(FARP)); + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) + return 1; +- lpfc_nlp_init(phba, ndlp, nportid); + +- elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp, ++ lpfc_nlp_init(vport, ndlp, nportid); ++ ++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, + ndlp->nlp_DID, ELS_CMD_RNID); + if (!elsiocb) { + lpfc_nlp_put(ndlp); +@@ -1394,44 +1592,71 @@ + pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + + *((uint32_t *) (pcmd)) = ELS_CMD_FARPR; +- pcmd += sizeof (uint32_t); ++ pcmd += sizeof(uint32_t); + + /* Fill in FARPR payload */ + fp = (FARP *) (pcmd); +- memset(fp, 0, sizeof (FARP)); ++ memset(fp, 0, sizeof(FARP)); + lp = (uint32_t *) pcmd; + *lp++ = be32_to_cpu(nportid); +- *lp++ = be32_to_cpu(phba->fc_myDID); ++ *lp++ = be32_to_cpu(vport->fc_myDID); + fp->Rflags = 0; + fp->Mflags = (FARP_MATCH_PORT | FARP_MATCH_NODE); + +- memcpy(&fp->RportName, &phba->fc_portname, sizeof (struct lpfc_name)); +- memcpy(&fp->RnodeName, &phba->fc_nodename, sizeof (struct lpfc_name)); +- if ((ondlp = lpfc_findnode_did(phba, nportid))) { ++ memcpy(&fp->RportName, &vport->fc_portname, sizeof(struct lpfc_name)); ++ memcpy(&fp->RnodeName, &vport->fc_nodename, sizeof(struct lpfc_name)); ++ ondlp = lpfc_findnode_did(vport, nportid); ++ if (ondlp) { + memcpy(&fp->OportName, &ondlp->nlp_portname, +- sizeof (struct lpfc_name)); ++ sizeof(struct lpfc_name)); + memcpy(&fp->OnodeName, &ondlp->nlp_nodename, +- sizeof (struct lpfc_name)); ++ sizeof(struct lpfc_name)); + } + ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "Issue FARPR: did:x%x", ++ ndlp->nlp_DID, 0, 0); ++ + phba->fc_stat.elsXmitFARPR++; + elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd; +- spin_lock_irq(phba->host->host_lock); + if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { +- spin_unlock_irq(phba->host->host_lock); + lpfc_nlp_put(ndlp); + lpfc_els_free_iocb(phba, elsiocb); + return 1; + } +- spin_unlock_irq(phba->host->host_lock); + lpfc_nlp_put(ndlp); + return 0; + } + ++static void ++lpfc_end_rscn(struct lpfc_vport *vport) ++{ ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ if (vport->fc_flag & FC_RSCN_MODE) { ++ /* ++ * Check to see if more RSCNs came in while we were ++ * processing this one. ++ */ ++ if (vport->fc_rscn_id_cnt || ++ (vport->fc_flag & FC_RSCN_DISCOVERY) != 0) ++ lpfc_els_handle_rscn(vport); ++ else { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~FC_RSCN_MODE; ++ spin_unlock_irq(shost->host_lock); ++ } ++ } ++} ++ + void +-lpfc_cancel_retry_delay_tmo(struct lpfc_hba *phba, struct lpfc_nodelist * nlp) ++lpfc_cancel_retry_delay_tmo(struct lpfc_vport *vport, struct lpfc_nodelist *nlp) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ spin_lock_irq(shost->host_lock); + nlp->nlp_flag &= ~NLP_DELAY_TMO; ++ spin_unlock_irq(shost->host_lock); + del_timer_sync(&nlp->nlp_delayfunc); + nlp->nlp_last_elscmd = 0; + +@@ -1439,30 +1664,21 @@ + list_del_init(&nlp->els_retry_evt.evt_listp); + + if (nlp->nlp_flag & NLP_NPR_2B_DISC) { ++ spin_lock_irq(shost->host_lock); + nlp->nlp_flag &= ~NLP_NPR_2B_DISC; +- if (phba->num_disc_nodes) { ++ spin_unlock_irq(shost->host_lock); ++ if (vport->num_disc_nodes) { + /* Check to see if there are more + * PLOGIs to be sent + */ +- lpfc_more_plogi(phba); ++ lpfc_more_plogi(vport); + +- if (phba->num_disc_nodes == 0) { +- phba->fc_flag &= ~FC_NDISC_ACTIVE; +- lpfc_can_disctmo(phba); +- if (phba->fc_flag & FC_RSCN_MODE) { +- /* +- * Check to see if more RSCNs +- * came in while we were +- * processing this one. +- */ +- if((phba->fc_rscn_id_cnt==0) && +- !(phba->fc_flag & FC_RSCN_DISCOVERY)) { +- phba->fc_flag &= ~FC_RSCN_MODE; +- } +- else { +- lpfc_els_handle_rscn(phba); +- } +- } ++ if (vport->num_disc_nodes == 0) { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~FC_NDISC_ACTIVE; ++ spin_unlock_irq(shost->host_lock); ++ lpfc_can_disctmo(vport); ++ lpfc_end_rscn(vport); + } + } + } +@@ -1472,18 +1688,19 @@ + void + lpfc_els_retry_delay(unsigned long ptr) + { +- struct lpfc_nodelist *ndlp; +- struct lpfc_hba *phba; +- unsigned long iflag; +- struct lpfc_work_evt *evtp; ++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) ptr; ++ struct lpfc_vport *vport = ndlp->vport; ++ struct lpfc_hba *phba = vport->phba; ++ unsigned long flags; ++ struct lpfc_work_evt *evtp = &ndlp->els_retry_evt; + +- ndlp = (struct lpfc_nodelist *)ptr; +- phba = ndlp->nlp_phba; ++ ndlp = (struct lpfc_nodelist *) ptr; ++ phba = ndlp->vport->phba; + evtp = &ndlp->els_retry_evt; + +- spin_lock_irqsave(phba->host->host_lock, iflag); ++ spin_lock_irqsave(&phba->hbalock, flags); + if (!list_empty(&evtp->evt_listp)) { +- spin_unlock_irqrestore(phba->host->host_lock, iflag); ++ spin_unlock_irqrestore(&phba->hbalock, flags); + return; + } + +@@ -1491,33 +1708,31 @@ + evtp->evt = LPFC_EVT_ELS_RETRY; + list_add_tail(&evtp->evt_listp, &phba->work_list); + if (phba->work_wait) +- wake_up(phba->work_wait); ++ lpfc_worker_wake_up(phba); + +- spin_unlock_irqrestore(phba->host->host_lock, iflag); ++ spin_unlock_irqrestore(&phba->hbalock, flags); + return; + } + + void + lpfc_els_retry_delay_handler(struct lpfc_nodelist *ndlp) + { +- struct lpfc_hba *phba; +- uint32_t cmd; +- uint32_t did; +- uint8_t retry; ++ struct lpfc_vport *vport = ndlp->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ uint32_t cmd, did, retry; + +- phba = ndlp->nlp_phba; +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + did = ndlp->nlp_DID; + cmd = ndlp->nlp_last_elscmd; + ndlp->nlp_last_elscmd = 0; + + if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) { +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + return; + } + + ndlp->nlp_flag &= ~NLP_DELAY_TMO; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + /* + * If a discovery event readded nlp_delayfunc after timer + * firing and before processing the timer, cancel the +@@ -1528,57 +1743,54 @@ + + switch (cmd) { + case ELS_CMD_FLOGI: +- lpfc_issue_els_flogi(phba, ndlp, retry); ++ lpfc_issue_els_flogi(vport, ndlp, retry); + break; + case ELS_CMD_PLOGI: +- if(!lpfc_issue_els_plogi(phba, ndlp->nlp_DID, retry)) { ++ if (!lpfc_issue_els_plogi(vport, ndlp->nlp_DID, retry)) { + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); + } + break; + case ELS_CMD_ADISC: +- if (!lpfc_issue_els_adisc(phba, ndlp, retry)) { ++ if (!lpfc_issue_els_adisc(vport, ndlp, retry)) { + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE); + } + break; + case ELS_CMD_PRLI: +- if (!lpfc_issue_els_prli(phba, ndlp, retry)) { ++ if (!lpfc_issue_els_prli(vport, ndlp, retry)) { + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PRLI_ISSUE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE); + } + break; + case ELS_CMD_LOGO: +- if (!lpfc_issue_els_logo(phba, ndlp, retry)) { ++ if (!lpfc_issue_els_logo(vport, ndlp, retry)) { + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + } + break; ++ case ELS_CMD_FDISC: ++ lpfc_issue_els_fdisc(vport, ndlp, retry); ++ break; + } + return; + } + + static int +-lpfc_els_retry(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { +- IOCB_t *irsp; +- struct lpfc_dmabuf *pcmd; +- struct lpfc_nodelist *ndlp; ++ struct lpfc_vport *vport = cmdiocb->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ IOCB_t *irsp = &rspiocb->iocb; ++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1; ++ struct lpfc_dmabuf *pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; + uint32_t *elscmd; + struct ls_rjt stat; +- int retry, maxretry; +- int delay; +- uint32_t cmd; ++ int retry = 0, maxretry = lpfc_max_els_tries, delay = 0; ++ uint32_t cmd = 0; + uint32_t did; + +- retry = 0; +- delay = 0; +- maxretry = lpfc_max_els_tries; +- irsp = &rspiocb->iocb; +- ndlp = (struct lpfc_nodelist *) cmdiocb->context1; +- pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; +- cmd = 0; + + /* Note: context2 may be 0 for internal driver abort + * of delays ELS command. +@@ -1594,11 +1806,15 @@ + else { + /* We should only hit this case for retrying PLOGI */ + did = irsp->un.elsreq64.remoteID; +- ndlp = lpfc_findnode_did(phba, did); ++ ndlp = lpfc_findnode_did(vport, did); + if (!ndlp && (cmd != ELS_CMD_PLOGI)) + return 1; + } + ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "Retry ELS: wd7:x%x wd4:x%x did:x%x", ++ *(((uint32_t *) irsp) + 7), irsp->un.ulpWord[4], ndlp->nlp_DID); ++ + switch (irsp->ulpStatus) { + case IOSTAT_FCP_RSP_ERROR: + case IOSTAT_REMOTE_STOP: +@@ -1607,25 +1823,37 @@ + case IOSTAT_LOCAL_REJECT: + switch ((irsp->un.ulpWord[4] & 0xff)) { + case IOERR_LOOP_OPEN_FAILURE: +- if (cmd == ELS_CMD_PLOGI) { +- if (cmdiocb->retry == 0) { +- delay = 1; +- } +- } ++ if (cmd == ELS_CMD_PLOGI && cmdiocb->retry == 0) ++ delay = 1000; + retry = 1; + break; + +- case IOERR_SEQUENCE_TIMEOUT: ++ case IOERR_ILLEGAL_COMMAND: ++ if ((phba->sli3_options & LPFC_SLI3_VPORT_TEARDOWN) && ++ (cmd == ELS_CMD_FDISC)) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0124 FDISC failed (3/6) retrying...\n", ++ phba->brd_no, vport->vpi); ++ lpfc_mbx_unreg_vpi(vport); + retry = 1; ++ /* Always retry for this case */ ++ cmdiocb->retry = 0; ++ } + break; + + case IOERR_NO_RESOURCES: +- if (cmd == ELS_CMD_PLOGI) { +- delay = 1; +- } ++ retry = 1; ++ if (cmdiocb->retry > 100) ++ delay = 100; ++ maxretry = 250; ++ break; ++ ++ case IOERR_ILLEGAL_FRAME: ++ delay = 100; + retry = 1; + break; + ++ case IOERR_SEQUENCE_TIMEOUT: + case IOERR_INVALID_RPI: + retry = 1; + break; +@@ -1655,27 +1883,57 @@ + if (stat.un.b.lsRjtRsnCodeExp == + LSEXP_CMD_IN_PROGRESS) { + if (cmd == ELS_CMD_PLOGI) { +- delay = 1; ++ delay = 1000; + maxretry = 48; + } + retry = 1; + break; + } + if (cmd == ELS_CMD_PLOGI) { +- delay = 1; ++ delay = 1000; + maxretry = lpfc_max_els_tries + 1; + retry = 1; + break; + } ++ if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && ++ (cmd == ELS_CMD_FDISC) && ++ (stat.un.b.lsRjtRsnCodeExp == LSEXP_OUT_OF_RESOURCE)){ ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0125 FDISC Failed (x%x)." ++ " Fabric out of resources\n", ++ phba->brd_no, vport->vpi, stat.un.lsRjtError); ++ lpfc_vport_set_state(vport, ++ FC_VPORT_NO_FABRIC_RSCS); ++ } + break; + + case LSRJT_LOGICAL_BSY: +- if (cmd == ELS_CMD_PLOGI) { +- delay = 1; ++ if ((cmd == ELS_CMD_PLOGI) || ++ (cmd == ELS_CMD_PRLI)) { ++ delay = 1000; + maxretry = 48; ++ } else if (cmd == ELS_CMD_FDISC) { ++ /* Always retry for this case */ ++ cmdiocb->retry = 0; + } + retry = 1; + break; ++ ++ case LSRJT_LOGICAL_ERR: ++ case LSRJT_PROTOCOL_ERR: ++ if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && ++ (cmd == ELS_CMD_FDISC) && ++ ((stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_PNAME) || ++ (stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_NPORT_ID)) ++ ) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0123 FDISC Failed (x%x)." ++ " Fabric Detected Bad WWN\n", ++ phba->brd_no, vport->vpi, stat.un.lsRjtError); ++ lpfc_vport_set_state(vport, ++ FC_VPORT_FABRIC_REJ_WWN); ++ } ++ break; + } + break; + +@@ -1695,21 +1953,27 @@ + retry = 0; + } + ++ if ((vport->load_flag & FC_UNLOADING) != 0) ++ retry = 0; ++ + if (retry) { + + /* Retry ELS command to remote NPORT */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0107 Retry ELS command x%x to remote " ++ "%d (%d):0107 Retry ELS command x%x to remote " + "NPORT x%x Data: x%x x%x\n", +- phba->brd_no, ++ phba->brd_no, vport->vpi, + cmd, did, cmdiocb->retry, delay); + +- if ((cmd == ELS_CMD_PLOGI) || (cmd == ELS_CMD_ADISC)) { ++ if (((cmd == ELS_CMD_PLOGI) || (cmd == ELS_CMD_ADISC)) && ++ ((irsp->ulpStatus != IOSTAT_LOCAL_REJECT) || ++ ((irsp->un.ulpWord[4] & 0xff) != IOERR_NO_RESOURCES))) { ++ /* Don't reset timer for no resources */ ++ + /* If discovery / RSCN timer is running, reset it */ +- if (timer_pending(&phba->fc_disctmo) || +- (phba->fc_flag & FC_RSCN_MODE)) { +- lpfc_set_disctmo(phba); +- } ++ if (timer_pending(&vport->fc_disctmo) || ++ (vport->fc_flag & FC_RSCN_MODE)) ++ lpfc_set_disctmo(vport); + } + + phba->fc_stat.elsXmitRetry++; +@@ -1717,50 +1981,62 @@ + phba->fc_stat.elsDelayRetry++; + ndlp->nlp_retry = cmdiocb->retry; + +- mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ); ++ /* delay is specified in milliseconds */ ++ mod_timer(&ndlp->nlp_delayfunc, ++ jiffies + msecs_to_jiffies(delay)); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_DELAY_TMO; ++ spin_unlock_irq(shost->host_lock); + + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); ++ if (cmd == ELS_CMD_PRLI) ++ lpfc_nlp_set_state(vport, ndlp, ++ NLP_STE_REG_LOGIN_ISSUE); ++ else ++ lpfc_nlp_set_state(vport, ndlp, ++ NLP_STE_NPR_NODE); + ndlp->nlp_last_elscmd = cmd; + + return 1; + } + switch (cmd) { + case ELS_CMD_FLOGI: +- lpfc_issue_els_flogi(phba, ndlp, cmdiocb->retry); ++ lpfc_issue_els_flogi(vport, ndlp, cmdiocb->retry); ++ return 1; ++ case ELS_CMD_FDISC: ++ lpfc_issue_els_fdisc(vport, ndlp, cmdiocb->retry); + return 1; + case ELS_CMD_PLOGI: + if (ndlp) { + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, ++ lpfc_nlp_set_state(vport, ndlp, + NLP_STE_PLOGI_ISSUE); + } +- lpfc_issue_els_plogi(phba, did, cmdiocb->retry); ++ lpfc_issue_els_plogi(vport, did, cmdiocb->retry); + return 1; + case ELS_CMD_ADISC: + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE); +- lpfc_issue_els_adisc(phba, ndlp, cmdiocb->retry); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE); ++ lpfc_issue_els_adisc(vport, ndlp, cmdiocb->retry); + return 1; + case ELS_CMD_PRLI: + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PRLI_ISSUE); +- lpfc_issue_els_prli(phba, ndlp, cmdiocb->retry); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE); ++ lpfc_issue_els_prli(vport, ndlp, cmdiocb->retry); + return 1; + case ELS_CMD_LOGO: + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); +- lpfc_issue_els_logo(phba, ndlp, cmdiocb->retry); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); ++ lpfc_issue_els_logo(vport, ndlp, cmdiocb->retry); + return 1; + } + } + + /* No retry ELS command to remote NPORT */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0108 No retry ELS command x%x to remote NPORT x%x " +- "Data: x%x\n", +- phba->brd_no, ++ "%d (%d):0108 No retry ELS command x%x to remote " ++ "NPORT x%x Data: x%x\n", ++ phba->brd_no, vport->vpi, + cmd, did, cmdiocb->retry); + + return 0; +@@ -1795,33 +2071,36 @@ + lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys); + kfree(buf_ptr); + } +- spin_lock_irq(phba->host->host_lock); + lpfc_sli_release_iocbq(phba, elsiocb); +- spin_unlock_irq(phba->host->host_lock); + return 0; + } + + static void +-lpfc_cmpl_els_logo_acc(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { +- struct lpfc_nodelist *ndlp; ++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1; ++ struct lpfc_vport *vport = cmdiocb->vport; ++ IOCB_t *irsp; + +- ndlp = (struct lpfc_nodelist *) cmdiocb->context1; ++ irsp = &rspiocb->iocb; ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, ++ "ACC LOGO cmpl: status:x%x/x%x did:x%x", ++ irsp->ulpStatus, irsp->un.ulpWord[4], ndlp->nlp_DID); + + /* ACC to LOGO completes to NPort */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0109 ACC to LOGO completes to NPort x%x " ++ "%d (%d):0109 ACC to LOGO completes to NPort x%x " + "Data: x%x x%x x%x\n", +- phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag, +- ndlp->nlp_state, ndlp->nlp_rpi); ++ phba->brd_no, vport->vpi, ndlp->nlp_DID, ++ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); + + switch (ndlp->nlp_state) { + case NLP_STE_UNUSED_NODE: /* node is just allocated */ +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + break; + case NLP_STE_NPR_NODE: /* NPort Recovery mode */ +- lpfc_unreg_rpi(phba, ndlp); ++ lpfc_unreg_rpi(vport, ndlp); + break; + default: + break; +@@ -1830,24 +2109,38 @@ + return; + } + ++void ++lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) ++{ ++ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); ++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; ++ ++ pmb->context1 = NULL; ++ lpfc_mbuf_free(phba, mp->virt, mp->phys); ++ kfree(mp); ++ mempool_free(pmb, phba->mbox_mem_pool); ++ lpfc_nlp_put(ndlp); ++ return; ++} ++ + static void +-lpfc_cmpl_els_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) + { ++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1; ++ struct lpfc_vport *vport = ndlp ? ndlp->vport : NULL; ++ struct Scsi_Host *shost = vport ? lpfc_shost_from_vport(vport) : NULL; + IOCB_t *irsp; +- struct lpfc_nodelist *ndlp; + LPFC_MBOXQ_t *mbox = NULL; +- struct lpfc_dmabuf *mp; ++ struct lpfc_dmabuf *mp = NULL; + + irsp = &rspiocb->iocb; + +- ndlp = (struct lpfc_nodelist *) cmdiocb->context1; + if (cmdiocb->context_un.mbox) + mbox = cmdiocb->context_un.mbox; + +- + /* Check to see if link went down during discovery */ +- if (lpfc_els_chk_latt(phba) || !ndlp) { ++ if (!ndlp || lpfc_els_chk_latt(vport)) { + if (mbox) { + mp = (struct lpfc_dmabuf *) mbox->context1; + if (mp) { +@@ -1859,11 +2152,16 @@ + goto out; + } + ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, ++ "ACC cmpl: status:x%x/x%x did:x%x", ++ irsp->ulpStatus, irsp->un.ulpWord[4], ++ irsp->un.rcvels.remoteID); ++ + /* ELS response tag completes */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0110 ELS response tag x%x completes " ++ "%d (%d):0110 ELS response tag x%x completes " + "Data: x%x x%x x%x x%x x%x x%x x%x\n", +- phba->brd_no, ++ phba->brd_no, vport->vpi, + cmdiocb->iocb.ulpIoTag, rspiocb->iocb.ulpStatus, + rspiocb->iocb.un.ulpWord[4], rspiocb->iocb.ulpTimeout, + ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state, +@@ -1872,11 +2170,19 @@ + if (mbox) { + if ((rspiocb->iocb.ulpStatus == 0) + && (ndlp->nlp_flag & NLP_ACC_REGLOGIN)) { +- lpfc_unreg_rpi(phba, ndlp); +- mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; ++ lpfc_unreg_rpi(vport, ndlp); + mbox->context2 = lpfc_nlp_get(ndlp); ++ mbox->vport = vport; ++ if (ndlp->nlp_flag & NLP_RM_DFLT_RPI) { ++ mbox->mbox_flag |= LPFC_MBX_IMED_UNREG; ++ mbox->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi; ++ } ++ else { ++ mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_REG_LOGIN_ISSUE); ++ lpfc_nlp_set_state(vport, ndlp, ++ NLP_STE_REG_LOGIN_ISSUE); ++ } + if (lpfc_sli_issue_mbox(phba, mbox, + (MBX_NOWAIT | MBX_STOP_IOCB)) + != MBX_NOT_FINISHED) { +@@ -1886,17 +2192,13 @@ + /* NOTE: we should have messages for unsuccessful + reglogin */ + } else { +- /* Do not call NO_LIST for lpfc_els_abort'ed ELS cmds */ +- if (!((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && +- ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) || +- (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) || +- (irsp->un.ulpWord[4] == IOERR_SLI_DOWN)))) { +- if (ndlp->nlp_flag & NLP_ACC_REGLOGIN) { +- lpfc_drop_node(phba, ndlp); ++ /* Do not drop node for lpfc_els_abort'ed ELS cmds */ ++ if (!lpfc_error_lost_link(irsp) && ++ ndlp->nlp_flag & NLP_ACC_REGLOGIN) { ++ lpfc_drop_node(vport, ndlp); + ndlp = NULL; + } + } +- } + mp = (struct lpfc_dmabuf *) mbox->context1; + if (mp) { + lpfc_mbuf_free(phba, mp->virt, mp->phys); +@@ -1906,19 +2208,21 @@ + } + out: + if (ndlp) { +- spin_lock_irq(phba->host->host_lock); +- ndlp->nlp_flag &= ~NLP_ACC_REGLOGIN; +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); ++ ndlp->nlp_flag &= ~(NLP_ACC_REGLOGIN | NLP_RM_DFLT_RPI); ++ spin_unlock_irq(shost->host_lock); + } + lpfc_els_free_iocb(phba, cmdiocb); + return; + } + + int +-lpfc_els_rsp_acc(struct lpfc_hba * phba, uint32_t flag, +- struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp, +- LPFC_MBOXQ_t * mbox, uint8_t newnode) ++lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag, ++ struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp, ++ LPFC_MBOXQ_t *mbox, uint8_t newnode) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + IOCB_t *icmd; + IOCB_t *oldcmd; + struct lpfc_iocbq *elsiocb; +@@ -1935,22 +2239,29 @@ + + switch (flag) { + case ELS_CMD_ACC: +- cmdsize = sizeof (uint32_t); +- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, ++ cmdsize = sizeof(uint32_t); ++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, + ndlp, ndlp->nlp_DID, ELS_CMD_ACC); + if (!elsiocb) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_LOGO_ACC; ++ spin_unlock_irq(shost->host_lock); + return 1; + } ++ + icmd = &elsiocb->iocb; + icmd->ulpContext = oldcmd->ulpContext; /* Xri */ + pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + *((uint32_t *) (pcmd)) = ELS_CMD_ACC; +- pcmd += sizeof (uint32_t); ++ pcmd += sizeof(uint32_t); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, ++ "Issue ACC: did:x%x flg:x%x", ++ ndlp->nlp_DID, ndlp->nlp_flag, 0); + break; + case ELS_CMD_PLOGI: +- cmdsize = (sizeof (struct serv_parm) + sizeof (uint32_t)); +- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, ++ cmdsize = (sizeof(struct serv_parm) + sizeof(uint32_t)); ++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, + ndlp, ndlp->nlp_DID, ELS_CMD_ACC); + if (!elsiocb) + return 1; +@@ -1963,12 +2274,16 @@ + elsiocb->context_un.mbox = mbox; + + *((uint32_t *) (pcmd)) = ELS_CMD_ACC; +- pcmd += sizeof (uint32_t); +- memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm)); ++ pcmd += sizeof(uint32_t); ++ memcpy(pcmd, &vport->fc_sparam, sizeof(struct serv_parm)); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, ++ "Issue ACC PLOGI: did:x%x flg:x%x", ++ ndlp->nlp_DID, ndlp->nlp_flag, 0); + break; + case ELS_CMD_PRLO: +- cmdsize = sizeof (uint32_t) + sizeof (PRLO); +- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, ++ cmdsize = sizeof(uint32_t) + sizeof(PRLO); ++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, + ndlp, ndlp->nlp_DID, ELS_CMD_PRLO); + if (!elsiocb) + return 1; +@@ -1978,10 +2293,14 @@ + pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + + memcpy(pcmd, ((struct lpfc_dmabuf *) oldiocb->context2)->virt, +- sizeof (uint32_t) + sizeof (PRLO)); ++ sizeof(uint32_t) + sizeof(PRLO)); + *((uint32_t *) (pcmd)) = ELS_CMD_PRLO_ACC; + els_pkt_ptr = (ELS_PKT *) pcmd; + els_pkt_ptr->un.prlo.acceptRspCode = PRLO_REQ_EXECUTED; ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, ++ "Issue ACC PRLO: did:x%x flg:x%x", ++ ndlp->nlp_DID, ndlp->nlp_flag, 0); + break; + default: + return 1; +@@ -1994,25 +2313,23 @@ + + /* Xmit ELS ACC response tag */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0128 Xmit ELS ACC response tag x%x, XRI: x%x, " ++ "%d (%d):0128 Xmit ELS ACC response tag x%x, XRI: x%x, " + "DID: x%x, nlp_flag: x%x nlp_state: x%x RPI: x%x\n", +- phba->brd_no, elsiocb->iotag, ++ phba->brd_no, vport->vpi, elsiocb->iotag, + elsiocb->iocb.ulpContext, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); + + if (ndlp->nlp_flag & NLP_LOGO_ACC) { +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_LOGO_ACC; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + elsiocb->iocb_cmpl = lpfc_cmpl_els_logo_acc; + } else { +- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; ++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; + } + + phba->fc_stat.elsXmitACC++; +- spin_lock_irq(phba->host->host_lock); + rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); +- spin_unlock_irq(phba->host->host_lock); + if (rc == IOCB_ERROR) { + lpfc_els_free_iocb(phba, elsiocb); + return 1; +@@ -2021,9 +2338,11 @@ + } + + int +-lpfc_els_rsp_reject(struct lpfc_hba * phba, uint32_t rejectError, +- struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp) ++lpfc_els_rsp_reject(struct lpfc_vport *vport, uint32_t rejectError, ++ struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp, ++ LPFC_MBOXQ_t *mbox) + { ++ struct lpfc_hba *phba = vport->phba; + IOCB_t *icmd; + IOCB_t *oldcmd; + struct lpfc_iocbq *elsiocb; +@@ -2036,9 +2355,9 @@ + psli = &phba->sli; + pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ + +- cmdsize = 2 * sizeof (uint32_t); +- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, +- ndlp, ndlp->nlp_DID, ELS_CMD_LS_RJT); ++ cmdsize = 2 * sizeof(uint32_t); ++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp, ++ ndlp->nlp_DID, ELS_CMD_LS_RJT); + if (!elsiocb) + return 1; + +@@ -2048,22 +2367,30 @@ + pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + + *((uint32_t *) (pcmd)) = ELS_CMD_LS_RJT; +- pcmd += sizeof (uint32_t); ++ pcmd += sizeof(uint32_t); + *((uint32_t *) (pcmd)) = rejectError; + ++ if (mbox) { ++ elsiocb->context_un.mbox = mbox; ++ elsiocb->context1 = lpfc_nlp_get(ndlp); ++ } ++ + /* Xmit ELS RJT response tag */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0129 Xmit ELS RJT x%x response tag x%x xri x%x, " +- "did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n", +- phba->brd_no, rejectError, elsiocb->iotag, ++ "%d (%d):0129 Xmit ELS RJT x%x response tag x%x " ++ "xri x%x, did x%x, nlp_flag x%x, nlp_state x%x, " ++ "rpi x%x\n", ++ phba->brd_no, vport->vpi, rejectError, elsiocb->iotag, + elsiocb->iocb.ulpContext, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); + ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, ++ "Issue LS_RJT: did:x%x flg:x%x err:x%x", ++ ndlp->nlp_DID, ndlp->nlp_flag, rejectError); ++ + phba->fc_stat.elsXmitLSRJT++; +- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; +- spin_lock_irq(phba->host->host_lock); ++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; + rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); +- spin_unlock_irq(phba->host->host_lock); + if (rc == IOCB_ERROR) { + lpfc_els_free_iocb(phba, elsiocb); + return 1; +@@ -2072,25 +2399,22 @@ + } + + int +-lpfc_els_rsp_adisc_acc(struct lpfc_hba * phba, +- struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp) ++lpfc_els_rsp_adisc_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb, ++ struct lpfc_nodelist *ndlp) + { ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_sli *psli = &phba->sli; ++ struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING]; + ADISC *ap; +- IOCB_t *icmd; +- IOCB_t *oldcmd; ++ IOCB_t *icmd, *oldcmd; + struct lpfc_iocbq *elsiocb; +- struct lpfc_sli_ring *pring; +- struct lpfc_sli *psli; + uint8_t *pcmd; + uint16_t cmdsize; + int rc; + +- psli = &phba->sli; +- pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ +- +- cmdsize = sizeof (uint32_t) + sizeof (ADISC); +- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, +- ndlp, ndlp->nlp_DID, ELS_CMD_ACC); ++ cmdsize = sizeof(uint32_t) + sizeof(ADISC); ++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp, ++ ndlp->nlp_DID, ELS_CMD_ACC); + if (!elsiocb) + return 1; + +@@ -2100,28 +2424,30 @@ + + /* Xmit ADISC ACC response tag */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0130 Xmit ADISC ACC response iotag x%x xri: " ++ "%d (%d):0130 Xmit ADISC ACC response iotag x%x xri: " + "x%x, did x%x, nlp_flag x%x, nlp_state x%x rpi x%x\n", +- phba->brd_no, elsiocb->iotag, ++ phba->brd_no, vport->vpi, elsiocb->iotag, + elsiocb->iocb.ulpContext, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); + + pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + + *((uint32_t *) (pcmd)) = ELS_CMD_ACC; +- pcmd += sizeof (uint32_t); ++ pcmd += sizeof(uint32_t); + + ap = (ADISC *) (pcmd); + ap->hardAL_PA = phba->fc_pref_ALPA; +- memcpy(&ap->portName, &phba->fc_portname, sizeof (struct lpfc_name)); +- memcpy(&ap->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name)); +- ap->DID = be32_to_cpu(phba->fc_myDID); ++ memcpy(&ap->portName, &vport->fc_portname, sizeof(struct lpfc_name)); ++ memcpy(&ap->nodeName, &vport->fc_nodename, sizeof(struct lpfc_name)); ++ ap->DID = be32_to_cpu(vport->fc_myDID); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, ++ "Issue ACC ADISC: did:x%x flg:x%x", ++ ndlp->nlp_DID, ndlp->nlp_flag, 0); + + phba->fc_stat.elsXmitACC++; +- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; +- spin_lock_irq(phba->host->host_lock); ++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; + rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); +- spin_unlock_irq(phba->host->host_lock); + if (rc == IOCB_ERROR) { + lpfc_els_free_iocb(phba, elsiocb); + return 1; +@@ -2130,9 +2456,10 @@ + } + + int +-lpfc_els_rsp_prli_acc(struct lpfc_hba *phba, struct lpfc_iocbq *oldiocb, ++lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb, + struct lpfc_nodelist *ndlp) + { ++ struct lpfc_hba *phba = vport->phba; + PRLI *npr; + lpfc_vpd_t *vpd; + IOCB_t *icmd; +@@ -2147,8 +2474,8 @@ + psli = &phba->sli; + pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ + +- cmdsize = sizeof (uint32_t) + sizeof (PRLI); +- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, ndlp, ++ cmdsize = sizeof(uint32_t) + sizeof(PRLI); ++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp, + ndlp->nlp_DID, (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK))); + if (!elsiocb) + return 1; +@@ -2159,19 +2486,19 @@ + + /* Xmit PRLI ACC response tag */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0131 Xmit PRLI ACC response tag x%x xri x%x, " ++ "%d (%d):0131 Xmit PRLI ACC response tag x%x xri x%x, " + "did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n", +- phba->brd_no, elsiocb->iotag, ++ phba->brd_no, vport->vpi, elsiocb->iotag, + elsiocb->iocb.ulpContext, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); + + pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + + *((uint32_t *) (pcmd)) = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK)); +- pcmd += sizeof (uint32_t); ++ pcmd += sizeof(uint32_t); + + /* For PRLI, remainder of payload is PRLI parameter page */ +- memset(pcmd, 0, sizeof (PRLI)); ++ memset(pcmd, 0, sizeof(PRLI)); + + npr = (PRLI *) pcmd; + vpd = &phba->vpd; +@@ -2193,12 +2520,14 @@ + npr->prliType = PRLI_FCP_TYPE; + npr->initiatorFunc = 1; + ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, ++ "Issue ACC PRLI: did:x%x flg:x%x", ++ ndlp->nlp_DID, ndlp->nlp_flag, 0); ++ + phba->fc_stat.elsXmitACC++; +- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; ++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; + +- spin_lock_irq(phba->host->host_lock); + rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); +- spin_unlock_irq(phba->host->host_lock); + if (rc == IOCB_ERROR) { + lpfc_els_free_iocb(phba, elsiocb); + return 1; +@@ -2207,12 +2536,12 @@ + } + + static int +-lpfc_els_rsp_rnid_acc(struct lpfc_hba *phba, uint8_t format, ++lpfc_els_rsp_rnid_acc(struct lpfc_vport *vport, uint8_t format, + struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp) + { ++ struct lpfc_hba *phba = vport->phba; + RNID *rn; +- IOCB_t *icmd; +- IOCB_t *oldcmd; ++ IOCB_t *icmd, *oldcmd; + struct lpfc_iocbq *elsiocb; + struct lpfc_sli_ring *pring; + struct lpfc_sli *psli; +@@ -2223,13 +2552,13 @@ + psli = &phba->sli; + pring = &psli->ring[LPFC_ELS_RING]; + +- cmdsize = sizeof (uint32_t) + sizeof (uint32_t) +- + (2 * sizeof (struct lpfc_name)); ++ cmdsize = sizeof(uint32_t) + sizeof(uint32_t) ++ + (2 * sizeof(struct lpfc_name)); + if (format) +- cmdsize += sizeof (RNID_TOP_DISC); ++ cmdsize += sizeof(RNID_TOP_DISC); + +- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, +- ndlp, ndlp->nlp_DID, ELS_CMD_ACC); ++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp, ++ ndlp->nlp_DID, ELS_CMD_ACC); + if (!elsiocb) + return 1; + +@@ -2239,30 +2568,30 @@ + + /* Xmit RNID ACC response tag */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0132 Xmit RNID ACC response tag x%x " ++ "%d (%d):0132 Xmit RNID ACC response tag x%x " + "xri x%x\n", +- phba->brd_no, elsiocb->iotag, ++ phba->brd_no, vport->vpi, elsiocb->iotag, + elsiocb->iocb.ulpContext); + + pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + + *((uint32_t *) (pcmd)) = ELS_CMD_ACC; +- pcmd += sizeof (uint32_t); ++ pcmd += sizeof(uint32_t); + +- memset(pcmd, 0, sizeof (RNID)); ++ memset(pcmd, 0, sizeof(RNID)); + rn = (RNID *) (pcmd); + rn->Format = format; +- rn->CommonLen = (2 * sizeof (struct lpfc_name)); +- memcpy(&rn->portName, &phba->fc_portname, sizeof (struct lpfc_name)); +- memcpy(&rn->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name)); ++ rn->CommonLen = (2 * sizeof(struct lpfc_name)); ++ memcpy(&rn->portName, &vport->fc_portname, sizeof(struct lpfc_name)); ++ memcpy(&rn->nodeName, &vport->fc_nodename, sizeof(struct lpfc_name)); + switch (format) { + case 0: + rn->SpecificLen = 0; + break; + case RNID_TOPOLOGY_DISC: +- rn->SpecificLen = sizeof (RNID_TOP_DISC); ++ rn->SpecificLen = sizeof(RNID_TOP_DISC); + memcpy(&rn->un.topologyDisc.portName, +- &phba->fc_portname, sizeof (struct lpfc_name)); ++ &vport->fc_portname, sizeof(struct lpfc_name)); + rn->un.topologyDisc.unitType = RNID_HBA; + rn->un.topologyDisc.physPort = 0; + rn->un.topologyDisc.attachedNodes = 0; +@@ -2273,15 +2602,17 @@ + break; + } + ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP, ++ "Issue ACC RNID: did:x%x flg:x%x", ++ ndlp->nlp_DID, ndlp->nlp_flag, 0); ++ + phba->fc_stat.elsXmitACC++; +- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; ++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; + lpfc_nlp_put(ndlp); + elsiocb->context1 = NULL; /* Don't need ndlp for cmpl, + * it could be freed */ + +- spin_lock_irq(phba->host->host_lock); + rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0); +- spin_unlock_irq(phba->host->host_lock); + if (rc == IOCB_ERROR) { + lpfc_els_free_iocb(phba, elsiocb); + return 1; +@@ -2290,168 +2621,153 @@ + } + + int +-lpfc_els_disc_adisc(struct lpfc_hba *phba) ++lpfc_els_disc_adisc(struct lpfc_vport *vport) + { +- int sentadisc; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp, *next_ndlp; ++ int sentadisc = 0; + +- sentadisc = 0; + /* go thru NPR nodes and issue any remaining ELS ADISCs */ +- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) { ++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_state == NLP_STE_NPR_NODE && + (ndlp->nlp_flag & NLP_NPR_2B_DISC) != 0 && + (ndlp->nlp_flag & NLP_NPR_ADISC) != 0) { +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_NPR_ADISC; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE); +- lpfc_issue_els_adisc(phba, ndlp, 0); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE); ++ lpfc_issue_els_adisc(vport, ndlp, 0); + sentadisc++; +- phba->num_disc_nodes++; +- if (phba->num_disc_nodes >= +- phba->cfg_discovery_threads) { +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_NLP_MORE; +- spin_unlock_irq(phba->host->host_lock); ++ vport->num_disc_nodes++; ++ if (vport->num_disc_nodes >= ++ vport->phba->cfg_discovery_threads) { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_NLP_MORE; ++ spin_unlock_irq(shost->host_lock); + break; + } + } + } + if (sentadisc == 0) { +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~FC_NLP_MORE; +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~FC_NLP_MORE; ++ spin_unlock_irq(shost->host_lock); + } + return sentadisc; + } + + int +-lpfc_els_disc_plogi(struct lpfc_hba * phba) ++lpfc_els_disc_plogi(struct lpfc_vport *vport) + { +- int sentplogi; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp, *next_ndlp; ++ int sentplogi = 0; + +- sentplogi = 0; +- /* go thru NPR list and issue any remaining ELS PLOGIs */ +- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) { ++ /* go thru NPR nodes and issue any remaining ELS PLOGIs */ ++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_state == NLP_STE_NPR_NODE && + (ndlp->nlp_flag & NLP_NPR_2B_DISC) != 0 && + (ndlp->nlp_flag & NLP_DELAY_TMO) == 0 && + (ndlp->nlp_flag & NLP_NPR_ADISC) == 0) { + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); +- lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); ++ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); + sentplogi++; +- phba->num_disc_nodes++; +- if (phba->num_disc_nodes >= +- phba->cfg_discovery_threads) { +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_NLP_MORE; +- spin_unlock_irq(phba->host->host_lock); ++ vport->num_disc_nodes++; ++ if (vport->num_disc_nodes >= ++ vport->phba->cfg_discovery_threads) { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_NLP_MORE; ++ spin_unlock_irq(shost->host_lock); + break; + } + } + } + if (sentplogi == 0) { +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~FC_NLP_MORE; +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~FC_NLP_MORE; ++ spin_unlock_irq(shost->host_lock); + } + return sentplogi; + } + +-int +-lpfc_els_flush_rscn(struct lpfc_hba * phba) ++void ++lpfc_els_flush_rscn(struct lpfc_vport *vport) + { +- struct lpfc_dmabuf *mp; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + int i; + +- for (i = 0; i < phba->fc_rscn_id_cnt; i++) { +- mp = phba->fc_rscn_id_list[i]; +- lpfc_mbuf_free(phba, mp->virt, mp->phys); +- kfree(mp); +- phba->fc_rscn_id_list[i] = NULL; +- } +- phba->fc_rscn_id_cnt = 0; +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~(FC_RSCN_MODE | FC_RSCN_DISCOVERY); +- spin_unlock_irq(phba->host->host_lock); +- lpfc_can_disctmo(phba); +- return 0; ++ for (i = 0; i < vport->fc_rscn_id_cnt; i++) { ++ lpfc_in_buf_free(phba, vport->fc_rscn_id_list[i]); ++ vport->fc_rscn_id_list[i] = NULL; ++ } ++ spin_lock_irq(shost->host_lock); ++ vport->fc_rscn_id_cnt = 0; ++ vport->fc_flag &= ~(FC_RSCN_MODE | FC_RSCN_DISCOVERY); ++ spin_unlock_irq(shost->host_lock); ++ lpfc_can_disctmo(vport); + } + + int +-lpfc_rscn_payload_check(struct lpfc_hba * phba, uint32_t did) ++lpfc_rscn_payload_check(struct lpfc_vport *vport, uint32_t did) + { + D_ID ns_did; + D_ID rscn_did; +- struct lpfc_dmabuf *mp; + uint32_t *lp; +- uint32_t payload_len, cmd, i, match; ++ uint32_t payload_len, i; ++ struct lpfc_hba *phba = vport->phba; + + ns_did.un.word = did; +- match = 0; + + /* Never match fabric nodes for RSCNs */ + if ((did & Fabric_DID_MASK) == Fabric_DID_MASK) +- return(0); ++ return 0; + + /* If we are doing a FULL RSCN rediscovery, match everything */ +- if (phba->fc_flag & FC_RSCN_DISCOVERY) { ++ if (vport->fc_flag & FC_RSCN_DISCOVERY) + return did; +- } + +- for (i = 0; i < phba->fc_rscn_id_cnt; i++) { +- mp = phba->fc_rscn_id_list[i]; +- lp = (uint32_t *) mp->virt; +- cmd = *lp++; +- payload_len = be32_to_cpu(cmd) & 0xffff; /* payload length */ +- payload_len -= sizeof (uint32_t); /* take off word 0 */ ++ for (i = 0; i < vport->fc_rscn_id_cnt; i++) { ++ lp = vport->fc_rscn_id_list[i]->virt; ++ payload_len = be32_to_cpu(*lp++ & ~ELS_CMD_MASK); ++ payload_len -= sizeof(uint32_t); /* take off word 0 */ + while (payload_len) { +- rscn_did.un.word = *lp++; +- rscn_did.un.word = be32_to_cpu(rscn_did.un.word); +- payload_len -= sizeof (uint32_t); ++ rscn_did.un.word = be32_to_cpu(*lp++); ++ payload_len -= sizeof(uint32_t); + switch (rscn_did.un.b.resv) { + case 0: /* Single N_Port ID effected */ +- if (ns_did.un.word == rscn_did.un.word) { +- match = did; +- } ++ if (ns_did.un.word == rscn_did.un.word) ++ return did; + break; + case 1: /* Whole N_Port Area effected */ + if ((ns_did.un.b.domain == rscn_did.un.b.domain) + && (ns_did.un.b.area == rscn_did.un.b.area)) +- { +- match = did; +- } ++ return did; + break; + case 2: /* Whole N_Port Domain effected */ + if (ns_did.un.b.domain == rscn_did.un.b.domain) +- { +- match = did; +- } +- break; +- case 3: /* Whole Fabric effected */ +- match = did; ++ return did; + break; + default: +- /* Unknown Identifier in RSCN list */ ++ /* Unknown Identifier in RSCN node */ + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, +- "%d:0217 Unknown Identifier in " +- "RSCN payload Data: x%x\n", +- phba->brd_no, rscn_did.un.word); +- break; +- } +- if (match) { +- break; ++ "%d (%d):0217 Unknown " ++ "Identifier in RSCN payload " ++ "Data: x%x\n", ++ phba->brd_no, vport->vpi, ++ rscn_did.un.word); ++ case 3: /* Whole Fabric effected */ ++ return did; + } + } + } +- return match; ++ return 0; + } + + static int +-lpfc_rscn_recovery_check(struct lpfc_hba *phba) ++lpfc_rscn_recovery_check(struct lpfc_vport *vport) + { + struct lpfc_nodelist *ndlp = NULL; + +@@ -2459,12 +2775,12 @@ + * them to NPR state. + */ + +- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) { ++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_state == NLP_STE_UNUSED_NODE || +- lpfc_rscn_payload_check(phba, ndlp->nlp_DID) == 0) ++ lpfc_rscn_payload_check(vport, ndlp->nlp_DID) == 0) + continue; + +- lpfc_disc_state_machine(phba, ndlp, NULL, ++ lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RECOVERY); + + /* +@@ -2472,175 +2788,248 @@ + * recovery event. + */ + if (ndlp->nlp_flag & NLP_DELAY_TMO) +- lpfc_cancel_retry_delay_tmo(phba, ndlp); ++ lpfc_cancel_retry_delay_tmo(vport, ndlp); + } + + return 0; + } + + static int +-lpfc_els_rcv_rscn(struct lpfc_hba * phba, +- struct lpfc_iocbq * cmdiocb, +- struct lpfc_nodelist * ndlp, uint8_t newnode) ++lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_nodelist *ndlp, uint8_t newnode) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_dmabuf *pcmd; +- uint32_t *lp; ++ struct lpfc_vport *next_vport; ++ uint32_t *lp, *datap; + IOCB_t *icmd; +- uint32_t payload_len, cmd; ++ uint32_t payload_len, length, nportid, *cmd; ++ int rscn_cnt = vport->fc_rscn_id_cnt; ++ int rscn_id = 0, hba_id = 0; + int i; + + icmd = &cmdiocb->iocb; + pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; + lp = (uint32_t *) pcmd->virt; + +- cmd = *lp++; +- payload_len = be32_to_cpu(cmd) & 0xffff; /* payload length */ +- payload_len -= sizeof (uint32_t); /* take off word 0 */ +- cmd &= ELS_CMD_MASK; ++ payload_len = be32_to_cpu(*lp++ & ~ELS_CMD_MASK); ++ payload_len -= sizeof(uint32_t); /* take off word 0 */ + + /* RSCN received */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_DISCOVERY, +- "%d:0214 RSCN received Data: x%x x%x x%x x%x\n", +- phba->brd_no, +- phba->fc_flag, payload_len, *lp, phba->fc_rscn_id_cnt); ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0214 RSCN received Data: x%x x%x x%x x%x\n", ++ phba->brd_no, vport->vpi, vport->fc_flag, payload_len, ++ *lp, rscn_cnt); + + for (i = 0; i < payload_len/sizeof(uint32_t); i++) +- fc_host_post_event(phba->host, fc_get_event_number(), ++ fc_host_post_event(shost, fc_get_event_number(), + FCH_EVT_RSCN, lp[i]); + + /* If we are about to begin discovery, just ACC the RSCN. + * Discovery processing will satisfy it. + */ +- if (phba->hba_state <= LPFC_NS_QRY) { +- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, ++ if (vport->port_state <= LPFC_NS_QRY) { ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV RSCN ignore: did:x%x/ste:x%x flg:x%x", ++ ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag); ++ ++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, + newnode); + return 0; + } + ++ /* If this RSCN just contains NPortIDs for other vports on this HBA, ++ * just ACC and ignore it. ++ */ ++ if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && ++ !(phba->cfg_peer_port_login)) { ++ i = payload_len; ++ datap = lp; ++ while (i > 0) { ++ nportid = *datap++; ++ nportid = ((be32_to_cpu(nportid)) & Mask_DID); ++ i -= sizeof(uint32_t); ++ rscn_id++; ++ list_for_each_entry(next_vport, &phba->port_list, ++ listentry) { ++ if (nportid == next_vport->fc_myDID) { ++ hba_id++; ++ break; ++ } ++ } ++ } ++ if (rscn_id == hba_id) { ++ /* ALL NPortIDs in RSCN are on HBA */ ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0214 Ignore RSCN Data: x%x x%x x%x x%x\n", ++ phba->brd_no, vport->vpi, vport->fc_flag, payload_len, ++ *lp, rscn_cnt); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV RSCN vport: did:x%x/ste:x%x flg:x%x", ++ ndlp->nlp_DID, vport->port_state, ++ ndlp->nlp_flag); ++ ++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ++ ndlp, NULL, newnode); ++ return 0; ++ } ++ } ++ + /* If we are already processing an RSCN, save the received + * RSCN payload buffer, cmdiocb->context2 to process later. + */ +- if (phba->fc_flag & (FC_RSCN_MODE | FC_NDISC_ACTIVE)) { +- if ((phba->fc_rscn_id_cnt < FC_MAX_HOLD_RSCN) && +- !(phba->fc_flag & FC_RSCN_DISCOVERY)) { +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_RSCN_MODE; +- spin_unlock_irq(phba->host->host_lock); +- phba->fc_rscn_id_list[phba->fc_rscn_id_cnt++] = pcmd; +- ++ if (vport->fc_flag & (FC_RSCN_MODE | FC_NDISC_ACTIVE)) { ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV RSCN defer: did:x%x/ste:x%x flg:x%x", ++ ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag); ++ ++ vport->fc_flag |= FC_RSCN_DEFERRED; ++ if ((rscn_cnt < FC_MAX_HOLD_RSCN) && ++ !(vport->fc_flag & FC_RSCN_DISCOVERY)) { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_RSCN_MODE; ++ spin_unlock_irq(shost->host_lock); ++ if (rscn_cnt) { ++ cmd = vport->fc_rscn_id_list[rscn_cnt-1]->virt; ++ length = be32_to_cpu(*cmd & ~ELS_CMD_MASK); ++ } ++ if ((rscn_cnt) && ++ (payload_len + length <= LPFC_BPL_SIZE)) { ++ *cmd &= ELS_CMD_MASK; ++ *cmd |= be32_to_cpu(payload_len + length); ++ memcpy(((uint8_t *)cmd) + length, lp, ++ payload_len); ++ } else { ++ vport->fc_rscn_id_list[rscn_cnt] = pcmd; ++ vport->fc_rscn_id_cnt++; + /* If we zero, cmdiocb->context2, the calling + * routine will not try to free it. + */ + cmdiocb->context2 = NULL; ++ } + + /* Deferred RSCN */ + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, +- "%d:0235 Deferred RSCN " ++ "%d (%d):0235 Deferred RSCN " + "Data: x%x x%x x%x\n", +- phba->brd_no, phba->fc_rscn_id_cnt, +- phba->fc_flag, phba->hba_state); ++ phba->brd_no, vport->vpi, ++ vport->fc_rscn_id_cnt, vport->fc_flag, ++ vport->port_state); + } else { +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_RSCN_DISCOVERY; +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_RSCN_DISCOVERY; ++ spin_unlock_irq(shost->host_lock); + /* ReDiscovery RSCN */ + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, +- "%d:0234 ReDiscovery RSCN " ++ "%d (%d):0234 ReDiscovery RSCN " + "Data: x%x x%x x%x\n", +- phba->brd_no, phba->fc_rscn_id_cnt, +- phba->fc_flag, phba->hba_state); ++ phba->brd_no, vport->vpi, ++ vport->fc_rscn_id_cnt, vport->fc_flag, ++ vport->port_state); + } + /* Send back ACC */ +- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, ++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, + newnode); + + /* send RECOVERY event for ALL nodes that match RSCN payload */ +- lpfc_rscn_recovery_check(phba); ++ lpfc_rscn_recovery_check(vport); ++ vport->fc_flag &= ~FC_RSCN_DEFERRED; + return 0; + } + +- phba->fc_flag |= FC_RSCN_MODE; +- phba->fc_rscn_id_list[phba->fc_rscn_id_cnt++] = pcmd; ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV RSCN: did:x%x/ste:x%x flg:x%x", ++ ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag); ++ ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_RSCN_MODE; ++ spin_unlock_irq(shost->host_lock); ++ vport->fc_rscn_id_list[vport->fc_rscn_id_cnt++] = pcmd; + /* + * If we zero, cmdiocb->context2, the calling routine will + * not try to free it. + */ + cmdiocb->context2 = NULL; + +- lpfc_set_disctmo(phba); ++ lpfc_set_disctmo(vport); + + /* Send back ACC */ +- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, newnode); ++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, newnode); + + /* send RECOVERY event for ALL nodes that match RSCN payload */ +- lpfc_rscn_recovery_check(phba); ++ lpfc_rscn_recovery_check(vport); + +- return lpfc_els_handle_rscn(phba); ++ return lpfc_els_handle_rscn(vport); + } + + int +-lpfc_els_handle_rscn(struct lpfc_hba * phba) ++lpfc_els_handle_rscn(struct lpfc_vport *vport) + { + struct lpfc_nodelist *ndlp; ++ struct lpfc_hba *phba = vport->phba; ++ ++ /* Ignore RSCN if the port is being torn down. */ ++ if (vport->load_flag & FC_UNLOADING) { ++ lpfc_els_flush_rscn(vport); ++ return 0; ++ } + + /* Start timer for RSCN processing */ +- lpfc_set_disctmo(phba); ++ lpfc_set_disctmo(vport); + + /* RSCN processed */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_DISCOVERY, +- "%d:0215 RSCN processed Data: x%x x%x x%x x%x\n", +- phba->brd_no, +- phba->fc_flag, 0, phba->fc_rscn_id_cnt, +- phba->hba_state); ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0215 RSCN processed Data: x%x x%x x%x x%x\n", ++ phba->brd_no, vport->vpi, ++ vport->fc_flag, 0, vport->fc_rscn_id_cnt, ++ vport->port_state); + + /* To process RSCN, first compare RSCN data with NameServer */ +- phba->fc_ns_retry = 0; +- ndlp = lpfc_findnode_did(phba, NameServer_DID); ++ vport->fc_ns_retry = 0; ++ ndlp = lpfc_findnode_did(vport, NameServer_DID); + if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { + /* Good ndlp, issue CT Request to NameServer */ +- if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT) == 0) { ++ if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, 0) == 0) + /* Wait for NameServer query cmpl before we can + continue */ + return 1; +- } + } else { + /* If login to NameServer does not exist, issue one */ + /* Good status, issue PLOGI to NameServer */ +- ndlp = lpfc_findnode_did(phba, NameServer_DID); +- if (ndlp) { ++ ndlp = lpfc_findnode_did(vport, NameServer_DID); ++ if (ndlp) + /* Wait for NameServer login cmpl before we can + continue */ + return 1; +- } ++ + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) { +- lpfc_els_flush_rscn(phba); ++ lpfc_els_flush_rscn(vport); + return 0; + } else { +- lpfc_nlp_init(phba, ndlp, NameServer_DID); ++ lpfc_nlp_init(vport, ndlp, NameServer_DID); + ndlp->nlp_type |= NLP_FABRIC; + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); +- lpfc_issue_els_plogi(phba, NameServer_DID, 0); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); ++ lpfc_issue_els_plogi(vport, NameServer_DID, 0); + /* Wait for NameServer login cmpl before we can + continue */ + return 1; + } + } + +- lpfc_els_flush_rscn(phba); ++ lpfc_els_flush_rscn(vport); + return 0; + } + + static int +-lpfc_els_rcv_flogi(struct lpfc_hba * phba, +- struct lpfc_iocbq * cmdiocb, +- struct lpfc_nodelist * ndlp, uint8_t newnode) ++lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_nodelist *ndlp, uint8_t newnode) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_dmabuf *pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; + uint32_t *lp = (uint32_t *) pcmd->virt; + IOCB_t *icmd = &cmdiocb->iocb; +@@ -2655,7 +3044,7 @@ + + /* FLOGI received */ + +- lpfc_set_disctmo(phba); ++ lpfc_set_disctmo(vport); + + if (phba->fc_topology == TOPOLOGY_LOOP) { + /* We should never receive a FLOGI in loop mode, ignore it */ +@@ -2664,33 +3053,34 @@ + /* An FLOGI ELS command was received from DID in + Loop Mode */ + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, +- "%d:0113 An FLOGI ELS command x%x was received " +- "from DID x%x in Loop Mode\n", +- phba->brd_no, cmd, did); ++ "%d (%d):0113 An FLOGI ELS command x%x was " ++ "received from DID x%x in Loop Mode\n", ++ phba->brd_no, vport->vpi, cmd, did); + return 1; + } + + did = Fabric_DID; + +- if ((lpfc_check_sparm(phba, ndlp, sp, CLASS3))) { ++ if ((lpfc_check_sparm(vport, ndlp, sp, CLASS3))) { + /* For a FLOGI we accept, then if our portname is greater + * then the remote portname we initiate Nport login. + */ + +- rc = memcmp(&phba->fc_portname, &sp->portName, +- sizeof (struct lpfc_name)); ++ rc = memcmp(&vport->fc_portname, &sp->portName, ++ sizeof(struct lpfc_name)); + + if (!rc) { +- if ((mbox = mempool_alloc(phba->mbox_mem_pool, +- GFP_KERNEL)) == 0) { ++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (!mbox) + return 1; +- } ++ + lpfc_linkdown(phba); + lpfc_init_link(phba, mbox, + phba->cfg_topology, + phba->cfg_link_speed); + mbox->mb.un.varInitLnk.lipsr_AL_PA = 0; + mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; ++ mbox->vport = vport; + rc = lpfc_sli_issue_mbox + (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB)); + lpfc_set_loopback_flag(phba); +@@ -2699,31 +3089,34 @@ + } + return 1; + } else if (rc > 0) { /* greater than */ +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_PT2PT_PLOGI; +- spin_unlock_irq(phba->host->host_lock); +- } +- phba->fc_flag |= FC_PT2PT; +- phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_PT2PT_PLOGI; ++ spin_unlock_irq(shost->host_lock); ++ } ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_PT2PT; ++ vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); ++ spin_unlock_irq(shost->host_lock); + } else { + /* Reject this request because invalid parameters */ + stat.un.b.lsRjtRsvd0 = 0; + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS; + stat.un.b.vendorUnique = 0; +- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, ++ NULL); + return 1; + } + + /* Send back ACC */ +- lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, newnode); ++ lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, newnode); + + return 0; + } + + static int +-lpfc_els_rcv_rnid(struct lpfc_hba * phba, +- struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp) ++lpfc_els_rcv_rnid(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_nodelist *ndlp) + { + struct lpfc_dmabuf *pcmd; + uint32_t *lp; +@@ -2746,7 +3139,7 @@ + case 0: + case RNID_TOPOLOGY_DISC: + /* Send back ACC */ +- lpfc_els_rsp_rnid_acc(phba, rn->Format, cmdiocb, ndlp); ++ lpfc_els_rsp_rnid_acc(vport, rn->Format, cmdiocb, ndlp); + break; + default: + /* Reject this request because format not supported */ +@@ -2754,13 +3147,14 @@ + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA; + stat.un.b.vendorUnique = 0; +- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, ++ NULL); + } + return 0; + } + + static int +-lpfc_els_rcv_lirr(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++lpfc_els_rcv_lirr(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, + struct lpfc_nodelist *ndlp) + { + struct ls_rjt stat; +@@ -2770,15 +3164,15 @@ + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA; + stat.un.b.vendorUnique = 0; +- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); + return 0; + } + + static void + lpfc_els_rsp_rps_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) + { +- struct lpfc_sli *psli; +- struct lpfc_sli_ring *pring; ++ struct lpfc_sli *psli = &phba->sli; ++ struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING]; + MAILBOX_t *mb; + IOCB_t *icmd; + RPS_RSP *rps_rsp; +@@ -2788,8 +3182,6 @@ + uint16_t xri, status; + uint32_t cmdsize; + +- psli = &phba->sli; +- pring = &psli->ring[LPFC_ELS_RING]; + mb = &pmb->mb; + + ndlp = (struct lpfc_nodelist *) pmb->context2; +@@ -2804,7 +3196,8 @@ + + cmdsize = sizeof(RPS_RSP) + sizeof(uint32_t); + mempool_free(pmb, phba->mbox_mem_pool); +- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, lpfc_max_els_tries, ndlp, ++ elsiocb = lpfc_prep_els_iocb(phba->pport, 0, cmdsize, ++ lpfc_max_els_tries, ndlp, + ndlp->nlp_DID, ELS_CMD_ACC); + lpfc_nlp_put(ndlp); + if (!elsiocb) +@@ -2815,14 +3208,14 @@ + + pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + *((uint32_t *) (pcmd)) = ELS_CMD_ACC; +- pcmd += sizeof (uint32_t); /* Skip past command */ ++ pcmd += sizeof(uint32_t); /* Skip past command */ + rps_rsp = (RPS_RSP *)pcmd; + + if (phba->fc_topology != TOPOLOGY_LOOP) + status = 0x10; + else + status = 0x8; +- if (phba->fc_flag & FC_FABRIC) ++ if (phba->pport->fc_flag & FC_FABRIC) + status |= 0x4; + + rps_rsp->rsvd1 = 0; +@@ -2836,25 +3229,25 @@ + + /* Xmit ELS RPS ACC response tag */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0118 Xmit ELS RPS ACC response tag x%x xri x%x, " +- "did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n", +- phba->brd_no, elsiocb->iotag, ++ "%d (%d):0118 Xmit ELS RPS ACC response tag x%x " ++ "xri x%x, did x%x, nlp_flag x%x, nlp_state x%x, " ++ "rpi x%x\n", ++ phba->brd_no, ndlp->vport->vpi, elsiocb->iotag, + elsiocb->iocb.ulpContext, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); + +- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; ++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; + phba->fc_stat.elsXmitACC++; +- +- if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { ++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) + lpfc_els_free_iocb(phba, elsiocb); +- } + return; + } + + static int +-lpfc_els_rcv_rps(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_nodelist * ndlp) ++lpfc_els_rcv_rps(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_nodelist *ndlp) + { ++ struct lpfc_hba *phba = vport->phba; + uint32_t *lp; + uint8_t flag; + LPFC_MBOXQ_t *mbox; +@@ -2868,7 +3261,8 @@ + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA; + stat.un.b.vendorUnique = 0; +- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, ++ NULL); + } + + pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; +@@ -2878,19 +3272,24 @@ + + if ((flag == 0) || + ((flag == 1) && (be32_to_cpu(rps->un.portNum) == 0)) || +- ((flag == 2) && (memcmp(&rps->un.portName, &phba->fc_portname, +- sizeof (struct lpfc_name)) == 0))) { +- if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC))) { ++ ((flag == 2) && (memcmp(&rps->un.portName, &vport->fc_portname, ++ sizeof(struct lpfc_name)) == 0))) { ++ ++ printk("Fix me....\n"); ++ dump_stack(); ++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC); ++ if (mbox) { + lpfc_read_lnk_stat(phba, mbox); + mbox->context1 = +- (void *)((unsigned long)cmdiocb->iocb.ulpContext); ++ (void *)((unsigned long) cmdiocb->iocb.ulpContext); + mbox->context2 = lpfc_nlp_get(ndlp); ++ mbox->vport = vport; + mbox->mbox_cmpl = lpfc_els_rsp_rps_acc; + if (lpfc_sli_issue_mbox (phba, mbox, +- (MBX_NOWAIT | MBX_STOP_IOCB)) != MBX_NOT_FINISHED) { ++ (MBX_NOWAIT | MBX_STOP_IOCB)) != MBX_NOT_FINISHED) + /* Mbox completion will send ELS Response */ + return 0; +- } ++ + lpfc_nlp_put(ndlp); + mempool_free(mbox, phba->mbox_mem_pool); + } +@@ -2899,27 +3298,25 @@ + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA; + stat.un.b.vendorUnique = 0; +- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); + return 0; + } + + static int +-lpfc_els_rsp_rpl_acc(struct lpfc_hba * phba, uint16_t cmdsize, +- struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp) ++lpfc_els_rsp_rpl_acc(struct lpfc_vport *vport, uint16_t cmdsize, ++ struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp) + { +- IOCB_t *icmd; +- IOCB_t *oldcmd; ++ struct lpfc_hba *phba = vport->phba; ++ IOCB_t *icmd, *oldcmd; + RPL_RSP rpl_rsp; + struct lpfc_iocbq *elsiocb; +- struct lpfc_sli_ring *pring; +- struct lpfc_sli *psli; ++ struct lpfc_sli *psli = &phba->sli; ++ struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING]; + uint8_t *pcmd; + +- psli = &phba->sli; +- pring = &psli->ring[LPFC_ELS_RING]; /* ELS ring */ ++ elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp, ++ ndlp->nlp_DID, ELS_CMD_ACC); + +- elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, +- ndlp, ndlp->nlp_DID, ELS_CMD_ACC); + if (!elsiocb) + return 1; + +@@ -2929,7 +3326,7 @@ + + pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt); + *((uint32_t *) (pcmd)) = ELS_CMD_ACC; +- pcmd += sizeof (uint16_t); ++ pcmd += sizeof(uint16_t); + *((uint16_t *)(pcmd)) = be16_to_cpu(cmdsize); + pcmd += sizeof(uint16_t); + +@@ -2937,8 +3334,8 @@ + rpl_rsp.listLen = be32_to_cpu(1); + rpl_rsp.index = 0; + rpl_rsp.port_num_blk.portNum = 0; +- rpl_rsp.port_num_blk.portID = be32_to_cpu(phba->fc_myDID); +- memcpy(&rpl_rsp.port_num_blk.portName, &phba->fc_portname, ++ rpl_rsp.port_num_blk.portID = be32_to_cpu(vport->fc_myDID); ++ memcpy(&rpl_rsp.port_num_blk.portName, &vport->fc_portname, + sizeof(struct lpfc_name)); + + memcpy(pcmd, &rpl_rsp, cmdsize - sizeof(uint32_t)); +@@ -2946,13 +3343,14 @@ + + /* Xmit ELS RPL ACC response tag */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0120 Xmit ELS RPL ACC response tag x%x xri x%x, " +- "did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n", +- phba->brd_no, elsiocb->iotag, ++ "%d (%d):0120 Xmit ELS RPL ACC response tag x%x " ++ "xri x%x, did x%x, nlp_flag x%x, nlp_state x%x, " ++ "rpi x%x\n", ++ phba->brd_no, vport->vpi, elsiocb->iotag, + elsiocb->iocb.ulpContext, ndlp->nlp_DID, + ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); + +- elsiocb->iocb_cmpl = lpfc_cmpl_els_acc; ++ elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp; + + phba->fc_stat.elsXmitACC++; + if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { +@@ -2963,8 +3361,8 @@ + } + + static int +-lpfc_els_rcv_rpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_nodelist * ndlp) ++lpfc_els_rcv_rpl(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_nodelist *ndlp) + { + struct lpfc_dmabuf *pcmd; + uint32_t *lp; +@@ -2979,7 +3377,8 @@ + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA; + stat.un.b.vendorUnique = 0; +- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, ++ NULL); + } + + pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; +@@ -2996,15 +3395,16 @@ + } else { + cmdsize = sizeof(uint32_t) + maxsize * sizeof(uint32_t); + } +- lpfc_els_rsp_rpl_acc(phba, cmdsize, cmdiocb, ndlp); ++ lpfc_els_rsp_rpl_acc(vport, cmdsize, cmdiocb, ndlp); + + return 0; + } + + static int +-lpfc_els_rcv_farp(struct lpfc_hba * phba, +- struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp) ++lpfc_els_rcv_farp(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_nodelist *ndlp) + { ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_dmabuf *pcmd; + uint32_t *lp; + IOCB_t *icmd; +@@ -3020,11 +3420,9 @@ + fp = (FARP *) lp; + + /* FARP-REQ received from DID */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_ELS, +- "%d:0601 FARP-REQ received from DID x%x\n", +- phba->brd_no, did); ++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, ++ "%d (%d):0601 FARP-REQ received from DID x%x\n", ++ phba->brd_no, vport->vpi, did); + + /* We will only support match on WWPN or WWNN */ + if (fp->Mflags & ~(FARP_MATCH_NODE | FARP_MATCH_PORT)) { +@@ -3034,15 +3432,15 @@ + cnt = 0; + /* If this FARP command is searching for my portname */ + if (fp->Mflags & FARP_MATCH_PORT) { +- if (memcmp(&fp->RportName, &phba->fc_portname, +- sizeof (struct lpfc_name)) == 0) ++ if (memcmp(&fp->RportName, &vport->fc_portname, ++ sizeof(struct lpfc_name)) == 0) + cnt = 1; + } + + /* If this FARP command is searching for my nodename */ + if (fp->Mflags & FARP_MATCH_NODE) { +- if (memcmp(&fp->RnodeName, &phba->fc_nodename, +- sizeof (struct lpfc_name)) == 0) ++ if (memcmp(&fp->RnodeName, &vport->fc_nodename, ++ sizeof(struct lpfc_name)) == 0) + cnt = 1; + } + +@@ -3052,28 +3450,28 @@ + /* Log back into the node before sending the FARP. */ + if (fp->Rflags & FARP_REQUEST_PLOGI) { + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, ++ lpfc_nlp_set_state(vport, ndlp, + NLP_STE_PLOGI_ISSUE); +- lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0); ++ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); + } + + /* Send a FARP response to that node */ +- if (fp->Rflags & FARP_REQUEST_FARPR) { +- lpfc_issue_els_farpr(phba, did, 0); +- } ++ if (fp->Rflags & FARP_REQUEST_FARPR) ++ lpfc_issue_els_farpr(vport, did, 0); + } + } + return 0; + } + + static int +-lpfc_els_rcv_farpr(struct lpfc_hba * phba, +- struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp) ++lpfc_els_rcv_farpr(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_nodelist *ndlp) + { + struct lpfc_dmabuf *pcmd; + uint32_t *lp; + IOCB_t *icmd; + uint32_t cmd, did; ++ struct lpfc_hba *phba = vport->phba; + + icmd = &cmdiocb->iocb; + did = icmd->un.elsreq64.remoteID; +@@ -3082,21 +3480,18 @@ + + cmd = *lp++; + /* FARP-RSP received from DID */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_ELS, +- "%d:0600 FARP-RSP received from DID x%x\n", +- phba->brd_no, did); +- ++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, ++ "%d (%d):0600 FARP-RSP received from DID x%x\n", ++ phba->brd_no, vport->vpi, did); + /* ACCEPT the Farp resp request */ +- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); ++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); + + return 0; + } + + static int +-lpfc_els_rcv_fan(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_nodelist * fan_ndlp) ++lpfc_els_rcv_fan(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_nodelist *fan_ndlp) + { + struct lpfc_dmabuf *pcmd; + uint32_t *lp; +@@ -3104,10 +3499,12 @@ + uint32_t cmd, did; + FAN *fp; + struct lpfc_nodelist *ndlp, *next_ndlp; ++ struct lpfc_hba *phba = vport->phba; + + /* FAN received */ +- lpfc_printf_log(phba, KERN_INFO, LOG_ELS, "%d:0265 FAN received\n", +- phba->brd_no); ++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, ++ "%d (%d):0265 FAN received\n", ++ phba->brd_no, vport->vpi); + + icmd = &cmdiocb->iocb; + did = icmd->un.elsreq64.remoteID; +@@ -3115,11 +3512,11 @@ + lp = (uint32_t *)pcmd->virt; + + cmd = *lp++; +- fp = (FAN *)lp; ++ fp = (FAN *) lp; + + /* FAN received; Fan does not have a reply sequence */ + +- if (phba->hba_state == LPFC_LOCAL_CFG_LINK) { ++ if (phba->pport->port_state == LPFC_LOCAL_CFG_LINK) { + if ((memcmp(&phba->fc_fabparam.nodeName, &fp->FnodeName, + sizeof(struct lpfc_name)) != 0) || + (memcmp(&phba->fc_fabparam.portName, &fp->FportName, +@@ -3130,7 +3527,7 @@ + */ + + list_for_each_entry_safe(ndlp, next_ndlp, +- &phba->fc_nodes, nlp_listp) { ++ &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_state != NLP_STE_NPR_NODE) + continue; + if (ndlp->nlp_type & NLP_FABRIC) { +@@ -3138,24 +3535,24 @@ + * Clean up old Fabric, Nameserver and + * other NLP_FABRIC logins + */ +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + } else if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) { + /* Fail outstanding I/O now since this + * device is marked for PLOGI + */ +- lpfc_unreg_rpi(phba, ndlp); ++ lpfc_unreg_rpi(vport, ndlp); + } + } + +- phba->hba_state = LPFC_FLOGI; +- lpfc_set_disctmo(phba); +- lpfc_initial_flogi(phba); ++ vport->port_state = LPFC_FLOGI; ++ lpfc_set_disctmo(vport); ++ lpfc_initial_flogi(vport); + return 0; + } + /* Discovery not needed, + * move the nodes to their original state. + */ +- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, ++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, + nlp_listp) { + if (ndlp->nlp_state != NLP_STE_NPR_NODE) + continue; +@@ -3163,13 +3560,13 @@ + switch (ndlp->nlp_prev_state) { + case NLP_STE_UNMAPPED_NODE: + ndlp->nlp_prev_state = NLP_STE_NPR_NODE; +- lpfc_nlp_set_state(phba, ndlp, ++ lpfc_nlp_set_state(vport, ndlp, + NLP_STE_UNMAPPED_NODE); + break; + + case NLP_STE_MAPPED_NODE: + ndlp->nlp_prev_state = NLP_STE_NPR_NODE; +- lpfc_nlp_set_state(phba, ndlp, ++ lpfc_nlp_set_state(vport, ndlp, + NLP_STE_MAPPED_NODE); + break; + +@@ -3179,7 +3576,7 @@ + } + + /* Start discovery - this should just do CLEAR_LA */ +- lpfc_disc_start(phba); ++ lpfc_disc_start(vport); + } + return 0; + } +@@ -3187,42 +3584,42 @@ + void + lpfc_els_timeout(unsigned long ptr) + { +- struct lpfc_hba *phba; ++ struct lpfc_vport *vport = (struct lpfc_vport *) ptr; ++ struct lpfc_hba *phba = vport->phba; + unsigned long iflag; + +- phba = (struct lpfc_hba *)ptr; +- if (phba == 0) +- return; +- spin_lock_irqsave(phba->host->host_lock, iflag); +- if (!(phba->work_hba_events & WORKER_ELS_TMO)) { +- phba->work_hba_events |= WORKER_ELS_TMO; ++ spin_lock_irqsave(&vport->work_port_lock, iflag); ++ if ((vport->work_port_events & WORKER_ELS_TMO) == 0) { ++ vport->work_port_events |= WORKER_ELS_TMO; ++ spin_unlock_irqrestore(&vport->work_port_lock, iflag); ++ ++ spin_lock_irqsave(&phba->hbalock, iflag); + if (phba->work_wait) +- wake_up(phba->work_wait); ++ lpfc_worker_wake_up(phba); ++ spin_unlock_irqrestore(&phba->hbalock, iflag); + } +- spin_unlock_irqrestore(phba->host->host_lock, iflag); ++ else ++ spin_unlock_irqrestore(&vport->work_port_lock, iflag); + return; + } + + void +-lpfc_els_timeout_handler(struct lpfc_hba *phba) ++lpfc_els_timeout_handler(struct lpfc_vport *vport) + { ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_sli_ring *pring; + struct lpfc_iocbq *tmp_iocb, *piocb; + IOCB_t *cmd = NULL; + struct lpfc_dmabuf *pcmd; +- uint32_t *elscmd; +- uint32_t els_command=0; ++ uint32_t els_command = 0; + uint32_t timeout; +- uint32_t remote_ID; ++ uint32_t remote_ID = 0xffffffff; + +- if (phba == 0) +- return; +- spin_lock_irq(phba->host->host_lock); + /* If the timer is already canceled do nothing */ +- if (!(phba->work_hba_events & WORKER_ELS_TMO)) { +- spin_unlock_irq(phba->host->host_lock); ++ if ((vport->work_port_events & WORKER_ELS_TMO) == 0) { + return; + } ++ spin_lock_irq(&phba->hbalock); + timeout = (uint32_t)(phba->fc_ratov << 1); + + pring = &phba->sli.ring[LPFC_ELS_RING]; +@@ -3230,63 +3627,70 @@ + list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) { + cmd = &piocb->iocb; + +- if ((piocb->iocb_flag & LPFC_IO_LIBDFC) || +- (piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN) || +- (piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN)) { ++ if ((piocb->iocb_flag & LPFC_IO_LIBDFC) != 0 || ++ piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN || ++ piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN) + continue; +- } ++ ++ if (piocb->vport != vport) ++ continue; ++ + pcmd = (struct lpfc_dmabuf *) piocb->context2; +- if (pcmd) { +- elscmd = (uint32_t *) (pcmd->virt); +- els_command = *elscmd; +- } ++ if (pcmd) ++ els_command = *(uint32_t *) (pcmd->virt); + +- if ((els_command == ELS_CMD_FARP) +- || (els_command == ELS_CMD_FARPR)) { ++ if (els_command == ELS_CMD_FARP || ++ els_command == ELS_CMD_FARPR || ++ els_command == ELS_CMD_FDISC) ++ continue; ++ ++ if (vport != piocb->vport) + continue; +- } + + if (piocb->drvrTimeout > 0) { +- if (piocb->drvrTimeout >= timeout) { ++ if (piocb->drvrTimeout >= timeout) + piocb->drvrTimeout -= timeout; +- } else { ++ else + piocb->drvrTimeout = 0; +- } + continue; + } + +- if (cmd->ulpCommand == CMD_GEN_REQUEST64_CR) { ++ remote_ID = 0xffffffff; ++ if (cmd->ulpCommand != CMD_GEN_REQUEST64_CR) ++ remote_ID = cmd->un.elsreq64.remoteID; ++ else { + struct lpfc_nodelist *ndlp; +- ndlp = __lpfc_findnode_rpi(phba, cmd->ulpContext); ++ ndlp = __lpfc_findnode_rpi(vport, cmd->ulpContext); ++ if (ndlp) + remote_ID = ndlp->nlp_DID; +- } else { +- remote_ID = cmd->un.elsreq64.remoteID; + } + +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_ELS, +- "%d:0127 ELS timeout Data: x%x x%x x%x x%x\n", +- phba->brd_no, els_command, ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0127 ELS timeout Data: x%x x%x x%x " ++ "x%x\n", ++ phba->brd_no, vport->vpi, els_command, + remote_ID, cmd->ulpCommand, cmd->ulpIoTag); + + lpfc_sli_issue_abort_iotag(phba, pring, piocb); + } +- if (phba->sli.ring[LPFC_ELS_RING].txcmplq_cnt) +- mod_timer(&phba->els_tmofunc, jiffies + HZ * timeout); ++ spin_unlock_irq(&phba->hbalock); + +- spin_unlock_irq(phba->host->host_lock); ++ if (phba->sli.ring[LPFC_ELS_RING].txcmplq_cnt) ++ mod_timer(&vport->els_tmofunc, jiffies + HZ * timeout); + } + + void +-lpfc_els_flush_cmd(struct lpfc_hba *phba) ++lpfc_els_flush_cmd(struct lpfc_vport *vport) + { + LIST_HEAD(completions); ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; + struct lpfc_iocbq *tmp_iocb, *piocb; + IOCB_t *cmd = NULL; + +- spin_lock_irq(phba->host->host_lock); ++ lpfc_fabric_abort_vport(vport); ++ ++ spin_lock_irq(&phba->hbalock); + list_for_each_entry_safe(piocb, tmp_iocb, &pring->txq, list) { + cmd = &piocb->iocb; + +@@ -3301,271 +3705,1042 @@ + cmd->ulpCommand == CMD_ABORT_XRI_CN) + continue; + ++ if (piocb->vport != vport) ++ continue; ++ + list_move_tail(&piocb->list, &completions); + pring->txq_cnt--; +- + } + + list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) { +- cmd = &piocb->iocb; +- + if (piocb->iocb_flag & LPFC_IO_LIBDFC) { + continue; + } + ++ if (piocb->vport != vport) ++ continue; ++ + lpfc_sli_issue_abort_iotag(phba, pring, piocb); + } +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + +- while(!list_empty(&completions)) { ++ while (!list_empty(&completions)) { + piocb = list_get_first(&completions, struct lpfc_iocbq, list); + cmd = &piocb->iocb; +- list_del(&piocb->list); ++ list_del_init(&piocb->list); + +- if (piocb->iocb_cmpl) { ++ if (!piocb->iocb_cmpl) ++ lpfc_sli_release_iocbq(phba, piocb); ++ else { + cmd->ulpStatus = IOSTAT_LOCAL_REJECT; + cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; + (piocb->iocb_cmpl) (phba, piocb, piocb); +- } else +- lpfc_sli_release_iocbq(phba, piocb); ++ } + } + + return; + } + +-void +-lpfc_els_unsol_event(struct lpfc_hba * phba, +- struct lpfc_sli_ring * pring, struct lpfc_iocbq * elsiocb) ++static void ++lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ++ struct lpfc_vport *vport, struct lpfc_iocbq *elsiocb) + { +- struct lpfc_sli *psli; + struct lpfc_nodelist *ndlp; +- struct lpfc_dmabuf *mp; +- uint32_t *lp; +- IOCB_t *icmd; + struct ls_rjt stat; +- uint32_t cmd; +- uint32_t did; +- uint32_t newnode; +- uint32_t drop_cmd = 0; /* by default do NOT drop received cmd */ +- uint32_t rjt_err = 0; +- +- psli = &phba->sli; +- icmd = &elsiocb->iocb; +- +- if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) && +- ((icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING)) { +- /* Not enough posted buffers; Try posting more buffers */ +- phba->fc_stat.NoRcvBuf++; +- lpfc_post_buffer(phba, pring, 0, 1); +- return; +- } +- +- /* If there are no BDEs associated with this IOCB, +- * there is nothing to do. +- */ +- if (icmd->ulpBdeCount == 0) +- return; ++ uint32_t *payload; ++ uint32_t cmd, did, newnode, rjt_err = 0; ++ IOCB_t *icmd = &elsiocb->iocb; + +- /* type of ELS cmd is first 32bit word in packet */ +- mp = lpfc_sli_ringpostbuf_get(phba, pring, getPaddr(icmd->un. +- cont64[0]. +- addrHigh, +- icmd->un. +- cont64[0].addrLow)); +- if (mp == 0) { +- drop_cmd = 1; ++ if (vport == NULL || elsiocb->context2 == NULL) + goto dropit; +- } + + newnode = 0; +- lp = (uint32_t *) mp->virt; +- cmd = *lp++; +- lpfc_post_buffer(phba, &psli->ring[LPFC_ELS_RING], 1, 1); ++ payload = ((struct lpfc_dmabuf *)elsiocb->context2)->virt; ++ cmd = *payload; ++ if ((phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) == 0) ++ lpfc_post_buffer(phba, pring, 1, 1); + ++ did = icmd->un.rcvels.remoteID; + if (icmd->ulpStatus) { +- lpfc_mbuf_free(phba, mp->virt, mp->phys); +- kfree(mp); +- drop_cmd = 1; ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV Unsol ELS: status:x%x/x%x did:x%x", ++ icmd->ulpStatus, icmd->un.ulpWord[4], did); + goto dropit; + } + + /* Check to see if link went down during discovery */ +- if (lpfc_els_chk_latt(phba)) { +- lpfc_mbuf_free(phba, mp->virt, mp->phys); +- kfree(mp); +- drop_cmd = 1; ++ if (lpfc_els_chk_latt(vport)) + goto dropit; +- } + +- did = icmd->un.rcvels.remoteID; +- ndlp = lpfc_findnode_did(phba, did); ++ /* Ignore traffic recevied during vport shutdown. */ ++ if (vport->load_flag & FC_UNLOADING) ++ goto dropit; ++ ++ ndlp = lpfc_findnode_did(vport, did); + if (!ndlp) { + /* Cannot find existing Fabric ndlp, so allocate a new one */ + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); +- if (!ndlp) { +- lpfc_mbuf_free(phba, mp->virt, mp->phys); +- kfree(mp); +- drop_cmd = 1; ++ if (!ndlp) + goto dropit; +- } + +- lpfc_nlp_init(phba, ndlp, did); ++ lpfc_nlp_init(vport, ndlp, did); + newnode = 1; + if ((did & Fabric_DID_MASK) == Fabric_DID_MASK) { + ndlp->nlp_type |= NLP_FABRIC; + } +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); + } + + phba->fc_stat.elsRcvFrame++; + if (elsiocb->context1) + lpfc_nlp_put(elsiocb->context1); + elsiocb->context1 = lpfc_nlp_get(ndlp); +- elsiocb->context2 = mp; ++ elsiocb->vport = vport; + + if ((cmd & ELS_CMD_MASK) == ELS_CMD_RSCN) { + cmd &= ELS_CMD_MASK; + } + /* ELS command received from NPORT */ + lpfc_printf_log(phba, KERN_INFO, LOG_ELS, +- "%d:0112 ELS command x%x received from NPORT x%x " +- "Data: x%x\n", phba->brd_no, cmd, did, phba->hba_state); ++ "%d (%d):0112 ELS command x%x received from NPORT x%x " ++ "Data: x%x\n", phba->brd_no, vport->vpi, cmd, did, ++ vport->port_state); + + switch (cmd) { + case ELS_CMD_PLOGI: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV PLOGI: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvPLOGI++; +- if (phba->hba_state < LPFC_DISC_AUTH) { +- rjt_err = 1; ++ ndlp = lpfc_plogi_confirm_nport(phba, payload, ndlp); ++ ++ if (vport->port_state < LPFC_DISC_AUTH) { ++ rjt_err = LSRJT_UNABLE_TPC; + break; + } +- ndlp = lpfc_plogi_confirm_nport(phba, mp, ndlp); +- lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PLOGI); ++ lpfc_disc_state_machine(vport, ndlp, elsiocb, ++ NLP_EVT_RCV_PLOGI); ++ + break; + case ELS_CMD_FLOGI: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV FLOGI: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvFLOGI++; +- lpfc_els_rcv_flogi(phba, elsiocb, ndlp, newnode); ++ lpfc_els_rcv_flogi(vport, elsiocb, ndlp, newnode); + if (newnode) +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + break; + case ELS_CMD_LOGO: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV LOGO: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvLOGO++; +- if (phba->hba_state < LPFC_DISC_AUTH) { +- rjt_err = 1; ++ if (vport->port_state < LPFC_DISC_AUTH) { ++ rjt_err = LSRJT_UNABLE_TPC; + break; + } +- lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_LOGO); ++ lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_LOGO); + break; + case ELS_CMD_PRLO: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV PRLO: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvPRLO++; +- if (phba->hba_state < LPFC_DISC_AUTH) { +- rjt_err = 1; ++ if (vport->port_state < LPFC_DISC_AUTH) { ++ rjt_err = LSRJT_UNABLE_TPC; + break; + } +- lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PRLO); ++ lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLO); + break; + case ELS_CMD_RSCN: + phba->fc_stat.elsRcvRSCN++; +- lpfc_els_rcv_rscn(phba, elsiocb, ndlp, newnode); ++ lpfc_els_rcv_rscn(vport, elsiocb, ndlp, newnode); + if (newnode) +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + break; + case ELS_CMD_ADISC: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV ADISC: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvADISC++; +- if (phba->hba_state < LPFC_DISC_AUTH) { +- rjt_err = 1; ++ if (vport->port_state < LPFC_DISC_AUTH) { ++ rjt_err = LSRJT_UNABLE_TPC; + break; + } +- lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_ADISC); ++ lpfc_disc_state_machine(vport, ndlp, elsiocb, ++ NLP_EVT_RCV_ADISC); + break; + case ELS_CMD_PDISC: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV PDISC: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvPDISC++; +- if (phba->hba_state < LPFC_DISC_AUTH) { +- rjt_err = 1; ++ if (vport->port_state < LPFC_DISC_AUTH) { ++ rjt_err = LSRJT_UNABLE_TPC; + break; + } +- lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PDISC); ++ lpfc_disc_state_machine(vport, ndlp, elsiocb, ++ NLP_EVT_RCV_PDISC); + break; + case ELS_CMD_FARPR: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV FARPR: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvFARPR++; +- lpfc_els_rcv_farpr(phba, elsiocb, ndlp); ++ lpfc_els_rcv_farpr(vport, elsiocb, ndlp); + break; + case ELS_CMD_FARP: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV FARP: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvFARP++; +- lpfc_els_rcv_farp(phba, elsiocb, ndlp); ++ lpfc_els_rcv_farp(vport, elsiocb, ndlp); + break; + case ELS_CMD_FAN: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV FAN: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvFAN++; +- lpfc_els_rcv_fan(phba, elsiocb, ndlp); ++ lpfc_els_rcv_fan(vport, elsiocb, ndlp); + break; + case ELS_CMD_PRLI: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV PRLI: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvPRLI++; +- if (phba->hba_state < LPFC_DISC_AUTH) { +- rjt_err = 1; ++ if (vport->port_state < LPFC_DISC_AUTH) { ++ rjt_err = LSRJT_UNABLE_TPC; + break; + } +- lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PRLI); ++ lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLI); + break; + case ELS_CMD_LIRR: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV LIRR: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvLIRR++; +- lpfc_els_rcv_lirr(phba, elsiocb, ndlp); ++ lpfc_els_rcv_lirr(vport, elsiocb, ndlp); + if (newnode) +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + break; + case ELS_CMD_RPS: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV RPS: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvRPS++; +- lpfc_els_rcv_rps(phba, elsiocb, ndlp); ++ lpfc_els_rcv_rps(vport, elsiocb, ndlp); + if (newnode) +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + break; + case ELS_CMD_RPL: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV RPL: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvRPL++; +- lpfc_els_rcv_rpl(phba, elsiocb, ndlp); ++ lpfc_els_rcv_rpl(vport, elsiocb, ndlp); + if (newnode) +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + break; + case ELS_CMD_RNID: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV RNID: did:x%x/ste:x%x flg:x%x", ++ did, vport->port_state, ndlp->nlp_flag); ++ + phba->fc_stat.elsRcvRNID++; +- lpfc_els_rcv_rnid(phba, elsiocb, ndlp); ++ lpfc_els_rcv_rnid(vport, elsiocb, ndlp); + if (newnode) +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + break; + default: ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, ++ "RCV ELS cmd: cmd:x%x did:x%x/ste:x%x", ++ cmd, did, vport->port_state); ++ + /* Unsupported ELS command, reject */ +- rjt_err = 1; ++ rjt_err = LSRJT_INVALID_CMD; + + /* Unknown ELS command received from NPORT */ + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, +- "%d:0115 Unknown ELS command x%x received from " +- "NPORT x%x\n", phba->brd_no, cmd, did); ++ "%d (%d):0115 Unknown ELS command x%x " ++ "received from NPORT x%x\n", ++ phba->brd_no, vport->vpi, cmd, did); + if (newnode) +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + break; + } + + /* check if need to LS_RJT received ELS cmd */ + if (rjt_err) { +- stat.un.b.lsRjtRsvd0 = 0; +- stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; ++ memset(&stat, 0, sizeof(stat)); ++ stat.un.b.lsRjtRsnCode = rjt_err; + stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; +- stat.un.b.vendorUnique = 0; +- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, elsiocb, ndlp); ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, elsiocb, ndlp, ++ NULL); ++ if (newnode) ++ lpfc_drop_node(vport, ndlp); + } + +- lpfc_nlp_put(elsiocb->context1); +- elsiocb->context1 = NULL; +- if (elsiocb->context2) { +- lpfc_mbuf_free(phba, mp->virt, mp->phys); +- kfree(mp); +- } ++ return; ++ + dropit: +- /* check if need to drop received ELS cmd */ +- if (drop_cmd == 1) { + lpfc_printf_log(phba, KERN_ERR, LOG_ELS, +- "%d:0111 Dropping received ELS cmd " +- "Data: x%x x%x x%x\n", phba->brd_no, ++ "%d (%d):0111 Dropping received ELS cmd " ++ "Data: x%x x%x x%x\n", ++ phba->brd_no, vport ? vport->vpi : 0xffff, + icmd->ulpStatus, icmd->un.ulpWord[4], + icmd->ulpTimeout); + phba->fc_stat.elsRcvDrop++; ++} ++ ++static struct lpfc_vport * ++lpfc_find_vport_by_vpid(struct lpfc_hba *phba, uint16_t vpi) ++{ ++ struct lpfc_vport *vport; ++ ++ list_for_each_entry(vport, &phba->port_list, listentry) { ++ if (vport->vpi == vpi) ++ return vport; ++ } ++ return NULL; ++} ++ ++void ++lpfc_els_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ++ struct lpfc_iocbq *elsiocb) ++{ ++ struct lpfc_vport *vport = phba->pport; ++ IOCB_t *icmd = &elsiocb->iocb; ++ dma_addr_t paddr; ++ struct lpfc_dmabuf *bdeBuf1 = elsiocb->context2; ++ struct lpfc_dmabuf *bdeBuf2 = elsiocb->context3; ++ ++ elsiocb->context2 = NULL; ++ elsiocb->context3 = NULL; ++ ++ if (icmd->ulpStatus == IOSTAT_NEED_BUFFER) { ++ lpfc_sli_hbqbuf_add_hbqs(phba, LPFC_ELS_HBQ); ++ } else if (icmd->ulpStatus == IOSTAT_LOCAL_REJECT && ++ (icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING) { ++ phba->fc_stat.NoRcvBuf++; ++ /* Not enough posted buffers; Try posting more buffers */ ++ if (!(phba->sli3_options & LPFC_SLI3_HBQ_ENABLED)) ++ lpfc_post_buffer(phba, pring, 0, 1); ++ return; ++ } ++ ++ if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && ++ (icmd->ulpCommand == CMD_IOCB_RCV_ELS64_CX || ++ icmd->ulpCommand == CMD_IOCB_RCV_SEQ64_CX)) { ++ if (icmd->unsli3.rcvsli3.vpi == 0xffff) ++ vport = phba->pport; ++ else { ++ uint16_t vpi = icmd->unsli3.rcvsli3.vpi; ++ vport = lpfc_find_vport_by_vpid(phba, vpi); ++ } ++ } ++ /* If there are no BDEs associated ++ * with this IOCB, there is nothing to do. ++ */ ++ if (icmd->ulpBdeCount == 0) ++ return; ++ ++ /* type of ELS cmd is first 32bit word ++ * in packet ++ */ ++ if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) { ++ elsiocb->context2 = bdeBuf1; ++ } else { ++ paddr = getPaddr(icmd->un.cont64[0].addrHigh, ++ icmd->un.cont64[0].addrLow); ++ elsiocb->context2 = lpfc_sli_ringpostbuf_get(phba, pring, ++ paddr); ++ } ++ ++ lpfc_els_unsol_buffer(phba, pring, vport, elsiocb); ++ /* ++ * The different unsolicited event handlers would tell us ++ * if they are done with "mp" by setting context2 to NULL. ++ */ ++ lpfc_nlp_put(elsiocb->context1); ++ elsiocb->context1 = NULL; ++ if (elsiocb->context2) { ++ lpfc_in_buf_free(phba, (struct lpfc_dmabuf *)elsiocb->context2); ++ elsiocb->context2 = NULL; ++ } ++ ++ /* RCV_ELS64_CX provide for 2 BDEs - process 2nd if included */ ++ if ((phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) && ++ icmd->ulpBdeCount == 2) { ++ elsiocb->context2 = bdeBuf2; ++ lpfc_els_unsol_buffer(phba, pring, vport, elsiocb); ++ /* free mp if we are done with it */ ++ if (elsiocb->context2) { ++ lpfc_in_buf_free(phba, elsiocb->context2); ++ elsiocb->context2 = NULL; ++ } ++ } ++} ++ ++void ++lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport) ++{ ++ struct lpfc_nodelist *ndlp, *ndlp_fdmi; ++ ++ ndlp = lpfc_findnode_did(vport, NameServer_DID); ++ if (!ndlp) { ++ ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); ++ if (!ndlp) { ++ if (phba->fc_topology == TOPOLOGY_LOOP) { ++ lpfc_disc_start(vport); ++ return; ++ } ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0251 NameServer login: no memory\n", ++ phba->brd_no, vport->vpi); ++ return; ++ } ++ lpfc_nlp_init(vport, ndlp, NameServer_DID); ++ ndlp->nlp_type |= NLP_FABRIC; ++ } ++ ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); ++ ++ if (lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0)) { ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0252 Cannot issue NameServer login\n", ++ phba->brd_no, vport->vpi); ++ return; ++ } ++ ++ if (phba->cfg_fdmi_on) { ++ ndlp_fdmi = mempool_alloc(phba->nlp_mem_pool, ++ GFP_KERNEL); ++ if (ndlp_fdmi) { ++ lpfc_nlp_init(vport, ndlp_fdmi, FDMI_DID); ++ ndlp_fdmi->nlp_type |= NLP_FABRIC; ++ ndlp_fdmi->nlp_state = ++ NLP_STE_PLOGI_ISSUE; ++ lpfc_issue_els_plogi(vport, ndlp_fdmi->nlp_DID, ++ 0); ++ } ++ } ++ return; ++} ++ ++static void ++lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) ++{ ++ struct lpfc_vport *vport = pmb->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; ++ MAILBOX_t *mb = &pmb->mb; ++ ++ vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI; ++ lpfc_nlp_put(ndlp); ++ ++ if (mb->mbxStatus) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, ++ "%d (%d):0915 Register VPI failed: 0x%x\n", ++ phba->brd_no, vport->vpi, mb->mbxStatus); ++ ++ switch (mb->mbxStatus) { ++ case 0x11: /* unsupported feature */ ++ case 0x9603: /* max_vpi exceeded */ ++ /* giving up on vport registration */ ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); ++ spin_unlock_irq(shost->host_lock); ++ lpfc_can_disctmo(vport); ++ break; ++ default: ++ /* Try to recover from this error */ ++ lpfc_mbx_unreg_vpi(vport); ++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; ++ lpfc_initial_fdisc(vport); ++ break; ++ } ++ ++ } else { ++ if (vport == phba->pport) ++ lpfc_issue_fabric_reglogin(vport); ++ else ++ lpfc_do_scr_ns_plogi(phba, vport); + } ++ mempool_free(pmb, phba->mbox_mem_pool); + return; + } ++ ++void ++lpfc_register_new_vport(struct lpfc_hba *phba, struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp) ++{ ++ LPFC_MBOXQ_t *mbox; ++ ++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (mbox) { ++ lpfc_reg_vpi(phba, vport->vpi, vport->fc_myDID, mbox); ++ mbox->vport = vport; ++ mbox->context2 = lpfc_nlp_get(ndlp); ++ mbox->mbox_cmpl = lpfc_cmpl_reg_new_vport; ++ if (lpfc_sli_issue_mbox(phba, mbox, ++ MBX_NOWAIT | MBX_STOP_IOCB) ++ == MBX_NOT_FINISHED) { ++ mempool_free(mbox, phba->mbox_mem_pool); ++ vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI; ++ ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ ++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, ++ "%d (%d):0253 Register VPI: Cannot send mbox\n", ++ phba->brd_no, vport->vpi); ++ } ++ } else { ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ ++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, ++ "%d (%d):0254 Register VPI: no memory\n", ++ phba->brd_no, vport->vpi); ++ ++ vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI; ++ lpfc_nlp_put(ndlp); ++ } ++} ++ ++static void ++lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) ++{ ++ struct lpfc_vport *vport = cmdiocb->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1; ++ struct lpfc_nodelist *np; ++ struct lpfc_nodelist *next_np; ++ IOCB_t *irsp = &rspiocb->iocb; ++ struct lpfc_iocbq *piocb; ++ ++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, ++ "%d (%d):0123 FDISC completes. x%x/x%x prevDID: x%x\n", ++ phba->brd_no, vport->vpi, ++ irsp->ulpStatus, irsp->un.ulpWord[4], vport->fc_prevDID); ++ ++ /* Since all FDISCs are being single threaded, we ++ * must reset the discovery timer for ALL vports ++ * waiting to send FDISC when one completes. ++ */ ++ list_for_each_entry(piocb, &phba->fabric_iocb_list, list) { ++ lpfc_set_disctmo(piocb->vport); ++ } ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "FDISC cmpl: status:x%x/x%x prevdid:x%x", ++ irsp->ulpStatus, irsp->un.ulpWord[4], vport->fc_prevDID); ++ ++ if (irsp->ulpStatus) { ++ /* Check for retry */ ++ if (lpfc_els_retry(phba, cmdiocb, rspiocb)) ++ goto out; ++ ++ /* FDISC failed */ ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0124 FDISC failed. (%d/%d)\n", ++ phba->brd_no, vport->vpi, ++ irsp->ulpStatus, irsp->un.ulpWord[4]); ++ ++ if (vport->fc_vport->vport_state == FC_VPORT_INITIALIZING) ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ ++ lpfc_nlp_put(ndlp); ++ /* giving up on FDISC. Cancel discovery timer */ ++ lpfc_can_disctmo(vport); ++ } else { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_FABRIC; ++ if (vport->phba->fc_topology == TOPOLOGY_LOOP) ++ vport->fc_flag |= FC_PUBLIC_LOOP; ++ spin_unlock_irq(shost->host_lock); ++ ++ vport->fc_myDID = irsp->un.ulpWord[4] & Mask_DID; ++ lpfc_vport_set_state(vport, FC_VPORT_ACTIVE); ++ if ((vport->fc_prevDID != vport->fc_myDID) && ++ !(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)) { ++ /* If our NportID changed, we need to ensure all ++ * remaining NPORTs get unreg_login'ed so we can ++ * issue unreg_vpi. ++ */ ++ list_for_each_entry_safe(np, next_np, ++ &vport->fc_nodes, nlp_listp) { ++ if (np->nlp_state != NLP_STE_NPR_NODE ++ || !(np->nlp_flag & NLP_NPR_ADISC)) ++ continue; ++ spin_lock_irq(shost->host_lock); ++ np->nlp_flag &= ~NLP_NPR_ADISC; ++ spin_unlock_irq(shost->host_lock); ++ lpfc_unreg_rpi(vport, np); ++ } ++ lpfc_mbx_unreg_vpi(vport); ++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; ++ } ++ ++ if (vport->fc_flag & FC_VPORT_NEEDS_REG_VPI) ++ lpfc_register_new_vport(phba, vport, ndlp); ++ else ++ lpfc_do_scr_ns_plogi(phba, vport); ++ ++ lpfc_nlp_put(ndlp); /* Free Fabric ndlp for vports */ ++ } ++ ++out: ++ lpfc_els_free_iocb(phba, cmdiocb); ++} ++ ++int ++lpfc_issue_els_fdisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ uint8_t retry) ++{ ++ struct lpfc_hba *phba = vport->phba; ++ IOCB_t *icmd; ++ struct lpfc_iocbq *elsiocb; ++ struct serv_parm *sp; ++ uint8_t *pcmd; ++ uint16_t cmdsize; ++ int did = ndlp->nlp_DID; ++ int rc; ++ ++ cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm)); ++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, did, ++ ELS_CMD_FDISC); ++ if (!elsiocb) { ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0255 Issue FDISC: no IOCB\n", ++ phba->brd_no, vport->vpi); ++ return 1; ++ } ++ ++ icmd = &elsiocb->iocb; ++ icmd->un.elsreq64.myID = 0; ++ icmd->un.elsreq64.fl = 1; ++ ++ /* For FDISC, Let FDISC rsp set the NPortID for this VPI */ ++ icmd->ulpCt_h = 1; ++ icmd->ulpCt_l = 0; ++ ++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); ++ *((uint32_t *) (pcmd)) = ELS_CMD_FDISC; ++ pcmd += sizeof(uint32_t); /* CSP Word 1 */ ++ memcpy(pcmd, &vport->phba->pport->fc_sparam, sizeof(struct serv_parm)); ++ sp = (struct serv_parm *) pcmd; ++ /* Setup CSPs accordingly for Fabric */ ++ sp->cmn.e_d_tov = 0; ++ sp->cmn.w2.r_a_tov = 0; ++ sp->cls1.classValid = 0; ++ sp->cls2.seqDelivery = 1; ++ sp->cls3.seqDelivery = 1; ++ ++ pcmd += sizeof(uint32_t); /* CSP Word 2 */ ++ pcmd += sizeof(uint32_t); /* CSP Word 3 */ ++ pcmd += sizeof(uint32_t); /* CSP Word 4 */ ++ pcmd += sizeof(uint32_t); /* Port Name */ ++ memcpy(pcmd, &vport->fc_portname, 8); ++ pcmd += sizeof(uint32_t); /* Node Name */ ++ pcmd += sizeof(uint32_t); /* Node Name */ ++ memcpy(pcmd, &vport->fc_nodename, 8); ++ ++ lpfc_set_disctmo(vport); ++ ++ phba->fc_stat.elsXmitFDISC++; ++ elsiocb->iocb_cmpl = lpfc_cmpl_els_fdisc; ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "Issue FDISC: did:x%x", ++ did, 0, 0); ++ ++ rc = lpfc_issue_fabric_iocb(phba, elsiocb); ++ if (rc == IOCB_ERROR) { ++ lpfc_els_free_iocb(phba, elsiocb); ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0256 Issue FDISC: Cannot send IOCB\n", ++ phba->brd_no, vport->vpi); ++ ++ return 1; ++ } ++ lpfc_vport_set_state(vport, FC_VPORT_INITIALIZING); ++ vport->port_state = LPFC_FDISC; ++ return 0; ++} ++ ++static void ++lpfc_cmpl_els_npiv_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) ++{ ++ struct lpfc_vport *vport = cmdiocb->vport; ++ IOCB_t *irsp; ++ ++ irsp = &rspiocb->iocb; ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "LOGO npiv cmpl: status:x%x/x%x did:x%x", ++ irsp->ulpStatus, irsp->un.ulpWord[4], irsp->un.rcvels.remoteID); ++ ++ lpfc_els_free_iocb(phba, cmdiocb); ++ vport->unreg_vpi_cmpl = VPORT_ERROR; ++} ++ ++int ++lpfc_issue_els_npiv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) ++{ ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; ++ IOCB_t *icmd; ++ struct lpfc_iocbq *elsiocb; ++ uint8_t *pcmd; ++ uint16_t cmdsize; ++ ++ cmdsize = 2 * sizeof(uint32_t) + sizeof(struct lpfc_name); ++ elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, 0, ndlp, ndlp->nlp_DID, ++ ELS_CMD_LOGO); ++ if (!elsiocb) ++ return 1; ++ ++ icmd = &elsiocb->iocb; ++ pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); ++ *((uint32_t *) (pcmd)) = ELS_CMD_LOGO; ++ pcmd += sizeof(uint32_t); ++ ++ /* Fill in LOGO payload */ ++ *((uint32_t *) (pcmd)) = be32_to_cpu(vport->fc_myDID); ++ pcmd += sizeof(uint32_t); ++ memcpy(pcmd, &vport->fc_portname, sizeof(struct lpfc_name)); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "Issue LOGO npiv did:x%x flg:x%x", ++ ndlp->nlp_DID, ndlp->nlp_flag, 0); ++ ++ elsiocb->iocb_cmpl = lpfc_cmpl_els_npiv_logo; ++ spin_lock_irq(shost->host_lock); ++ ndlp->nlp_flag |= NLP_LOGO_SND; ++ spin_unlock_irq(shost->host_lock); ++ if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) { ++ spin_lock_irq(shost->host_lock); ++ ndlp->nlp_flag &= ~NLP_LOGO_SND; ++ spin_unlock_irq(shost->host_lock); ++ lpfc_els_free_iocb(phba, elsiocb); ++ return 1; ++ } ++ return 0; ++} ++ ++void ++lpfc_fabric_block_timeout(unsigned long ptr) ++{ ++ struct lpfc_hba *phba = (struct lpfc_hba *) ptr; ++ unsigned long iflags; ++ uint32_t tmo_posted; ++ spin_lock_irqsave(&phba->pport->work_port_lock, iflags); ++ tmo_posted = phba->pport->work_port_events & WORKER_FABRIC_BLOCK_TMO; ++ if (!tmo_posted) ++ phba->pport->work_port_events |= WORKER_FABRIC_BLOCK_TMO; ++ spin_unlock_irqrestore(&phba->pport->work_port_lock, iflags); ++ ++ if (!tmo_posted) { ++ spin_lock_irqsave(&phba->hbalock, iflags); ++ if (phba->work_wait) ++ lpfc_worker_wake_up(phba); ++ spin_unlock_irqrestore(&phba->hbalock, iflags); ++ } ++} ++ ++static void ++lpfc_resume_fabric_iocbs(struct lpfc_hba *phba) ++{ ++ struct lpfc_iocbq *iocb; ++ unsigned long iflags; ++ int ret; ++ struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; ++ IOCB_t *cmd; ++ ++repeat: ++ iocb = NULL; ++ spin_lock_irqsave(&phba->hbalock, iflags); ++ /* Post any pending iocb to the SLI layer */ ++ if (atomic_read(&phba->fabric_iocb_count) == 0) { ++ list_remove_head(&phba->fabric_iocb_list, iocb, typeof(*iocb), ++ list); ++ if (iocb) ++ atomic_inc(&phba->fabric_iocb_count); ++ } ++ spin_unlock_irqrestore(&phba->hbalock, iflags); ++ if (iocb) { ++ iocb->fabric_iocb_cmpl = iocb->iocb_cmpl; ++ iocb->iocb_cmpl = lpfc_cmpl_fabric_iocb; ++ iocb->iocb_flag |= LPFC_IO_FABRIC; ++ ++ lpfc_debugfs_disc_trc(iocb->vport, LPFC_DISC_TRC_ELS_CMD, ++ "Fabric sched1: ste:x%x", ++ iocb->vport->port_state, 0, 0); ++ ++ ret = lpfc_sli_issue_iocb(phba, pring, iocb, 0); ++ ++ if (ret == IOCB_ERROR) { ++ iocb->iocb_cmpl = iocb->fabric_iocb_cmpl; ++ iocb->fabric_iocb_cmpl = NULL; ++ iocb->iocb_flag &= ~LPFC_IO_FABRIC; ++ cmd = &iocb->iocb; ++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT; ++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; ++ iocb->iocb_cmpl(phba, iocb, iocb); ++ ++ atomic_dec(&phba->fabric_iocb_count); ++ goto repeat; ++ } ++ } ++ ++ return; ++} ++ ++void ++lpfc_unblock_fabric_iocbs(struct lpfc_hba *phba) ++{ ++ clear_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags); ++ ++ lpfc_resume_fabric_iocbs(phba); ++ return; ++} ++ ++static void ++lpfc_block_fabric_iocbs(struct lpfc_hba *phba) ++{ ++ int blocked; ++ ++ blocked = test_and_set_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags); ++ /* Start a timer to unblock fabric ++ * iocbs after 100ms ++ */ ++ if (!blocked) ++ mod_timer(&phba->fabric_block_timer, jiffies + HZ/10 ); ++ ++ return; ++} ++ ++static void ++lpfc_cmpl_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) ++{ ++ struct ls_rjt stat; ++ ++ if ((cmdiocb->iocb_flag & LPFC_IO_FABRIC) != LPFC_IO_FABRIC) ++ BUG(); ++ ++ switch (rspiocb->iocb.ulpStatus) { ++ case IOSTAT_NPORT_RJT: ++ case IOSTAT_FABRIC_RJT: ++ if (rspiocb->iocb.un.ulpWord[4] & RJT_UNAVAIL_TEMP) { ++ lpfc_block_fabric_iocbs(phba); ++ } ++ break; ++ ++ case IOSTAT_NPORT_BSY: ++ case IOSTAT_FABRIC_BSY: ++ lpfc_block_fabric_iocbs(phba); ++ break; ++ ++ case IOSTAT_LS_RJT: ++ stat.un.lsRjtError = ++ be32_to_cpu(rspiocb->iocb.un.ulpWord[4]); ++ if ((stat.un.b.lsRjtRsnCode == LSRJT_UNABLE_TPC) || ++ (stat.un.b.lsRjtRsnCode == LSRJT_LOGICAL_BSY)) ++ lpfc_block_fabric_iocbs(phba); ++ break; ++ } ++ ++ if (atomic_read(&phba->fabric_iocb_count) == 0) ++ BUG(); ++ ++ cmdiocb->iocb_cmpl = cmdiocb->fabric_iocb_cmpl; ++ cmdiocb->fabric_iocb_cmpl = NULL; ++ cmdiocb->iocb_flag &= ~LPFC_IO_FABRIC; ++ cmdiocb->iocb_cmpl(phba, cmdiocb, rspiocb); ++ ++ atomic_dec(&phba->fabric_iocb_count); ++ if (!test_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags)) { ++ /* Post any pending iocbs to HBA */ ++ lpfc_resume_fabric_iocbs(phba); ++ } ++} ++ ++int ++lpfc_issue_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *iocb) ++{ ++ unsigned long iflags; ++ struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; ++ int ready; ++ int ret; ++ ++ if (atomic_read(&phba->fabric_iocb_count) > 1) ++ BUG(); ++ ++ spin_lock_irqsave(&phba->hbalock, iflags); ++ ready = atomic_read(&phba->fabric_iocb_count) == 0 && ++ !test_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags); ++ ++ spin_unlock_irqrestore(&phba->hbalock, iflags); ++ if (ready) { ++ iocb->fabric_iocb_cmpl = iocb->iocb_cmpl; ++ iocb->iocb_cmpl = lpfc_cmpl_fabric_iocb; ++ iocb->iocb_flag |= LPFC_IO_FABRIC; ++ ++ lpfc_debugfs_disc_trc(iocb->vport, LPFC_DISC_TRC_ELS_CMD, ++ "Fabric sched2: ste:x%x", ++ iocb->vport->port_state, 0, 0); ++ ++ atomic_inc(&phba->fabric_iocb_count); ++ ret = lpfc_sli_issue_iocb(phba, pring, iocb, 0); ++ ++ if (ret == IOCB_ERROR) { ++ iocb->iocb_cmpl = iocb->fabric_iocb_cmpl; ++ iocb->fabric_iocb_cmpl = NULL; ++ iocb->iocb_flag &= ~LPFC_IO_FABRIC; ++ atomic_dec(&phba->fabric_iocb_count); ++ } ++ } else { ++ spin_lock_irqsave(&phba->hbalock, iflags); ++ list_add_tail(&iocb->list, &phba->fabric_iocb_list); ++ spin_unlock_irqrestore(&phba->hbalock, iflags); ++ ret = IOCB_SUCCESS; ++ } ++ return ret; ++} ++ ++ ++void lpfc_fabric_abort_vport(struct lpfc_vport *vport) ++{ ++ LIST_HEAD(completions); ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_iocbq *tmp_iocb, *piocb; ++ IOCB_t *cmd; ++ ++ spin_lock_irq(&phba->hbalock); ++ list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list, ++ list) { ++ ++ if (piocb->vport != vport) ++ continue; ++ ++ list_move_tail(&piocb->list, &completions); ++ } ++ spin_unlock_irq(&phba->hbalock); ++ ++ while (!list_empty(&completions)) { ++ piocb = list_get_first(&completions, struct lpfc_iocbq, list); ++ list_del_init(&piocb->list); ++ ++ cmd = &piocb->iocb; ++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT; ++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; ++ (piocb->iocb_cmpl) (phba, piocb, piocb); ++ } ++} ++ ++void lpfc_fabric_abort_nport(struct lpfc_nodelist *ndlp) ++{ ++ LIST_HEAD(completions); ++ struct lpfc_hba *phba = ndlp->vport->phba; ++ struct lpfc_iocbq *tmp_iocb, *piocb; ++ struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; ++ IOCB_t *cmd; ++ ++ spin_lock_irq(&phba->hbalock); ++ list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list, ++ list) { ++ if ((lpfc_check_sli_ndlp(phba, pring, piocb, ndlp))) { ++ ++ list_move_tail(&piocb->list, &completions); ++ } ++ } ++ spin_unlock_irq(&phba->hbalock); ++ ++ while (!list_empty(&completions)) { ++ piocb = list_get_first(&completions, struct lpfc_iocbq, list); ++ list_del_init(&piocb->list); ++ ++ cmd = &piocb->iocb; ++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT; ++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; ++ (piocb->iocb_cmpl) (phba, piocb, piocb); ++ } ++} ++ ++void lpfc_fabric_abort_hba(struct lpfc_hba *phba) ++{ ++ LIST_HEAD(completions); ++ struct lpfc_iocbq *piocb; ++ IOCB_t *cmd; ++ ++ spin_lock_irq(&phba->hbalock); ++ list_splice_init(&phba->fabric_iocb_list, &completions); ++ spin_unlock_irq(&phba->hbalock); ++ ++ while (!list_empty(&completions)) { ++ piocb = list_get_first(&completions, struct lpfc_iocbq, list); ++ list_del_init(&piocb->list); ++ ++ cmd = &piocb->iocb; ++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT; ++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; ++ (piocb->iocb_cmpl) (phba, piocb, piocb); ++ } ++} ++ ++ ++void lpfc_fabric_abort_flogi(struct lpfc_hba *phba) ++{ ++ LIST_HEAD(completions); ++ struct lpfc_iocbq *tmp_iocb, *piocb; ++ IOCB_t *cmd; ++ struct lpfc_nodelist *ndlp; ++ ++ spin_lock_irq(&phba->hbalock); ++ list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list, ++ list) { ++ ++ cmd = &piocb->iocb; ++ ndlp = (struct lpfc_nodelist *) piocb->context1; ++ if (cmd->ulpCommand == CMD_ELS_REQUEST64_CR && ++ ndlp != NULL && ++ ndlp->nlp_DID == Fabric_DID) ++ list_move_tail(&piocb->list, &completions); ++ } ++ spin_unlock_irq(&phba->hbalock); ++ ++ while (!list_empty(&completions)) { ++ piocb = list_get_first(&completions, struct lpfc_iocbq, list); ++ list_del_init(&piocb->list); ++ ++ cmd = &piocb->iocb; ++ cmd->ulpStatus = IOSTAT_LOCAL_REJECT; ++ cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; ++ (piocb->iocb_cmpl) (phba, piocb, piocb); ++ } ++} ++ ++ +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hbadisc.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_hbadisc.c +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hbadisc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_hbadisc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -36,6 +36,8 @@ + #include "lpfc.h" + #include "lpfc_logmsg.h" + #include "lpfc_crtn.h" ++#include "lpfc_vport.h" ++#include "lpfc_debugfs.h" + + /* AlpaArray for assignment of scsid for scan-down and bind_method */ + static uint8_t lpfcAlpaArray[] = { +@@ -54,7 +56,7 @@ + 0x10, 0x0F, 0x08, 0x04, 0x02, 0x01 + }; + +-static void lpfc_disc_timeout_handler(struct lpfc_hba *); ++static void lpfc_disc_timeout_handler(struct lpfc_vport *); + + void + lpfc_terminate_rport_io(struct fc_rport *rport) +@@ -74,14 +76,16 @@ + return; + } + +- phba = ndlp->nlp_phba; ++ phba = ndlp->vport->phba; ++ ++ lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_RPORT, ++ "rport terminate: sid:x%x did:x%x flg:x%x", ++ ndlp->nlp_sid, ndlp->nlp_DID, ndlp->nlp_flag); + +- spin_lock_irq(phba->host->host_lock); + if (ndlp->nlp_sid != NLP_NO_SID) { + lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring], + ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT); + } +- spin_unlock_irq(phba->host->host_lock); + + return; + } +@@ -94,28 +98,98 @@ + { + struct lpfc_rport_data *rdata; + struct lpfc_nodelist * ndlp; +- uint8_t *name; +- int warn_on = 0; ++ struct lpfc_vport *vport; + struct lpfc_hba *phba; ++ struct completion devloss_compl; ++ struct lpfc_work_evt *evtp; + + rdata = rport->dd_data; + ndlp = rdata->pnode; + + if (!ndlp) { +- if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) ++ if (rport->scsi_target_id != -1) { + printk(KERN_ERR "Cannot find remote node" + " for rport in dev_loss_tmo_callbk x%x\n", + rport->port_id); ++ } + return; + } + +- if (ndlp->nlp_state == NLP_STE_MAPPED_NODE) ++ vport = ndlp->vport; ++ phba = vport->phba; ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT, ++ "rport devlosscb: sid:x%x did:x%x flg:x%x", ++ ndlp->nlp_sid, ndlp->nlp_DID, ndlp->nlp_flag); ++ ++ init_completion(&devloss_compl); ++ evtp = &ndlp->dev_loss_evt; ++ ++ if (!list_empty(&evtp->evt_listp)) ++ return; ++ ++ spin_lock_irq(&phba->hbalock); ++ evtp->evt_arg1 = ndlp; ++ evtp->evt_arg2 = &devloss_compl; ++ evtp->evt = LPFC_EVT_DEV_LOSS; ++ list_add_tail(&evtp->evt_listp, &phba->work_list); ++ if (phba->work_wait) ++ wake_up(phba->work_wait); ++ ++ spin_unlock_irq(&phba->hbalock); ++ ++ wait_for_completion(&devloss_compl); ++ ++ return; ++} ++ ++/* ++ * This function is called from the worker thread when dev_loss_tmo ++ * expire. ++ */ ++void ++lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) ++{ ++ struct lpfc_rport_data *rdata; ++ struct fc_rport *rport; ++ struct lpfc_vport *vport; ++ struct lpfc_hba *phba; ++ uint8_t *name; ++ int warn_on = 0; ++ ++ rport = ndlp->rport; ++ ++ if (!rport) + return; + +- name = (uint8_t *)&ndlp->nlp_portname; +- phba = ndlp->nlp_phba; ++ rdata = rport->dd_data; ++ name = (uint8_t *) &ndlp->nlp_portname; ++ vport = ndlp->vport; ++ phba = vport->phba; ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT, ++ "rport devlosstmo:did:x%x type:x%x id:x%x", ++ ndlp->nlp_DID, ndlp->nlp_type, rport->scsi_target_id); + +- spin_lock_irq(phba->host->host_lock); ++ if (!(vport->load_flag & FC_UNLOADING) && ++ ndlp->nlp_state == NLP_STE_MAPPED_NODE) ++ return; ++ ++ if (ndlp->nlp_type & NLP_FABRIC) { ++ int put_node; ++ int put_rport; ++ ++ /* We will clean up these Nodes in linkup */ ++ put_node = rdata->pnode != NULL; ++ put_rport = ndlp->rport != NULL; ++ rdata->pnode = NULL; ++ ndlp->rport = NULL; ++ if (put_node) ++ lpfc_nlp_put(ndlp); ++ if (put_rport) ++ put_device(&rport->dev); ++ return; ++ } + + if (ndlp->nlp_sid != NLP_NO_SID) { + warn_on = 1; +@@ -123,76 +197,114 @@ + lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring], + ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT); + } +- if (phba->fc_flag & FC_UNLOADING) ++ if (vport->load_flag & FC_UNLOADING) + warn_on = 0; + +- spin_unlock_irq(phba->host->host_lock); +- + if (warn_on) { + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, +- "%d:0203 Devloss timeout on " ++ "%d (%d):0203 Devloss timeout on " + "WWPN %x:%x:%x:%x:%x:%x:%x:%x " + "NPort x%x Data: x%x x%x x%x\n", +- phba->brd_no, ++ phba->brd_no, vport->vpi, + *name, *(name+1), *(name+2), *(name+3), + *(name+4), *(name+5), *(name+6), *(name+7), + ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->nlp_state, ndlp->nlp_rpi); + } else { + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, +- "%d:0204 Devloss timeout on " ++ "%d (%d):0204 Devloss timeout on " + "WWPN %x:%x:%x:%x:%x:%x:%x:%x " + "NPort x%x Data: x%x x%x x%x\n", +- phba->brd_no, ++ phba->brd_no, vport->vpi, + *name, *(name+1), *(name+2), *(name+3), + *(name+4), *(name+5), *(name+6), *(name+7), + ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->nlp_state, ndlp->nlp_rpi); + } + +- if (!(phba->fc_flag & FC_UNLOADING) && ++ if (!(vport->load_flag & FC_UNLOADING) && + !(ndlp->nlp_flag & NLP_DELAY_TMO) && + !(ndlp->nlp_flag & NLP_NPR_2B_DISC) && + (ndlp->nlp_state != NLP_STE_UNMAPPED_NODE)) +- lpfc_disc_state_machine(phba, ndlp, NULL, NLP_EVT_DEVICE_RM); ++ lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM); + else { ++ int put_node; ++ int put_rport; ++ ++ put_node = rdata->pnode != NULL; ++ put_rport = ndlp->rport != NULL; + rdata->pnode = NULL; + ndlp->rport = NULL; ++ if (put_node) + lpfc_nlp_put(ndlp); ++ if (put_rport) + put_device(&rport->dev); + } ++} ++ + ++void ++lpfc_worker_wake_up(struct lpfc_hba *phba) ++{ ++ wake_up(phba->work_wait); + return; + } + + static void +-lpfc_work_list_done(struct lpfc_hba * phba) ++lpfc_work_list_done(struct lpfc_hba *phba) + { + struct lpfc_work_evt *evtp = NULL; + struct lpfc_nodelist *ndlp; ++ struct lpfc_vport *vport; + int free_evt; + +- spin_lock_irq(phba->host->host_lock); +- while(!list_empty(&phba->work_list)) { ++ spin_lock_irq(&phba->hbalock); ++ while (!list_empty(&phba->work_list)) { + list_remove_head((&phba->work_list), evtp, typeof(*evtp), + evt_listp); +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + free_evt = 1; + switch (evtp->evt) { ++ case LPFC_EVT_DEV_LOSS_DELAY: ++ free_evt = 0; /* evt is part of ndlp */ ++ ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1); ++ vport = ndlp->vport; ++ if (!vport) ++ break; ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT, ++ "rport devlossdly:did:x%x flg:x%x", ++ ndlp->nlp_DID, ndlp->nlp_flag, 0); ++ ++ if (!(vport->load_flag & FC_UNLOADING) && ++ !(ndlp->nlp_flag & NLP_DELAY_TMO) && ++ !(ndlp->nlp_flag & NLP_NPR_2B_DISC)) { ++ lpfc_disc_state_machine(vport, ndlp, NULL, ++ NLP_EVT_DEVICE_RM); ++ } ++ break; + case LPFC_EVT_ELS_RETRY: +- ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1); ++ ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1); + lpfc_els_retry_delay_handler(ndlp); ++ free_evt = 0; /* evt is part of ndlp */ ++ break; ++ case LPFC_EVT_DEV_LOSS: ++ ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1); ++ lpfc_nlp_get(ndlp); ++ lpfc_dev_loss_tmo_handler(ndlp); + free_evt = 0; ++ complete((struct completion *)(evtp->evt_arg2)); ++ lpfc_nlp_put(ndlp); + break; + case LPFC_EVT_ONLINE: +- if (phba->hba_state < LPFC_LINK_DOWN) +- *(int *)(evtp->evt_arg1) = lpfc_online(phba); ++ if (phba->link_state < LPFC_LINK_DOWN) ++ *(int *) (evtp->evt_arg1) = lpfc_online(phba); + else +- *(int *)(evtp->evt_arg1) = 0; ++ *(int *) (evtp->evt_arg1) = 0; + complete((struct completion *)(evtp->evt_arg2)); + break; + case LPFC_EVT_OFFLINE_PREP: +- if (phba->hba_state >= LPFC_LINK_DOWN) ++ if (phba->link_state >= LPFC_LINK_DOWN) + lpfc_offline_prep(phba); + *(int *)(evtp->evt_arg1) = 0; + complete((struct completion *)(evtp->evt_arg2)); +@@ -218,33 +330,31 @@ + case LPFC_EVT_KILL: + lpfc_offline(phba); + *(int *)(evtp->evt_arg1) +- = (phba->stopped) ? 0 : lpfc_sli_brdkill(phba); ++ = (phba->pport->stopped) ++ ? 0 : lpfc_sli_brdkill(phba); + lpfc_unblock_mgmt_io(phba); + complete((struct completion *)(evtp->evt_arg2)); + break; + } + if (free_evt) + kfree(evtp); +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + } +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + } + +-static void +-lpfc_work_done(struct lpfc_hba * phba) ++void ++lpfc_work_done(struct lpfc_hba *phba) + { + struct lpfc_sli_ring *pring; +- int i; +- uint32_t ha_copy; +- uint32_t control; +- uint32_t work_hba_events; ++ uint32_t ha_copy, status, control, work_port_events; ++ struct lpfc_vport *vport; + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + ha_copy = phba->work_ha; + phba->work_ha = 0; +- work_hba_events=phba->work_hba_events; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + if (ha_copy & HA_ERATT) + lpfc_handle_eratt(phba); +@@ -255,66 +365,111 @@ + if (ha_copy & HA_LATT) + lpfc_handle_latt(phba); + +- if (work_hba_events & WORKER_DISC_TMO) +- lpfc_disc_timeout_handler(phba); ++ spin_lock_irq(&phba->hbalock); ++ list_for_each_entry(vport, &phba->port_list, listentry) { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ if (!scsi_host_get(shost)) { ++ continue; ++ } ++ spin_unlock_irq(&phba->hbalock); ++ work_port_events = vport->work_port_events; ++ ++ if (work_port_events & WORKER_DISC_TMO) ++ lpfc_disc_timeout_handler(vport); + +- if (work_hba_events & WORKER_ELS_TMO) +- lpfc_els_timeout_handler(phba); ++ if (work_port_events & WORKER_ELS_TMO) ++ lpfc_els_timeout_handler(vport); + +- if (work_hba_events & WORKER_MBOX_TMO) ++ if (work_port_events & WORKER_HB_TMO) ++ lpfc_hb_timeout_handler(phba); ++ ++ if (work_port_events & WORKER_MBOX_TMO) + lpfc_mbox_timeout_handler(phba); + +- if (work_hba_events & WORKER_FDMI_TMO) +- lpfc_fdmi_tmo_handler(phba); ++ if (work_port_events & WORKER_FABRIC_BLOCK_TMO) ++ lpfc_unblock_fabric_iocbs(phba); ++ ++ if (work_port_events & WORKER_FDMI_TMO) ++ lpfc_fdmi_timeout_handler(vport); + +- spin_lock_irq(phba->host->host_lock); +- phba->work_hba_events &= ~work_hba_events; +- spin_unlock_irq(phba->host->host_lock); +- +- for (i = 0; i < phba->sli.num_rings; i++, ha_copy >>= 4) { +- pring = &phba->sli.ring[i]; +- if ((ha_copy & HA_RXATT) ++ if (work_port_events & WORKER_RAMP_DOWN_QUEUE) ++ lpfc_ramp_down_queue_handler(phba); ++ ++ if (work_port_events & WORKER_RAMP_UP_QUEUE) ++ lpfc_ramp_up_queue_handler(phba); ++ ++ spin_lock_irq(&vport->work_port_lock); ++ vport->work_port_events &= ~work_port_events; ++ spin_unlock_irq(&vport->work_port_lock); ++ scsi_host_put(shost); ++ spin_lock_irq(&phba->hbalock); ++ } ++ spin_unlock_irq(&phba->hbalock); ++ ++ pring = &phba->sli.ring[LPFC_ELS_RING]; ++ status = (ha_copy & (HA_RXMASK << (4*LPFC_ELS_RING))); ++ status >>= (4*LPFC_ELS_RING); ++ if ((status & HA_RXMASK) + || (pring->flag & LPFC_DEFERRED_RING_EVENT)) { + if (pring->flag & LPFC_STOP_IOCB_MASK) { + pring->flag |= LPFC_DEFERRED_RING_EVENT; + } else { + lpfc_sli_handle_slow_ring_event(phba, pring, +- (ha_copy & ++ (status & + HA_RXMASK)); + pring->flag &= ~LPFC_DEFERRED_RING_EVENT; + } + /* + * Turn on Ring interrupts + */ +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + control = readl(phba->HCregaddr); +- control |= (HC_R0INT_ENA << i); ++ if (!(control & (HC_R0INT_ENA << LPFC_ELS_RING))) { ++ control |= (HC_R0INT_ENA << LPFC_ELS_RING); + writel(control, phba->HCregaddr); + readl(phba->HCregaddr); /* flush */ +- spin_unlock_irq(phba->host->host_lock); + } ++ spin_unlock_irq(&phba->hbalock); + } +- +- lpfc_work_list_done (phba); +- ++ lpfc_work_list_done(phba); + } + + static int +-check_work_wait_done(struct lpfc_hba *phba) { ++check_work_wait_done(struct lpfc_hba *phba) ++{ ++ struct lpfc_vport *vport; ++ struct lpfc_sli_ring *pring; ++ int rc = 0; + +- spin_lock_irq(phba->host->host_lock); +- if (phba->work_ha || +- phba->work_hba_events || +- (!list_empty(&phba->work_list)) || ++ spin_lock_irq(&phba->hbalock); ++ list_for_each_entry(vport, &phba->port_list, listentry) { ++ if (vport->work_port_events) { ++ rc = 1; ++ goto exit; ++ } ++ } ++ ++ if (phba->work_ha || (!list_empty(&phba->work_list)) || + kthread_should_stop()) { +- spin_unlock_irq(phba->host->host_lock); +- return 1; +- } else { +- spin_unlock_irq(phba->host->host_lock); +- return 0; ++ rc = 1; ++ goto exit; + } ++ ++ pring = &phba->sli.ring[LPFC_ELS_RING]; ++ if (pring->flag & LPFC_DEFERRED_RING_EVENT) ++ rc = 1; ++exit: ++ if (rc) ++ phba->work_found++; ++ else ++ phba->work_found = 0; ++ ++ spin_unlock_irq(&phba->hbalock); ++ return rc; + } + ++ + int + lpfc_do_work(void *p) + { +@@ -324,11 +479,13 @@ + + set_user_nice(current, -20); + phba->work_wait = &work_waitq; ++ phba->work_found = 0; + + while (1) { + + rc = wait_event_interruptible(work_waitq, + check_work_wait_done(phba)); ++ + BUG_ON(rc); + + if (kthread_should_stop()) +@@ -336,6 +493,17 @@ + + lpfc_work_done(phba); + ++ /* If there is alot of slow ring work, like during link up ++ * check_work_wait_done() may cause this thread to not give ++ * up the CPU for very long periods of time. This may cause ++ * soft lockups or other problems. To avoid these situations ++ * give up the CPU here after LPFC_MAX_WORKER_ITERATION ++ * consecutive iterations. ++ */ ++ if (phba->work_found >= LPFC_MAX_WORKER_ITERATION) { ++ phba->work_found = 0; ++ schedule(); ++ } + } + phba->work_wait = NULL; + return 0; +@@ -347,16 +515,17 @@ + * embedding it in the IOCB. + */ + int +-lpfc_workq_post_event(struct lpfc_hba * phba, void *arg1, void *arg2, ++lpfc_workq_post_event(struct lpfc_hba *phba, void *arg1, void *arg2, + uint32_t evt) + { + struct lpfc_work_evt *evtp; ++ unsigned long flags; + + /* + * All Mailbox completions and LPFC_ELS_RING rcv ring IOCB events will + * be queued to worker thread for processing + */ +- evtp = kmalloc(sizeof(struct lpfc_work_evt), GFP_KERNEL); ++ evtp = kmalloc(sizeof(struct lpfc_work_evt), GFP_ATOMIC); + if (!evtp) + return 0; + +@@ -364,136 +533,210 @@ + evtp->evt_arg2 = arg2; + evtp->evt = evt; + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irqsave(&phba->hbalock, flags); + list_add_tail(&evtp->evt_listp, &phba->work_list); + if (phba->work_wait) +- wake_up(phba->work_wait); +- spin_unlock_irq(phba->host->host_lock); ++ lpfc_worker_wake_up(phba); ++ spin_unlock_irqrestore(&phba->hbalock, flags); + + return 1; + } + +-int +-lpfc_linkdown(struct lpfc_hba *phba) ++void ++lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove) + { +- struct lpfc_sli *psli; ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_nodelist *ndlp, *next_ndlp; +- LPFC_MBOXQ_t *mb; + int rc; + +- psli = &phba->sli; +- /* sysfs or selective reset may call this routine to clean up */ +- if (phba->hba_state >= LPFC_LINK_DOWN) { +- if (phba->hba_state == LPFC_LINK_DOWN) +- return 0; ++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { ++ if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) ++ continue; ++ ++ if (phba->sli3_options & LPFC_SLI3_VPORT_TEARDOWN) ++ lpfc_unreg_rpi(vport, ndlp); + +- spin_lock_irq(phba->host->host_lock); +- phba->hba_state = LPFC_LINK_DOWN; +- spin_unlock_irq(phba->host->host_lock); ++ /* Leave Fabric nodes alone on link down */ ++ if (!remove && ndlp->nlp_type & NLP_FABRIC) ++ continue; ++ rc = lpfc_disc_state_machine(vport, ndlp, NULL, ++ remove ++ ? NLP_EVT_DEVICE_RM ++ : NLP_EVT_DEVICE_RECOVERY); ++ } ++ if (phba->sli3_options & LPFC_SLI3_VPORT_TEARDOWN) { ++ lpfc_mbx_unreg_vpi(vport); ++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; + } ++} ++ ++static void ++lpfc_linkdown_port(struct lpfc_vport *vport) ++{ ++ struct lpfc_nodelist *ndlp, *next_ndlp; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + +- fc_host_post_event(phba->host, fc_get_event_number(), +- FCH_EVT_LINKDOWN, 0); ++ fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_LINKDOWN, 0); + +- /* Clean up any firmware default rpi's */ +- if ((mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) { +- lpfc_unreg_did(phba, 0xffffffff, mb); +- mb->mbox_cmpl=lpfc_sli_def_mbox_cmpl; +- if (lpfc_sli_issue_mbox(phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB)) +- == MBX_NOT_FINISHED) { +- mempool_free( mb, phba->mbox_mem_pool); +- } +- } ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "Link Down: state:x%x rtry:x%x flg:x%x", ++ vport->port_state, vport->fc_ns_retry, vport->fc_flag); + + /* Cleanup any outstanding RSCN activity */ +- lpfc_els_flush_rscn(phba); ++ lpfc_els_flush_rscn(vport); + + /* Cleanup any outstanding ELS commands */ +- lpfc_els_flush_cmd(phba); ++ lpfc_els_flush_cmd(vport); ++ ++ lpfc_cleanup_rpis(vport, 0); + +- /* +- * Issue a LINK DOWN event to all nodes. +- */ +- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) { + /* free any ndlp's on unused list */ ++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) ++ /* free any ndlp's in unused state */ + if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) +- lpfc_drop_node(phba, ndlp); +- else /* otherwise, force node recovery. */ +- rc = lpfc_disc_state_machine(phba, ndlp, NULL, +- NLP_EVT_DEVICE_RECOVERY); ++ lpfc_drop_node(vport, ndlp); ++ ++ /* Turn off discovery timer if its running */ ++ lpfc_can_disctmo(vport); ++} ++ ++int ++lpfc_linkdown(struct lpfc_hba *phba) ++{ ++ struct lpfc_vport *vport = phba->pport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_vport *port_iterator; ++ LPFC_MBOXQ_t *mb; ++ ++ if (phba->link_state == LPFC_LINK_DOWN) { ++ return 0; ++ } ++ spin_lock_irq(&phba->hbalock); ++ if (phba->link_state > LPFC_LINK_DOWN) { ++ phba->link_state = LPFC_LINK_DOWN; ++ phba->pport->fc_flag &= ~FC_LBIT; ++ } ++ spin_unlock_irq(&phba->hbalock); ++ ++ list_for_each_entry(port_iterator, &phba->port_list, listentry) { ++ ++ /* Issue a LINK DOWN event to all nodes */ ++ lpfc_linkdown_port(port_iterator); ++ } ++ ++ /* Clean up any firmware default rpi's */ ++ mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (mb) { ++ lpfc_unreg_did(phba, 0xffff, 0xffffffff, mb); ++ mb->vport = vport; ++ mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; ++ if (lpfc_sli_issue_mbox(phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB)) ++ == MBX_NOT_FINISHED) { ++ mempool_free(mb, phba->mbox_mem_pool); ++ } + } + + /* Setup myDID for link up if we are in pt2pt mode */ +- if (phba->fc_flag & FC_PT2PT) { +- phba->fc_myDID = 0; +- if ((mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) { ++ if (phba->pport->fc_flag & FC_PT2PT) { ++ phba->pport->fc_myDID = 0; ++ mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (mb) { + lpfc_config_link(phba, mb); +- mb->mbox_cmpl=lpfc_sli_def_mbox_cmpl; +- if (lpfc_sli_issue_mbox +- (phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB)) ++ mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; ++ mb->vport = vport; ++ if (lpfc_sli_issue_mbox(phba, mb, ++ (MBX_NOWAIT | MBX_STOP_IOCB)) + == MBX_NOT_FINISHED) { +- mempool_free( mb, phba->mbox_mem_pool); ++ mempool_free(mb, phba->mbox_mem_pool); + } + } +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI); +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); ++ phba->pport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI); ++ spin_unlock_irq(shost->host_lock); + } +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~FC_LBIT; +- spin_unlock_irq(phba->host->host_lock); +- +- /* Turn off discovery timer if its running */ +- lpfc_can_disctmo(phba); + +- /* Must process IOCBs on all rings to handle ABORTed I/Os */ + return 0; + } + +-static int +-lpfc_linkup(struct lpfc_hba *phba) ++static void ++lpfc_linkup_cleanup_nodes(struct lpfc_vport *vport) + { +- struct lpfc_nodelist *ndlp, *next_ndlp; +- +- fc_host_post_event(phba->host, fc_get_event_number(), +- FCH_EVT_LINKUP, 0); +- +- spin_lock_irq(phba->host->host_lock); +- phba->hba_state = LPFC_LINK_UP; +- phba->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY | +- FC_RSCN_MODE | FC_NLP_MORE | FC_RSCN_DISCOVERY); +- phba->fc_flag |= FC_NDISC_ACTIVE; +- phba->fc_ns_retry = 0; +- spin_unlock_irq(phba->host->host_lock); ++ struct lpfc_nodelist *ndlp; + ++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { ++ if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) ++ continue; + +- if (phba->fc_flag & FC_LBIT) { +- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) { +- if (ndlp->nlp_state != NLP_STE_UNUSED_NODE) { + if (ndlp->nlp_type & NLP_FABRIC) { +- /* +- * On Linkup its safe to clean up the +- * ndlp from Fabric connections. ++ /* On Linkup its safe to clean up the ndlp ++ * from Fabric connections. + */ +- lpfc_nlp_set_state(phba, ndlp, +- NLP_STE_UNUSED_NODE); ++ if (ndlp->nlp_DID != Fabric_DID) ++ lpfc_unreg_rpi(vport, ndlp); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + } else if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) { +- /* +- * Fail outstanding IO now since +- * device is marked for PLOGI. ++ /* Fail outstanding IO now since device is ++ * marked for PLOGI. + */ +- lpfc_unreg_rpi(phba, ndlp); +- } +- } ++ lpfc_unreg_rpi(vport, ndlp); + } + } ++} + +- /* free any ndlp's on unused list */ +- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, +- nlp_listp) { ++static void ++lpfc_linkup_port(struct lpfc_vport *vport) ++{ ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_nodelist *ndlp, *next_ndlp; ++ struct lpfc_hba *phba = vport->phba; ++ ++ if ((vport->load_flag & FC_UNLOADING) != 0) ++ return; ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "Link Up: top:x%x speed:x%x flg:x%x", ++ phba->fc_topology, phba->fc_linkspeed, phba->link_flag); ++ ++ /* If NPIV is not enabled, only bring the physical port up */ ++ if (!(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && ++ (vport != phba->pport)) ++ return; ++ ++ fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_LINKUP, 0); ++ ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY | ++ FC_RSCN_MODE | FC_NLP_MORE | FC_RSCN_DISCOVERY); ++ vport->fc_flag |= FC_NDISC_ACTIVE; ++ vport->fc_ns_retry = 0; ++ spin_unlock_irq(shost->host_lock); ++ ++ if (vport->fc_flag & FC_LBIT) ++ lpfc_linkup_cleanup_nodes(vport); ++ ++ /* free any ndlp's in unused state */ ++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, ++ nlp_listp) + if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); ++} ++ ++static int ++lpfc_linkup(struct lpfc_hba *phba) ++{ ++ struct lpfc_vport *vport; ++ ++ phba->link_state = LPFC_LINK_UP; ++ ++ /* Unblock fabric iocbs if they are blocked */ ++ clear_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags); ++ del_timer_sync(&phba->fabric_block_timer); ++ ++ list_for_each_entry(vport, &phba->port_list, listentry) { ++ lpfc_linkup_port(vport); + } ++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) ++ lpfc_issue_clear_la(phba, phba->pport); + + return 0; + } +@@ -505,14 +748,14 @@ + * handed off to the SLI layer. + */ + void +-lpfc_mbx_cmpl_clear_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) ++lpfc_mbx_cmpl_clear_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) + { +- struct lpfc_sli *psli; +- MAILBOX_t *mb; ++ struct lpfc_vport *vport = pmb->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_sli *psli = &phba->sli; ++ MAILBOX_t *mb = &pmb->mb; + uint32_t control; + +- psli = &phba->sli; +- mb = &pmb->mb; + /* Since we don't do discovery right now, turn these off here */ + psli->ring[psli->extra_ring].flag &= ~LPFC_STOP_IOCB_EVENT; + psli->ring[psli->fcp_ring].flag &= ~LPFC_STOP_IOCB_EVENT; +@@ -522,69 +765,74 @@ + if ((mb->mbxStatus) && (mb->mbxStatus != 0x1601)) { + /* CLEAR_LA mbox error state */ + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, +- "%d:0320 CLEAR_LA mbxStatus error x%x hba " ++ "%d (%d):0320 CLEAR_LA mbxStatus error x%x hba " + "state x%x\n", +- phba->brd_no, mb->mbxStatus, phba->hba_state); ++ phba->brd_no, vport->vpi, mb->mbxStatus, ++ vport->port_state); + +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + goto out; + } + +- if (phba->fc_flag & FC_ABORT_DISCOVERY) +- goto out; ++ if (vport->port_type == LPFC_PHYSICAL_PORT) ++ phba->link_state = LPFC_HBA_READY; + +- phba->num_disc_nodes = 0; +- /* go thru NPR list and issue ELS PLOGIs */ +- if (phba->fc_npr_cnt) { +- lpfc_els_disc_plogi(phba); +- } ++ spin_lock_irq(&phba->hbalock); ++ psli->sli_flag |= LPFC_PROCESS_LA; ++ control = readl(phba->HCregaddr); ++ control |= HC_LAINT_ENA; ++ writel(control, phba->HCregaddr); ++ readl(phba->HCregaddr); /* flush */ ++ spin_unlock_irq(&phba->hbalock); ++ return; ++ ++ vport->num_disc_nodes = 0; ++ /* go thru NPR nodes and issue ELS PLOGIs */ ++ if (vport->fc_npr_cnt) ++ lpfc_els_disc_plogi(vport); + +- if (!phba->num_disc_nodes) { +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~FC_NDISC_ACTIVE; +- spin_unlock_irq(phba->host->host_lock); ++ if (!vport->num_disc_nodes) { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~FC_NDISC_ACTIVE; ++ spin_unlock_irq(shost->host_lock); + } + +- phba->hba_state = LPFC_HBA_READY; ++ vport->port_state = LPFC_VPORT_READY; + + out: + /* Device Discovery completes */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_DISCOVERY, +- "%d:0225 Device Discovery completes\n", +- phba->brd_no); +- +- mempool_free( pmb, phba->mbox_mem_pool); +- +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~FC_ABORT_DISCOVERY; +- if (phba->fc_flag & FC_ESTABLISH_LINK) { +- phba->fc_flag &= ~FC_ESTABLISH_LINK; +- } +- spin_unlock_irq(phba->host->host_lock); ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0225 Device Discovery completes\n", ++ phba->brd_no, vport->vpi); ++ ++ mempool_free(pmb, phba->mbox_mem_pool); ++ ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~(FC_ABORT_DISCOVERY | FC_ESTABLISH_LINK); ++ spin_unlock_irq(shost->host_lock); + + del_timer_sync(&phba->fc_estabtmo); + +- lpfc_can_disctmo(phba); ++ lpfc_can_disctmo(vport); + + /* turn on Link Attention interrupts */ +- spin_lock_irq(phba->host->host_lock); ++ ++ spin_lock_irq(&phba->hbalock); + psli->sli_flag |= LPFC_PROCESS_LA; + control = readl(phba->HCregaddr); + control |= HC_LAINT_ENA; + writel(control, phba->HCregaddr); + readl(phba->HCregaddr); /* flush */ +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + return; + } + ++ + static void + lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) + { +- struct lpfc_sli *psli = &phba->sli; +- int rc; ++ struct lpfc_vport *vport = pmb->vport; + + if (pmb->mb.mbxStatus) + goto out; +@@ -592,127 +840,110 @@ + mempool_free(pmb, phba->mbox_mem_pool); + + if (phba->fc_topology == TOPOLOGY_LOOP && +- phba->fc_flag & FC_PUBLIC_LOOP && +- !(phba->fc_flag & FC_LBIT)) { ++ vport->fc_flag & FC_PUBLIC_LOOP && ++ !(vport->fc_flag & FC_LBIT)) { + /* Need to wait for FAN - use discovery timer +- * for timeout. hba_state is identically ++ * for timeout. port_state is identically + * LPFC_LOCAL_CFG_LINK while waiting for FAN + */ +- lpfc_set_disctmo(phba); ++ lpfc_set_disctmo(vport); + return; + } + +- /* Start discovery by sending a FLOGI. hba_state is identically ++ /* Start discovery by sending a FLOGI. port_state is identically + * LPFC_FLOGI while waiting for FLOGI cmpl + */ +- phba->hba_state = LPFC_FLOGI; +- lpfc_set_disctmo(phba); +- lpfc_initial_flogi(phba); ++ if (vport->port_state != LPFC_FLOGI) { ++ vport->port_state = LPFC_FLOGI; ++ lpfc_set_disctmo(vport); ++ lpfc_initial_flogi(vport); ++ } + return; + + out: + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, +- "%d:0306 CONFIG_LINK mbxStatus error x%x " ++ "%d (%d):0306 CONFIG_LINK mbxStatus error x%x " + "HBA state x%x\n", +- phba->brd_no, pmb->mb.mbxStatus, phba->hba_state); ++ phba->brd_no, vport->vpi, pmb->mb.mbxStatus, ++ vport->port_state); + +- lpfc_linkdown(phba); ++ mempool_free(pmb, phba->mbox_mem_pool); + +- phba->hba_state = LPFC_HBA_ERROR; ++ lpfc_linkdown(phba); + + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, +- "%d:0200 CONFIG_LINK bad hba state x%x\n", +- phba->brd_no, phba->hba_state); ++ "%d (%d):0200 CONFIG_LINK bad hba state x%x\n", ++ phba->brd_no, vport->vpi, vport->port_state); + +- lpfc_clear_la(phba, pmb); +- pmb->mbox_cmpl = lpfc_mbx_cmpl_clear_la; +- rc = lpfc_sli_issue_mbox(phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB)); +- if (rc == MBX_NOT_FINISHED) { +- mempool_free(pmb, phba->mbox_mem_pool); +- lpfc_disc_flush_list(phba); +- psli->ring[(psli->extra_ring)].flag &= ~LPFC_STOP_IOCB_EVENT; +- psli->ring[(psli->fcp_ring)].flag &= ~LPFC_STOP_IOCB_EVENT; +- psli->ring[(psli->next_ring)].flag &= ~LPFC_STOP_IOCB_EVENT; +- phba->hba_state = LPFC_HBA_READY; +- } ++ lpfc_issue_clear_la(phba, vport); + return; + } + + static void +-lpfc_mbx_cmpl_read_sparam(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) ++lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) + { +- struct lpfc_sli *psli = &phba->sli; + MAILBOX_t *mb = &pmb->mb; + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1; ++ struct lpfc_vport *vport = pmb->vport; + + + /* Check for error */ + if (mb->mbxStatus) { + /* READ_SPARAM mbox error state */ + lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, +- "%d:0319 READ_SPARAM mbxStatus error x%x " ++ "%d (%d):0319 READ_SPARAM mbxStatus error x%x " + "hba state x%x>\n", +- phba->brd_no, mb->mbxStatus, phba->hba_state); ++ phba->brd_no, vport->vpi, mb->mbxStatus, ++ vport->port_state); + + lpfc_linkdown(phba); +- phba->hba_state = LPFC_HBA_ERROR; + goto out; + } + +- memcpy((uint8_t *) & phba->fc_sparam, (uint8_t *) mp->virt, ++ memcpy((uint8_t *) &vport->fc_sparam, (uint8_t *) mp->virt, + sizeof (struct serv_parm)); + if (phba->cfg_soft_wwnn) +- u64_to_wwn(phba->cfg_soft_wwnn, phba->fc_sparam.nodeName.u.wwn); ++ u64_to_wwn(phba->cfg_soft_wwnn, ++ vport->fc_sparam.nodeName.u.wwn); + if (phba->cfg_soft_wwpn) +- u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn); +- memcpy((uint8_t *) & phba->fc_nodename, +- (uint8_t *) & phba->fc_sparam.nodeName, +- sizeof (struct lpfc_name)); +- memcpy((uint8_t *) & phba->fc_portname, +- (uint8_t *) & phba->fc_sparam.portName, +- sizeof (struct lpfc_name)); ++ u64_to_wwn(phba->cfg_soft_wwpn, ++ vport->fc_sparam.portName.u.wwn); ++ memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName, ++ sizeof(vport->fc_nodename)); ++ memcpy(&vport->fc_portname, &vport->fc_sparam.portName, ++ sizeof(vport->fc_portname)); ++ if (vport->port_type == LPFC_PHYSICAL_PORT) { ++ memcpy(&phba->wwnn, &vport->fc_nodename, sizeof(phba->wwnn)); ++ memcpy(&phba->wwpn, &vport->fc_portname, sizeof(phba->wwnn)); ++ } ++ + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); +- mempool_free( pmb, phba->mbox_mem_pool); ++ mempool_free(pmb, phba->mbox_mem_pool); + return; + + out: + pmb->context1 = NULL; + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); +- if (phba->hba_state != LPFC_CLEAR_LA) { +- lpfc_clear_la(phba, pmb); +- pmb->mbox_cmpl = lpfc_mbx_cmpl_clear_la; +- if (lpfc_sli_issue_mbox(phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB)) +- == MBX_NOT_FINISHED) { +- mempool_free( pmb, phba->mbox_mem_pool); +- lpfc_disc_flush_list(phba); +- psli->ring[(psli->extra_ring)].flag &= +- ~LPFC_STOP_IOCB_EVENT; +- psli->ring[(psli->fcp_ring)].flag &= +- ~LPFC_STOP_IOCB_EVENT; +- psli->ring[(psli->next_ring)].flag &= +- ~LPFC_STOP_IOCB_EVENT; +- phba->hba_state = LPFC_HBA_READY; +- } +- } else { +- mempool_free( pmb, phba->mbox_mem_pool); +- } ++ lpfc_issue_clear_la(phba, vport); ++ mempool_free(pmb, phba->mbox_mem_pool); + return; + } + + static void + lpfc_mbx_process_link_up(struct lpfc_hba *phba, READ_LA_VAR *la) + { +- int i; ++ struct lpfc_vport *vport = phba->pport; + LPFC_MBOXQ_t *sparam_mbox, *cfglink_mbox; ++ int i; + struct lpfc_dmabuf *mp; + int rc; + + sparam_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + switch (la->UlnkSpeed) { + case LA_1GHZ_LINK: + phba->fc_linkspeed = LA_1GHZ_LINK; +@@ -732,14 +963,16 @@ + } + + phba->fc_topology = la->topology; ++ phba->link_flag &= ~LS_NPIV_FAB_SUPPORTED; + + if (phba->fc_topology == TOPOLOGY_LOOP) { +- /* Get Loop Map information */ ++ phba->sli3_options &= ~LPFC_SLI3_NPIV_ENABLED; + ++ /* Get Loop Map information */ + if (la->il) +- phba->fc_flag |= FC_LBIT; ++ vport->fc_flag |= FC_LBIT; + +- phba->fc_myDID = la->granted_AL_PA; ++ vport->fc_myDID = la->granted_AL_PA; + i = la->un.lilpBde64.tus.f.bdeSize; + + if (i == 0) { +@@ -781,14 +1014,20 @@ + } + } + } else { +- phba->fc_myDID = phba->fc_pref_DID; +- phba->fc_flag |= FC_LBIT; ++ if (!(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)) { ++ if (phba->max_vpi && phba->cfg_npiv_enable && ++ (phba->sli_rev == 3)) ++ phba->sli3_options |= LPFC_SLI3_NPIV_ENABLED; ++ } ++ vport->fc_myDID = phba->fc_pref_DID; ++ vport->fc_flag |= FC_LBIT; + } +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + lpfc_linkup(phba); + if (sparam_mbox) { +- lpfc_read_sparam(phba, sparam_mbox); ++ lpfc_read_sparam(phba, sparam_mbox, 0); ++ sparam_mbox->vport = vport; + sparam_mbox->mbox_cmpl = lpfc_mbx_cmpl_read_sparam; + rc = lpfc_sli_issue_mbox(phba, sparam_mbox, + (MBX_NOWAIT | MBX_STOP_IOCB)); +@@ -799,36 +1038,48 @@ + mempool_free(sparam_mbox, phba->mbox_mem_pool); + if (cfglink_mbox) + mempool_free(cfglink_mbox, phba->mbox_mem_pool); +- return; ++ goto out; + } + } + + if (cfglink_mbox) { +- phba->hba_state = LPFC_LOCAL_CFG_LINK; ++ vport->port_state = LPFC_LOCAL_CFG_LINK; + lpfc_config_link(phba, cfglink_mbox); ++ cfglink_mbox->vport = vport; + cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link; + rc = lpfc_sli_issue_mbox(phba, cfglink_mbox, + (MBX_NOWAIT | MBX_STOP_IOCB)); +- if (rc == MBX_NOT_FINISHED) ++ if (rc != MBX_NOT_FINISHED) ++ return; + mempool_free(cfglink_mbox, phba->mbox_mem_pool); + } ++out: ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, ++ "%d (%d):0263 Discovery Mailbox error: state: 0x%x : %p %p\n", ++ phba->brd_no, vport->vpi, ++ vport->port_state, sparam_mbox, cfglink_mbox); ++ ++ lpfc_issue_clear_la(phba, vport); ++ return; + } + + static void +-lpfc_mbx_issue_link_down(struct lpfc_hba *phba) { ++lpfc_mbx_issue_link_down(struct lpfc_hba *phba) ++{ + uint32_t control; + struct lpfc_sli *psli = &phba->sli; + + lpfc_linkdown(phba); + + /* turn on Link Attention interrupts - no CLEAR_LA needed */ +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + psli->sli_flag |= LPFC_PROCESS_LA; + control = readl(phba->HCregaddr); + control |= HC_LAINT_ENA; + writel(control, phba->HCregaddr); + readl(phba->HCregaddr); /* flush */ +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + } + + /* +@@ -838,22 +1089,21 @@ + * handed off to the SLI layer. + */ + void +-lpfc_mbx_cmpl_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) ++lpfc_mbx_cmpl_read_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) + { ++ struct lpfc_vport *vport = pmb->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + READ_LA_VAR *la; + MAILBOX_t *mb = &pmb->mb; + struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); + + /* Check for error */ + if (mb->mbxStatus) { +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_LINK_EVENT, ++ lpfc_printf_log(phba, KERN_INFO, LOG_LINK_EVENT, + "%d:1307 READ_LA mbox error x%x state x%x\n", +- phba->brd_no, +- mb->mbxStatus, phba->hba_state); ++ phba->brd_no, mb->mbxStatus, vport->port_state); + lpfc_mbx_issue_link_down(phba); +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + goto lpfc_mbx_cmpl_read_la_free_mbuf; + } + +@@ -861,27 +1111,26 @@ + + memcpy(&phba->alpa_map[0], mp->virt, 128); + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + if (la->pb) +- phba->fc_flag |= FC_BYPASSED_MODE; ++ vport->fc_flag |= FC_BYPASSED_MODE; + else +- phba->fc_flag &= ~FC_BYPASSED_MODE; +- spin_unlock_irq(phba->host->host_lock); ++ vport->fc_flag &= ~FC_BYPASSED_MODE; ++ spin_unlock_irq(shost->host_lock); + + if (((phba->fc_eventTag + 1) < la->eventTag) || + (phba->fc_eventTag == la->eventTag)) { + phba->fc_stat.LinkMultiEvent++; +- if (la->attType == AT_LINK_UP) { ++ if (la->attType == AT_LINK_UP) + if (phba->fc_eventTag != 0) + lpfc_linkdown(phba); + } +- } + + phba->fc_eventTag = la->eventTag; + + if (la->attType == AT_LINK_UP) { + phba->fc_stat.LinkUp++; +- if (phba->fc_flag & FC_LOOPBACK_MODE) { ++ if (phba->link_flag & LS_LOOPBACK_MODE) { + lpfc_printf_log(phba, KERN_INFO, LOG_LINK_EVENT, + "%d:1306 Link Up Event in loop back mode " + "x%x received Data: x%x x%x x%x x%x\n", +@@ -903,7 +1152,7 @@ + "%d:1305 Link Down Event x%x received " + "Data: x%x x%x x%x\n", + phba->brd_no, la->eventTag, phba->fc_eventTag, +- phba->hba_state, phba->fc_flag); ++ phba->pport->port_state, vport->fc_flag); + lpfc_mbx_issue_link_down(phba); + } + +@@ -921,31 +1170,115 @@ + * handed off to the SLI layer. + */ + void +-lpfc_mbx_cmpl_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) ++lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) + { +- struct lpfc_sli *psli; +- MAILBOX_t *mb; +- struct lpfc_dmabuf *mp; +- struct lpfc_nodelist *ndlp; +- +- psli = &phba->sli; +- mb = &pmb->mb; +- +- ndlp = (struct lpfc_nodelist *) pmb->context2; +- mp = (struct lpfc_dmabuf *) (pmb->context1); ++ struct lpfc_vport *vport = pmb->vport; ++ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); ++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; + + pmb->context1 = NULL; + + /* Good status, call state machine */ +- lpfc_disc_state_machine(phba, ndlp, pmb, NLP_EVT_CMPL_REG_LOGIN); ++ lpfc_disc_state_machine(vport, ndlp, pmb, NLP_EVT_CMPL_REG_LOGIN); + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); +- mempool_free( pmb, phba->mbox_mem_pool); ++ mempool_free(pmb, phba->mbox_mem_pool); + lpfc_nlp_put(ndlp); + + return; + } + ++static void ++lpfc_mbx_cmpl_unreg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) ++{ ++ MAILBOX_t *mb = &pmb->mb; ++ struct lpfc_vport *vport = pmb->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ switch (mb->mbxStatus) { ++ case 0x0011: ++ case 0x0020: ++ case 0x9700: ++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE, ++ "%d (%d):0911 cmpl_unreg_vpi, " ++ "mb status = 0x%x\n", ++ phba->brd_no, vport->vpi, mb->mbxStatus); ++ break; ++ } ++ vport->unreg_vpi_cmpl = VPORT_OK; ++ mempool_free(pmb, phba->mbox_mem_pool); ++ /* ++ * This shost reference might have been taken at the beginning of ++ * lpfc_vport_delete() ++ */ ++ if (vport->load_flag & FC_UNLOADING) ++ scsi_host_put(shost); ++} ++ ++void ++lpfc_mbx_unreg_vpi(struct lpfc_vport *vport) ++{ ++ struct lpfc_hba *phba = vport->phba; ++ LPFC_MBOXQ_t *mbox; ++ int rc; ++ ++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (!mbox) ++ return; ++ ++ lpfc_unreg_vpi(phba, vport->vpi, mbox); ++ mbox->vport = vport; ++ mbox->mbox_cmpl = lpfc_mbx_cmpl_unreg_vpi; ++ rc = lpfc_sli_issue_mbox(phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB)); ++ if (rc == MBX_NOT_FINISHED) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_VPORT, ++ "%d (%d):1800 Could not issue unreg_vpi\n", ++ phba->brd_no, vport->vpi); ++ mempool_free(mbox, phba->mbox_mem_pool); ++ vport->unreg_vpi_cmpl = VPORT_ERROR; ++ } ++} ++ ++static void ++lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) ++{ ++ struct lpfc_vport *vport = pmb->vport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ MAILBOX_t *mb = &pmb->mb; ++ ++ switch (mb->mbxStatus) { ++ case 0x0011: ++ case 0x9601: ++ case 0x9602: ++ lpfc_printf_log(phba, KERN_INFO, LOG_NODE, ++ "%d (%d):0912 cmpl_reg_vpi, mb status = 0x%x\n", ++ phba->brd_no, vport->vpi, mb->mbxStatus); ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); ++ spin_unlock_irq(shost->host_lock); ++ vport->fc_myDID = 0; ++ goto out; ++ } ++ ++ vport->num_disc_nodes = 0; ++ /* go thru NPR list and issue ELS PLOGIs */ ++ if (vport->fc_npr_cnt) ++ lpfc_els_disc_plogi(vport); ++ ++ if (!vport->num_disc_nodes) { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~FC_NDISC_ACTIVE; ++ spin_unlock_irq(shost->host_lock); ++ lpfc_can_disctmo(vport); ++ } ++ vport->port_state = LPFC_VPORT_READY; ++ ++out: ++ mempool_free(pmb, phba->mbox_mem_pool); ++ return; ++} ++ + /* + * This routine handles processing a Fabric REG_LOGIN mailbox + * command upon completion. It is setup in the LPFC_MBOXQ +@@ -953,20 +1286,14 @@ + * handed off to the SLI layer. + */ + void +-lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) ++lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) + { +- struct lpfc_sli *psli; +- MAILBOX_t *mb; +- struct lpfc_dmabuf *mp; ++ struct lpfc_vport *vport = pmb->vport; ++ struct lpfc_vport *next_vport; ++ MAILBOX_t *mb = &pmb->mb; ++ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); + struct lpfc_nodelist *ndlp; +- struct lpfc_nodelist *ndlp_fdmi; +- +- +- psli = &phba->sli; +- mb = &pmb->mb; +- + ndlp = (struct lpfc_nodelist *) pmb->context2; +- mp = (struct lpfc_dmabuf *) (pmb->context1); + + pmb->context1 = NULL; + pmb->context2 = NULL; +@@ -977,60 +1304,46 @@ + mempool_free(pmb, phba->mbox_mem_pool); + lpfc_nlp_put(ndlp); + +- /* FLOGI failed, so just use loop map to make discovery list */ +- lpfc_disc_list_loopmap(phba); ++ if (phba->fc_topology == TOPOLOGY_LOOP) { ++ /* FLOGI failed, use loop map to make discovery list */ ++ lpfc_disc_list_loopmap(vport); + + /* Start discovery */ +- lpfc_disc_start(phba); ++ lpfc_disc_start(vport); ++ return; ++ } ++ ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, ++ "%d (%d):0258 Register Fabric login error: 0x%x\n", ++ phba->brd_no, vport->vpi, mb->mbxStatus); ++ + return; + } + + ndlp->nlp_rpi = mb->un.varWords[0]; + ndlp->nlp_type |= NLP_FABRIC; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); + + lpfc_nlp_put(ndlp); /* Drop the reference from the mbox */ + +- if (phba->hba_state == LPFC_FABRIC_CFG_LINK) { +- /* This NPort has been assigned an NPort_ID by the fabric as a +- * result of the completed fabric login. Issue a State Change +- * Registration (SCR) ELS request to the fabric controller +- * (SCR_DID) so that this NPort gets RSCN events from the +- * fabric. +- */ +- lpfc_issue_els_scr(phba, SCR_DID, 0); ++ if (vport->port_state == LPFC_FABRIC_CFG_LINK) { ++ list_for_each_entry(next_vport, &phba->port_list, listentry) { ++ if (next_vport->port_type == LPFC_PHYSICAL_PORT) ++ continue; + +- ndlp = lpfc_findnode_did(phba, NameServer_DID); +- if (!ndlp) { +- /* Allocate a new node instance. If the pool is empty, +- * start the discovery process and skip the Nameserver +- * login process. This is attempted again later on. +- * Otherwise, issue a Port Login (PLOGI) to NameServer. +- */ +- ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC); +- if (!ndlp) { +- lpfc_disc_start(phba); +- lpfc_mbuf_free(phba, mp->virt, mp->phys); +- kfree(mp); +- mempool_free(pmb, phba->mbox_mem_pool); +- return; +- } else { +- lpfc_nlp_init(phba, ndlp, NameServer_DID); +- ndlp->nlp_type |= NLP_FABRIC; +- } +- } +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); +- lpfc_issue_els_plogi(phba, NameServer_DID, 0); +- if (phba->cfg_fdmi_on) { +- ndlp_fdmi = mempool_alloc(phba->nlp_mem_pool, +- GFP_KERNEL); +- if (ndlp_fdmi) { +- lpfc_nlp_init(phba, ndlp_fdmi, FDMI_DID); +- ndlp_fdmi->nlp_type |= NLP_FABRIC; +- ndlp_fdmi->nlp_state = NLP_STE_PLOGI_ISSUE; +- lpfc_issue_els_plogi(phba, FDMI_DID, 0); ++ if (phba->link_flag & LS_NPIV_FAB_SUPPORTED) ++ lpfc_initial_fdisc(next_vport); ++ else if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) { ++ lpfc_vport_set_state(vport, ++ FC_VPORT_NO_FABRIC_SUPP); ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0259 No NPIV Fabric " ++ "support\n", ++ phba->brd_no, vport->vpi); + } + } ++ lpfc_do_scr_ns_plogi(phba, vport); + } + + lpfc_mbuf_free(phba, mp->virt, mp->phys); +@@ -1046,32 +1359,36 @@ + * handed off to the SLI layer. + */ + void +-lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) ++lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) + { +- struct lpfc_sli *psli; +- MAILBOX_t *mb; +- struct lpfc_dmabuf *mp; +- struct lpfc_nodelist *ndlp; +- +- psli = &phba->sli; +- mb = &pmb->mb; +- +- ndlp = (struct lpfc_nodelist *) pmb->context2; +- mp = (struct lpfc_dmabuf *) (pmb->context1); ++ MAILBOX_t *mb = &pmb->mb; ++ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); ++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; ++ struct lpfc_vport *vport = pmb->vport; + + if (mb->mbxStatus) { ++out: + lpfc_nlp_put(ndlp); + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); + mempool_free(pmb, phba->mbox_mem_pool); +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + +- /* RegLogin failed, so just use loop map to make discovery +- list */ +- lpfc_disc_list_loopmap(phba); ++ if (phba->fc_topology == TOPOLOGY_LOOP) { ++ /* ++ * RegLogin failed, use loop map to make discovery ++ * list ++ */ ++ lpfc_disc_list_loopmap(vport); + + /* Start discovery */ +- lpfc_disc_start(phba); ++ lpfc_disc_start(vport); ++ return; ++ } ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0260 Register NameServer error: 0x%x\n", ++ phba->brd_no, vport->vpi, mb->mbxStatus); + return; + } + +@@ -1079,37 +1396,43 @@ + + ndlp->nlp_rpi = mb->un.varWords[0]; + ndlp->nlp_type |= NLP_FABRIC; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); + +- if (phba->hba_state < LPFC_HBA_READY) { +- /* Link up discovery requires Fabrib registration. */ +- lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RNN_ID); +- lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RSNN_NN); +- lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RFT_ID); +- lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RFF_ID); ++ if (vport->port_state < LPFC_VPORT_READY) { ++ /* Link up discovery requires Fabric registration. */ ++ lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0, 0); /* Do this first! */ ++ lpfc_ns_cmd(vport, SLI_CTNS_RNN_ID, 0, 0); ++ lpfc_ns_cmd(vport, SLI_CTNS_RSNN_NN, 0, 0); ++ lpfc_ns_cmd(vport, SLI_CTNS_RSPN_ID, 0, 0); ++ lpfc_ns_cmd(vport, SLI_CTNS_RFT_ID, 0, 0); ++ ++ /* Issue SCR just before NameServer GID_FT Query */ ++ lpfc_issue_els_scr(vport, SCR_DID, 0); + } + +- phba->fc_ns_retry = 0; ++ vport->fc_ns_retry = 0; + /* Good status, issue CT Request to NameServer */ +- if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT)) { ++ if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, 0)) { + /* Cannot issue NameServer Query, so finish up discovery */ +- lpfc_disc_start(phba); ++ goto out; + } + + lpfc_nlp_put(ndlp); + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); +- mempool_free( pmb, phba->mbox_mem_pool); ++ mempool_free(pmb, phba->mbox_mem_pool); + + return; + } + + static void +-lpfc_register_remote_port(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) ++lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct fc_rport *rport; + struct lpfc_rport_data *rdata; + struct fc_rport_identifiers rport_ids; ++ struct lpfc_hba *phba = vport->phba; + + /* Remote port has reappeared. Re-register w/ FC transport */ + rport_ids.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn); +@@ -1125,10 +1448,15 @@ + * registered the port. + */ + if (ndlp->rport && ndlp->rport->dd_data && +- *(struct lpfc_rport_data **) ndlp->rport->dd_data) { ++ ((struct lpfc_rport_data *) ndlp->rport->dd_data)->pnode == ndlp) { + lpfc_nlp_put(ndlp); + } +- ndlp->rport = rport = fc_remote_port_add(phba->host, 0, &rport_ids); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT, ++ "rport add: did:x%x flg:x%x type x%x", ++ ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type); ++ ++ ndlp->rport = rport = fc_remote_port_add(shost, 0, &rport_ids); + if (!rport || !get_device(&rport->dev)) { + dev_printk(KERN_WARNING, &phba->pcidev->dev, + "Warning: fc_remote_port_add failed\n"); +@@ -1154,22 +1482,17 @@ + (rport->scsi_target_id < LPFC_MAX_TARGET)) { + ndlp->nlp_sid = rport->scsi_target_id; + } +- + return; + } + + static void +-lpfc_unregister_remote_port(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) ++lpfc_unregister_remote_port(struct lpfc_nodelist *ndlp) + { + struct fc_rport *rport = ndlp->rport; +- struct lpfc_rport_data *rdata = rport->dd_data; + +- if (rport->scsi_target_id == -1) { +- ndlp->rport = NULL; +- rdata->pnode = NULL; +- lpfc_nlp_put(ndlp); +- put_device(&rport->dev); +- } ++ lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_RPORT, ++ "rport delete: did:x%x flg:x%x type x%x", ++ ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type); + + fc_remote_port_delete(rport); + +@@ -1177,42 +1500,46 @@ + } + + static void +-lpfc_nlp_counters(struct lpfc_hba *phba, int state, int count) ++lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count) + { +- spin_lock_irq(phba->host->host_lock); ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ spin_lock_irq(shost->host_lock); + switch (state) { + case NLP_STE_UNUSED_NODE: +- phba->fc_unused_cnt += count; ++ vport->fc_unused_cnt += count; + break; + case NLP_STE_PLOGI_ISSUE: +- phba->fc_plogi_cnt += count; ++ vport->fc_plogi_cnt += count; + break; + case NLP_STE_ADISC_ISSUE: +- phba->fc_adisc_cnt += count; ++ vport->fc_adisc_cnt += count; + break; + case NLP_STE_REG_LOGIN_ISSUE: +- phba->fc_reglogin_cnt += count; ++ vport->fc_reglogin_cnt += count; + break; + case NLP_STE_PRLI_ISSUE: +- phba->fc_prli_cnt += count; ++ vport->fc_prli_cnt += count; + break; + case NLP_STE_UNMAPPED_NODE: +- phba->fc_unmap_cnt += count; ++ vport->fc_unmap_cnt += count; + break; + case NLP_STE_MAPPED_NODE: +- phba->fc_map_cnt += count; ++ vport->fc_map_cnt += count; + break; + case NLP_STE_NPR_NODE: +- phba->fc_npr_cnt += count; ++ vport->fc_npr_cnt += count; + break; + } +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + } + + static void +-lpfc_nlp_state_cleanup(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, ++lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + int old_state, int new_state) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ + if (new_state == NLP_STE_UNMAPPED_NODE) { + ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR); + ndlp->nlp_flag &= ~NLP_NODEV_REMOVE; +@@ -1226,21 +1553,20 @@ + /* Transport interface */ + if (ndlp->rport && (old_state == NLP_STE_MAPPED_NODE || + old_state == NLP_STE_UNMAPPED_NODE)) { +- phba->nport_event_cnt++; +- lpfc_unregister_remote_port(phba, ndlp); ++ vport->phba->nport_event_cnt++; ++ lpfc_unregister_remote_port(ndlp); + } + + if (new_state == NLP_STE_MAPPED_NODE || + new_state == NLP_STE_UNMAPPED_NODE) { +- phba->nport_event_cnt++; ++ vport->phba->nport_event_cnt++; + /* + * Tell the fc transport about the port, if we haven't + * already. If we have, and it's a scsi entity, be + * sure to unblock any attached scsi devices + */ +- lpfc_register_remote_port(phba, ndlp); ++ lpfc_register_remote_port(vport, ndlp); + } +- + /* + * if we added to Mapped list, but the remote port + * registration failed or assigned a target id outside +@@ -1251,10 +1577,10 @@ + (!ndlp->rport || + ndlp->rport->scsi_target_id == -1 || + ndlp->rport->scsi_target_id >= LPFC_MAX_TARGET)) { +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_TGT_NO_SCSIID; +- spin_unlock_irq(phba->host->host_lock); +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); ++ spin_unlock_irq(shost->host_lock); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); + } + } + +@@ -1280,61 +1606,74 @@ + } + + void +-lpfc_nlp_set_state(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, int state) ++lpfc_nlp_set_state(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ int state) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + int old_state = ndlp->nlp_state; + char name1[16], name2[16]; + +- lpfc_printf_log(phba, KERN_INFO, LOG_NODE, +- "%d:0904 NPort state transition x%06x, %s -> %s\n", +- phba->brd_no, ++ lpfc_printf_log(vport->phba, KERN_INFO, LOG_NODE, ++ "%d (%d):0904 NPort state transition x%06x, %s -> %s\n", ++ vport->phba->brd_no, vport->vpi, + ndlp->nlp_DID, + lpfc_nlp_state_name(name1, sizeof(name1), old_state), + lpfc_nlp_state_name(name2, sizeof(name2), state)); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_NODE, ++ "node statechg did:x%x old:%d ste:%d", ++ ndlp->nlp_DID, old_state, state); ++ + if (old_state == NLP_STE_NPR_NODE && + (ndlp->nlp_flag & NLP_DELAY_TMO) != 0 && + state != NLP_STE_NPR_NODE) +- lpfc_cancel_retry_delay_tmo(phba, ndlp); ++ lpfc_cancel_retry_delay_tmo(vport, ndlp); + if (old_state == NLP_STE_UNMAPPED_NODE) { + ndlp->nlp_flag &= ~NLP_TGT_NO_SCSIID; + ndlp->nlp_type &= ~NLP_FC_NODE; + } + + if (list_empty(&ndlp->nlp_listp)) { +- spin_lock_irq(phba->host->host_lock); +- list_add_tail(&ndlp->nlp_listp, &phba->fc_nodes); +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); ++ list_add_tail(&ndlp->nlp_listp, &vport->fc_nodes); ++ spin_unlock_irq(shost->host_lock); + } else if (old_state) +- lpfc_nlp_counters(phba, old_state, -1); ++ lpfc_nlp_counters(vport, old_state, -1); + + ndlp->nlp_state = state; +- lpfc_nlp_counters(phba, state, 1); +- lpfc_nlp_state_cleanup(phba, ndlp, old_state, state); ++ lpfc_nlp_counters(vport, state, 1); ++ lpfc_nlp_state_cleanup(vport, ndlp, old_state, state); + } + + void +-lpfc_dequeue_node(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) ++lpfc_dequeue_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ + if ((ndlp->nlp_flag & NLP_DELAY_TMO) != 0) +- lpfc_cancel_retry_delay_tmo(phba, ndlp); ++ lpfc_cancel_retry_delay_tmo(vport, ndlp); + if (ndlp->nlp_state && !list_empty(&ndlp->nlp_listp)) +- lpfc_nlp_counters(phba, ndlp->nlp_state, -1); +- spin_lock_irq(phba->host->host_lock); ++ lpfc_nlp_counters(vport, ndlp->nlp_state, -1); ++ spin_lock_irq(shost->host_lock); + list_del_init(&ndlp->nlp_listp); +- spin_unlock_irq(phba->host->host_lock); +- lpfc_nlp_state_cleanup(phba, ndlp, ndlp->nlp_state, 0); ++ spin_unlock_irq(shost->host_lock); ++ lpfc_nlp_state_cleanup(vport, ndlp, ndlp->nlp_state, ++ NLP_STE_UNUSED_NODE); + } + + void +-lpfc_drop_node(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) ++lpfc_drop_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ + if ((ndlp->nlp_flag & NLP_DELAY_TMO) != 0) +- lpfc_cancel_retry_delay_tmo(phba, ndlp); ++ lpfc_cancel_retry_delay_tmo(vport, ndlp); + if (ndlp->nlp_state && !list_empty(&ndlp->nlp_listp)) +- lpfc_nlp_counters(phba, ndlp->nlp_state, -1); +- spin_lock_irq(phba->host->host_lock); ++ lpfc_nlp_counters(vport, ndlp->nlp_state, -1); ++ spin_lock_irq(shost->host_lock); + list_del_init(&ndlp->nlp_listp); +- spin_unlock_irq(phba->host->host_lock); ++ ndlp->nlp_flag &= ~NLP_TARGET_REMOVE; ++ spin_unlock_irq(shost->host_lock); + lpfc_nlp_put(ndlp); + } + +@@ -1342,11 +1681,13 @@ + * Start / ReStart rescue timer for Discovery / RSCN handling + */ + void +-lpfc_set_disctmo(struct lpfc_hba * phba) ++lpfc_set_disctmo(struct lpfc_vport *vport) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + uint32_t tmo; + +- if (phba->hba_state == LPFC_LOCAL_CFG_LINK) { ++ if (vport->port_state == LPFC_LOCAL_CFG_LINK) { + /* For FAN, timeout should be greater then edtov */ + tmo = (((phba->fc_edtov + 999) / 1000) + 1); + } else { +@@ -1356,18 +1697,25 @@ + tmo = ((phba->fc_ratov * 3) + 3); + } + +- mod_timer(&phba->fc_disctmo, jiffies + HZ * tmo); +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_DISC_TMO; +- spin_unlock_irq(phba->host->host_lock); ++ ++ if (!timer_pending(&vport->fc_disctmo)) { ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "set disc timer: tmo:x%x state:x%x flg:x%x", ++ tmo, vport->port_state, vport->fc_flag); ++ } ++ ++ mod_timer(&vport->fc_disctmo, jiffies + HZ * tmo); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_DISC_TMO; ++ spin_unlock_irq(shost->host_lock); + + /* Start Discovery Timer state */ + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, +- "%d:0247 Start Discovery Timer state x%x " ++ "%d (%d):0247 Start Discovery Timer state x%x " + "Data: x%x x%lx x%x x%x\n", +- phba->brd_no, +- phba->hba_state, tmo, (unsigned long)&phba->fc_disctmo, +- phba->fc_plogi_cnt, phba->fc_adisc_cnt); ++ phba->brd_no, vport->vpi, vport->port_state, tmo, ++ (unsigned long)&vport->fc_disctmo, vport->fc_plogi_cnt, ++ vport->fc_adisc_cnt); + + return; + } +@@ -1376,23 +1724,34 @@ + * Cancel rescue timer for Discovery / RSCN handling + */ + int +-lpfc_can_disctmo(struct lpfc_hba * phba) ++lpfc_can_disctmo(struct lpfc_vport *vport) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; ++ unsigned long iflags; ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "can disc timer: state:x%x rtry:x%x flg:x%x", ++ vport->port_state, vport->fc_ns_retry, vport->fc_flag); ++ + /* Turn off discovery timer if its running */ +- if (phba->fc_flag & FC_DISC_TMO) { +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~FC_DISC_TMO; +- spin_unlock_irq(phba->host->host_lock); +- del_timer_sync(&phba->fc_disctmo); +- phba->work_hba_events &= ~WORKER_DISC_TMO; ++ if (vport->fc_flag & FC_DISC_TMO) { ++ spin_lock_irqsave(shost->host_lock, iflags); ++ vport->fc_flag &= ~FC_DISC_TMO; ++ spin_unlock_irqrestore(shost->host_lock, iflags); ++ del_timer_sync(&vport->fc_disctmo); ++ spin_lock_irqsave(&vport->work_port_lock, iflags); ++ vport->work_port_events &= ~WORKER_DISC_TMO; ++ spin_unlock_irqrestore(&vport->work_port_lock, iflags); + } + + /* Cancel Discovery Timer state */ + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, +- "%d:0248 Cancel Discovery Timer state x%x " ++ "%d (%d):0248 Cancel Discovery Timer state x%x " + "Data: x%x x%x x%x\n", +- phba->brd_no, phba->hba_state, phba->fc_flag, +- phba->fc_plogi_cnt, phba->fc_adisc_cnt); ++ phba->brd_no, vport->vpi, vport->port_state, ++ vport->fc_flag, vport->fc_plogi_cnt, ++ vport->fc_adisc_cnt); + + return 0; + } +@@ -1402,15 +1761,18 @@ + * Return true if iocb matches the specified nport + */ + int +-lpfc_check_sli_ndlp(struct lpfc_hba * phba, +- struct lpfc_sli_ring * pring, +- struct lpfc_iocbq * iocb, struct lpfc_nodelist * ndlp) ++lpfc_check_sli_ndlp(struct lpfc_hba *phba, ++ struct lpfc_sli_ring *pring, ++ struct lpfc_iocbq *iocb, ++ struct lpfc_nodelist *ndlp) + { +- struct lpfc_sli *psli; +- IOCB_t *icmd; ++ struct lpfc_sli *psli = &phba->sli; ++ IOCB_t *icmd = &iocb->iocb; ++ struct lpfc_vport *vport = ndlp->vport; ++ ++ if (iocb->vport != vport) ++ return 0; + +- psli = &phba->sli; +- icmd = &iocb->iocb; + if (pring->ringno == LPFC_ELS_RING) { + switch (icmd->ulpCommand) { + case CMD_GEN_REQUEST64_CR: +@@ -1445,7 +1807,7 @@ + * associated with nlp_rpi in the LPFC_NODELIST entry. + */ + static int +-lpfc_no_rpi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp) ++lpfc_no_rpi(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) + { + LIST_HEAD(completions); + struct lpfc_sli *psli; +@@ -1454,6 +1816,8 @@ + IOCB_t *icmd; + uint32_t rpi, i; + ++ lpfc_fabric_abort_nport(ndlp); ++ + /* + * Everything that matches on txcmplq will be returned + * by firmware with a no rpi error. +@@ -1465,15 +1829,15 @@ + for (i = 0; i < psli->num_rings; i++) { + pring = &psli->ring[i]; + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + list_for_each_entry_safe(iocb, next_iocb, &pring->txq, + list) { + /* + * Check to see if iocb matches the nport we are + * looking for + */ +- if ((lpfc_check_sli_ndlp +- (phba, pring, iocb, ndlp))) { ++ if ((lpfc_check_sli_ndlp(phba, pring, iocb, ++ ndlp))) { + /* It matches, so deque and call compl + with an error */ + list_move_tail(&iocb->list, +@@ -1481,22 +1845,22 @@ + pring->txq_cnt--; + } + } +- spin_unlock_irq(phba->host->host_lock); +- ++ spin_unlock_irq(&phba->hbalock); + } + } + + while (!list_empty(&completions)) { + iocb = list_get_first(&completions, struct lpfc_iocbq, list); +- list_del(&iocb->list); ++ list_del_init(&iocb->list); + +- if (iocb->iocb_cmpl) { ++ if (!iocb->iocb_cmpl) ++ lpfc_sli_release_iocbq(phba, iocb); ++ else { + icmd = &iocb->iocb; + icmd->ulpStatus = IOSTAT_LOCAL_REJECT; + icmd->un.ulpWord[4] = IOERR_SLI_ABORTED; +- (iocb->iocb_cmpl) (phba, iocb, iocb); +- } else +- lpfc_sli_release_iocbq(phba, iocb); ++ (iocb->iocb_cmpl)(phba, iocb, iocb); ++ } + } + + return 0; +@@ -1512,19 +1876,22 @@ + * we are waiting to PLOGI back to the remote NPort. + */ + int +-lpfc_unreg_rpi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp) ++lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) + { ++ struct lpfc_hba *phba = vport->phba; + LPFC_MBOXQ_t *mbox; + int rc; + + if (ndlp->nlp_rpi) { +- if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) { +- lpfc_unreg_login(phba, ndlp->nlp_rpi, mbox); +- mbox->mbox_cmpl=lpfc_sli_def_mbox_cmpl; +- rc = lpfc_sli_issue_mbox +- (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB)); ++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (mbox) { ++ lpfc_unreg_login(phba, vport->vpi, ndlp->nlp_rpi, mbox); ++ mbox->vport = vport; ++ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; ++ rc = lpfc_sli_issue_mbox(phba, mbox, ++ (MBX_NOWAIT | MBX_STOP_IOCB)); + if (rc == MBX_NOT_FINISHED) +- mempool_free( mbox, phba->mbox_mem_pool); ++ mempool_free(mbox, phba->mbox_mem_pool); + } + lpfc_no_rpi(phba, ndlp); + ndlp->nlp_rpi = 0; +@@ -1533,25 +1900,70 @@ + return 0; + } + ++void ++lpfc_unreg_all_rpis(struct lpfc_vport *vport) ++{ ++ struct lpfc_hba *phba = vport->phba; ++ LPFC_MBOXQ_t *mbox; ++ int rc; ++ ++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (mbox) { ++ lpfc_unreg_login(phba, vport->vpi, 0xffff, mbox); ++ mbox->vport = vport; ++ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; ++ rc = lpfc_sli_issue_mbox(phba, mbox, ++ (MBX_NOWAIT | MBX_STOP_IOCB)); ++ if (rc == MBX_NOT_FINISHED) { ++ mempool_free(mbox, phba->mbox_mem_pool); ++ } ++ } ++} ++ ++void ++lpfc_unreg_default_rpis(struct lpfc_vport *vport) ++{ ++ struct lpfc_hba *phba = vport->phba; ++ LPFC_MBOXQ_t *mbox; ++ int rc; ++ ++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (mbox) { ++ lpfc_unreg_did(phba, vport->vpi, 0xffffffff, mbox); ++ mbox->vport = vport; ++ mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; ++ rc = lpfc_sli_issue_mbox(phba, mbox, ++ (MBX_NOWAIT | MBX_STOP_IOCB)); ++ if (rc == MBX_NOT_FINISHED) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_VPORT, ++ "%d (%d):1815 Could not issue " ++ "unreg_did (default rpis)\n", ++ phba->brd_no, vport->vpi); ++ mempool_free(mbox, phba->mbox_mem_pool); ++ } ++ } ++} ++ + /* + * Free resources associated with LPFC_NODELIST entry + * so it can be freed. + */ + static int +-lpfc_cleanup_node(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp) ++lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) + { +- LPFC_MBOXQ_t *mb; +- LPFC_MBOXQ_t *nextmb; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; ++ LPFC_MBOXQ_t *mb, *nextmb; + struct lpfc_dmabuf *mp; + + /* Cleanup node for NPort */ + lpfc_printf_log(phba, KERN_INFO, LOG_NODE, +- "%d:0900 Cleanup node for NPort x%x " ++ "%d (%d):0900 Cleanup node for NPort x%x " + "Data: x%x x%x x%x\n", +- phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag, ++ phba->brd_no, vport->vpi, ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->nlp_state, ndlp->nlp_rpi); + +- lpfc_dequeue_node(phba, ndlp); ++ lpfc_dequeue_node(vport, ndlp); + + /* cleanup any ndlp on mbox q waiting for reglogin cmpl */ + if ((mb = phba->sli.mbox_active)) { +@@ -1562,13 +1974,13 @@ + } + } + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) { + if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) && + (ndlp == (struct lpfc_nodelist *) mb->context2)) { + mp = (struct lpfc_dmabuf *) (mb->context1); + if (mp) { +- lpfc_mbuf_free(phba, mp->virt, mp->phys); ++ __lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); + } + list_del(&mb->list); +@@ -1576,20 +1988,27 @@ + lpfc_nlp_put(ndlp); + } + } +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + lpfc_els_abort(phba,ndlp); +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_DELAY_TMO; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + + ndlp->nlp_last_elscmd = 0; + del_timer_sync(&ndlp->nlp_delayfunc); + + if (!list_empty(&ndlp->els_retry_evt.evt_listp)) + list_del_init(&ndlp->els_retry_evt.evt_listp); ++ if (!list_empty(&ndlp->dev_loss_evt.evt_listp)) ++ list_del_init(&ndlp->dev_loss_evt.evt_listp); ++ ++ if (!list_empty(&ndlp->dev_loss_evt.evt_listp)) { ++ list_del_init(&ndlp->dev_loss_evt.evt_listp); ++ complete((struct completion *)(ndlp->dev_loss_evt.evt_arg2)); ++ } + +- lpfc_unreg_rpi(phba, ndlp); ++ lpfc_unreg_rpi(vport, ndlp); + + return 0; + } +@@ -1600,18 +2019,22 @@ + * machine, defer the free till we reach the end of the state machine. + */ + static void +-lpfc_nlp_remove(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) ++lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) + { + struct lpfc_rport_data *rdata; + + if (ndlp->nlp_flag & NLP_DELAY_TMO) { +- lpfc_cancel_retry_delay_tmo(phba, ndlp); ++ lpfc_cancel_retry_delay_tmo(vport, ndlp); + } + +- lpfc_cleanup_node(phba, ndlp); ++ lpfc_cleanup_node(vport, ndlp); + +- if ((ndlp->rport) && !(phba->fc_flag & FC_UNLOADING)) { +- put_device(&ndlp->rport->dev); ++ /* ++ * We can get here with a non-NULL ndlp->rport because when we ++ * unregister a rport we don't break the rport/node linkage. So if we ++ * do, make sure we don't leaving any dangling pointers behind. ++ */ ++ if (ndlp->rport) { + rdata = ndlp->rport->dd_data; + rdata->pnode = NULL; + ndlp->rport = NULL; +@@ -1619,11 +2042,10 @@ + } + + static int +-lpfc_matchdid(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, uint32_t did) ++lpfc_matchdid(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ uint32_t did) + { +- D_ID mydid; +- D_ID ndlpdid; +- D_ID matchdid; ++ D_ID mydid, ndlpdid, matchdid; + + if (did == Bcast_DID) + return 0; +@@ -1637,7 +2059,7 @@ + return 1; + + /* Next check for area/domain identically equals 0 match */ +- mydid.un.word = phba->fc_myDID; ++ mydid.un.word = vport->fc_myDID; + if ((mydid.un.b.domain == 0) && (mydid.un.b.area == 0)) { + return 0; + } +@@ -1669,101 +2091,116 @@ + } + + /* Search for a nodelist entry */ +-struct lpfc_nodelist * +-lpfc_findnode_did(struct lpfc_hba *phba, uint32_t did) ++static struct lpfc_nodelist * ++__lpfc_findnode_did(struct lpfc_vport *vport, uint32_t did) + { ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_nodelist *ndlp; + uint32_t data1; + +- spin_lock_irq(phba->host->host_lock); +- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) { +- if (lpfc_matchdid(phba, ndlp, did)) { ++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { ++ if (lpfc_matchdid(vport, ndlp, did)) { + data1 = (((uint32_t) ndlp->nlp_state << 24) | + ((uint32_t) ndlp->nlp_xri << 16) | + ((uint32_t) ndlp->nlp_type << 8) | + ((uint32_t) ndlp->nlp_rpi & 0xff)); + lpfc_printf_log(phba, KERN_INFO, LOG_NODE, +- "%d:0929 FIND node DID " ++ "%d (%d):0929 FIND node DID " + " Data: x%p x%x x%x x%x\n", +- phba->brd_no, ++ phba->brd_no, vport->vpi, + ndlp, ndlp->nlp_DID, + ndlp->nlp_flag, data1); +- spin_unlock_irq(phba->host->host_lock); + return ndlp; + } + } +- spin_unlock_irq(phba->host->host_lock); + + /* FIND node did NOT FOUND */ + lpfc_printf_log(phba, KERN_INFO, LOG_NODE, +- "%d:0932 FIND node did x%x NOT FOUND.\n", +- phba->brd_no, did); ++ "%d (%d):0932 FIND node did x%x NOT FOUND.\n", ++ phba->brd_no, vport->vpi, did); + return NULL; + } + + struct lpfc_nodelist * +-lpfc_setup_disc_node(struct lpfc_hba * phba, uint32_t did) ++lpfc_findnode_did(struct lpfc_vport *vport, uint32_t did) ++{ ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_nodelist *ndlp; ++ ++ spin_lock_irq(shost->host_lock); ++ ndlp = __lpfc_findnode_did(vport, did); ++ spin_unlock_irq(shost->host_lock); ++ return ndlp; ++} ++ ++struct lpfc_nodelist * ++lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp; + +- ndlp = lpfc_findnode_did(phba, did); ++ ndlp = lpfc_findnode_did(vport, did); + if (!ndlp) { +- if ((phba->fc_flag & FC_RSCN_MODE) && +- ((lpfc_rscn_payload_check(phba, did) == 0))) ++ if ((vport->fc_flag & FC_RSCN_MODE) != 0 && ++ lpfc_rscn_payload_check(vport, did) == 0) + return NULL; + ndlp = (struct lpfc_nodelist *) +- mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); ++ mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) + return NULL; +- lpfc_nlp_init(phba, ndlp, did); +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); ++ lpfc_nlp_init(vport, ndlp, did); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NPR_2B_DISC; ++ spin_unlock_irq(shost->host_lock); + return ndlp; + } +- if (phba->fc_flag & FC_RSCN_MODE) { +- if (lpfc_rscn_payload_check(phba, did)) { ++ if (vport->fc_flag & FC_RSCN_MODE) { ++ if (lpfc_rscn_payload_check(vport, did)) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NPR_2B_DISC; ++ spin_unlock_irq(shost->host_lock); + + /* Since this node is marked for discovery, + * delay timeout is not needed. + */ + if (ndlp->nlp_flag & NLP_DELAY_TMO) +- lpfc_cancel_retry_delay_tmo(phba, ndlp); ++ lpfc_cancel_retry_delay_tmo(vport, ndlp); + } else + ndlp = NULL; + } else { + if (ndlp->nlp_state == NLP_STE_ADISC_ISSUE || + ndlp->nlp_state == NLP_STE_PLOGI_ISSUE) + return NULL; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NPR_2B_DISC; ++ spin_unlock_irq(shost->host_lock); + } + return ndlp; + } + + /* Build a list of nodes to discover based on the loopmap */ + void +-lpfc_disc_list_loopmap(struct lpfc_hba * phba) ++lpfc_disc_list_loopmap(struct lpfc_vport *vport) + { ++ struct lpfc_hba *phba = vport->phba; + int j; + uint32_t alpa, index; + +- if (phba->hba_state <= LPFC_LINK_DOWN) { ++ if (!lpfc_is_link_up(phba)) + return; +- } +- if (phba->fc_topology != TOPOLOGY_LOOP) { ++ ++ if (phba->fc_topology != TOPOLOGY_LOOP) + return; +- } + + /* Check for loop map present or not */ + if (phba->alpa_map[0]) { + for (j = 1; j <= phba->alpa_map[0]; j++) { + alpa = phba->alpa_map[j]; +- +- if (((phba->fc_myDID & 0xff) == alpa) || (alpa == 0)) { ++ if (((vport->fc_myDID & 0xff) == alpa) || (alpa == 0)) + continue; +- } +- lpfc_setup_disc_node(phba, alpa); ++ lpfc_setup_disc_node(vport, alpa); + } + } else { + /* No alpamap, so try all alpa's */ +@@ -1776,113 +2213,167 @@ + else + index = FC_MAXLOOP - j - 1; + alpa = lpfcAlpaArray[index]; +- if ((phba->fc_myDID & 0xff) == alpa) { ++ if ((vport->fc_myDID & 0xff) == alpa) + continue; +- } +- +- lpfc_setup_disc_node(phba, alpa); ++ lpfc_setup_disc_node(vport, alpa); + } + } + return; + } + +-/* Start Link up / RSCN discovery on NPR list */ + void +-lpfc_disc_start(struct lpfc_hba * phba) ++lpfc_issue_clear_la(struct lpfc_hba *phba, struct lpfc_vport *vport) + { +- struct lpfc_sli *psli; + LPFC_MBOXQ_t *mbox; +- struct lpfc_nodelist *ndlp, *next_ndlp; ++ struct lpfc_sli *psli = &phba->sli; ++ struct lpfc_sli_ring *extra_ring = &psli->ring[psli->extra_ring]; ++ struct lpfc_sli_ring *fcp_ring = &psli->ring[psli->fcp_ring]; ++ struct lpfc_sli_ring *next_ring = &psli->ring[psli->next_ring]; ++ int rc; ++ ++ /* ++ * if it's not a physical port or if we already send ++ * clear_la then don't send it. ++ */ ++ if ((phba->link_state >= LPFC_CLEAR_LA) || ++ (vport->port_type != LPFC_PHYSICAL_PORT)) ++ return; ++ ++ /* Link up discovery */ ++ if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL)) != NULL) { ++ phba->link_state = LPFC_CLEAR_LA; ++ lpfc_clear_la(phba, mbox); ++ mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la; ++ mbox->vport = vport; ++ rc = lpfc_sli_issue_mbox(phba, mbox, (MBX_NOWAIT | ++ MBX_STOP_IOCB)); ++ if (rc == MBX_NOT_FINISHED) { ++ mempool_free(mbox, phba->mbox_mem_pool); ++ lpfc_disc_flush_list(vport); ++ extra_ring->flag &= ~LPFC_STOP_IOCB_EVENT; ++ fcp_ring->flag &= ~LPFC_STOP_IOCB_EVENT; ++ next_ring->flag &= ~LPFC_STOP_IOCB_EVENT; ++ phba->link_state = LPFC_HBA_ERROR; ++ } ++ } ++} ++ ++/* Reg_vpi to tell firmware to resume normal operations */ ++void ++lpfc_issue_reg_vpi(struct lpfc_hba *phba, struct lpfc_vport *vport) ++{ ++ LPFC_MBOXQ_t *regvpimbox; ++ ++ regvpimbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (regvpimbox) { ++ lpfc_reg_vpi(phba, vport->vpi, vport->fc_myDID, regvpimbox); ++ regvpimbox->mbox_cmpl = lpfc_mbx_cmpl_reg_vpi; ++ regvpimbox->vport = vport; ++ if (lpfc_sli_issue_mbox(phba, regvpimbox, ++ (MBX_NOWAIT | MBX_STOP_IOCB)) ++ == MBX_NOT_FINISHED) { ++ mempool_free(regvpimbox, phba->mbox_mem_pool); ++ } ++ } ++} ++ ++/* Start Link up / RSCN discovery on NPR nodes */ ++void ++lpfc_disc_start(struct lpfc_vport *vport) ++{ ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + uint32_t num_sent; + uint32_t clear_la_pending; + int did_changed; +- int rc; +- +- psli = &phba->sli; + +- if (phba->hba_state <= LPFC_LINK_DOWN) { ++ if (!lpfc_is_link_up(phba)) + return; +- } +- if (phba->hba_state == LPFC_CLEAR_LA) ++ ++ if (phba->link_state == LPFC_CLEAR_LA) + clear_la_pending = 1; + else + clear_la_pending = 0; + +- if (phba->hba_state < LPFC_HBA_READY) { +- phba->hba_state = LPFC_DISC_AUTH; +- } +- lpfc_set_disctmo(phba); ++ if (vport->port_state < LPFC_VPORT_READY) ++ vport->port_state = LPFC_DISC_AUTH; ++ ++ lpfc_set_disctmo(vport); + +- if (phba->fc_prevDID == phba->fc_myDID) { ++ if (vport->fc_prevDID == vport->fc_myDID) + did_changed = 0; +- } else { ++ else + did_changed = 1; +- } +- phba->fc_prevDID = phba->fc_myDID; +- phba->num_disc_nodes = 0; ++ ++ vport->fc_prevDID = vport->fc_myDID; ++ vport->num_disc_nodes = 0; + + /* Start Discovery state */ + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, +- "%d:0202 Start Discovery hba state x%x " ++ "%d (%d):0202 Start Discovery hba state x%x " + "Data: x%x x%x x%x\n", +- phba->brd_no, phba->hba_state, phba->fc_flag, +- phba->fc_plogi_cnt, phba->fc_adisc_cnt); +- +- /* If our did changed, we MUST do PLOGI */ +- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) { +- if (ndlp->nlp_state == NLP_STE_NPR_NODE && +- (ndlp->nlp_flag & NLP_NPR_2B_DISC) != 0 && +- did_changed) { +- spin_lock_irq(phba->host->host_lock); +- ndlp->nlp_flag &= ~NLP_NPR_ADISC; +- spin_unlock_irq(phba->host->host_lock); +- } +- } ++ phba->brd_no, vport->vpi, vport->port_state, ++ vport->fc_flag, vport->fc_plogi_cnt, ++ vport->fc_adisc_cnt); + + /* First do ADISCs - if any */ +- num_sent = lpfc_els_disc_adisc(phba); ++ num_sent = lpfc_els_disc_adisc(vport); + + if (num_sent) + return; + +- if ((phba->hba_state < LPFC_HBA_READY) && (!clear_la_pending)) { ++ /* ++ * For SLI3, cmpl_reg_vpi will set port_state to READY, and ++ * continue discovery. ++ */ ++ if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && ++ !(vport->fc_flag & FC_RSCN_MODE)) { ++ lpfc_issue_reg_vpi(phba, vport); ++ return; ++ } ++ ++ /* ++ * For SLI2, we need to set port_state to READY and continue ++ * discovery. ++ */ ++ if (vport->port_state < LPFC_VPORT_READY && !clear_la_pending) { + /* If we get here, there is nothing to ADISC */ +- if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) { +- phba->hba_state = LPFC_CLEAR_LA; +- lpfc_clear_la(phba, mbox); +- mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la; +- rc = lpfc_sli_issue_mbox(phba, mbox, +- (MBX_NOWAIT | MBX_STOP_IOCB)); +- if (rc == MBX_NOT_FINISHED) { +- mempool_free( mbox, phba->mbox_mem_pool); +- lpfc_disc_flush_list(phba); +- psli->ring[(psli->extra_ring)].flag &= +- ~LPFC_STOP_IOCB_EVENT; +- psli->ring[(psli->fcp_ring)].flag &= +- ~LPFC_STOP_IOCB_EVENT; +- psli->ring[(psli->next_ring)].flag &= +- ~LPFC_STOP_IOCB_EVENT; +- phba->hba_state = LPFC_HBA_READY; ++ if (vport->port_type == LPFC_PHYSICAL_PORT) ++ lpfc_issue_clear_la(phba, vport); ++ ++ if (!(vport->fc_flag & FC_ABORT_DISCOVERY)) { ++ vport->num_disc_nodes = 0; ++ /* go thru NPR nodes and issue ELS PLOGIs */ ++ if (vport->fc_npr_cnt) ++ lpfc_els_disc_plogi(vport); ++ ++ if (!vport->num_disc_nodes) { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~FC_NDISC_ACTIVE; ++ spin_unlock_irq(shost->host_lock); ++ lpfc_can_disctmo(vport); + } + } ++ vport->port_state = LPFC_VPORT_READY; + } else { + /* Next do PLOGIs - if any */ +- num_sent = lpfc_els_disc_plogi(phba); ++ num_sent = lpfc_els_disc_plogi(vport); + + if (num_sent) + return; + +- if (phba->fc_flag & FC_RSCN_MODE) { ++ if (vport->fc_flag & FC_RSCN_MODE) { + /* Check to see if more RSCNs came in while we + * were processing this one. + */ +- if ((phba->fc_rscn_id_cnt == 0) && +- (!(phba->fc_flag & FC_RSCN_DISCOVERY))) { +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~FC_RSCN_MODE; +- spin_unlock_irq(phba->host->host_lock); ++ if ((vport->fc_rscn_id_cnt == 0) && ++ (!(vport->fc_flag & FC_RSCN_DISCOVERY))) { ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~FC_RSCN_MODE; ++ spin_unlock_irq(shost->host_lock); ++ lpfc_can_disctmo(vport); + } else +- lpfc_els_handle_rscn(phba); ++ lpfc_els_handle_rscn(vport); + } + } + return; +@@ -1893,7 +2384,7 @@ + * ring the match the sppecified nodelist. + */ + static void +-lpfc_free_tx(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp) ++lpfc_free_tx(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) + { + LIST_HEAD(completions); + struct lpfc_sli *psli; +@@ -1907,7 +2398,7 @@ + /* Error matching iocb on txq or txcmplq + * First check the txq. + */ +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) { + if (iocb->context1 != ndlp) { + continue; +@@ -1927,36 +2418,36 @@ + continue; + } + icmd = &iocb->iocb; +- if ((icmd->ulpCommand == CMD_ELS_REQUEST64_CR) || +- (icmd->ulpCommand == CMD_XMIT_ELS_RSP64_CX)) { ++ if (icmd->ulpCommand == CMD_ELS_REQUEST64_CR || ++ icmd->ulpCommand == CMD_XMIT_ELS_RSP64_CX) { + lpfc_sli_issue_abort_iotag(phba, pring, iocb); + } + } +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + while (!list_empty(&completions)) { + iocb = list_get_first(&completions, struct lpfc_iocbq, list); +- list_del(&iocb->list); ++ list_del_init(&iocb->list); + +- if (iocb->iocb_cmpl) { ++ if (!iocb->iocb_cmpl) ++ lpfc_sli_release_iocbq(phba, iocb); ++ else { + icmd = &iocb->iocb; + icmd->ulpStatus = IOSTAT_LOCAL_REJECT; + icmd->un.ulpWord[4] = IOERR_SLI_ABORTED; + (iocb->iocb_cmpl) (phba, iocb, iocb); +- } else +- lpfc_sli_release_iocbq(phba, iocb); + } +- +- return; ++ } + } + + void +-lpfc_disc_flush_list(struct lpfc_hba * phba) ++lpfc_disc_flush_list(struct lpfc_vport *vport) + { + struct lpfc_nodelist *ndlp, *next_ndlp; ++ struct lpfc_hba *phba = vport->phba; + +- if (phba->fc_plogi_cnt || phba->fc_adisc_cnt) { +- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, ++ if (vport->fc_plogi_cnt || vport->fc_adisc_cnt) { ++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, + nlp_listp) { + if (ndlp->nlp_state == NLP_STE_PLOGI_ISSUE || + ndlp->nlp_state == NLP_STE_ADISC_ISSUE) { +@@ -1967,6 +2458,14 @@ + } + } + ++void ++lpfc_cleanup_discovery_resources(struct lpfc_vport *vport) ++{ ++ lpfc_els_flush_rscn(vport); ++ lpfc_els_flush_cmd(vport); ++ lpfc_disc_flush_list(vport); ++} ++ + /*****************************************************************************/ + /* + * NAME: lpfc_disc_timeout +@@ -1985,158 +2484,154 @@ + void + lpfc_disc_timeout(unsigned long ptr) + { +- struct lpfc_hba *phba = (struct lpfc_hba *)ptr; ++ struct lpfc_vport *vport = (struct lpfc_vport *) ptr; ++ struct lpfc_hba *phba = vport->phba; + unsigned long flags = 0; + + if (unlikely(!phba)) + return; + +- spin_lock_irqsave(phba->host->host_lock, flags); +- if (!(phba->work_hba_events & WORKER_DISC_TMO)) { +- phba->work_hba_events |= WORKER_DISC_TMO; ++ if ((vport->work_port_events & WORKER_DISC_TMO) == 0) { ++ spin_lock_irqsave(&vport->work_port_lock, flags); ++ vport->work_port_events |= WORKER_DISC_TMO; ++ spin_unlock_irqrestore(&vport->work_port_lock, flags); ++ ++ spin_lock_irqsave(&phba->hbalock, flags); + if (phba->work_wait) +- wake_up(phba->work_wait); ++ lpfc_worker_wake_up(phba); ++ spin_unlock_irqrestore(&phba->hbalock, flags); + } +- spin_unlock_irqrestore(phba->host->host_lock, flags); + return; + } + + static void +-lpfc_disc_timeout_handler(struct lpfc_hba *phba) ++lpfc_disc_timeout_handler(struct lpfc_vport *vport) + { +- struct lpfc_sli *psli; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_sli *psli = &phba->sli; + struct lpfc_nodelist *ndlp, *next_ndlp; +- LPFC_MBOXQ_t *clearlambox, *initlinkmbox; ++ LPFC_MBOXQ_t *initlinkmbox; + int rc, clrlaerr = 0; + +- if (unlikely(!phba)) +- return; +- +- if (!(phba->fc_flag & FC_DISC_TMO)) ++ if (!(vport->fc_flag & FC_DISC_TMO)) + return; + +- psli = &phba->sli; ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~FC_DISC_TMO; ++ spin_unlock_irq(shost->host_lock); + +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~FC_DISC_TMO; +- spin_unlock_irq(phba->host->host_lock); ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD, ++ "disc timeout: state:x%x rtry:x%x flg:x%x", ++ vport->port_state, vport->fc_ns_retry, vport->fc_flag); + +- switch (phba->hba_state) { ++ switch (vport->port_state) { + + case LPFC_LOCAL_CFG_LINK: +- /* hba_state is identically LPFC_LOCAL_CFG_LINK while waiting for FAN */ ++ /* port_state is identically LPFC_LOCAL_CFG_LINK while waiting for ++ * FAN ++ */ + /* FAN timeout */ +- lpfc_printf_log(phba, +- KERN_WARNING, +- LOG_DISCOVERY, +- "%d:0221 FAN timeout\n", +- phba->brd_no); ++ lpfc_printf_log(phba, KERN_WARNING, LOG_DISCOVERY, ++ "%d (%d):0221 FAN timeout\n", ++ phba->brd_no, vport->vpi); + + /* Start discovery by sending FLOGI, clean up old rpis */ +- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, ++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, + nlp_listp) { + if (ndlp->nlp_state != NLP_STE_NPR_NODE) + continue; + if (ndlp->nlp_type & NLP_FABRIC) { + /* Clean up the ndlp on Fabric connections */ +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + } else if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) { + /* Fail outstanding IO now since device + * is marked for PLOGI. + */ +- lpfc_unreg_rpi(phba, ndlp); ++ lpfc_unreg_rpi(vport, ndlp); + } + } +- phba->hba_state = LPFC_FLOGI; +- lpfc_set_disctmo(phba); +- lpfc_initial_flogi(phba); ++ if (vport->port_state != LPFC_FLOGI) { ++ vport->port_state = LPFC_FLOGI; ++ lpfc_set_disctmo(vport); ++ lpfc_initial_flogi(vport); ++ } + break; + ++ case LPFC_FDISC: + case LPFC_FLOGI: +- /* hba_state is identically LPFC_FLOGI while waiting for FLOGI cmpl */ ++ /* port_state is identically LPFC_FLOGI while waiting for FLOGI cmpl */ + /* Initial FLOGI timeout */ +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_DISCOVERY, +- "%d:0222 Initial FLOGI timeout\n", +- phba->brd_no); ++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, ++ "%d (%d):0222 Initial %s timeout\n", ++ phba->brd_no, vport->vpi, ++ vport->vpi ? "FLOGI" : "FDISC"); + + /* Assume no Fabric and go on with discovery. + * Check for outstanding ELS FLOGI to abort. + */ + + /* FLOGI failed, so just use loop map to make discovery list */ +- lpfc_disc_list_loopmap(phba); ++ lpfc_disc_list_loopmap(vport); + + /* Start discovery */ +- lpfc_disc_start(phba); ++ lpfc_disc_start(vport); + break; + + case LPFC_FABRIC_CFG_LINK: + /* hba_state is identically LPFC_FABRIC_CFG_LINK while waiting for + NameServer login */ + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, +- "%d:0223 Timeout while waiting for NameServer " +- "login\n", phba->brd_no); ++ "%d (%d):0223 Timeout while waiting for " ++ "NameServer login\n", ++ phba->brd_no, vport->vpi); + + /* Next look for NameServer ndlp */ +- ndlp = lpfc_findnode_did(phba, NameServer_DID); ++ ndlp = lpfc_findnode_did(vport, NameServer_DID); + if (ndlp) + lpfc_nlp_put(ndlp); + /* Start discovery */ +- lpfc_disc_start(phba); ++ lpfc_disc_start(vport); + break; + + case LPFC_NS_QRY: + /* Check for wait for NameServer Rsp timeout */ + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, +- "%d:0224 NameServer Query timeout " ++ "%d (%d):0224 NameServer Query timeout " + "Data: x%x x%x\n", +- phba->brd_no, +- phba->fc_ns_retry, LPFC_MAX_NS_RETRY); ++ phba->brd_no, vport->vpi, ++ vport->fc_ns_retry, LPFC_MAX_NS_RETRY); + +- ndlp = lpfc_findnode_did(phba, NameServer_DID); +- if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { +- if (phba->fc_ns_retry < LPFC_MAX_NS_RETRY) { ++ if (vport->fc_ns_retry < LPFC_MAX_NS_RETRY) { + /* Try it one more time */ +- rc = lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT); ++ vport->fc_ns_retry++; ++ rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, ++ vport->fc_ns_retry, 0); + if (rc == 0) + break; + } +- phba->fc_ns_retry = 0; +- } +- +- /* Nothing to authenticate, so CLEAR_LA right now */ +- clearlambox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); +- if (!clearlambox) { +- clrlaerr = 1; +- lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, +- "%d:0226 Device Discovery " +- "completion error\n", +- phba->brd_no); +- phba->hba_state = LPFC_HBA_ERROR; +- break; +- } ++ vport->fc_ns_retry = 0; + +- phba->hba_state = LPFC_CLEAR_LA; +- lpfc_clear_la(phba, clearlambox); +- clearlambox->mbox_cmpl = lpfc_mbx_cmpl_clear_la; +- rc = lpfc_sli_issue_mbox(phba, clearlambox, +- (MBX_NOWAIT | MBX_STOP_IOCB)); +- if (rc == MBX_NOT_FINISHED) { +- mempool_free(clearlambox, phba->mbox_mem_pool); +- clrlaerr = 1; +- break; ++ /* ++ * Discovery is over. ++ * set port_state to PORT_READY if SLI2. ++ * cmpl_reg_vpi will set port_state to READY for SLI3. ++ */ ++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) ++ lpfc_issue_reg_vpi(phba, vport); ++ else { /* NPIV Not enabled */ ++ lpfc_issue_clear_la(phba, vport); ++ vport->port_state = LPFC_VPORT_READY; + } + + /* Setup and issue mailbox INITIALIZE LINK command */ + initlinkmbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!initlinkmbox) { + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, +- "%d:0206 Device Discovery " ++ "%d (%d):0206 Device Discovery " + "completion error\n", +- phba->brd_no); +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->brd_no, vport->vpi); ++ phba->link_state = LPFC_HBA_ERROR; + break; + } + +@@ -2144,6 +2639,8 @@ + lpfc_init_link(phba, initlinkmbox, phba->cfg_topology, + phba->cfg_link_speed); + initlinkmbox->mb.un.varInitLnk.lipsr_AL_PA = 0; ++ initlinkmbox->vport = vport; ++ initlinkmbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + rc = lpfc_sli_issue_mbox(phba, initlinkmbox, + (MBX_NOWAIT | MBX_STOP_IOCB)); + lpfc_set_loopback_flag(phba); +@@ -2154,67 +2651,81 @@ + + case LPFC_DISC_AUTH: + /* Node Authentication timeout */ +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_DISCOVERY, +- "%d:0227 Node Authentication timeout\n", +- phba->brd_no); +- lpfc_disc_flush_list(phba); +- clearlambox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); +- if (!clearlambox) { +- clrlaerr = 1; + lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, +- "%d:0207 Device Discovery " +- "completion error\n", +- phba->brd_no); +- phba->hba_state = LPFC_HBA_ERROR; +- break; ++ "%d (%d):0227 Node Authentication timeout\n", ++ phba->brd_no, vport->vpi); ++ lpfc_disc_flush_list(vport); ++ ++ /* ++ * set port_state to PORT_READY if SLI2. ++ * cmpl_reg_vpi will set port_state to READY for SLI3. ++ */ ++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) ++ lpfc_issue_reg_vpi(phba, vport); ++ else { /* NPIV Not enabled */ ++ lpfc_issue_clear_la(phba, vport); ++ vport->port_state = LPFC_VPORT_READY; + } +- phba->hba_state = LPFC_CLEAR_LA; +- lpfc_clear_la(phba, clearlambox); +- clearlambox->mbox_cmpl = lpfc_mbx_cmpl_clear_la; +- rc = lpfc_sli_issue_mbox(phba, clearlambox, +- (MBX_NOWAIT | MBX_STOP_IOCB)); +- if (rc == MBX_NOT_FINISHED) { +- mempool_free(clearlambox, phba->mbox_mem_pool); +- clrlaerr = 1; ++ break; ++ ++ case LPFC_VPORT_READY: ++ if (vport->fc_flag & FC_RSCN_MODE) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, ++ "%d (%d):0231 RSCN timeout Data: x%x " ++ "x%x\n", ++ phba->brd_no, vport->vpi, ++ vport->fc_ns_retry, LPFC_MAX_NS_RETRY); ++ ++ /* Cleanup any outstanding ELS commands */ ++ lpfc_els_flush_cmd(vport); ++ ++ lpfc_els_flush_rscn(vport); ++ lpfc_disc_flush_list(vport); + } + break; + ++ default: ++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, ++ "%d (%d):0229 Unexpected discovery timeout, " ++ "vport State x%x\n", ++ phba->brd_no, vport->vpi, vport->port_state); ++ ++ break; ++ } ++ ++ switch (phba->link_state) { + case LPFC_CLEAR_LA: + /* CLEAR LA timeout */ +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_DISCOVERY, +- "%d:0228 CLEAR LA timeout\n", +- phba->brd_no); ++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, ++ "%d (%d):0228 CLEAR LA timeout\n", ++ phba->brd_no, vport->vpi); + clrlaerr = 1; + break; + +- case LPFC_HBA_READY: +- if (phba->fc_flag & FC_RSCN_MODE) { +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_DISCOVERY, +- "%d:0231 RSCN timeout Data: x%x x%x\n", +- phba->brd_no, +- phba->fc_ns_retry, LPFC_MAX_NS_RETRY); +- +- /* Cleanup any outstanding ELS commands */ +- lpfc_els_flush_cmd(phba); ++ case LPFC_LINK_UNKNOWN: ++ case LPFC_WARM_START: ++ case LPFC_INIT_START: ++ case LPFC_INIT_MBX_CMDS: ++ case LPFC_LINK_DOWN: ++ case LPFC_LINK_UP: ++ case LPFC_HBA_ERROR: ++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, ++ "%d (%d):0230 Unexpected timeout, hba link " ++ "state x%x\n", ++ phba->brd_no, vport->vpi, phba->link_state); ++ clrlaerr = 1; ++ break; + +- lpfc_els_flush_rscn(phba); +- lpfc_disc_flush_list(phba); +- } ++ case LPFC_HBA_READY: + break; + } + + if (clrlaerr) { +- lpfc_disc_flush_list(phba); ++ lpfc_disc_flush_list(vport); + psli->ring[(psli->extra_ring)].flag &= ~LPFC_STOP_IOCB_EVENT; + psli->ring[(psli->fcp_ring)].flag &= ~LPFC_STOP_IOCB_EVENT; + psli->ring[(psli->next_ring)].flag &= ~LPFC_STOP_IOCB_EVENT; +- phba->hba_state = LPFC_HBA_READY; ++ vport->port_state = LPFC_VPORT_READY; + } + + return; +@@ -2227,37 +2738,29 @@ + * handed off to the SLI layer. + */ + void +-lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) ++lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) + { +- struct lpfc_sli *psli; +- MAILBOX_t *mb; +- struct lpfc_dmabuf *mp; +- struct lpfc_nodelist *ndlp; +- +- psli = &phba->sli; +- mb = &pmb->mb; +- +- ndlp = (struct lpfc_nodelist *) pmb->context2; +- mp = (struct lpfc_dmabuf *) (pmb->context1); ++ MAILBOX_t *mb = &pmb->mb; ++ struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); ++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2; ++ struct lpfc_vport *vport = pmb->vport; + + pmb->context1 = NULL; + + ndlp->nlp_rpi = mb->un.varWords[0]; + ndlp->nlp_type |= NLP_FABRIC; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); + +- /* Start issuing Fabric-Device Management Interface (FDMI) +- * command to 0xfffffa (FDMI well known port) +- */ +- if (phba->cfg_fdmi_on == 1) { +- lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DHBA); +- } else { + /* +- * Delay issuing FDMI command if fdmi-on=2 +- * (supporting RPA/hostnmae) ++ * Start issuing Fabric-Device Management Interface (FDMI) command to ++ * 0xfffffa (FDMI well known port) or Delay issuing FDMI command if ++ * fdmi-on=2 (supporting RPA/hostnmae) + */ +- mod_timer(&phba->fc_fdmitmo, jiffies + HZ * 60); +- } ++ ++ if (phba->cfg_fdmi_on == 1) ++ lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DHBA); ++ else ++ mod_timer(&vport->fc_fdmitmo, jiffies + HZ * 60); + + /* Mailbox took a reference to the node */ + lpfc_nlp_put(ndlp); +@@ -2283,16 +2786,12 @@ + sizeof(ndlp->nlp_portname)) == 0; + } + +-/* +- * Search node lists for a remote port matching filter criteria +- * Caller needs to hold host_lock before calling this routine. +- */ + struct lpfc_nodelist * +-__lpfc_find_node(struct lpfc_hba *phba, node_filter filter, void *param) ++__lpfc_find_node(struct lpfc_vport *vport, node_filter filter, void *param) + { + struct lpfc_nodelist *ndlp; + +- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) { ++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_state != NLP_STE_UNUSED_NODE && + filter(ndlp, param)) + return ndlp; +@@ -2302,68 +2801,104 @@ + + /* + * Search node lists for a remote port matching filter criteria +- * This routine is used when the caller does NOT have host_lock. ++ * Caller needs to hold host_lock before calling this routine. + */ + struct lpfc_nodelist * +-lpfc_find_node(struct lpfc_hba *phba, node_filter filter, void *param) ++lpfc_find_node(struct lpfc_vport *vport, node_filter filter, void *param) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp; + +- spin_lock_irq(phba->host->host_lock); +- ndlp = __lpfc_find_node(phba, filter, param); +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); ++ ndlp = __lpfc_find_node(vport, filter, param); ++ spin_unlock_irq(shost->host_lock); + return ndlp; + } + + /* + * This routine looks up the ndlp lists for the given RPI. If rpi found it +- * returns the node list pointer else return NULL. ++ * returns the node list element pointer else return NULL. + */ + struct lpfc_nodelist * +-__lpfc_findnode_rpi(struct lpfc_hba *phba, uint16_t rpi) ++__lpfc_findnode_rpi(struct lpfc_vport *vport, uint16_t rpi) + { +- return __lpfc_find_node(phba, lpfc_filter_by_rpi, &rpi); ++ return __lpfc_find_node(vport, lpfc_filter_by_rpi, &rpi); + } + + struct lpfc_nodelist * +-lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi) ++lpfc_findnode_rpi(struct lpfc_vport *vport, uint16_t rpi) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp; + +- spin_lock_irq(phba->host->host_lock); +- ndlp = __lpfc_findnode_rpi(phba, rpi); +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); ++ ndlp = __lpfc_findnode_rpi(vport, rpi); ++ spin_unlock_irq(shost->host_lock); + return ndlp; + } + + /* + * This routine looks up the ndlp lists for the given WWPN. If WWPN found it +- * returns the node list pointer else return NULL. ++ * returns the node element list pointer else return NULL. + */ + struct lpfc_nodelist * +-lpfc_findnode_wwpn(struct lpfc_hba *phba, struct lpfc_name *wwpn) ++lpfc_findnode_wwpn(struct lpfc_vport *vport, struct lpfc_name *wwpn) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp; + +- spin_lock_irq(phba->host->host_lock); +- ndlp = __lpfc_find_node(phba, lpfc_filter_by_wwpn, wwpn); +- spin_unlock_irq(phba->host->host_lock); +- return NULL; ++ spin_lock_irq(shost->host_lock); ++ ndlp = __lpfc_find_node(vport, lpfc_filter_by_wwpn, wwpn); ++ spin_unlock_irq(shost->host_lock); ++ return ndlp; + } + + void +-lpfc_nlp_init(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, uint32_t did) ++lpfc_dev_loss_delay(unsigned long ptr) ++{ ++ struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) ptr; ++ struct lpfc_vport *vport = ndlp->vport; ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_work_evt *evtp = &ndlp->dev_loss_evt; ++ unsigned long flags; ++ ++ evtp = &ndlp->dev_loss_evt; ++ ++ spin_lock_irqsave(&phba->hbalock, flags); ++ if (!list_empty(&evtp->evt_listp)) { ++ spin_unlock_irqrestore(&phba->hbalock, flags); ++ return; ++ } ++ ++ evtp->evt_arg1 = ndlp; ++ evtp->evt = LPFC_EVT_DEV_LOSS_DELAY; ++ list_add_tail(&evtp->evt_listp, &phba->work_list); ++ if (phba->work_wait) ++ lpfc_worker_wake_up(phba); ++ spin_unlock_irqrestore(&phba->hbalock, flags); ++ return; ++} ++ ++void ++lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ uint32_t did) + { + memset(ndlp, 0, sizeof (struct lpfc_nodelist)); + INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp); ++ INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp); + init_timer(&ndlp->nlp_delayfunc); + ndlp->nlp_delayfunc.function = lpfc_els_retry_delay; + ndlp->nlp_delayfunc.data = (unsigned long)ndlp; + ndlp->nlp_DID = did; +- ndlp->nlp_phba = phba; ++ ndlp->vport = vport; + ndlp->nlp_sid = NLP_NO_SID; + INIT_LIST_HEAD(&ndlp->nlp_listp); + kref_init(&ndlp->kref); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_NODE, ++ "node init: did:x%x", ++ ndlp->nlp_DID, 0, 0); ++ + return; + } + +@@ -2372,8 +2907,13 @@ + { + struct lpfc_nodelist *ndlp = container_of(kref, struct lpfc_nodelist, + kref); +- lpfc_nlp_remove(ndlp->nlp_phba, ndlp); +- mempool_free(ndlp, ndlp->nlp_phba->nlp_mem_pool); ++ ++ lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE, ++ "node release: did:x%x flg:x%x type:x%x", ++ ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type); ++ ++ lpfc_nlp_remove(ndlp->vport, ndlp); ++ mempool_free(ndlp, ndlp->vport->phba->nlp_mem_pool); + } + + struct lpfc_nodelist * +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hw.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_hw.h +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hw.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_hw.h 2007-12-21 15:36:12.000000000 -0500 +@@ -59,6 +59,12 @@ + #define SLI2_IOCB_CMD_R3XTRA_ENTRIES 24 + #define SLI2_IOCB_RSP_R3XTRA_ENTRIES 32 + ++#define SLI2_IOCB_CMD_SIZE 32 ++#define SLI2_IOCB_RSP_SIZE 32 ++#define SLI3_IOCB_CMD_SIZE 128 ++#define SLI3_IOCB_RSP_SIZE 64 ++ ++ + /* Common Transport structures and definitions */ + + union CtRevisionId { +@@ -79,6 +85,9 @@ + uint32_t word; + }; + ++#define FC4_FEATURE_INIT 0x2 ++#define FC4_FEATURE_TARGET 0x1 ++ + struct lpfc_sli_ct_request { + /* Structure is in Big Endian format */ + union CtRevisionId RevisionId; +@@ -121,20 +130,6 @@ + + uint32_t rsvd[7]; + } rft; +- struct rff { +- uint32_t PortId; +- uint8_t reserved[2]; +-#ifdef __BIG_ENDIAN_BITFIELD +- uint8_t feature_res:6; +- uint8_t feature_init:1; +- uint8_t feature_tgt:1; +-#else /* __LITTLE_ENDIAN_BITFIELD */ +- uint8_t feature_tgt:1; +- uint8_t feature_init:1; +- uint8_t feature_res:6; +-#endif +- uint8_t type_code; /* type=8 for FCP */ +- } rff; + struct rnn { + uint32_t PortId; /* For RNN_ID requests */ + uint8_t wwnn[8]; +@@ -144,15 +139,42 @@ + uint8_t len; + uint8_t symbname[255]; + } rsnn; ++ struct rspn { /* For RSPN_ID requests */ ++ uint32_t PortId; ++ uint8_t len; ++ uint8_t symbname[255]; ++ } rspn; ++ struct gff { ++ uint32_t PortId; ++ } gff; ++ struct gff_acc { ++ uint8_t fbits[128]; ++ } gff_acc; ++#define FCP_TYPE_FEATURE_OFFSET 4 ++ struct rff { ++ uint32_t PortId; ++ uint8_t reserved[2]; ++ uint8_t fbits; ++ uint8_t type_code; /* type=8 for FCP */ ++ } rff; + } un; + }; + + #define SLI_CT_REVISION 1 +-#define GID_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 260) +-#define RFT_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 228) +-#define RFF_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 235) +-#define RNN_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request) - 252) +-#define RSNN_REQUEST_SZ (sizeof(struct lpfc_sli_ct_request)) ++#define GID_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ ++ sizeof(struct gid)) ++#define GFF_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ ++ sizeof(struct gff)) ++#define RFT_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ ++ sizeof(struct rft)) ++#define RFF_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ ++ sizeof(struct rff)) ++#define RNN_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ ++ sizeof(struct rnn)) ++#define RSNN_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ ++ sizeof(struct rsnn)) ++#define RSPN_REQUEST_SZ (offsetof(struct lpfc_sli_ct_request, un) + \ ++ sizeof(struct rspn)) + + /* + * FsType Definitions +@@ -227,6 +249,7 @@ + #define SLI_CTNS_GFT_ID 0x0117 + #define SLI_CTNS_GSPN_ID 0x0118 + #define SLI_CTNS_GPT_ID 0x011A ++#define SLI_CTNS_GFF_ID 0x011F + #define SLI_CTNS_GID_PN 0x0121 + #define SLI_CTNS_GID_NN 0x0131 + #define SLI_CTNS_GIP_NN 0x0135 +@@ -240,9 +263,9 @@ + #define SLI_CTNS_RNN_ID 0x0213 + #define SLI_CTNS_RCS_ID 0x0214 + #define SLI_CTNS_RFT_ID 0x0217 +-#define SLI_CTNS_RFF_ID 0x021F + #define SLI_CTNS_RSPN_ID 0x0218 + #define SLI_CTNS_RPT_ID 0x021A ++#define SLI_CTNS_RFF_ID 0x021F + #define SLI_CTNS_RIP_NN 0x0235 + #define SLI_CTNS_RIPA_NN 0x0236 + #define SLI_CTNS_RSNN_NN 0x0239 +@@ -311,9 +334,9 @@ + uint8_t bbCreditlsb; /* FC Word 0, byte 3 */ + + #ifdef __BIG_ENDIAN_BITFIELD +- uint16_t increasingOffset:1; /* FC Word 1, bit 31 */ ++ uint16_t request_multiple_Nport:1; /* FC Word 1, bit 31 */ + uint16_t randomOffset:1; /* FC Word 1, bit 30 */ +- uint16_t word1Reserved2:1; /* FC Word 1, bit 29 */ ++ uint16_t response_multiple_NPort:1; /* FC Word 1, bit 29 */ + uint16_t fPort:1; /* FC Word 1, bit 28 */ + uint16_t altBbCredit:1; /* FC Word 1, bit 27 */ + uint16_t edtovResolution:1; /* FC Word 1, bit 26 */ +@@ -332,9 +355,9 @@ + uint16_t edtovResolution:1; /* FC Word 1, bit 26 */ + uint16_t altBbCredit:1; /* FC Word 1, bit 27 */ + uint16_t fPort:1; /* FC Word 1, bit 28 */ +- uint16_t word1Reserved2:1; /* FC Word 1, bit 29 */ ++ uint16_t response_multiple_NPort:1; /* FC Word 1, bit 29 */ + uint16_t randomOffset:1; /* FC Word 1, bit 30 */ +- uint16_t increasingOffset:1; /* FC Word 1, bit 31 */ ++ uint16_t request_multiple_Nport:1; /* FC Word 1, bit 31 */ + + uint16_t payloadlength:1; /* FC Word 1, bit 16 */ + uint16_t contIncSeqCnt:1; /* FC Word 1, bit 17 */ +@@ -1255,7 +1278,9 @@ + #define MBX_KILL_BOARD 0x24 + #define MBX_CONFIG_FARP 0x25 + #define MBX_BEACON 0x2A ++#define MBX_HEARTBEAT 0x31 + ++#define MBX_CONFIG_HBQ 0x7C + #define MBX_LOAD_AREA 0x81 + #define MBX_RUN_BIU_DIAG64 0x84 + #define MBX_CONFIG_PORT 0x88 +@@ -1263,6 +1288,10 @@ + #define MBX_READ_RPI64 0x8F + #define MBX_REG_LOGIN64 0x93 + #define MBX_READ_LA64 0x95 ++#define MBX_REG_VPI 0x96 ++#define MBX_UNREG_VPI 0x97 ++#define MBX_REG_VNPID 0x96 ++#define MBX_UNREG_VNPID 0x97 + + #define MBX_FLASH_WR_ULA 0x98 + #define MBX_SET_DEBUG 0x99 +@@ -1335,6 +1364,10 @@ + #define CMD_FCP_TRECEIVE64_CX 0xA1 + #define CMD_FCP_TRSP64_CX 0xA3 + ++#define CMD_IOCB_RCV_SEQ64_CX 0xB5 ++#define CMD_IOCB_RCV_ELS64_CX 0xB7 ++#define CMD_IOCB_RCV_CONT64_CX 0xBB ++ + #define CMD_GEN_REQUEST64_CR 0xC2 + #define CMD_GEN_REQUEST64_CX 0xC3 + +@@ -1561,6 +1594,7 @@ + #define FLAGS_TOPOLOGY_MODE_PT_PT 0x02 /* Attempt pt-pt only */ + #define FLAGS_TOPOLOGY_MODE_LOOP 0x04 /* Attempt loop only */ + #define FLAGS_TOPOLOGY_MODE_PT_LOOP 0x06 /* Attempt pt-pt then loop */ ++#define FLAGS_UNREG_LOGIN_ALL 0x08 /* UNREG_LOGIN all on link down */ + #define FLAGS_LIRP_LILP 0x80 /* LIRP / LILP is disabled */ + + #define FLAGS_TOPOLOGY_FAILOVER 0x0400 /* Bit 10 */ +@@ -1744,8 +1778,6 @@ + #define LMT_4Gb 0x040 + #define LMT_8Gb 0x080 + #define LMT_10Gb 0x100 +- +- + uint32_t rsvd2; + uint32_t rsvd3; + uint32_t max_xri; +@@ -1754,7 +1786,10 @@ + uint32_t avail_xri; + uint32_t avail_iocb; + uint32_t avail_rpi; +- uint32_t default_rpi; ++ uint32_t max_vpi; ++ uint32_t rsvd4; ++ uint32_t rsvd5; ++ uint32_t avail_vpi; + } READ_CONFIG_VAR; + + /* Structure for MB Command READ_RCONFIG (12) */ +@@ -1818,6 +1853,13 @@ + structure */ + struct ulp_bde64 sp64; + } un; ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint16_t rsvd3; ++ uint16_t vpi; ++#else /* __LITTLE_ENDIAN_BITFIELD */ ++ uint16_t vpi; ++ uint16_t rsvd3; ++#endif + } READ_SPARM_VAR; + + /* Structure for MB Command READ_STATUS (14) */ +@@ -1918,11 +1960,17 @@ + #ifdef __BIG_ENDIAN_BITFIELD + uint32_t cv:1; + uint32_t rr:1; +- uint32_t rsvd1:29; ++ uint32_t rsvd2:2; ++ uint32_t v3req:1; ++ uint32_t v3rsp:1; ++ uint32_t rsvd1:25; + uint32_t rv:1; + #else /* __LITTLE_ENDIAN_BITFIELD */ + uint32_t rv:1; +- uint32_t rsvd1:29; ++ uint32_t rsvd1:25; ++ uint32_t v3rsp:1; ++ uint32_t v3req:1; ++ uint32_t rsvd2:2; + uint32_t rr:1; + uint32_t cv:1; + #endif +@@ -1972,8 +2020,8 @@ + uint8_t sli1FwName[16]; + uint32_t sli2FwRev; + uint8_t sli2FwName[16]; +- uint32_t rsvd2; +- uint32_t RandomData[7]; ++ uint32_t sli3Feat; ++ uint32_t RandomData[6]; + } READ_REV_VAR; + + /* Structure for MB Command READ_LINK_STAT (18) */ +@@ -2013,6 +2061,14 @@ + struct ulp_bde64 sp64; + } un; + ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint16_t rsvd6; ++ uint16_t vpi; ++#else /* __LITTLE_ENDIAN_BITFIELD */ ++ uint16_t vpi; ++ uint16_t rsvd6; ++#endif ++ + } REG_LOGIN_VAR; + + /* Word 30 contents for REG_LOGIN */ +@@ -2037,16 +2093,78 @@ + #ifdef __BIG_ENDIAN_BITFIELD + uint16_t rsvd1; + uint16_t rpi; ++ uint32_t rsvd2; ++ uint32_t rsvd3; ++ uint32_t rsvd4; ++ uint32_t rsvd5; ++ uint16_t rsvd6; ++ uint16_t vpi; + #else /* __LITTLE_ENDIAN_BITFIELD */ + uint16_t rpi; + uint16_t rsvd1; ++ uint32_t rsvd2; ++ uint32_t rsvd3; ++ uint32_t rsvd4; ++ uint32_t rsvd5; ++ uint16_t vpi; ++ uint16_t rsvd6; + #endif + } UNREG_LOGIN_VAR; + ++/* Structure for MB Command REG_VPI (0x96) */ ++typedef struct { ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t rsvd1; ++ uint32_t rsvd2:8; ++ uint32_t sid:24; ++ uint32_t rsvd3; ++ uint32_t rsvd4; ++ uint32_t rsvd5; ++ uint16_t rsvd6; ++ uint16_t vpi; ++#else /* __LITTLE_ENDIAN */ ++ uint32_t rsvd1; ++ uint32_t sid:24; ++ uint32_t rsvd2:8; ++ uint32_t rsvd3; ++ uint32_t rsvd4; ++ uint32_t rsvd5; ++ uint16_t vpi; ++ uint16_t rsvd6; ++#endif ++} REG_VPI_VAR; ++ ++/* Structure for MB Command UNREG_VPI (0x97) */ ++typedef struct { ++ uint32_t rsvd1; ++ uint32_t rsvd2; ++ uint32_t rsvd3; ++ uint32_t rsvd4; ++ uint32_t rsvd5; ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint16_t rsvd6; ++ uint16_t vpi; ++#else /* __LITTLE_ENDIAN */ ++ uint16_t vpi; ++ uint16_t rsvd6; ++#endif ++} UNREG_VPI_VAR; ++ + /* Structure for MB Command UNREG_D_ID (0x23) */ + + typedef struct { + uint32_t did; ++ uint32_t rsvd2; ++ uint32_t rsvd3; ++ uint32_t rsvd4; ++ uint32_t rsvd5; ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint16_t rsvd6; ++ uint16_t vpi; ++#else ++ uint16_t vpi; ++ uint16_t rsvd6; ++#endif + } UNREG_D_ID_VAR; + + /* Structure for MB Command READ_LA (21) */ +@@ -2178,13 +2296,240 @@ + #define DMP_RSP_OFFSET 0x14 /* word 5 contains first word of rsp */ + #define DMP_RSP_SIZE 0x6C /* maximum of 27 words of rsp data */ + +-/* Structure for MB Command CONFIG_PORT (0x88) */ ++struct hbq_mask { ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint8_t tmatch; ++ uint8_t tmask; ++ uint8_t rctlmatch; ++ uint8_t rctlmask; ++#else /* __LITTLE_ENDIAN */ ++ uint8_t rctlmask; ++ uint8_t rctlmatch; ++ uint8_t tmask; ++ uint8_t tmatch; ++#endif ++}; + ++ ++/* Structure for MB Command CONFIG_HBQ (7c) */ ++ ++struct config_hbq_var { ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t rsvd1 :7; ++ uint32_t recvNotify :1; /* Receive Notification */ ++ uint32_t numMask :8; /* # Mask Entries */ ++ uint32_t profile :8; /* Selection Profile */ ++ uint32_t rsvd2 :8; ++#else /* __LITTLE_ENDIAN */ ++ uint32_t rsvd2 :8; ++ uint32_t profile :8; /* Selection Profile */ ++ uint32_t numMask :8; /* # Mask Entries */ ++ uint32_t recvNotify :1; /* Receive Notification */ ++ uint32_t rsvd1 :7; ++#endif ++ ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t hbqId :16; ++ uint32_t rsvd3 :12; ++ uint32_t ringMask :4; ++#else /* __LITTLE_ENDIAN */ ++ uint32_t ringMask :4; ++ uint32_t rsvd3 :12; ++ uint32_t hbqId :16; ++#endif ++ ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t entry_count :16; ++ uint32_t rsvd4 :8; ++ uint32_t headerLen :8; ++#else /* __LITTLE_ENDIAN */ ++ uint32_t headerLen :8; ++ uint32_t rsvd4 :8; ++ uint32_t entry_count :16; ++#endif ++ ++ uint32_t hbqaddrLow; ++ uint32_t hbqaddrHigh; ++ ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t rsvd5 :31; ++ uint32_t logEntry :1; ++#else /* __LITTLE_ENDIAN */ ++ uint32_t logEntry :1; ++ uint32_t rsvd5 :31; ++#endif ++ ++ uint32_t rsvd6; /* w7 */ ++ uint32_t rsvd7; /* w8 */ ++ uint32_t rsvd8; /* w9 */ ++ ++ struct hbq_mask hbqMasks[6]; ++ ++ ++ union { ++ uint32_t allprofiles[12]; ++ ++ struct { ++ #ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t seqlenoff :16; ++ uint32_t maxlen :16; ++ #else /* __LITTLE_ENDIAN */ ++ uint32_t maxlen :16; ++ uint32_t seqlenoff :16; ++ #endif ++ #ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t rsvd1 :28; ++ uint32_t seqlenbcnt :4; ++ #else /* __LITTLE_ENDIAN */ ++ uint32_t seqlenbcnt :4; ++ uint32_t rsvd1 :28; ++ #endif ++ uint32_t rsvd[10]; ++ } profile2; ++ ++ struct { ++ #ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t seqlenoff :16; ++ uint32_t maxlen :16; ++ #else /* __LITTLE_ENDIAN */ ++ uint32_t maxlen :16; ++ uint32_t seqlenoff :16; ++ #endif ++ #ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t cmdcodeoff :28; ++ uint32_t rsvd1 :12; ++ uint32_t seqlenbcnt :4; ++ #else /* __LITTLE_ENDIAN */ ++ uint32_t seqlenbcnt :4; ++ uint32_t rsvd1 :12; ++ uint32_t cmdcodeoff :28; ++ #endif ++ uint32_t cmdmatch[8]; ++ ++ uint32_t rsvd[2]; ++ } profile3; ++ ++ struct { ++ #ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t seqlenoff :16; ++ uint32_t maxlen :16; ++ #else /* __LITTLE_ENDIAN */ ++ uint32_t maxlen :16; ++ uint32_t seqlenoff :16; ++ #endif ++ #ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t cmdcodeoff :28; ++ uint32_t rsvd1 :12; ++ uint32_t seqlenbcnt :4; ++ #else /* __LITTLE_ENDIAN */ ++ uint32_t seqlenbcnt :4; ++ uint32_t rsvd1 :12; ++ uint32_t cmdcodeoff :28; ++ #endif ++ uint32_t cmdmatch[8]; ++ ++ uint32_t rsvd[2]; ++ } profile5; ++ ++ } profiles; ++ ++}; ++ ++ ++ ++/* Structure for MB Command CONFIG_PORT (0x88) */ + typedef struct { +- uint32_t pcbLen; ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t cBE : 1; ++ uint32_t cET : 1; ++ uint32_t cHpcb : 1; ++ uint32_t cMA : 1; ++ uint32_t sli_mode : 4; ++ uint32_t pcbLen : 24; /* bit 23:0 of memory based port ++ * config block */ ++#else /* __LITTLE_ENDIAN */ ++ uint32_t pcbLen : 24; /* bit 23:0 of memory based port ++ * config block */ ++ uint32_t sli_mode : 4; ++ uint32_t cMA : 1; ++ uint32_t cHpcb : 1; ++ uint32_t cET : 1; ++ uint32_t cBE : 1; ++#endif ++ + uint32_t pcbLow; /* bit 31:0 of memory based port config block */ + uint32_t pcbHigh; /* bit 63:32 of memory based port config block */ +- uint32_t hbainit[5]; ++ uint32_t hbainit[6]; ++ ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t rsvd : 24; /* Reserved */ ++ uint32_t cmv : 1; /* Configure Max VPIs */ ++ uint32_t ccrp : 1; /* Config Command Ring Polling */ ++ uint32_t csah : 1; /* Configure Synchronous Abort Handling */ ++ uint32_t chbs : 1; /* Cofigure Host Backing store */ ++ uint32_t cinb : 1; /* Enable Interrupt Notification Block */ ++ uint32_t cerbm : 1; /* Configure Enhanced Receive Buf Mgmt */ ++ uint32_t cmx : 1; /* Configure Max XRIs */ ++ uint32_t cmr : 1; /* Configure Max RPIs */ ++#else /* __LITTLE_ENDIAN */ ++ uint32_t cmr : 1; /* Configure Max RPIs */ ++ uint32_t cmx : 1; /* Configure Max XRIs */ ++ uint32_t cerbm : 1; /* Configure Enhanced Receive Buf Mgmt */ ++ uint32_t cinb : 1; /* Enable Interrupt Notification Block */ ++ uint32_t chbs : 1; /* Cofigure Host Backing store */ ++ uint32_t csah : 1; /* Configure Synchronous Abort Handling */ ++ uint32_t ccrp : 1; /* Config Command Ring Polling */ ++ uint32_t cmv : 1; /* Configure Max VPIs */ ++ uint32_t rsvd : 24; /* Reserved */ ++#endif ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t rsvd2 : 24; /* Reserved */ ++ uint32_t gmv : 1; /* Grant Max VPIs */ ++ uint32_t gcrp : 1; /* Grant Command Ring Polling */ ++ uint32_t gsah : 1; /* Grant Synchronous Abort Handling */ ++ uint32_t ghbs : 1; /* Grant Host Backing Store */ ++ uint32_t ginb : 1; /* Grant Interrupt Notification Block */ ++ uint32_t gerbm : 1; /* Grant ERBM Request */ ++ uint32_t gmx : 1; /* Grant Max XRIs */ ++ uint32_t gmr : 1; /* Grant Max RPIs */ ++#else /* __LITTLE_ENDIAN */ ++ uint32_t gmr : 1; /* Grant Max RPIs */ ++ uint32_t gmx : 1; /* Grant Max XRIs */ ++ uint32_t gerbm : 1; /* Grant ERBM Request */ ++ uint32_t ginb : 1; /* Grant Interrupt Notification Block */ ++ uint32_t ghbs : 1; /* Grant Host Backing Store */ ++ uint32_t gsah : 1; /* Grant Synchronous Abort Handling */ ++ uint32_t gcrp : 1; /* Grant Command Ring Polling */ ++ uint32_t gmv : 1; /* Grant Max VPIs */ ++ uint32_t rsvd2 : 24; /* Reserved */ ++#endif ++ ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t max_rpi : 16; /* Max RPIs Port should configure */ ++ uint32_t max_xri : 16; /* Max XRIs Port should configure */ ++#else /* __LITTLE_ENDIAN */ ++ uint32_t max_xri : 16; /* Max XRIs Port should configure */ ++ uint32_t max_rpi : 16; /* Max RPIs Port should configure */ ++#endif ++ ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t max_hbq : 16; /* Max HBQs Host expect to configure */ ++ uint32_t rsvd3 : 16; /* Max HBQs Host expect to configure */ ++#else /* __LITTLE_ENDIAN */ ++ uint32_t rsvd3 : 16; /* Max HBQs Host expect to configure */ ++ uint32_t max_hbq : 16; /* Max HBQs Host expect to configure */ ++#endif ++ ++ uint32_t rsvd4; /* Reserved */ ++ ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint32_t rsvd5 : 16; /* Reserved */ ++ uint32_t max_vpi : 16; /* Max number of virt N-Ports */ ++#else /* __LITTLE_ENDIAN */ ++ uint32_t max_vpi : 16; /* Max number of virt N-Ports */ ++ uint32_t rsvd5 : 16; /* Reserved */ ++#endif ++ + } CONFIG_PORT_VAR; + + /* SLI-2 Port Control Block */ +@@ -2262,7 +2607,9 @@ + #define MAILBOX_CMD_SIZE (MAILBOX_CMD_WSIZE * sizeof(uint32_t)) + + typedef union { +- uint32_t varWords[MAILBOX_CMD_WSIZE - 1]; ++ uint32_t varWords[MAILBOX_CMD_WSIZE - 1]; /* first word is type/ ++ * feature/max ring number ++ */ + LOAD_SM_VAR varLdSM; /* cmd = 1 (LOAD_SM) */ + READ_NV_VAR varRDnvp; /* cmd = 2 (READ_NVPARMS) */ + WRITE_NV_VAR varWTnvp; /* cmd = 3 (WRITE_NVPARMS) */ +@@ -2287,8 +2634,13 @@ + CLEAR_LA_VAR varClearLA; /* cmd = 22 (CLEAR_LA) */ + DUMP_VAR varDmp; /* Warm Start DUMP mbx cmd */ + UNREG_D_ID_VAR varUnregDID; /* cmd = 0x23 (UNREG_D_ID) */ +- CONFIG_FARP_VAR varCfgFarp; /* cmd = 0x25 (CONFIG_FARP) NEW_FEATURE */ ++ CONFIG_FARP_VAR varCfgFarp; /* cmd = 0x25 (CONFIG_FARP) ++ * NEW_FEATURE ++ */ ++ struct config_hbq_var varCfgHbq;/* cmd = 0x7c (CONFIG_HBQ) */ + CONFIG_PORT_VAR varCfgPort; /* cmd = 0x88 (CONFIG_PORT) */ ++ REG_VPI_VAR varRegVpi; /* cmd = 0x96 (REG_VPI) */ ++ UNREG_VPI_VAR varUnregVpi; /* cmd = 0x97 (UNREG_VPI) */ + } MAILVARIANTS; + + /* +@@ -2305,14 +2657,27 @@ + __le32 rspPutInx; + }; + +-typedef struct _SLI2_DESC { +- struct lpfc_hgp host[MAX_RINGS]; ++struct sli2_desc { + uint32_t unused1[16]; ++ struct lpfc_hgp host[MAX_RINGS]; + struct lpfc_pgp port[MAX_RINGS]; +-} SLI2_DESC; ++}; ++ ++struct sli3_desc { ++ struct lpfc_hgp host[MAX_RINGS]; ++ uint32_t reserved[8]; ++ uint32_t hbq_put[16]; ++}; ++ ++struct sli3_pgp { ++ struct lpfc_pgp port[MAX_RINGS]; ++ uint32_t hbq_get[16]; ++}; + + typedef union { +- SLI2_DESC s2; ++ struct sli2_desc s2; ++ struct sli3_desc s3; ++ struct sli3_pgp s3_pgp; + } SLI_VAR; + + typedef struct { +@@ -2618,6 +2983,25 @@ + uint32_t fcpt_Length; /* transfer ready for IWRITE */ + } FCPT_FIELDS64; + ++/* IOCB Command template for CMD_IOCB_RCV_ELS64_CX (0xB7) ++ or CMD_IOCB_RCV_SEQ64_CX (0xB5) */ ++ ++struct rcv_sli3 { ++ uint32_t word8Rsvd; ++#ifdef __BIG_ENDIAN_BITFIELD ++ uint16_t vpi; ++ uint16_t word9Rsvd; ++#else /* __LITTLE_ENDIAN */ ++ uint16_t word9Rsvd; ++ uint16_t vpi; ++#endif ++ uint32_t word10Rsvd; ++ uint32_t acc_len; /* accumulated length */ ++ struct ulp_bde64 bde2; ++}; ++ ++ ++ + typedef struct _IOCB { /* IOCB structure */ + union { + GENERIC_RSP grsp; /* Generic response */ +@@ -2633,7 +3017,7 @@ + /* SLI-2 structures */ + + struct ulp_bde64 cont64[2]; /* up to 2 64 bit continuation +- bde_64s */ ++ * bde_64s */ + ELS_REQUEST64 elsreq64; /* ELS_REQUEST template */ + GEN_REQUEST64 genreq64; /* GEN_REQUEST template */ + RCV_ELS_REQ64 rcvels64; /* RCV_ELS_REQ template */ +@@ -2695,9 +3079,20 @@ + uint32_t ulpTimeout:8; + #endif + ++ union { ++ struct rcv_sli3 rcvsli3; /* words 8 - 15 */ ++ uint32_t sli3Words[24]; /* 96 extra bytes for SLI-3 */ ++ } unsli3; ++ ++#define ulpCt_h ulpXS ++#define ulpCt_l ulpFCP2Rcvy ++ ++#define IOCB_FCP 1 /* IOCB is used for FCP ELS cmds-ulpRsvByte */ ++#define IOCB_IP 2 /* IOCB is used for IP ELS cmds */ + #define PARM_UNUSED 0 /* PU field (Word 4) not used */ + #define PARM_REL_OFF 1 /* PU field (Word 4) = R. O. */ + #define PARM_READ_CHECK 2 /* PU field (Word 4) = Data Transfer Length */ ++#define PARM_NPIV_DID 3 + #define CLASS1 0 /* Class 1 */ + #define CLASS2 1 /* Class 2 */ + #define CLASS3 2 /* Class 3 */ +@@ -2718,39 +3113,51 @@ + #define IOSTAT_RSVD2 0xC + #define IOSTAT_RSVD3 0xD + #define IOSTAT_RSVD4 0xE +-#define IOSTAT_RSVD5 0xF ++#define IOSTAT_NEED_BUFFER 0xF + #define IOSTAT_DRIVER_REJECT 0x10 /* ulpStatus - Driver defined */ + #define IOSTAT_DEFAULT 0xF /* Same as rsvd5 for now */ + #define IOSTAT_CNT 0x11 + + } IOCB_t; + ++/* Structure used for a single HBQ entry */ ++struct lpfc_hbq_entry { ++ struct ulp_bde64 bde; ++ uint32_t buffer_tag; ++}; ++ + + #define SLI1_SLIM_SIZE (4 * 1024) + + /* Up to 498 IOCBs will fit into 16k + * 256 (MAILBOX_t) + 140 (PCB_t) + ( 32 (IOCB_t) * 498 ) = < 16384 + */ +-#define SLI2_SLIM_SIZE (16 * 1024) ++#define SLI2_SLIM_SIZE (64 * 1024) + + /* Maximum IOCBs that will fit in SLI2 slim */ + #define MAX_SLI2_IOCB 498 ++#define MAX_SLIM_IOCB_SIZE (SLI2_SLIM_SIZE - \ ++ (sizeof(MAILBOX_t) + sizeof(PCB_t))) ++ ++/* HBQ entries are 4 words each = 4k */ ++#define LPFC_TOTAL_HBQ_SIZE (sizeof(struct lpfc_hbq_entry) * \ ++ lpfc_sli_hbq_count()) + + struct lpfc_sli2_slim { + MAILBOX_t mbx; + PCB_t pcb; +- IOCB_t IOCBs[MAX_SLI2_IOCB]; ++ IOCB_t IOCBs[MAX_SLIM_IOCB_SIZE]; + }; + +-/******************************************************************* +-This macro check PCI device to allow special handling for LC HBAs. +- +-Parameters: +-device : struct pci_dev 's device field +- +-return 1 => TRUE +- 0 => FALSE +- *******************************************************************/ ++/* ++ * This function checks PCI device to allow special handling for LC HBAs. ++ * ++ * Parameters: ++ * device : struct pci_dev 's device field ++ * ++ * return 1 => TRUE ++ * 0 => FALSE ++ */ + static inline int + lpfc_is_LC_HBA(unsigned short device) + { +@@ -2766,3 +3173,16 @@ + else + return 0; + } ++ ++/* ++ * Determine if an IOCB failed because of a link event or firmware reset. ++ */ ++ ++static inline int ++lpfc_error_lost_link(IOCB_t *iocbp) ++{ ++ return (iocbp->ulpStatus == IOSTAT_LOCAL_REJECT && ++ (iocbp->un.ulpWord[4] == IOERR_SLI_ABORTED || ++ iocbp->un.ulpWord[4] == IOERR_LINK_DOWN || ++ iocbp->un.ulpWord[4] == IOERR_SLI_DOWN)); ++} +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_init.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_init.c +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_init.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_init.c 2007-12-21 15:36:14.000000000 -0500 +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -40,15 +41,20 @@ + #include "lpfc.h" + #include "lpfc_logmsg.h" + #include "lpfc_crtn.h" ++#include "lpfc_vport.h" + #include "lpfc_version.h" ++#include "lpfc_vport.h" + + static int lpfc_parse_vpd(struct lpfc_hba *, uint8_t *, int); + static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *); + static int lpfc_post_rcv_buf(struct lpfc_hba *); + + static struct scsi_transport_template *lpfc_transport_template = NULL; ++static struct scsi_transport_template *lpfc_vport_transport_template = NULL; + static DEFINE_IDR(lpfc_hba_index); + ++ ++ + /************************************************************************/ + /* */ + /* lpfc_config_port_prep */ +@@ -61,7 +67,7 @@ + /* */ + /************************************************************************/ + int +-lpfc_config_port_prep(struct lpfc_hba * phba) ++lpfc_config_port_prep(struct lpfc_hba *phba) + { + lpfc_vpd_t *vp = &phba->vpd; + int i = 0, rc; +@@ -75,12 +81,12 @@ + + pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!pmb) { +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + return -ENOMEM; + } + + mb = &pmb->mb; +- phba->hba_state = LPFC_INIT_MBX_CMDS; ++ phba->link_state = LPFC_INIT_MBX_CMDS; + + if (lpfc_is_LC_HBA(phba->pcidev->device)) { + if (init_key) { +@@ -100,9 +106,7 @@ + rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL); + + if (rc != MBX_SUCCESS) { +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_MBOX, ++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX, + "%d:0324 Config Port initialization " + "error, mbxCmd x%x READ_NVPARM, " + "mbxStatus x%x\n", +@@ -112,16 +116,18 @@ + return -ERESTART; + } + memcpy(phba->wwnn, (char *)mb->un.varRDnvp.nodename, +- sizeof (mb->un.varRDnvp.nodename)); ++ sizeof(phba->wwnn)); ++ memcpy(phba->wwpn, (char *)mb->un.varRDnvp.portname, ++ sizeof(phba->wwpn)); + } + ++ phba->sli3_options = 0x0; ++ + /* Setup and issue mailbox READ REV command */ + lpfc_read_rev(phba, pmb); + rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL); + if (rc != MBX_SUCCESS) { +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_INIT, ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0439 Adapter failed to init, mbxCmd x%x " + "READ_REV, mbxStatus x%x\n", + phba->brd_no, +@@ -130,6 +136,7 @@ + return -ERESTART; + } + ++ + /* + * The value of rr must be 1 since the driver set the cv field to 1. + * This setting requires the FW to set all revision fields. +@@ -144,8 +151,12 @@ + return -ERESTART; + } + ++ if (phba->sli_rev == 3 && !mb->un.varRdRev.v3rsp) ++ return -EINVAL; ++ + /* Save information as VPD data */ + vp->rev.rBit = 1; ++ memcpy(&vp->sli3Feat, &mb->un.varRdRev.sli3Feat, sizeof(uint32_t)); + vp->rev.sli1FwRev = mb->un.varRdRev.sli1FwRev; + memcpy(vp->rev.sli1FwName, (char*) mb->un.varRdRev.sli1FwName, 16); + vp->rev.sli2FwRev = mb->un.varRdRev.sli2FwRev; +@@ -161,6 +172,13 @@ + vp->rev.postKernRev = mb->un.varRdRev.postKernRev; + vp->rev.opFwRev = mb->un.varRdRev.opFwRev; + ++ /* If the sli feature level is less then 9, we must ++ * tear down all RPIs and VPIs on link down if NPIV ++ * is enabled. ++ */ ++ if (vp->rev.feaLevelHigh < 9) ++ phba->sli3_options |= LPFC_SLI3_VPORT_TEARDOWN; ++ + if (lpfc_is_LC_HBA(phba->pcidev->device)) + memcpy(phba->RandomData, (char *)&mb->un.varWords[24], + sizeof (phba->RandomData)); +@@ -212,48 +230,34 @@ + /* */ + /************************************************************************/ + int +-lpfc_config_port_post(struct lpfc_hba * phba) ++lpfc_config_port_post(struct lpfc_hba *phba) + { ++ struct lpfc_vport *vport = phba->pport; + LPFC_MBOXQ_t *pmb; + MAILBOX_t *mb; + struct lpfc_dmabuf *mp; + struct lpfc_sli *psli = &phba->sli; + uint32_t status, timeout; +- int i, j, rc; ++ int i, j; ++ int rc; + + pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!pmb) { +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + return -ENOMEM; + } + mb = &pmb->mb; + +- lpfc_config_link(phba, pmb); +- rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL); +- if (rc != MBX_SUCCESS) { +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_INIT, +- "%d:0447 Adapter failed init, mbxCmd x%x " +- "CONFIG_LINK mbxStatus x%x\n", +- phba->brd_no, +- mb->mbxCommand, mb->mbxStatus); +- phba->hba_state = LPFC_HBA_ERROR; +- mempool_free( pmb, phba->mbox_mem_pool); +- return -EIO; +- } +- + /* Get login parameters for NID. */ +- lpfc_read_sparam(phba, pmb); ++ lpfc_read_sparam(phba, pmb, 0); ++ pmb->vport = vport; + if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) { +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_INIT, ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0448 Adapter failed init, mbxCmd x%x " + "READ_SPARM mbxStatus x%x\n", + phba->brd_no, + mb->mbxCommand, mb->mbxStatus); +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + mp = (struct lpfc_dmabuf *) pmb->context1; + mempool_free( pmb, phba->mbox_mem_pool); + lpfc_mbuf_free(phba, mp->virt, mp->phys); +@@ -263,25 +267,27 @@ + + mp = (struct lpfc_dmabuf *) pmb->context1; + +- memcpy(&phba->fc_sparam, mp->virt, sizeof (struct serv_parm)); ++ memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm)); + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); + pmb->context1 = NULL; + + if (phba->cfg_soft_wwnn) +- u64_to_wwn(phba->cfg_soft_wwnn, phba->fc_sparam.nodeName.u.wwn); ++ u64_to_wwn(phba->cfg_soft_wwnn, ++ vport->fc_sparam.nodeName.u.wwn); + if (phba->cfg_soft_wwpn) +- u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn); +- memcpy(&phba->fc_nodename, &phba->fc_sparam.nodeName, ++ u64_to_wwn(phba->cfg_soft_wwpn, ++ vport->fc_sparam.portName.u.wwn); ++ memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName, + sizeof (struct lpfc_name)); +- memcpy(&phba->fc_portname, &phba->fc_sparam.portName, ++ memcpy(&vport->fc_portname, &vport->fc_sparam.portName, + sizeof (struct lpfc_name)); + /* If no serial number in VPD data, use low 6 bytes of WWNN */ + /* This should be consolidated into parse_vpd ? - mr */ + if (phba->SerialNumber[0] == 0) { + uint8_t *outptr; + +- outptr = &phba->fc_nodename.u.s.IEEE[0]; ++ outptr = &vport->fc_nodename.u.s.IEEE[0]; + for (i = 0; i < 12; i++) { + status = *outptr++; + j = ((status & 0xf0) >> 4); +@@ -303,15 +309,14 @@ + } + + lpfc_read_config(phba, pmb); ++ pmb->vport = vport; + if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) { +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_INIT, ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0453 Adapter failed to init, mbxCmd x%x " + "READ_CONFIG, mbxStatus x%x\n", + phba->brd_no, + mb->mbxCommand, mb->mbxStatus); +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + mempool_free( pmb, phba->mbox_mem_pool); + return -EIO; + } +@@ -338,9 +343,7 @@ + || ((phba->cfg_link_speed == LINK_SPEED_10G) + && !(phba->lmt & LMT_10Gb))) { + /* Reset link speed to auto */ +- lpfc_printf_log(phba, +- KERN_WARNING, +- LOG_LINK_EVENT, ++ lpfc_printf_log(phba, KERN_WARNING, LOG_LINK_EVENT, + "%d:1302 Invalid speed for this board: " + "Reset link speed to auto: x%x\n", + phba->brd_no, +@@ -348,7 +351,7 @@ + phba->cfg_link_speed = LINK_SPEED_AUTO; + } + +- phba->hba_state = LPFC_LINK_DOWN; ++ phba->link_state = LPFC_LINK_DOWN; + + /* Only process IOCBs on ring 0 till hba_state is READY */ + if (psli->ring[psli->extra_ring].cmdringaddr) +@@ -359,10 +362,11 @@ + psli->ring[psli->next_ring].flag |= LPFC_STOP_IOCB_EVENT; + + /* Post receive buffers for desired rings */ ++ if (phba->sli_rev != 3) + lpfc_post_rcv_buf(phba); + + /* Enable appropriate host interrupts */ +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + status = readl(phba->HCregaddr); + status |= HC_MBINT_ENA | HC_ERINT_ENA | HC_LAINT_ENA; + if (psli->num_rings > 0) +@@ -380,22 +384,24 @@ + + writel(status, phba->HCregaddr); + readl(phba->HCregaddr); /* flush */ +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + /* + * Setup the ring 0 (els) timeout handler + */ + timeout = phba->fc_ratov << 1; +- mod_timer(&phba->els_tmofunc, jiffies + HZ * timeout); ++ mod_timer(&vport->els_tmofunc, jiffies + HZ * timeout); ++ mod_timer(&phba->hb_tmofunc, jiffies + HZ * LPFC_HB_MBOX_INTERVAL); ++ phba->hb_outstanding = 0; ++ phba->last_completion_time = jiffies; + + lpfc_init_link(phba, pmb, phba->cfg_topology, phba->cfg_link_speed); + pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; ++ pmb->vport = vport; + rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); + lpfc_set_loopback_flag(phba); + if (rc != MBX_SUCCESS) { +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_INIT, ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0454 Adapter failed to init, mbxCmd x%x " + "INIT_LINK, mbxStatus x%x\n", + phba->brd_no, +@@ -408,7 +414,7 @@ + writel(0xffffffff, phba->HAregaddr); + readl(phba->HAregaddr); /* flush */ + +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + if (rc != MBX_BUSY) + mempool_free(pmb, phba->mbox_mem_pool); + return -EIO; +@@ -429,18 +435,19 @@ + /* */ + /************************************************************************/ + int +-lpfc_hba_down_prep(struct lpfc_hba * phba) ++lpfc_hba_down_prep(struct lpfc_hba *phba) + { ++ struct lpfc_vport *vport = phba->pport; ++ + /* Disable interrupts */ + writel(0, phba->HCregaddr); + readl(phba->HCregaddr); /* flush */ + +- /* Cleanup potential discovery resources */ +- lpfc_els_flush_rscn(phba); +- lpfc_els_flush_cmd(phba); +- lpfc_disc_flush_list(phba); ++ list_for_each_entry(vport, &phba->port_list, listentry) { ++ lpfc_cleanup_discovery_resources(vport); ++ } + +- return (0); ++ return 0; + } + + /************************************************************************/ +@@ -453,13 +460,16 @@ + /* */ + /************************************************************************/ + int +-lpfc_hba_down_post(struct lpfc_hba * phba) ++lpfc_hba_down_post(struct lpfc_hba *phba) + { + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring; + struct lpfc_dmabuf *mp, *next_mp; + int i; + ++ if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) ++ lpfc_sli_hbqbuf_free_all(phba); ++ else { + /* Cleanup preposted buffers on the ELS ring */ + pring = &psli->ring[LPFC_ELS_RING]; + list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) { +@@ -468,6 +478,7 @@ + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); + } ++ } + + for (i = 0; i < psli->num_rings; i++) { + pring = &psli->ring[i]; +@@ -477,6 +488,119 @@ + return 0; + } + ++/* HBA heart beat timeout handler */ ++void ++lpfc_hb_timeout(unsigned long ptr) ++{ ++ struct lpfc_hba *phba; ++ unsigned long iflag; ++ ++ phba = (struct lpfc_hba *)ptr; ++ spin_lock_irqsave(&phba->pport->work_port_lock, iflag); ++ if (!(phba->pport->work_port_events & WORKER_HB_TMO)) ++ phba->pport->work_port_events |= WORKER_HB_TMO; ++ spin_unlock_irqrestore(&phba->pport->work_port_lock, iflag); ++ ++ if (phba->work_wait) ++ wake_up(phba->work_wait); ++ return; ++} ++ ++static void ++lpfc_hb_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq) ++{ ++ unsigned long drvr_flag; ++ ++ spin_lock_irqsave(&phba->hbalock, drvr_flag); ++ phba->hb_outstanding = 0; ++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag); ++ ++ mempool_free(pmboxq, phba->mbox_mem_pool); ++ if (!(phba->pport->fc_flag & FC_OFFLINE_MODE) && ++ !(phba->link_state == LPFC_HBA_ERROR) && ++ !(phba->pport->fc_flag & FC_UNLOADING)) ++ mod_timer(&phba->hb_tmofunc, ++ jiffies + HZ * LPFC_HB_MBOX_INTERVAL); ++ return; ++} ++ ++void ++lpfc_hb_timeout_handler(struct lpfc_hba *phba) ++{ ++ LPFC_MBOXQ_t *pmboxq; ++ int retval; ++ struct lpfc_sli *psli = &phba->sli; ++ ++ if ((phba->link_state == LPFC_HBA_ERROR) || ++ (phba->pport->fc_flag & FC_UNLOADING) || ++ (phba->pport->fc_flag & FC_OFFLINE_MODE)) ++ return; ++ ++ spin_lock_irq(&phba->pport->work_port_lock); ++ /* If the timer is already canceled do nothing */ ++ if (!(phba->pport->work_port_events & WORKER_HB_TMO)) { ++ spin_unlock_irq(&phba->pport->work_port_lock); ++ return; ++ } ++ ++ if (time_after(phba->last_completion_time + LPFC_HB_MBOX_INTERVAL * HZ, ++ jiffies)) { ++ spin_unlock_irq(&phba->pport->work_port_lock); ++ if (!phba->hb_outstanding) ++ mod_timer(&phba->hb_tmofunc, ++ jiffies + HZ * LPFC_HB_MBOX_INTERVAL); ++ else ++ mod_timer(&phba->hb_tmofunc, ++ jiffies + HZ * LPFC_HB_MBOX_TIMEOUT); ++ return; ++ } ++ spin_unlock_irq(&phba->pport->work_port_lock); ++ ++ /* If there is no heart beat outstanding, issue a heartbeat command */ ++ if (!phba->hb_outstanding) { ++ pmboxq = mempool_alloc(phba->mbox_mem_pool,GFP_KERNEL); ++ if (!pmboxq) { ++ mod_timer(&phba->hb_tmofunc, ++ jiffies + HZ * LPFC_HB_MBOX_INTERVAL); ++ return; ++ } ++ ++ lpfc_heart_beat(phba, pmboxq); ++ pmboxq->mbox_cmpl = lpfc_hb_mbox_cmpl; ++ pmboxq->vport = phba->pport; ++ retval = lpfc_sli_issue_mbox(phba, pmboxq, MBX_NOWAIT); ++ ++ if (retval != MBX_BUSY && retval != MBX_SUCCESS) { ++ mempool_free(pmboxq, phba->mbox_mem_pool); ++ mod_timer(&phba->hb_tmofunc, ++ jiffies + HZ * LPFC_HB_MBOX_INTERVAL); ++ return; ++ } ++ mod_timer(&phba->hb_tmofunc, ++ jiffies + HZ * LPFC_HB_MBOX_TIMEOUT); ++ phba->hb_outstanding = 1; ++ return; ++ } else { ++ /* ++ * If heart beat timeout called with hb_outstanding set we ++ * need to take the HBA offline. ++ */ ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, ++ "%d:0459 Adapter heartbeat failure, taking " ++ "this port offline.\n", phba->brd_no); ++ ++ spin_lock_irq(&phba->hbalock); ++ psli->sli_flag &= ~LPFC_SLI2_ACTIVE; ++ spin_unlock_irq(&phba->hbalock); ++ ++ lpfc_offline_prep(phba); ++ lpfc_offline(phba); ++ lpfc_unblock_mgmt_io(phba); ++ phba->link_state = LPFC_HBA_ERROR; ++ lpfc_hba_down_post(phba); ++ } ++} ++ + /************************************************************************/ + /* */ + /* lpfc_handle_eratt */ +@@ -486,11 +610,15 @@ + /* */ + /************************************************************************/ + void +-lpfc_handle_eratt(struct lpfc_hba * phba) ++lpfc_handle_eratt(struct lpfc_hba *phba) + { ++ struct lpfc_vport *vport = phba->pport; + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring; ++ struct lpfc_vport *port_iterator; + uint32_t event_data; ++ struct Scsi_Host *shost; ++ + /* If the pci channel is offline, ignore possible errors, + * since we cannot communicate with the pci card anyway. */ + if (pci_channel_offline(phba->pcidev)) +@@ -504,10 +632,17 @@ + "Data: x%x x%x x%x\n", + phba->brd_no, phba->work_hs, + phba->work_status[0], phba->work_status[1]); +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_ESTABLISH_LINK; ++ list_for_each_entry(port_iterator, &phba->port_list, ++ listentry) { ++ shost = lpfc_shost_from_vport(port_iterator); ++ ++ spin_lock_irq(shost->host_lock); ++ port_iterator->fc_flag |= FC_ESTABLISH_LINK; ++ spin_unlock_irq(shost->host_lock); ++ } ++ spin_lock_irq(&phba->hbalock); + psli->sli_flag &= ~LPFC_SLI2_ACTIVE; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + /* + * Firmware stops when it triggled erratt with HS_FFER6. +@@ -544,15 +679,18 @@ + phba->work_status[0], phba->work_status[1]); + + event_data = FC_REG_DUMP_EVENT; +- fc_host_post_vendor_event(phba->host, fc_get_event_number(), ++ shost = lpfc_shost_from_vport(vport); ++ fc_host_post_vendor_event(shost, fc_get_event_number(), + sizeof(event_data), (char *) &event_data, + SCSI_NL_VID_TYPE_PCI | PCI_VENDOR_ID_EMULEX); + ++ spin_lock_irq(&phba->hbalock); + psli->sli_flag &= ~LPFC_SLI2_ACTIVE; ++ spin_unlock_irq(&phba->hbalock); + lpfc_offline_prep(phba); + lpfc_offline(phba); + lpfc_unblock_mgmt_io(phba); +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + lpfc_hba_down_post(phba); + } + } +@@ -566,9 +704,11 @@ + /* */ + /************************************************************************/ + void +-lpfc_handle_latt(struct lpfc_hba * phba) ++lpfc_handle_latt(struct lpfc_hba *phba) + { ++ struct lpfc_vport *vport = phba->pport; + struct lpfc_sli *psli = &phba->sli; ++ struct lpfc_vport *port_iterator; + LPFC_MBOXQ_t *pmb; + volatile uint32_t control; + struct lpfc_dmabuf *mp; +@@ -589,20 +729,22 @@ + rc = -EIO; + + /* Cleanup any outstanding ELS commands */ +- lpfc_els_flush_cmd(phba); ++ list_for_each_entry(port_iterator, &phba->port_list, listentry) ++ lpfc_els_flush_cmd(port_iterator); + + psli->slistat.link_event++; + lpfc_read_la(phba, pmb, mp); + pmb->mbox_cmpl = lpfc_mbx_cmpl_read_la; ++ pmb->vport = vport; + rc = lpfc_sli_issue_mbox (phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB)); + if (rc == MBX_NOT_FINISHED) + goto lpfc_handle_latt_free_mbuf; + + /* Clear Link Attention in HA REG */ +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + writel(HA_LATT, phba->HAregaddr); + readl(phba->HAregaddr); /* flush */ +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + return; + +@@ -614,7 +756,7 @@ + mempool_free(pmb, phba->mbox_mem_pool); + lpfc_handle_latt_err_exit: + /* Enable Link attention interrupts */ +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + psli->sli_flag |= LPFC_PROCESS_LA; + control = readl(phba->HCregaddr); + control |= HC_LAINT_ENA; +@@ -624,15 +766,13 @@ + /* Clear Link Attention in HA REG */ + writel(HA_LATT, phba->HAregaddr); + readl(phba->HAregaddr); /* flush */ +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + lpfc_linkdown(phba); +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + + /* The other case is an error from issue_mbox */ + if (rc == -ENOMEM) +- lpfc_printf_log(phba, +- KERN_WARNING, +- LOG_MBOX, ++ lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX, + "%d:0300 READ_LA: no buffers\n", + phba->brd_no); + +@@ -646,7 +786,7 @@ + /* */ + /************************************************************************/ + static int +-lpfc_parse_vpd(struct lpfc_hba * phba, uint8_t * vpd, int len) ++lpfc_parse_vpd(struct lpfc_hba *phba, uint8_t *vpd, int len) + { + uint8_t lenlo, lenhi; + int Length; +@@ -658,9 +798,7 @@ + return 0; + + /* Vital Product */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_INIT, ++ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "%d:0455 Vital Product Data: x%x x%x x%x x%x\n", + phba->brd_no, + (uint32_t) vpd[0], (uint32_t) vpd[1], (uint32_t) vpd[2], +@@ -785,7 +923,7 @@ + } + + static void +-lpfc_get_hba_model_desc(struct lpfc_hba * phba, uint8_t * mdp, uint8_t * descp) ++lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp) + { + lpfc_vpd_t *vp; + uint16_t dev_id = phba->pcidev->device; +@@ -943,7 +1081,7 @@ + /* Returns the number of buffers NOT posted. */ + /**************************************************/ + int +-lpfc_post_buffer(struct lpfc_hba * phba, struct lpfc_sli_ring * pring, int cnt, ++lpfc_post_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, int cnt, + int type) + { + IOCB_t *icmd; +@@ -955,9 +1093,7 @@ + /* While there are buffers to post */ + while (cnt > 0) { + /* Allocate buffer for command iocb */ +- spin_lock_irq(phba->host->host_lock); + iocb = lpfc_sli_get_iocbq(phba); +- spin_unlock_irq(phba->host->host_lock); + if (iocb == NULL) { + pring->missbufcnt = cnt; + return cnt; +@@ -972,9 +1108,7 @@ + &mp1->phys); + if (mp1 == 0 || mp1->virt == 0) { + kfree(mp1); +- spin_lock_irq(phba->host->host_lock); + lpfc_sli_release_iocbq(phba, iocb); +- spin_unlock_irq(phba->host->host_lock); + pring->missbufcnt = cnt; + return cnt; + } +@@ -990,9 +1124,7 @@ + kfree(mp2); + lpfc_mbuf_free(phba, mp1->virt, mp1->phys); + kfree(mp1); +- spin_lock_irq(phba->host->host_lock); + lpfc_sli_release_iocbq(phba, iocb); +- spin_unlock_irq(phba->host->host_lock); + pring->missbufcnt = cnt; + return cnt; + } +@@ -1018,7 +1150,6 @@ + icmd->ulpCommand = CMD_QUE_RING_BUF64_CN; + icmd->ulpLe = 1; + +- spin_lock_irq(phba->host->host_lock); + if (lpfc_sli_issue_iocb(phba, pring, iocb, 0) == IOCB_ERROR) { + lpfc_mbuf_free(phba, mp1->virt, mp1->phys); + kfree(mp1); +@@ -1030,15 +1161,12 @@ + } + lpfc_sli_release_iocbq(phba, iocb); + pring->missbufcnt = cnt; +- spin_unlock_irq(phba->host->host_lock); + return cnt; + } +- spin_unlock_irq(phba->host->host_lock); + lpfc_sli_ringpostbuf_put(phba, pring, mp1); +- if (mp2) { ++ if (mp2) + lpfc_sli_ringpostbuf_put(phba, pring, mp2); + } +- } + pring->missbufcnt = 0; + return 0; + } +@@ -1050,7 +1178,7 @@ + /* */ + /************************************************************************/ + static int +-lpfc_post_rcv_buf(struct lpfc_hba * phba) ++lpfc_post_rcv_buf(struct lpfc_hba *phba) + { + struct lpfc_sli *psli = &phba->sli; + +@@ -1151,7 +1279,7 @@ + { + int t; + uint32_t *HashWorking; +- uint32_t *pwwnn = phba->wwnn; ++ uint32_t *pwwnn = (uint32_t *) phba->wwnn; + + HashWorking = kmalloc(80 * sizeof(uint32_t), GFP_KERNEL); + if (!HashWorking) +@@ -1170,64 +1298,76 @@ + } + + static void +-lpfc_cleanup(struct lpfc_hba * phba) ++lpfc_cleanup(struct lpfc_vport *vport) + { + struct lpfc_nodelist *ndlp, *next_ndlp; + + /* clean up phba - lpfc specific */ +- lpfc_can_disctmo(phba); +- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) ++ lpfc_can_disctmo(vport); ++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) + lpfc_nlp_put(ndlp); +- +- INIT_LIST_HEAD(&phba->fc_nodes); +- + return; + } + + static void + lpfc_establish_link_tmo(unsigned long ptr) + { +- struct lpfc_hba *phba = (struct lpfc_hba *)ptr; ++ struct lpfc_hba *phba = (struct lpfc_hba *) ptr; ++ struct lpfc_vport *vport = phba->pport; + unsigned long iflag; + +- + /* Re-establishing Link, timer expired */ + lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, + "%d:1300 Re-establishing Link, timer expired " + "Data: x%x x%x\n", +- phba->brd_no, phba->fc_flag, phba->hba_state); +- spin_lock_irqsave(phba->host->host_lock, iflag); +- phba->fc_flag &= ~FC_ESTABLISH_LINK; +- spin_unlock_irqrestore(phba->host->host_lock, iflag); ++ phba->brd_no, vport->fc_flag, ++ vport->port_state); ++ list_for_each_entry(vport, &phba->port_list, listentry) { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ spin_lock_irqsave(shost->host_lock, iflag); ++ vport->fc_flag &= ~FC_ESTABLISH_LINK; ++ spin_unlock_irqrestore(shost->host_lock, iflag); ++ } + } + +-static int +-lpfc_stop_timer(struct lpfc_hba * phba) ++void ++lpfc_stop_vport_timers(struct lpfc_vport *vport) + { +- struct lpfc_sli *psli = &phba->sli; ++ del_timer_sync(&vport->els_tmofunc); ++ del_timer_sync(&vport->fc_fdmitmo); ++ lpfc_can_disctmo(vport); ++ return; ++} ++ ++static void ++lpfc_stop_phba_timers(struct lpfc_hba *phba) ++{ ++ struct lpfc_vport *vport; + + del_timer_sync(&phba->fcp_poll_timer); + del_timer_sync(&phba->fc_estabtmo); +- del_timer_sync(&phba->fc_disctmo); +- del_timer_sync(&phba->fc_fdmitmo); +- del_timer_sync(&phba->els_tmofunc); +- psli = &phba->sli; +- del_timer_sync(&psli->mbox_tmo); +- return(1); ++ list_for_each_entry(vport, &phba->port_list, listentry) ++ lpfc_stop_vport_timers(vport); ++ del_timer_sync(&phba->sli.mbox_tmo); ++ del_timer_sync(&phba->fabric_block_timer); ++ phba->hb_outstanding = 0; ++ del_timer_sync(&phba->hb_tmofunc); ++ return; + } + + int +-lpfc_online(struct lpfc_hba * phba) ++lpfc_online(struct lpfc_hba *phba) + { ++ struct lpfc_vport *vport = phba->pport; ++ + if (!phba) + return 0; + +- if (!(phba->fc_flag & FC_OFFLINE_MODE)) ++ if (!(vport->fc_flag & FC_OFFLINE_MODE)) + return 0; + +- lpfc_printf_log(phba, +- KERN_WARNING, +- LOG_INIT, ++ lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, + "%d:0458 Bring Adapter online\n", + phba->brd_no); + +@@ -1243,9 +1383,14 @@ + return 1; + } + +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~FC_OFFLINE_MODE; +- spin_unlock_irq(phba->host->host_lock); ++ list_for_each_entry(vport, &phba->port_list, listentry) { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag &= ~FC_OFFLINE_MODE; ++ if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) ++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; ++ spin_unlock_irq(shost->host_lock); ++ } + + lpfc_unblock_mgmt_io(phba); + return 0; +@@ -1256,9 +1401,9 @@ + { + unsigned long iflag; + +- spin_lock_irqsave(phba->host->host_lock, iflag); +- phba->fc_flag |= FC_BLOCK_MGMT_IO; +- spin_unlock_irqrestore(phba->host->host_lock, iflag); ++ spin_lock_irqsave(&phba->hbalock, iflag); ++ phba->sli.sli_flag |= LPFC_BLOCK_MGMT_IO; ++ spin_unlock_irqrestore(&phba->hbalock, iflag); + } + + void +@@ -1266,17 +1411,18 @@ + { + unsigned long iflag; + +- spin_lock_irqsave(phba->host->host_lock, iflag); +- phba->fc_flag &= ~FC_BLOCK_MGMT_IO; +- spin_unlock_irqrestore(phba->host->host_lock, iflag); ++ spin_lock_irqsave(&phba->hbalock, iflag); ++ phba->sli.sli_flag &= ~LPFC_BLOCK_MGMT_IO; ++ spin_unlock_irqrestore(&phba->hbalock, iflag); + } + + void + lpfc_offline_prep(struct lpfc_hba * phba) + { ++ struct lpfc_vport *vport = phba->pport; + struct lpfc_nodelist *ndlp, *next_ndlp; + +- if (phba->fc_flag & FC_OFFLINE_MODE) ++ if (vport->fc_flag & FC_OFFLINE_MODE) + return; + + lpfc_block_mgmt_io(phba); +@@ -1284,39 +1430,49 @@ + lpfc_linkdown(phba); + + /* Issue an unreg_login to all nodes */ +- list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) ++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) + if (ndlp->nlp_state != NLP_STE_UNUSED_NODE) +- lpfc_unreg_rpi(phba, ndlp); ++ lpfc_unreg_rpi(vport, ndlp); + + lpfc_sli_flush_mbox_queue(phba); + } + + void +-lpfc_offline(struct lpfc_hba * phba) ++lpfc_offline(struct lpfc_hba *phba) + { +- unsigned long iflag; ++ struct lpfc_vport *vport = phba->pport; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_vport *port_iterator; + +- if (phba->fc_flag & FC_OFFLINE_MODE) ++ if (vport->fc_flag & FC_OFFLINE_MODE) + return; + + /* stop all timers associated with this hba */ +- lpfc_stop_timer(phba); ++ lpfc_stop_phba_timers(phba); ++ list_for_each_entry(port_iterator, &phba->port_list, listentry) { ++ port_iterator->work_port_events = 0; ++ } + +- lpfc_printf_log(phba, +- KERN_WARNING, +- LOG_INIT, ++ lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, + "%d:0460 Bring Adapter offline\n", + phba->brd_no); + + /* Bring down the SLI Layer and cleanup. The HBA is offline + now. */ + lpfc_sli_hba_down(phba); +- lpfc_cleanup(phba); +- spin_lock_irqsave(phba->host->host_lock, iflag); +- phba->work_hba_events = 0; ++ spin_lock_irq(&phba->hbalock); + phba->work_ha = 0; +- phba->fc_flag |= FC_OFFLINE_MODE; +- spin_unlock_irqrestore(phba->host->host_lock, iflag); ++ vport->fc_flag |= FC_OFFLINE_MODE; ++ spin_unlock_irq(&phba->hbalock); ++ list_for_each_entry(port_iterator, &phba->port_list, listentry) { ++ shost = lpfc_shost_from_vport(port_iterator); ++ ++ lpfc_cleanup(port_iterator); ++ spin_lock_irq(shost->host_lock); ++ vport->work_port_events = 0; ++ vport->fc_flag |= FC_OFFLINE_MODE; ++ spin_unlock_irq(shost->host_lock); ++ } + } + + /****************************************************************************** +@@ -1326,12 +1482,12 @@ + * + ******************************************************************************/ + static int +-lpfc_scsi_free(struct lpfc_hba * phba) ++lpfc_scsi_free(struct lpfc_hba *phba) + { + struct lpfc_scsi_buf *sb, *sb_next; + struct lpfc_iocbq *io, *io_next; + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + /* Release all the lpfc_scsi_bufs maintained by this host. */ + list_for_each_entry_safe(sb, sb_next, &phba->lpfc_scsi_buf_list, list) { + list_del(&sb->list); +@@ -1348,126 +1504,174 @@ + phba->total_iocbq_bufs--; + } + +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + return 0; + } + +-void lpfc_remove_device(struct lpfc_hba *phba) +-{ +- unsigned long iflag; +- +- lpfc_free_sysfs_attr(phba); + +- spin_lock_irqsave(phba->host->host_lock, iflag); +- phba->fc_flag |= FC_UNLOADING; ++struct lpfc_vport * ++lpfc_create_port(struct lpfc_hba *phba, int instance, struct fc_vport *fc_vport) ++{ ++ struct lpfc_vport *vport; ++ struct Scsi_Host *shost; ++ int error = 0; + +- spin_unlock_irqrestore(phba->host->host_lock, iflag); ++ shost = scsi_host_alloc(&lpfc_template, sizeof(struct lpfc_vport)); ++ if (!shost) ++ goto out; + +- fc_remove_host(phba->host); +- scsi_remove_host(phba->host); ++ vport = (struct lpfc_vport *) shost->hostdata; ++ vport->phba = phba; + +- kthread_stop(phba->worker_thread); ++ vport->load_flag |= FC_LOADING; ++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; + ++ shost->unique_id = instance; ++ shost->max_id = LPFC_MAX_TARGET; ++ shost->max_lun = phba->cfg_max_luns; ++ shost->this_id = -1; ++ shost->max_cmd_len = 16; + /* +- * Bring down the SLI Layer. This step disable all interrupts, +- * clears the rings, discards all mailbox commands, and resets +- * the HBA. ++ * Set initial can_queue value since 0 is no longer supported and ++ * scsi_add_host will fail. This will be adjusted later based on the ++ * max xri value determined in hba setup. + */ +- lpfc_sli_hba_down(phba); +- lpfc_sli_brdrestart(phba); ++ shost->can_queue = phba->cfg_hba_queue_depth - 10; ++ if (fc_vport != NULL) { ++ shost->transportt = lpfc_vport_transport_template; ++ vport->port_type = LPFC_NPIV_PORT; ++ } else { ++ shost->transportt = lpfc_transport_template; ++ vport->port_type = LPFC_PHYSICAL_PORT; ++ } + +- /* Release the irq reservation */ +- free_irq(phba->pcidev->irq, phba); +- pci_disable_msi(phba->pcidev); ++ /* Initialize all internally managed lists. */ ++ INIT_LIST_HEAD(&vport->fc_nodes); ++ spin_lock_init(&vport->work_port_lock); + +- lpfc_cleanup(phba); +- lpfc_stop_timer(phba); +- phba->work_hba_events = 0; ++ init_timer(&vport->fc_disctmo); ++ vport->fc_disctmo.function = lpfc_disc_timeout; ++ vport->fc_disctmo.data = (unsigned long)vport; + +- /* +- * Call scsi_free before mem_free since scsi bufs are released to their +- * corresponding pools here. +- */ +- lpfc_scsi_free(phba); +- lpfc_mem_free(phba); ++ init_timer(&vport->fc_fdmitmo); ++ vport->fc_fdmitmo.function = lpfc_fdmi_tmo; ++ vport->fc_fdmitmo.data = (unsigned long)vport; + +- /* Free resources associated with SLI2 interface */ +- dma_free_coherent(&phba->pcidev->dev, SLI2_SLIM_SIZE, +- phba->slim2p, phba->slim2p_mapping); ++ init_timer(&vport->els_tmofunc); ++ vport->els_tmofunc.function = lpfc_els_timeout; ++ vport->els_tmofunc.data = (unsigned long)vport; + +- /* unmap adapter SLIM and Control Registers */ +- iounmap(phba->ctrl_regs_memmap_p); +- iounmap(phba->slim_memmap_p); ++ if (fc_vport != NULL) { ++ error = scsi_add_host(shost, &fc_vport->dev); ++ } else { ++ error = scsi_add_host(shost, &phba->pcidev->dev); ++ } ++ if (error) ++ goto out_put_shost; + +- pci_release_regions(phba->pcidev); +- pci_disable_device(phba->pcidev); ++ list_add_tail(&vport->listentry, &phba->port_list); ++ return vport; + +- idr_remove(&lpfc_hba_index, phba->brd_no); +- scsi_host_put(phba->host); ++out_put_shost: ++ scsi_host_put(shost); ++out: ++ return NULL; + } + +-void lpfc_scan_start(struct Scsi_Host *host) ++void ++destroy_port(struct lpfc_vport *vport) + { +- struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + +- if (lpfc_alloc_sysfs_attr(phba)) +- goto error; ++ kfree(vport->vname); + +- phba->MBslimaddr = phba->slim_memmap_p; +- phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET; +- phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET; +- phba->HSregaddr = phba->ctrl_regs_memmap_p + HS_REG_OFFSET; +- phba->HCregaddr = phba->ctrl_regs_memmap_p + HC_REG_OFFSET; ++ lpfc_debugfs_terminate(vport); ++ fc_remove_host(shost); ++ scsi_remove_host(shost); + +- if (lpfc_sli_hba_setup(phba)) +- goto error; ++ spin_lock_irq(&phba->hbalock); ++ list_del_init(&vport->listentry); ++ spin_unlock_irq(&phba->hbalock); + +- /* +- * hba setup may have changed the hba_queue_depth so we need to adjust +- * the value of can_queue. +- */ +- host->can_queue = phba->cfg_hba_queue_depth - 10; ++ lpfc_cleanup(vport); + return; ++} + +-error: +- lpfc_remove_device(phba); ++int ++lpfc_get_instance(void) ++{ ++ int instance = 0; ++ ++ /* Assign an unused number */ ++ if (!idr_pre_get(&lpfc_hba_index, GFP_KERNEL)) ++ return -1; ++ if (idr_get_new(&lpfc_hba_index, NULL, &instance)) ++ return -1; ++ return instance; + } + ++/* ++ * Note: there is no scan_start function as adapter initialization ++ * will have asynchronously kicked off the link initialization. ++ */ ++ + int lpfc_scan_finished(struct Scsi_Host *shost, unsigned long time) + { +- struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ int stat = 0; + +- if (!phba->host) +- return 1; +- if (time >= 30 * HZ) ++ spin_lock_irq(shost->host_lock); ++ ++ if (vport->fc_flag & FC_UNLOADING) { ++ stat = 1; + goto finished; ++ } ++ if (time >= 30 * HZ) { ++ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, ++ "%d:0461 Scanning longer than 30 " ++ "seconds. Continuing initialization\n", ++ phba->brd_no); ++ stat = 1; ++ goto finished; ++ } ++ if (time >= 15 * HZ && phba->link_state <= LPFC_LINK_DOWN) { ++ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, ++ "%d:0465 Link down longer than 15 " ++ "seconds. Continuing initialization\n", ++ phba->brd_no); ++ stat = 1; ++ goto finished; ++ } + +- if (phba->hba_state != LPFC_HBA_READY) +- return 0; +- if (phba->num_disc_nodes || phba->fc_prli_sent) +- return 0; +- if ((phba->fc_map_cnt == 0) && (time < 2 * HZ)) +- return 0; +- if (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) +- return 0; +- if ((phba->hba_state > LPFC_LINK_DOWN) || (time < 15 * HZ)) +- return 0; ++ if (vport->port_state != LPFC_VPORT_READY) ++ goto finished; ++ if (vport->num_disc_nodes || vport->fc_prli_sent) ++ goto finished; ++ if (vport->fc_map_cnt == 0 && time < 2 * HZ) ++ goto finished; ++ if ((phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) != 0) ++ goto finished; ++ ++ stat = 1; + + finished: +- if (phba->cfg_poll & DISABLE_FCP_RING_INT) { +- spin_lock_irq(shost->host_lock); +- lpfc_poll_start_timer(phba); + spin_unlock_irq(shost->host_lock); +- } ++ return stat; ++} + ++void lpfc_host_attrib_init(struct Scsi_Host *shost) ++{ ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + /* +- * set fixed host attributes +- * Must done after lpfc_sli_hba_setup() ++ * Set fixed host attributes. Must done after lpfc_sli_hba_setup(). + */ + +- fc_host_node_name(shost) = wwn_to_u64(phba->fc_nodename.u.wwn); +- fc_host_port_name(shost) = wwn_to_u64(phba->fc_portname.u.wwn); ++ fc_host_node_name(shost) = wwn_to_u64(vport->fc_nodename.u.wwn); ++ fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn); + fc_host_supported_classes(shost) = FC_COS_CLASS3; + + memset(fc_host_supported_fc4s(shost), 0, +@@ -1475,7 +1679,8 @@ + fc_host_supported_fc4s(shost)[2] = 1; + fc_host_supported_fc4s(shost)[7] = 1; + +- lpfc_get_hba_sym_node_name(phba, fc_host_symbolic_name(shost)); ++ lpfc_vport_symbolic_node_name(vport, fc_host_symbolic_name(shost), ++ sizeof fc_host_symbolic_name(shost)); + + fc_host_supported_speeds(shost) = 0; + if (phba->lmt & LMT_10Gb) +@@ -1488,8 +1693,8 @@ + fc_host_supported_speeds(shost) |= FC_PORTSPEED_1GBIT; + + fc_host_maxframe_size(shost) = +- ((((uint32_t) phba->fc_sparam.cmn.bbRcvSizeMsb & 0x0F) << 8) | +- (uint32_t) phba->fc_sparam.cmn.bbRcvSizeLsb); ++ (((uint32_t) vport->fc_sparam.cmn.bbRcvSizeMsb & 0x0F) << 8) | ++ (uint32_t) vport->fc_sparam.cmn.bbRcvSizeLsb; + + /* This value is also unchanging */ + memset(fc_host_active_fc4s(shost), 0, +@@ -1497,20 +1702,20 @@ + fc_host_active_fc4s(shost)[2] = 1; + fc_host_active_fc4s(shost)[7] = 1; + ++ fc_host_max_npiv_vports(shost) = phba->max_vpi; + spin_lock_irq(shost->host_lock); +- phba->fc_flag &= ~FC_LOADING; ++ vport->fc_flag &= ~FC_LOADING; + spin_unlock_irq(shost->host_lock); +- +- return 1; + } + + static int __devinit + lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid) + { +- struct Scsi_Host *host; ++ struct lpfc_vport *vport = NULL; + struct lpfc_hba *phba; + struct lpfc_sli *psli; + struct lpfc_iocbq *iocbq_entry = NULL, *iocbq_next = NULL; ++ struct Scsi_Host *shost = NULL; + unsigned long bar0map_len, bar2map_len; + int error = -ENODEV, retval; + int i; +@@ -1521,61 +1726,46 @@ + if (pci_request_regions(pdev, LPFC_DRIVER_NAME)) + goto out_disable_device; + +- host = scsi_host_alloc(&lpfc_template, sizeof (struct lpfc_hba)); +- if (!host) ++ phba = kzalloc(sizeof (struct lpfc_hba), GFP_KERNEL); ++ if (!phba) + goto out_release_regions; + +- phba = (struct lpfc_hba*)host->hostdata; +- memset(phba, 0, sizeof (struct lpfc_hba)); +- phba->host = host; ++ spin_lock_init(&phba->hbalock); + +- phba->fc_flag |= FC_LOADING; + phba->pcidev = pdev; + + /* Assign an unused board number */ +- if (!idr_pre_get(&lpfc_hba_index, GFP_KERNEL)) +- goto out_put_host; ++ if ((phba->brd_no = lpfc_get_instance()) < 0) ++ goto out_free_phba; + +- error = idr_get_new(&lpfc_hba_index, NULL, &phba->brd_no); +- if (error) +- goto out_put_host; +- +- host->unique_id = phba->brd_no; ++ INIT_LIST_HEAD(&phba->port_list); ++ INIT_LIST_HEAD(&phba->hbq_buffer_list); ++ /* ++ * Get all the module params for configuring this host and then ++ * establish the host. ++ */ ++ lpfc_get_cfgparam(phba); ++ phba->max_vpi = LPFC_MAX_VPI; + + /* Initialize timers used by driver */ + init_timer(&phba->fc_estabtmo); + phba->fc_estabtmo.function = lpfc_establish_link_tmo; + phba->fc_estabtmo.data = (unsigned long)phba; +- init_timer(&phba->fc_disctmo); +- phba->fc_disctmo.function = lpfc_disc_timeout; +- phba->fc_disctmo.data = (unsigned long)phba; +- +- init_timer(&phba->fc_fdmitmo); +- phba->fc_fdmitmo.function = lpfc_fdmi_tmo; +- phba->fc_fdmitmo.data = (unsigned long)phba; +- init_timer(&phba->els_tmofunc); +- phba->els_tmofunc.function = lpfc_els_timeout; +- phba->els_tmofunc.data = (unsigned long)phba; ++ ++ init_timer(&phba->hb_tmofunc); ++ phba->hb_tmofunc.function = lpfc_hb_timeout; ++ phba->hb_tmofunc.data = (unsigned long)phba; ++ + psli = &phba->sli; + init_timer(&psli->mbox_tmo); + psli->mbox_tmo.function = lpfc_mbox_timeout; +- psli->mbox_tmo.data = (unsigned long)phba; +- ++ psli->mbox_tmo.data = (unsigned long) phba; + init_timer(&phba->fcp_poll_timer); + phba->fcp_poll_timer.function = lpfc_poll_timeout; +- phba->fcp_poll_timer.data = (unsigned long)phba; +- +- /* +- * Get all the module params for configuring this host and then +- * establish the host parameters. +- */ +- lpfc_get_cfgparam(phba); +- +- host->max_id = LPFC_MAX_TARGET; +- host->max_lun = phba->cfg_max_luns; +- host->this_id = -1; +- +- INIT_LIST_HEAD(&phba->fc_nodes); ++ phba->fcp_poll_timer.data = (unsigned long) phba; ++ init_timer(&phba->fabric_block_timer); ++ phba->fabric_block_timer.function = lpfc_fabric_block_timeout; ++ phba->fabric_block_timer.data = (unsigned long) phba; + + pci_set_master(pdev); + retval = pci_set_mwi(pdev); +@@ -1623,13 +1813,22 @@ + + memset(phba->slim2p, 0, SLI2_SLIM_SIZE); + ++ phba->hbqslimp.virt = dma_alloc_coherent(&phba->pcidev->dev, ++ lpfc_sli_hbq_size(), ++ &phba->hbqslimp.phys, ++ GFP_KERNEL); ++ if (!phba->hbqslimp.virt) ++ goto out_free_slim; ++ ++ memset(phba->hbqslimp.virt, 0, lpfc_sli_hbq_size()); ++ + /* Initialize the SLI Layer to run with lpfc HBAs. */ + lpfc_sli_setup(phba); + lpfc_sli_queue_setup(phba); + + error = lpfc_mem_alloc(phba); + if (error) +- goto out_free_slim; ++ goto out_free_hbqslimp; + + /* Initialize and populate the iocb list per host. */ + INIT_LIST_HEAD(&phba->lpfc_iocb_list); +@@ -1653,10 +1852,11 @@ + error = -ENOMEM; + goto out_free_iocbq; + } +- spin_lock_irq(phba->host->host_lock); ++ ++ spin_lock_irq(&phba->hbalock); + list_add(&iocbq_entry->list, &phba->lpfc_iocb_list); + phba->total_iocbq_bufs++; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + } + + /* Initialize HBA structure */ +@@ -1677,22 +1877,22 @@ + goto out_free_iocbq; + } + +- /* +- * Set initial can_queue value since 0 is no longer supported and +- * scsi_add_host will fail. This will be adjusted later based on the +- * max xri value determined in hba setup. +- */ +- host->can_queue = phba->cfg_hba_queue_depth - 10; +- +- /* Tell the midlayer we support 16 byte commands */ +- host->max_cmd_len = 16; +- + /* Initialize the list of scsi buffers used by driver for scsi IO. */ + spin_lock_init(&phba->scsi_buf_list_lock); + INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list); + +- host->transportt = lpfc_transport_template; +- pci_set_drvdata(pdev, host); ++ /* Initialize list of fabric iocbs */ ++ INIT_LIST_HEAD(&phba->fabric_iocb_list); ++ ++ vport = lpfc_create_port(phba, phba->brd_no, NULL); ++ if (!vport) ++ goto out_kthread_stop; ++ ++ shost = lpfc_shost_from_vport(vport); ++ phba->pport = vport; ++ lpfc_debugfs_initialize(vport); ++ ++ pci_set_drvdata(pdev, shost); + + if (phba->cfg_use_msi) { + error = pci_enable_msi(phba->pcidev); +@@ -1708,33 +1908,63 @@ + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0451 Enable interrupt handler failed\n", + phba->brd_no); +- goto out_kthread_stop; ++ goto out_disable_msi; + } + +- error = scsi_add_host(host, &pdev->dev); +- if (error) ++ phba->MBslimaddr = phba->slim_memmap_p; ++ phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET; ++ phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET; ++ phba->HSregaddr = phba->ctrl_regs_memmap_p + HS_REG_OFFSET; ++ phba->HCregaddr = phba->ctrl_regs_memmap_p + HC_REG_OFFSET; ++ ++ if (lpfc_alloc_sysfs_attr(vport)) + goto out_free_irq; + +- scsi_scan_host(host); ++ if (lpfc_sli_hba_setup(phba)) ++ goto out_remove_device; ++ ++ /* ++ * hba setup may have changed the hba_queue_depth so we need to adjust ++ * the value of can_queue. ++ */ ++ shost->can_queue = phba->cfg_hba_queue_depth - 10; ++ ++ lpfc_host_attrib_init(shost); ++ ++ if (phba->cfg_poll & DISABLE_FCP_RING_INT) { ++ spin_lock_irq(shost->host_lock); ++ lpfc_poll_start_timer(phba); ++ spin_unlock_irq(shost->host_lock); ++ } ++ ++ scsi_scan_host(shost); + + return 0; + ++out_remove_device: ++ lpfc_free_sysfs_attr(vport); ++ spin_lock_irq(shost->host_lock); ++ vport->fc_flag |= FC_UNLOADING; ++ spin_unlock_irq(shost->host_lock); + out_free_irq: +- lpfc_stop_timer(phba); +- phba->work_hba_events = 0; ++ lpfc_stop_phba_timers(phba); ++ phba->pport->work_port_events = 0; + free_irq(phba->pcidev->irq, phba); ++out_disable_msi: + pci_disable_msi(phba->pcidev); ++ destroy_port(vport); + out_kthread_stop: + kthread_stop(phba->worker_thread); + out_free_iocbq: + list_for_each_entry_safe(iocbq_entry, iocbq_next, + &phba->lpfc_iocb_list, list) { +- spin_lock_irq(phba->host->host_lock); + kfree(iocbq_entry); + phba->total_iocbq_bufs--; +- spin_unlock_irq(phba->host->host_lock); + } + lpfc_mem_free(phba); ++out_free_hbqslimp: ++ dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(), phba->hbqslimp.virt, ++ phba->hbqslimp.phys); + out_free_slim: + dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE, phba->slim2p, + phba->slim2p_mapping); +@@ -1744,27 +1974,85 @@ + iounmap(phba->slim_memmap_p); + out_idr_remove: + idr_remove(&lpfc_hba_index, phba->brd_no); +-out_put_host: +- phba->host = NULL; +- scsi_host_put(host); ++out_free_phba: ++ kfree(phba); + out_release_regions: + pci_release_regions(pdev); + out_disable_device: + pci_disable_device(pdev); + out: + pci_set_drvdata(pdev, NULL); ++ if (shost) ++ scsi_host_put(shost); + return error; + } + + static void __devexit + lpfc_pci_remove_one(struct pci_dev *pdev) + { +- struct Scsi_Host *host = pci_get_drvdata(pdev); +- struct lpfc_hba *phba = (struct lpfc_hba *)host->hostdata; ++ struct Scsi_Host *shost = pci_get_drvdata(pdev); ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_vport *port_iterator; ++ list_for_each_entry(port_iterator, &phba->port_list, listentry) ++ port_iterator->load_flag |= FC_UNLOADING; ++ ++ kfree(vport->vname); ++ lpfc_free_sysfs_attr(vport); + +- lpfc_remove_device(phba); ++ fc_remove_host(shost); ++ scsi_remove_host(shost); ++ ++ /* ++ * Bring down the SLI Layer. This step disable all interrupts, ++ * clears the rings, discards all mailbox commands, and resets ++ * the HBA. ++ */ ++ lpfc_sli_hba_down(phba); ++ lpfc_sli_brdrestart(phba); ++ ++ lpfc_stop_phba_timers(phba); ++ spin_lock_irq(&phba->hbalock); ++ list_del_init(&vport->listentry); ++ spin_unlock_irq(&phba->hbalock); ++ ++ ++ lpfc_debugfs_terminate(vport); ++ lpfc_cleanup(vport); ++ ++ kthread_stop(phba->worker_thread); ++ ++ /* Release the irq reservation */ ++ free_irq(phba->pcidev->irq, phba); ++ pci_disable_msi(phba->pcidev); + + pci_set_drvdata(pdev, NULL); ++ scsi_host_put(shost); ++ ++ /* ++ * Call scsi_free before mem_free since scsi bufs are released to their ++ * corresponding pools here. ++ */ ++ lpfc_scsi_free(phba); ++ lpfc_mem_free(phba); ++ ++ dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(), phba->hbqslimp.virt, ++ phba->hbqslimp.phys); ++ ++ /* Free resources associated with SLI2 interface */ ++ dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE, ++ phba->slim2p, phba->slim2p_mapping); ++ ++ /* unmap adapter SLIM and Control Registers */ ++ iounmap(phba->ctrl_regs_memmap_p); ++ iounmap(phba->slim_memmap_p); ++ ++ idr_remove(&lpfc_hba_index, phba->brd_no); ++ ++ kfree(phba); ++ ++ pci_release_regions(pdev); ++ pci_disable_device(pdev); + } + + /** +@@ -1822,10 +2110,13 @@ + pci_set_master(pdev); + + /* Re-establishing Link */ +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag |= FC_ESTABLISH_LINK; ++ spin_lock_irq(host->host_lock); ++ phba->pport->fc_flag |= FC_ESTABLISH_LINK; ++ spin_unlock_irq(host->host_lock); ++ ++ spin_lock_irq(&phba->hbalock); + psli->sli_flag &= ~LPFC_SLI2_ACTIVE; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + + /* Take device offline; this will perform cleanup */ +@@ -1948,11 +2239,15 @@ + + lpfc_transport_template = + fc_attach_transport(&lpfc_transport_functions); +- if (!lpfc_transport_template) ++ lpfc_vport_transport_template = ++ fc_attach_transport(&lpfc_vport_transport_functions); ++ if (!lpfc_transport_template || !lpfc_vport_transport_template) + return -ENOMEM; + error = pci_register_driver(&lpfc_driver); +- if (error) ++ if (error) { + fc_release_transport(lpfc_transport_template); ++ fc_release_transport(lpfc_vport_transport_template); ++ } + + return error; + } +@@ -1962,6 +2257,7 @@ + { + pci_unregister_driver(&lpfc_driver); + fc_release_transport(lpfc_transport_template); ++ fc_release_transport(lpfc_vport_transport_template); + } + + module_init(lpfc_init); +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_logmsg.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_logmsg.h +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_logmsg.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_logmsg.h 2007-12-21 15:36:12.000000000 -0500 +@@ -30,6 +30,7 @@ + #define LOG_SLI 0x800 /* SLI events */ + #define LOG_FCP_ERROR 0x1000 /* log errors, not underruns */ + #define LOG_LIBDFC 0x2000 /* Libdfc events */ ++#define LOG_VPORT 0x4000 /* NPIV events */ + #define LOG_ALL_MSG 0xffff /* LOG all messages */ + + #define lpfc_printf_log(phba, level, mask, fmt, arg...) \ +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mbox.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_mbox.c +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mbox.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_mbox.c 2007-12-21 15:36:12.000000000 -0500 +@@ -82,6 +82,22 @@ + } + + /**********************************************/ ++/* lpfc_heart_beat Issue a HEART_BEAT */ ++/* mailbox command */ ++/**********************************************/ ++void ++lpfc_heart_beat(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) ++{ ++ MAILBOX_t *mb; ++ ++ mb = &pmb->mb; ++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); ++ mb->mbxCommand = MBX_HEARTBEAT; ++ mb->mbxOwner = OWN_HOST; ++ return; ++} ++ ++/**********************************************/ + /* lpfc_read_la Issue a READ LA */ + /* mailbox command */ + /**********************************************/ +@@ -134,6 +150,7 @@ + void + lpfc_config_link(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) + { ++ struct lpfc_vport *vport = phba->pport; + MAILBOX_t *mb = &pmb->mb; + memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); + +@@ -147,7 +164,7 @@ + mb->un.varCfgLnk.cr_count = phba->cfg_cr_count; + } + +- mb->un.varCfgLnk.myId = phba->fc_myDID; ++ mb->un.varCfgLnk.myId = vport->fc_myDID; + mb->un.varCfgLnk.edtov = phba->fc_edtov; + mb->un.varCfgLnk.arbtov = phba->fc_arbtov; + mb->un.varCfgLnk.ratov = phba->fc_ratov; +@@ -239,7 +256,7 @@ + /* mailbox command */ + /**********************************************/ + int +-lpfc_read_sparam(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) ++lpfc_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, int vpi) + { + struct lpfc_dmabuf *mp; + MAILBOX_t *mb; +@@ -270,6 +287,7 @@ + mb->un.varRdSparm.un.sp64.tus.f.bdeSize = sizeof (struct serv_parm); + mb->un.varRdSparm.un.sp64.addrHigh = putPaddrHigh(mp->phys); + mb->un.varRdSparm.un.sp64.addrLow = putPaddrLow(mp->phys); ++ mb->un.varRdSparm.vpi = vpi; + + /* save address for completion */ + pmb->context1 = mp; +@@ -282,7 +300,8 @@ + /* mailbox command */ + /********************************************/ + void +-lpfc_unreg_did(struct lpfc_hba * phba, uint32_t did, LPFC_MBOXQ_t * pmb) ++lpfc_unreg_did(struct lpfc_hba * phba, uint16_t vpi, uint32_t did, ++ LPFC_MBOXQ_t * pmb) + { + MAILBOX_t *mb; + +@@ -290,6 +309,7 @@ + memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); + + mb->un.varUnregDID.did = did; ++ mb->un.varUnregDID.vpi = vpi; + + mb->mbxCommand = MBX_UNREG_D_ID; + mb->mbxOwner = OWN_HOST; +@@ -335,19 +355,17 @@ + /* mailbox command */ + /********************************************/ + int +-lpfc_reg_login(struct lpfc_hba * phba, +- uint32_t did, uint8_t * param, LPFC_MBOXQ_t * pmb, uint32_t flag) ++lpfc_reg_login(struct lpfc_hba *phba, uint16_t vpi, uint32_t did, ++ uint8_t *param, LPFC_MBOXQ_t *pmb, uint32_t flag) + { ++ MAILBOX_t *mb = &pmb->mb; + uint8_t *sparam; + struct lpfc_dmabuf *mp; +- MAILBOX_t *mb; +- struct lpfc_sli *psli; + +- psli = &phba->sli; +- mb = &pmb->mb; + memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); + + mb->un.varRegLogin.rpi = 0; ++ mb->un.varRegLogin.vpi = vpi; + mb->un.varRegLogin.did = did; + mb->un.varWords[30] = flag; /* Set flag to issue action on cmpl */ + +@@ -359,12 +377,10 @@ + kfree(mp); + mb->mbxCommand = MBX_REG_LOGIN64; + /* REG_LOGIN: no buffers */ +- lpfc_printf_log(phba, +- KERN_WARNING, +- LOG_MBOX, +- "%d:0302 REG_LOGIN: no buffers Data x%x x%x\n", +- phba->brd_no, +- (uint32_t) did, (uint32_t) flag); ++ lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX, ++ "%d (%d):0302 REG_LOGIN: no buffers, DID x%x, " ++ "flag x%x\n", ++ phba->brd_no, vpi, did, flag); + return (1); + } + INIT_LIST_HEAD(&mp->list); +@@ -389,7 +405,8 @@ + /* mailbox command */ + /**********************************************/ + void +-lpfc_unreg_login(struct lpfc_hba * phba, uint32_t rpi, LPFC_MBOXQ_t * pmb) ++lpfc_unreg_login(struct lpfc_hba *phba, uint16_t vpi, uint32_t rpi, ++ LPFC_MBOXQ_t * pmb) + { + MAILBOX_t *mb; + +@@ -398,12 +415,52 @@ + + mb->un.varUnregLogin.rpi = (uint16_t) rpi; + mb->un.varUnregLogin.rsvd1 = 0; ++ mb->un.varUnregLogin.vpi = vpi; + + mb->mbxCommand = MBX_UNREG_LOGIN; + mb->mbxOwner = OWN_HOST; + return; + } + ++/**************************************************/ ++/* lpfc_reg_vpi Issue a REG_VPI */ ++/* mailbox command */ ++/**************************************************/ ++void ++lpfc_reg_vpi(struct lpfc_hba *phba, uint16_t vpi, uint32_t sid, ++ LPFC_MBOXQ_t *pmb) ++{ ++ MAILBOX_t *mb = &pmb->mb; ++ ++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); ++ ++ mb->un.varRegVpi.vpi = vpi; ++ mb->un.varRegVpi.sid = sid; ++ ++ mb->mbxCommand = MBX_REG_VPI; ++ mb->mbxOwner = OWN_HOST; ++ return; ++ ++} ++ ++/**************************************************/ ++/* lpfc_unreg_vpi Issue a UNREG_VNPI */ ++/* mailbox command */ ++/**************************************************/ ++void ++lpfc_unreg_vpi(struct lpfc_hba *phba, uint16_t vpi, LPFC_MBOXQ_t *pmb) ++{ ++ MAILBOX_t *mb = &pmb->mb; ++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); ++ ++ mb->un.varUnregVpi.vpi = vpi; ++ ++ mb->mbxCommand = MBX_UNREG_VPI; ++ mb->mbxOwner = OWN_HOST; ++ return; ++ ++} ++ + static void + lpfc_config_pcb_setup(struct lpfc_hba * phba) + { +@@ -412,14 +469,18 @@ + PCB_t *pcbp = &phba->slim2p->pcb; + dma_addr_t pdma_addr; + uint32_t offset; +- uint32_t iocbCnt; ++ uint32_t iocbCnt = 0; + int i; + + pcbp->maxRing = (psli->num_rings - 1); + +- iocbCnt = 0; + for (i = 0; i < psli->num_rings; i++) { + pring = &psli->ring[i]; ++ ++ pring->sizeCiocb = phba->sli_rev == 3 ? SLI3_IOCB_CMD_SIZE: ++ SLI2_IOCB_CMD_SIZE; ++ pring->sizeRiocb = phba->sli_rev == 3 ? SLI3_IOCB_RSP_SIZE: ++ SLI2_IOCB_RSP_SIZE; + /* A ring MUST have both cmd and rsp entries defined to be + valid */ + if ((pring->numCiocb == 0) || (pring->numRiocb == 0)) { +@@ -434,20 +495,18 @@ + continue; + } + /* Command ring setup for ring */ +- pring->cmdringaddr = +- (void *)&phba->slim2p->IOCBs[iocbCnt]; ++ pring->cmdringaddr = (void *) &phba->slim2p->IOCBs[iocbCnt]; + pcbp->rdsc[i].cmdEntries = pring->numCiocb; + +- offset = (uint8_t *)&phba->slim2p->IOCBs[iocbCnt] - +- (uint8_t *)phba->slim2p; ++ offset = (uint8_t *) &phba->slim2p->IOCBs[iocbCnt] - ++ (uint8_t *) phba->slim2p; + pdma_addr = phba->slim2p_mapping + offset; + pcbp->rdsc[i].cmdAddrHigh = putPaddrHigh(pdma_addr); + pcbp->rdsc[i].cmdAddrLow = putPaddrLow(pdma_addr); + iocbCnt += pring->numCiocb; + + /* Response ring setup for ring */ +- pring->rspringaddr = +- (void *)&phba->slim2p->IOCBs[iocbCnt]; ++ pring->rspringaddr = (void *) &phba->slim2p->IOCBs[iocbCnt]; + + pcbp->rdsc[i].rspEntries = pring->numRiocb; + offset = (uint8_t *)&phba->slim2p->IOCBs[iocbCnt] - +@@ -462,16 +521,108 @@ + void + lpfc_read_rev(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) + { +- MAILBOX_t *mb; +- +- mb = &pmb->mb; ++ MAILBOX_t *mb = &pmb->mb; + memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); + mb->un.varRdRev.cv = 1; ++ mb->un.varRdRev.v3req = 1; /* Request SLI3 info */ + mb->mbxCommand = MBX_READ_REV; + mb->mbxOwner = OWN_HOST; + return; + } + ++static void ++lpfc_build_hbq_profile2(struct config_hbq_var *hbqmb, ++ struct lpfc_hbq_init *hbq_desc) ++{ ++ hbqmb->profiles.profile2.seqlenbcnt = hbq_desc->seqlenbcnt; ++ hbqmb->profiles.profile2.maxlen = hbq_desc->maxlen; ++ hbqmb->profiles.profile2.seqlenoff = hbq_desc->seqlenoff; ++} ++ ++static void ++lpfc_build_hbq_profile3(struct config_hbq_var *hbqmb, ++ struct lpfc_hbq_init *hbq_desc) ++{ ++ hbqmb->profiles.profile3.seqlenbcnt = hbq_desc->seqlenbcnt; ++ hbqmb->profiles.profile3.maxlen = hbq_desc->maxlen; ++ hbqmb->profiles.profile3.cmdcodeoff = hbq_desc->cmdcodeoff; ++ hbqmb->profiles.profile3.seqlenoff = hbq_desc->seqlenoff; ++ memcpy(&hbqmb->profiles.profile3.cmdmatch, hbq_desc->cmdmatch, ++ sizeof(hbqmb->profiles.profile3.cmdmatch)); ++} ++ ++static void ++lpfc_build_hbq_profile5(struct config_hbq_var *hbqmb, ++ struct lpfc_hbq_init *hbq_desc) ++{ ++ hbqmb->profiles.profile5.seqlenbcnt = hbq_desc->seqlenbcnt; ++ hbqmb->profiles.profile5.maxlen = hbq_desc->maxlen; ++ hbqmb->profiles.profile5.cmdcodeoff = hbq_desc->cmdcodeoff; ++ hbqmb->profiles.profile5.seqlenoff = hbq_desc->seqlenoff; ++ memcpy(&hbqmb->profiles.profile5.cmdmatch, hbq_desc->cmdmatch, ++ sizeof(hbqmb->profiles.profile5.cmdmatch)); ++} ++ ++void ++lpfc_config_hbq(struct lpfc_hba *phba, struct lpfc_hbq_init *hbq_desc, ++ uint32_t hbq_entry_index, LPFC_MBOXQ_t *pmb) ++{ ++ int i; ++ MAILBOX_t *mb = &pmb->mb; ++ struct config_hbq_var *hbqmb = &mb->un.varCfgHbq; ++ ++ memset(pmb, 0, sizeof (LPFC_MBOXQ_t)); ++ hbqmb->entry_count = hbq_desc->entry_count; /* # entries in HBQ */ ++ hbqmb->recvNotify = hbq_desc->rn; /* Receive ++ * Notification */ ++ hbqmb->numMask = hbq_desc->mask_count; /* # R_CTL/TYPE masks ++ * # in words 0-19 */ ++ hbqmb->profile = hbq_desc->profile; /* Selection profile: ++ * 0 = all, ++ * 7 = logentry */ ++ hbqmb->ringMask = hbq_desc->ring_mask; /* Binds HBQ to a ring ++ * e.g. Ring0=b0001, ++ * ring2=b0100 */ ++ hbqmb->headerLen = hbq_desc->headerLen; /* 0 if not profile 4 ++ * or 5 */ ++ hbqmb->logEntry = hbq_desc->logEntry; /* Set to 1 if this ++ * HBQ will be used ++ * for LogEntry ++ * buffers */ ++ hbqmb->hbqaddrLow = putPaddrLow(phba->hbqslimp.phys) + ++ hbq_entry_index * sizeof(struct lpfc_hbq_entry); ++ hbqmb->hbqaddrHigh = putPaddrHigh(phba->hbqslimp.phys); ++ ++ mb->mbxCommand = MBX_CONFIG_HBQ; ++ mb->mbxOwner = OWN_HOST; ++ ++ /* Copy info for profiles 2,3,5. Other ++ * profiles this area is reserved ++ */ ++ if (hbq_desc->profile == 2) ++ lpfc_build_hbq_profile2(hbqmb, hbq_desc); ++ else if (hbq_desc->profile == 3) ++ lpfc_build_hbq_profile3(hbqmb, hbq_desc); ++ else if (hbq_desc->profile == 5) ++ lpfc_build_hbq_profile5(hbqmb, hbq_desc); ++ ++ /* Return if no rctl / type masks for this HBQ */ ++ if (!hbq_desc->mask_count) ++ return; ++ ++ /* Otherwise we setup specific rctl / type masks for this HBQ */ ++ for (i = 0; i < hbq_desc->mask_count; i++) { ++ hbqmb->hbqMasks[i].tmatch = hbq_desc->hbqMasks[i].tmatch; ++ hbqmb->hbqMasks[i].tmask = hbq_desc->hbqMasks[i].tmask; ++ hbqmb->hbqMasks[i].rctlmatch = hbq_desc->hbqMasks[i].rctlmatch; ++ hbqmb->hbqMasks[i].rctlmask = hbq_desc->hbqMasks[i].rctlmask; ++ } ++ ++ return; ++} ++ ++ ++ + void + lpfc_config_ring(struct lpfc_hba * phba, int ring, LPFC_MBOXQ_t * pmb) + { +@@ -514,15 +665,16 @@ + } + + void +-lpfc_config_port(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) ++lpfc_config_port(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) + { ++ MAILBOX_t __iomem *mb_slim = (MAILBOX_t __iomem *) phba->MBslimaddr; + MAILBOX_t *mb = &pmb->mb; + dma_addr_t pdma_addr; + uint32_t bar_low, bar_high; + size_t offset; + struct lpfc_hgp hgp; +- void __iomem *to_slim; + int i; ++ uint32_t pgp_offset; + + memset(pmb, 0, sizeof(LPFC_MBOXQ_t)); + mb->mbxCommand = MBX_CONFIG_PORT; +@@ -535,12 +687,29 @@ + mb->un.varCfgPort.pcbLow = putPaddrLow(pdma_addr); + mb->un.varCfgPort.pcbHigh = putPaddrHigh(pdma_addr); + ++ /* If HBA supports SLI=3 ask for it */ ++ ++ if (phba->sli_rev == 3 && phba->vpd.sli3Feat.cerbm) { ++ mb->un.varCfgPort.cerbm = 1; /* Request HBQs */ ++ mb->un.varCfgPort.max_hbq = 1; /* Requesting 2 HBQs */ ++ if (phba->max_vpi && phba->cfg_npiv_enable && ++ phba->vpd.sli3Feat.cmv) { ++ mb->un.varCfgPort.max_vpi = phba->max_vpi; ++ mb->un.varCfgPort.cmv = 1; ++ phba->sli3_options |= LPFC_SLI3_NPIV_ENABLED; ++ } else ++ mb->un.varCfgPort.max_vpi = phba->max_vpi = 0; ++ } else ++ phba->sli_rev = 2; ++ mb->un.varCfgPort.sli_mode = phba->sli_rev; ++ + /* Now setup pcb */ + phba->slim2p->pcb.type = TYPE_NATIVE_SLI2; + phba->slim2p->pcb.feature = FEATURE_INITIAL_SLI2; + + /* Setup Mailbox pointers */ +- phba->slim2p->pcb.mailBoxSize = sizeof(MAILBOX_t); ++ phba->slim2p->pcb.mailBoxSize = offsetof(MAILBOX_t, us) + ++ sizeof(struct sli2_desc); + offset = (uint8_t *)&phba->slim2p->mbx - (uint8_t *)phba->slim2p; + pdma_addr = phba->slim2p_mapping + offset; + phba->slim2p->pcb.mbAddrHigh = putPaddrHigh(pdma_addr); +@@ -568,29 +737,70 @@ + pci_read_config_dword(phba->pcidev, PCI_BASE_ADDRESS_0, &bar_low); + pci_read_config_dword(phba->pcidev, PCI_BASE_ADDRESS_1, &bar_high); + ++ /* ++ * Set up HGP - Port Memory ++ * ++ * The port expects the host get/put pointers to reside in memory ++ * following the "non-diagnostic" mode mailbox (32 words, 0x80 bytes) ++ * area of SLIM. In SLI-2 mode, there's an additional 16 reserved ++ * words (0x40 bytes). This area is not reserved if HBQs are ++ * configured in SLI-3. ++ * ++ * CR0Put - SLI2(no HBQs) = 0xc0, With HBQs = 0x80 ++ * RR0Get 0xc4 0x84 ++ * CR1Put 0xc8 0x88 ++ * RR1Get 0xcc 0x8c ++ * CR2Put 0xd0 0x90 ++ * RR2Get 0xd4 0x94 ++ * CR3Put 0xd8 0x98 ++ * RR3Get 0xdc 0x9c ++ * ++ * Reserved 0xa0-0xbf ++ * If HBQs configured: ++ * HBQ 0 Put ptr 0xc0 ++ * HBQ 1 Put ptr 0xc4 ++ * HBQ 2 Put ptr 0xc8 ++ * ...... ++ * HBQ(M-1)Put Pointer 0xc0+(M-1)*4 ++ * ++ */ ++ ++ if (phba->sli_rev == 3) { ++ phba->host_gp = &mb_slim->us.s3.host[0]; ++ phba->hbq_put = &mb_slim->us.s3.hbq_put[0]; ++ } else { ++ phba->host_gp = &mb_slim->us.s2.host[0]; ++ phba->hbq_put = NULL; ++ } + + /* mask off BAR0's flag bits 0 - 3 */ + phba->slim2p->pcb.hgpAddrLow = (bar_low & PCI_BASE_ADDRESS_MEM_MASK) + +- (SLIMOFF*sizeof(uint32_t)); ++ (void __iomem *) phba->host_gp - ++ (void __iomem *)phba->MBslimaddr; + if (bar_low & PCI_BASE_ADDRESS_MEM_TYPE_64) + phba->slim2p->pcb.hgpAddrHigh = bar_high; + else + phba->slim2p->pcb.hgpAddrHigh = 0; + /* write HGP data to SLIM at the required longword offset */ + memset(&hgp, 0, sizeof(struct lpfc_hgp)); +- to_slim = phba->MBslimaddr + (SLIMOFF*sizeof (uint32_t)); + + for (i=0; i < phba->sli.num_rings; i++) { +- lpfc_memcpy_to_slim(to_slim, &hgp, sizeof(struct lpfc_hgp)); +- to_slim += sizeof (struct lpfc_hgp); ++ lpfc_memcpy_to_slim(phba->host_gp + i, &hgp, ++ sizeof(*phba->host_gp)); + } + + /* Setup Port Group ring pointer */ +- offset = (uint8_t *)&phba->slim2p->mbx.us.s2.port - ++ if (phba->sli_rev == 3) ++ pgp_offset = (uint8_t *)&phba->slim2p->mbx.us.s3_pgp.port - + (uint8_t *)phba->slim2p; +- pdma_addr = phba->slim2p_mapping + offset; ++ else ++ pgp_offset = (uint8_t *)&phba->slim2p->mbx.us.s2.port - ++ (uint8_t *)phba->slim2p; ++ ++ pdma_addr = phba->slim2p_mapping + pgp_offset; + phba->slim2p->pcb.pgpAddrHigh = putPaddrHigh(pdma_addr); + phba->slim2p->pcb.pgpAddrLow = putPaddrLow(pdma_addr); ++ phba->hbq_get = &phba->slim2p->mbx.us.s3_pgp.hbq_get[0]; + + /* Use callback routine to setp rings in the pcb */ + lpfc_config_pcb_setup(phba); +@@ -606,11 +816,7 @@ + + /* Swap PCB if needed */ + lpfc_sli_pcimem_bcopy(&phba->slim2p->pcb, &phba->slim2p->pcb, +- sizeof (PCB_t)); +- +- lpfc_printf_log(phba, KERN_INFO, LOG_INIT, +- "%d:0405 Service Level Interface (SLI) 2 selected\n", +- phba->brd_no); ++ sizeof(PCB_t)); + } + + void +@@ -644,15 +850,23 @@ + LPFC_MBOXQ_t *mbq = NULL; + struct lpfc_sli *psli = &phba->sli; + +- list_remove_head((&psli->mboxq), mbq, LPFC_MBOXQ_t, +- list); +- if (mbq) { ++ list_remove_head((&psli->mboxq), mbq, LPFC_MBOXQ_t, list); ++ if (mbq) + psli->mboxq_cnt--; +- } + + return mbq; + } + ++void ++lpfc_mbox_cmpl_put(struct lpfc_hba * phba, LPFC_MBOXQ_t * mbq) ++{ ++ /* This function expects to be called from interupt context */ ++ spin_lock(&phba->hbalock); ++ list_add_tail(&mbq->list, &phba->sli.mboxq_cmpl); ++ spin_unlock(&phba->hbalock); ++ return; ++} ++ + int + lpfc_mbox_tmo_val(struct lpfc_hba *phba, int cmd) + { +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mem.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_mem.c +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mem.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_mem.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1,7 +1,7 @@ + /******************************************************************* + * This file is part of the Emulex Linux Device Driver for * + * Fibre Channel Host Bus Adapters. * +- * Copyright (C) 2004-2005 Emulex. All rights reserved. * ++ * Copyright (C) 2004-2006 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * Portions Copyright (C) 2004-2005 Christoph Hellwig * +@@ -38,10 +38,13 @@ + #define LPFC_MBUF_POOL_SIZE 64 /* max elements in MBUF safety pool */ + #define LPFC_MEM_POOL_SIZE 64 /* max elem in non-DMA safety pool */ + ++ ++ + int + lpfc_mem_alloc(struct lpfc_hba * phba) + { + struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool; ++ int longs; + int i; + + phba->lpfc_scsi_dma_buf_pool = pci_pool_create("lpfc_scsi_dma_buf_pool", +@@ -80,10 +83,27 @@ + if (!phba->nlp_mem_pool) + goto fail_free_mbox_pool; + ++ phba->lpfc_hbq_pool = pci_pool_create("lpfc_hbq_pool",phba->pcidev, ++ LPFC_BPL_SIZE, 8, 0); ++ if (!phba->lpfc_hbq_pool) ++ goto fail_free_nlp_mem_pool; ++ ++ /* vpi zero is reserved for the physical port so add 1 to max */ ++ longs = ((phba->max_vpi + 1) + BITS_PER_LONG - 1) / BITS_PER_LONG; ++ phba->vpi_bmask = kzalloc(longs * sizeof(unsigned long), GFP_KERNEL); ++ if (!phba->vpi_bmask) ++ goto fail_free_hbq_pool; ++ + return 0; + ++ fail_free_hbq_pool: ++ lpfc_sli_hbqbuf_free_all(phba); ++ fail_free_nlp_mem_pool: ++ mempool_destroy(phba->nlp_mem_pool); ++ phba->nlp_mem_pool = NULL; + fail_free_mbox_pool: + mempool_destroy(phba->mbox_mem_pool); ++ phba->mbox_mem_pool = NULL; + fail_free_mbuf_pool: + while (i--) + pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt, +@@ -91,8 +111,10 @@ + kfree(pool->elements); + fail_free_lpfc_mbuf_pool: + pci_pool_destroy(phba->lpfc_mbuf_pool); ++ phba->lpfc_mbuf_pool = NULL; + fail_free_dma_buf_pool: + pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool); ++ phba->lpfc_scsi_dma_buf_pool = NULL; + fail: + return -ENOMEM; + } +@@ -106,6 +128,9 @@ + struct lpfc_dmabuf *mp; + int i; + ++ kfree(phba->vpi_bmask); ++ lpfc_sli_hbqbuf_free_all(phba); ++ + list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq, list) { + mp = (struct lpfc_dmabuf *) (mbox->context1); + if (mp) { +@@ -115,6 +140,15 @@ + list_del(&mbox->list); + mempool_free(mbox, phba->mbox_mem_pool); + } ++ list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq_cmpl, list) { ++ mp = (struct lpfc_dmabuf *) (mbox->context1); ++ if (mp) { ++ lpfc_mbuf_free(phba, mp->virt, mp->phys); ++ kfree(mp); ++ } ++ list_del(&mbox->list); ++ mempool_free(mbox, phba->mbox_mem_pool); ++ } + + psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; + if (psli->mbox_active) { +@@ -132,12 +166,20 @@ + pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt, + pool->elements[i].phys); + kfree(pool->elements); ++ ++ pci_pool_destroy(phba->lpfc_hbq_pool); + mempool_destroy(phba->nlp_mem_pool); + mempool_destroy(phba->mbox_mem_pool); + + pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool); + pci_pool_destroy(phba->lpfc_mbuf_pool); + ++ phba->lpfc_hbq_pool = NULL; ++ phba->nlp_mem_pool = NULL; ++ phba->mbox_mem_pool = NULL; ++ phba->lpfc_scsi_dma_buf_pool = NULL; ++ phba->lpfc_mbuf_pool = NULL; ++ + /* Free the iocb lookup array */ + kfree(psli->iocbq_lookup); + psli->iocbq_lookup = NULL; +@@ -148,20 +190,23 @@ + lpfc_mbuf_alloc(struct lpfc_hba *phba, int mem_flags, dma_addr_t *handle) + { + struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool; ++ unsigned long iflags; + void *ret; + + ret = pci_pool_alloc(phba->lpfc_mbuf_pool, GFP_KERNEL, handle); + +- if (!ret && ( mem_flags & MEM_PRI) && pool->current_count) { ++ spin_lock_irqsave(&phba->hbalock, iflags); ++ if (!ret && (mem_flags & MEM_PRI) && pool->current_count) { + pool->current_count--; + ret = pool->elements[pool->current_count].virt; + *handle = pool->elements[pool->current_count].phys; + } ++ spin_unlock_irqrestore(&phba->hbalock, iflags); + return ret; + } + + void +-lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma) ++__lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma) + { + struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool; + +@@ -174,3 +219,51 @@ + } + return; + } ++ ++void ++lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma) ++{ ++ unsigned long iflags; ++ ++ spin_lock_irqsave(&phba->hbalock, iflags); ++ __lpfc_mbuf_free(phba, virt, dma); ++ spin_unlock_irqrestore(&phba->hbalock, iflags); ++ return; ++} ++ ++void * ++lpfc_hbq_alloc(struct lpfc_hba *phba, int mem_flags, dma_addr_t *handle) ++{ ++ void *ret; ++ ret = pci_pool_alloc(phba->lpfc_hbq_pool, GFP_ATOMIC, handle); ++ return ret; ++} ++ ++void ++lpfc_hbq_free(struct lpfc_hba *phba, void *virt, dma_addr_t dma) ++{ ++ pci_pool_free(phba->lpfc_hbq_pool, virt, dma); ++ return; ++} ++ ++void ++lpfc_in_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp) ++{ ++ struct hbq_dmabuf *hbq_entry; ++ ++ if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) { ++ hbq_entry = container_of(mp, struct hbq_dmabuf, dbuf); ++ if (hbq_entry->tag == -1) { ++ lpfc_hbq_free(phba, hbq_entry->dbuf.virt, ++ hbq_entry->dbuf.phys); ++ kfree(hbq_entry); ++ } else { ++ lpfc_sli_free_hbq(phba, hbq_entry); ++ } ++ } else { ++ lpfc_mbuf_free(phba, mp->virt, mp->phys); ++ kfree(mp); ++ } ++ return; ++} ++ +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_nportdisc.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_nportdisc.c +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_nportdisc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_nportdisc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1,4 +1,4 @@ +-/******************************************************************* ++ /******************************************************************* + * This file is part of the Emulex Linux Device Driver for * + * Fibre Channel Host Bus Adapters. * + * Copyright (C) 2004-2007 Emulex. All rights reserved. * +@@ -35,20 +35,22 @@ + #include "lpfc.h" + #include "lpfc_logmsg.h" + #include "lpfc_crtn.h" ++#include "lpfc_vport.h" ++#include "lpfc_debugfs.h" + + + /* Called to verify a rcv'ed ADISC was intended for us. */ + static int +-lpfc_check_adisc(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, +- struct lpfc_name * nn, struct lpfc_name * pn) ++lpfc_check_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ struct lpfc_name *nn, struct lpfc_name *pn) + { + /* Compare the ADISC rsp WWNN / WWPN matches our internal node + * table entry for that node. + */ +- if (memcmp(nn, &ndlp->nlp_nodename, sizeof (struct lpfc_name)) != 0) ++ if (memcmp(nn, &ndlp->nlp_nodename, sizeof (struct lpfc_name))) + return 0; + +- if (memcmp(pn, &ndlp->nlp_portname, sizeof (struct lpfc_name)) != 0) ++ if (memcmp(pn, &ndlp->nlp_portname, sizeof (struct lpfc_name))) + return 0; + + /* we match, return success */ +@@ -56,11 +58,10 @@ + } + + int +-lpfc_check_sparm(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, struct serv_parm * sp, +- uint32_t class) ++lpfc_check_sparm(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ struct serv_parm * sp, uint32_t class) + { +- volatile struct serv_parm *hsp = &phba->fc_sparam; ++ volatile struct serv_parm *hsp = &vport->fc_sparam; + uint16_t hsp_value, ssp_value = 0; + + /* +@@ -75,12 +76,14 @@ + hsp->cls1.rcvDataSizeLsb; + ssp_value = (sp->cls1.rcvDataSizeMsb << 8) | + sp->cls1.rcvDataSizeLsb; ++ if (!ssp_value) ++ goto bad_service_param; + if (ssp_value > hsp_value) { + sp->cls1.rcvDataSizeLsb = hsp->cls1.rcvDataSizeLsb; + sp->cls1.rcvDataSizeMsb = hsp->cls1.rcvDataSizeMsb; + } + } else if (class == CLASS1) { +- return 0; ++ goto bad_service_param; + } + + if (sp->cls2.classValid) { +@@ -88,12 +91,14 @@ + hsp->cls2.rcvDataSizeLsb; + ssp_value = (sp->cls2.rcvDataSizeMsb << 8) | + sp->cls2.rcvDataSizeLsb; ++ if (!ssp_value) ++ goto bad_service_param; + if (ssp_value > hsp_value) { + sp->cls2.rcvDataSizeLsb = hsp->cls2.rcvDataSizeLsb; + sp->cls2.rcvDataSizeMsb = hsp->cls2.rcvDataSizeMsb; + } + } else if (class == CLASS2) { +- return 0; ++ goto bad_service_param; + } + + if (sp->cls3.classValid) { +@@ -101,12 +106,14 @@ + hsp->cls3.rcvDataSizeLsb; + ssp_value = (sp->cls3.rcvDataSizeMsb << 8) | + sp->cls3.rcvDataSizeLsb; ++ if (!ssp_value) ++ goto bad_service_param; + if (ssp_value > hsp_value) { + sp->cls3.rcvDataSizeLsb = hsp->cls3.rcvDataSizeLsb; + sp->cls3.rcvDataSizeMsb = hsp->cls3.rcvDataSizeMsb; + } + } else if (class == CLASS3) { +- return 0; ++ goto bad_service_param; + } + + /* +@@ -125,11 +132,21 @@ + memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof (struct lpfc_name)); + memcpy(&ndlp->nlp_portname, &sp->portName, sizeof (struct lpfc_name)); + return 1; ++bad_service_param: ++ lpfc_printf_log(vport->phba, KERN_ERR, LOG_DISCOVERY, ++ "%d (%d):0207 Device %x " ++ "(%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x) sent " ++ "invalid service parameters. Ignoring device.\n", ++ vport->phba->brd_no, ndlp->vport->vpi, ndlp->nlp_DID, ++ sp->nodeName.u.wwn[0], sp->nodeName.u.wwn[1], ++ sp->nodeName.u.wwn[2], sp->nodeName.u.wwn[3], ++ sp->nodeName.u.wwn[4], sp->nodeName.u.wwn[5], ++ sp->nodeName.u.wwn[6], sp->nodeName.u.wwn[7]); ++ return 0; + } + + static void * +-lpfc_check_elscmpl_iocb(struct lpfc_hba * phba, +- struct lpfc_iocbq *cmdiocb, ++lpfc_check_elscmpl_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, + struct lpfc_iocbq *rspiocb) + { + struct lpfc_dmabuf *pcmd, *prsp; +@@ -168,32 +185,29 @@ + * routine effectively results in a "software abort". + */ + int +-lpfc_els_abort(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp) ++lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) + { + LIST_HEAD(completions); +- struct lpfc_sli *psli; +- struct lpfc_sli_ring *pring; ++ struct lpfc_sli *psli = &phba->sli; ++ struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING]; + struct lpfc_iocbq *iocb, *next_iocb; + IOCB_t *cmd; + + /* Abort outstanding I/O on NPort */ + lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, +- "%d:0205 Abort outstanding I/O on NPort x%x " ++ "%d (%d):0205 Abort outstanding I/O on NPort x%x " + "Data: x%x x%x x%x\n", +- phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag, +- ndlp->nlp_state, ndlp->nlp_rpi); ++ phba->brd_no, ndlp->vport->vpi, ndlp->nlp_DID, ++ ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi); + +- psli = &phba->sli; +- pring = &psli->ring[LPFC_ELS_RING]; ++ lpfc_fabric_abort_nport(ndlp); + + /* First check the txq */ +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) { +- /* Check to see if iocb matches the nport we are looking +- for */ ++ /* Check to see if iocb matches the nport we are looking for */ + if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp)) { +- /* It matches, so deque and call compl with an +- error */ ++ /* It matches, so deque and call compl with anp error */ + list_move_tail(&iocb->list, &completions); + pring->txq_cnt--; + } +@@ -201,37 +215,39 @@ + + /* Next check the txcmplq */ + list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) { +- /* Check to see if iocb matches the nport we are looking +- for */ +- if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp)) ++ /* Check to see if iocb matches the nport we are looking for */ ++ if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp)) { + lpfc_sli_issue_abort_iotag(phba, pring, iocb); + } +- spin_unlock_irq(phba->host->host_lock); ++ } ++ spin_unlock_irq(&phba->hbalock); + + while (!list_empty(&completions)) { + iocb = list_get_first(&completions, struct lpfc_iocbq, list); + cmd = &iocb->iocb; +- list_del(&iocb->list); ++ list_del_init(&iocb->list); + +- if (iocb->iocb_cmpl) { ++ if (!iocb->iocb_cmpl) ++ lpfc_sli_release_iocbq(phba, iocb); ++ else { + cmd->ulpStatus = IOSTAT_LOCAL_REJECT; + cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; + (iocb->iocb_cmpl) (phba, iocb, iocb); +- } else +- lpfc_sli_release_iocbq(phba, iocb); ++ } + } + + /* If we are delaying issuing an ELS command, cancel it */ + if (ndlp->nlp_flag & NLP_DELAY_TMO) +- lpfc_cancel_retry_delay_tmo(phba, ndlp); ++ lpfc_cancel_retry_delay_tmo(phba->pport, ndlp); + return 0; + } + + static int +-lpfc_rcv_plogi(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, ++lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct lpfc_iocbq *cmdiocb) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_dmabuf *pcmd; + uint32_t *lp; + IOCB_t *icmd; +@@ -241,14 +257,14 @@ + int rc; + + memset(&stat, 0, sizeof (struct ls_rjt)); +- if (phba->hba_state <= LPFC_FLOGI) { ++ if (vport->port_state <= LPFC_FLOGI) { + /* Before responding to PLOGI, check for pt2pt mode. + * If we are pt2pt, with an outstanding FLOGI, abort + * the FLOGI and resend it first. + */ +- if (phba->fc_flag & FC_PT2PT) { ++ if (vport->fc_flag & FC_PT2PT) { + lpfc_els_abort_flogi(phba); +- if (!(phba->fc_flag & FC_PT2PT_PLOGI)) { ++ if (!(vport->fc_flag & FC_PT2PT_PLOGI)) { + /* If the other side is supposed to initiate + * the PLOGI anyway, just ACC it now and + * move on with discovery. +@@ -257,45 +273,42 @@ + phba->fc_ratov = FF_DEF_RATOV; + /* Start discovery - this should just do + CLEAR_LA */ +- lpfc_disc_start(phba); +- } else { +- lpfc_initial_flogi(phba); +- } ++ lpfc_disc_start(vport); ++ } else ++ lpfc_initial_flogi(vport); + } else { + stat.un.b.lsRjtRsnCode = LSRJT_LOGICAL_BSY; + stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; +- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, +- ndlp); ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ++ ndlp, NULL); + return 0; + } + } + pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; + lp = (uint32_t *) pcmd->virt; + sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t)); +- if ((lpfc_check_sparm(phba, ndlp, sp, CLASS3) == 0)) { ++ if ((lpfc_check_sparm(vport, ndlp, sp, CLASS3) == 0)) { + /* Reject this request because invalid parameters */ + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS; +- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, ++ NULL); + return 0; + } + icmd = &cmdiocb->iocb; + + /* PLOGI chkparm OK */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_ELS, +- "%d:0114 PLOGI chkparm OK Data: x%x x%x x%x x%x\n", +- phba->brd_no, ++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, ++ "%d (%d):0114 PLOGI chkparm OK Data: x%x x%x x%x x%x\n", ++ phba->brd_no, vport->vpi, + ndlp->nlp_DID, ndlp->nlp_state, ndlp->nlp_flag, + ndlp->nlp_rpi); + +- if ((phba->cfg_fcp_class == 2) && +- (sp->cls2.classValid)) { ++ if (phba->cfg_fcp_class == 2 && sp->cls2.classValid) + ndlp->nlp_fcp_info |= CLASS2; +- } else { ++ else + ndlp->nlp_fcp_info |= CLASS3; +- } ++ + ndlp->nlp_class_sup = 0; + if (sp->cls1.classValid) + ndlp->nlp_class_sup |= FC_COS_CLASS1; +@@ -317,35 +330,37 @@ + case NLP_STE_PRLI_ISSUE: + case NLP_STE_UNMAPPED_NODE: + case NLP_STE_MAPPED_NODE: +- lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, 0); ++ lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, 0); + return 1; + } + +- if ((phba->fc_flag & FC_PT2PT) +- && !(phba->fc_flag & FC_PT2PT_PLOGI)) { ++ if ((vport->fc_flag & FC_PT2PT) && ++ !(vport->fc_flag & FC_PT2PT_PLOGI)) { + /* rcv'ed PLOGI decides what our NPortId will be */ +- phba->fc_myDID = icmd->un.rcvels.parmRo; ++ vport->fc_myDID = icmd->un.rcvels.parmRo; + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (mbox == NULL) + goto out; + lpfc_config_link(phba, mbox); + mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl; ++ mbox->vport = vport; + rc = lpfc_sli_issue_mbox + (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB)); + if (rc == MBX_NOT_FINISHED) { +- mempool_free( mbox, phba->mbox_mem_pool); ++ mempool_free(mbox, phba->mbox_mem_pool); + goto out; + } + +- lpfc_can_disctmo(phba); ++ lpfc_can_disctmo(vport); + } + mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); +- if (mbox == NULL) ++ if (!mbox) + goto out; + +- if (lpfc_reg_login(phba, icmd->un.rcvels.remoteID, +- (uint8_t *) sp, mbox, 0)) { +- mempool_free( mbox, phba->mbox_mem_pool); ++ rc = lpfc_reg_login(phba, vport->vpi, icmd->un.rcvels.remoteID, ++ (uint8_t *) sp, mbox, 0); ++ if (rc) { ++ mempool_free(mbox, phba->mbox_mem_pool); + goto out; + } + +@@ -357,7 +372,10 @@ + * mbox->context2 = lpfc_nlp_get(ndlp) deferred until mailbox + * command issued in lpfc_cmpl_els_acc(). + */ ++ mbox->vport = vport; ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= (NLP_ACC_REGLOGIN | NLP_RCV_PLOGI); ++ spin_unlock_irq(shost->host_lock); + + /* + * If there is an outstanding PLOGI issued, abort it before +@@ -373,21 +391,38 @@ + lpfc_els_abort(phba, ndlp); + } + +- lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, mbox, 0); ++ if ((vport->port_type == LPFC_NPIV_PORT && ++ phba->cfg_vport_restrict_login)) { ++ ++ /* In order to preserve RPIs, we want to cleanup ++ * the default RPI the firmware created to rcv ++ * this ELS request. The only way to do this is ++ * to register, then unregister the RPI. ++ */ ++ spin_lock_irq(shost->host_lock); ++ ndlp->nlp_flag |= NLP_RM_DFLT_RPI; ++ spin_unlock_irq(shost->host_lock); ++ stat.un.b.lsRjtRsnCode = LSRJT_INVALID_CMD; ++ stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ++ ndlp, mbox); ++ return 1; ++ } ++ lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, mbox, 0); + return 1; + + out: + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + stat.un.b.lsRjtRsnCodeExp = LSEXP_OUT_OF_RESOURCE; +- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); + return 0; + } + + static int +-lpfc_rcv_padisc(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, ++lpfc_rcv_padisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct lpfc_iocbq *cmdiocb) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_dmabuf *pcmd; + struct serv_parm *sp; + struct lpfc_name *pnn, *ppn; +@@ -412,12 +447,11 @@ + } + + icmd = &cmdiocb->iocb; +- if ((icmd->ulpStatus == 0) && +- (lpfc_check_adisc(phba, ndlp, pnn, ppn))) { ++ if (icmd->ulpStatus == 0 && lpfc_check_adisc(vport, ndlp, pnn, ppn)) { + if (cmd == ELS_CMD_ADISC) { +- lpfc_els_rsp_adisc_acc(phba, cmdiocb, ndlp); ++ lpfc_els_rsp_adisc_acc(vport, cmdiocb, ndlp); + } else { +- lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, ++ lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, + NULL, 0); + } + return 1; +@@ -427,55 +461,57 @@ + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS; + stat.un.b.vendorUnique = 0; +- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); + + /* 1 sec timeout */ + mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ); + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_DELAY_TMO; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + ndlp->nlp_last_elscmd = ELS_CMD_PLOGI; + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + return 0; + } + + static int +-lpfc_rcv_logo(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, +- struct lpfc_iocbq *cmdiocb, +- uint32_t els_cmd) ++lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ struct lpfc_iocbq *cmdiocb, uint32_t els_cmd) + { +- /* Put ndlp on NPR list with 1 sec timeout for plogi, ACC logo */ ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ /* Put ndlp in NPR state with 1 sec timeout for plogi, ACC logo */ + /* Only call LOGO ACC for first LOGO, this avoids sending unnecessary + * PLOGIs during LOGO storms from a device. + */ ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_LOGO_ACC; ++ spin_unlock_irq(shost->host_lock); + if (els_cmd == ELS_CMD_PRLO) +- lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); ++ lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); + else +- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); ++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); + + if (!(ndlp->nlp_type & NLP_FABRIC) || + (ndlp->nlp_state == NLP_STE_ADISC_ISSUE)) { + /* Only try to re-login if this is NOT a Fabric Node */ + mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1); +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_DELAY_TMO; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + + ndlp->nlp_last_elscmd = ELS_CMD_PLOGI; + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + } else { + ndlp->nlp_prev_state = ndlp->nlp_state; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); + } + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_NPR_ADISC; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + /* The driver has to wait until the ACC completes before it continues + * processing the LOGO. The action will resume in + * lpfc_cmpl_els_logo_acc routine. Since part of processing includes an +@@ -485,8 +521,7 @@ + } + + static void +-lpfc_rcv_prli(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, ++lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct lpfc_iocbq *cmdiocb) + { + struct lpfc_dmabuf *pcmd; +@@ -501,8 +536,7 @@ + + ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR); + ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE; +- if ((npr->acceptRspCode == PRLI_REQ_EXECUTED) && +- (npr->prliType == PRLI_FCP_TYPE)) { ++ if (npr->prliType == PRLI_FCP_TYPE) { + if (npr->initiatorFunc) + ndlp->nlp_type |= NLP_FCP_INITIATOR; + if (npr->targetFunc) +@@ -517,36 +551,42 @@ + roles |= FC_RPORT_ROLE_FCP_INITIATOR; + if (ndlp->nlp_type & NLP_FCP_TARGET) + roles |= FC_RPORT_ROLE_FCP_TARGET; ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT, ++ "rport rolechg: role:x%x did:x%x flg:x%x", ++ roles, ndlp->nlp_DID, ndlp->nlp_flag); ++ + fc_remote_port_rolechg(rport, roles); + } + } + + static uint32_t +-lpfc_disc_set_adisc(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp) ++lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; ++ + /* Check config parameter use-adisc or FCP-2 */ +- if ((phba->cfg_use_adisc == 0) && +- !(phba->fc_flag & FC_RSCN_MODE)) { +- if (!(ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE)) +- return 0; +- } +- spin_lock_irq(phba->host->host_lock); ++ if ((phba->cfg_use_adisc && (vport->fc_flag & FC_RSCN_MODE)) || ++ ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NPR_ADISC; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + return 1; ++ } ++ ndlp->nlp_flag &= ~NLP_NPR_ADISC; ++ lpfc_unreg_rpi(vport, ndlp); ++ return 0; + } + + static uint32_t +-lpfc_disc_illegal(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_disc_illegal(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_DISCOVERY, +- "%d:0253 Illegal State Transition: node x%x event x%x, " +- "state x%x Data: x%x x%x\n", +- phba->brd_no, ++ lpfc_printf_log(vport->phba, KERN_ERR, LOG_DISCOVERY, ++ "%d (%d):0253 Illegal State Transition: node x%x " ++ "event x%x, state x%x Data: x%x x%x\n", ++ vport->phba->brd_no, vport->vpi, + ndlp->nlp_DID, evt, ndlp->nlp_state, ndlp->nlp_rpi, + ndlp->nlp_flag); + return ndlp->nlp_state; +@@ -555,150 +595,161 @@ + /* Start of Discovery State Machine routines */ + + static uint32_t +-lpfc_rcv_plogi_unused_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_plogi_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { + struct lpfc_iocbq *cmdiocb; + + cmdiocb = (struct lpfc_iocbq *) arg; + +- if (lpfc_rcv_plogi(phba, ndlp, cmdiocb)) { ++ if (lpfc_rcv_plogi(vport, ndlp, cmdiocb)) { + ndlp->nlp_prev_state = NLP_STE_UNUSED_NODE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); + return ndlp->nlp_state; + } +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + return NLP_STE_FREED_NODE; + } + + static uint32_t +-lpfc_rcv_els_unused_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_els_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- lpfc_issue_els_logo(phba, ndlp, 0); +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE); ++ lpfc_issue_els_logo(vport, ndlp, 0); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_logo_unused_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_logo_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; +- +- cmdiocb = (struct lpfc_iocbq *) arg; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_LOGO_ACC; +- spin_unlock_irq(phba->host->host_lock); +- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE); ++ spin_unlock_irq(shost->host_lock); ++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); + + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_cmpl_logo_unused_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_cmpl_logo_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + return NLP_STE_FREED_NODE; + } + + static uint32_t +-lpfc_device_rm_unused_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_device_rm_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + return NLP_STE_FREED_NODE; + } + + static uint32_t +-lpfc_rcv_plogi_plogi_issue(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, ++lpfc_rcv_plogi_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + void *arg, uint32_t evt) + { ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *cmdiocb = arg; +- struct lpfc_dmabuf *pcmd; +- struct serv_parm *sp; +- uint32_t *lp; ++ struct lpfc_dmabuf *pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; ++ uint32_t *lp = (uint32_t *) pcmd->virt; ++ struct serv_parm *sp = (struct serv_parm *) (lp + 1); + struct ls_rjt stat; + int port_cmp; + +- pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; +- lp = (uint32_t *) pcmd->virt; +- sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t)); +- + memset(&stat, 0, sizeof (struct ls_rjt)); + + /* For a PLOGI, we only accept if our portname is less + * than the remote portname. + */ + phba->fc_stat.elsLogiCol++; +- port_cmp = memcmp(&phba->fc_portname, &sp->portName, +- sizeof (struct lpfc_name)); ++ port_cmp = memcmp(&vport->fc_portname, &sp->portName, ++ sizeof(struct lpfc_name)); + + if (port_cmp >= 0) { + /* Reject this request because the remote node will accept + ours */ + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + stat.un.b.lsRjtRsnCodeExp = LSEXP_CMD_IN_PROGRESS; +- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, ++ NULL); + } else { +- lpfc_rcv_plogi(phba, ndlp, cmdiocb); +- } /* if our portname was less */ ++ lpfc_rcv_plogi(vport, ndlp, cmdiocb); ++ } /* If our portname was less */ + + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_logo_plogi_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_prli_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; ++ struct ls_rjt stat; + +- cmdiocb = (struct lpfc_iocbq *) arg; ++ memset(&stat, 0, sizeof (struct ls_rjt)); ++ stat.un.b.lsRjtRsnCode = LSRJT_LOGICAL_BSY; ++ stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); ++ return ndlp->nlp_state; ++} ++ ++static uint32_t ++lpfc_rcv_logo_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) ++{ ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + + /* software abort outstanding PLOGI */ +- lpfc_els_abort(phba, ndlp); ++ lpfc_els_abort(vport->phba, ndlp); + +- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); ++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_els_plogi_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_els_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; +- +- cmdiocb = (struct lpfc_iocbq *) arg; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + + /* software abort outstanding PLOGI */ + lpfc_els_abort(phba, ndlp); + + if (evt == NLP_EVT_RCV_LOGO) { +- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); ++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); + } else { +- lpfc_issue_els_logo(phba, ndlp, 0); ++ lpfc_issue_els_logo(vport, ndlp, 0); + } + +- /* Put ndlp in npr list set plogi timer for 1 sec */ ++ /* Put ndlp in npr state set plogi timer for 1 sec */ + mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1); +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_DELAY_TMO; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + ndlp->nlp_last_elscmd = ELS_CMD_PLOGI; + ndlp->nlp_prev_state = NLP_STE_PLOGI_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_cmpl_plogi_plogi_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, ++lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, + uint32_t evt) + { ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *cmdiocb, *rspiocb; + struct lpfc_dmabuf *pcmd, *prsp, *mp; + uint32_t *lp; +@@ -721,31 +772,26 @@ + + pcmd = (struct lpfc_dmabuf *) cmdiocb->context2; + +- prsp = list_get_first(&pcmd->list, +- struct lpfc_dmabuf, +- list); +- lp = (uint32_t *) prsp->virt; ++ prsp = list_get_first(&pcmd->list, struct lpfc_dmabuf, list); + ++ lp = (uint32_t *) prsp->virt; + sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t)); +- if (!lpfc_check_sparm(phba, ndlp, sp, CLASS3)) ++ if (!lpfc_check_sparm(vport, ndlp, sp, CLASS3)) + goto out; + + /* PLOGI chkparm OK */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_ELS, +- "%d:0121 PLOGI chkparm OK " ++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, ++ "%d (%d):0121 PLOGI chkparm OK " + "Data: x%x x%x x%x x%x\n", +- phba->brd_no, ++ phba->brd_no, vport->vpi, + ndlp->nlp_DID, ndlp->nlp_state, + ndlp->nlp_flag, ndlp->nlp_rpi); + +- if ((phba->cfg_fcp_class == 2) && +- (sp->cls2.classValid)) { ++ if (phba->cfg_fcp_class == 2 && (sp->cls2.classValid)) + ndlp->nlp_fcp_info |= CLASS2; +- } else { ++ else + ndlp->nlp_fcp_info |= CLASS3; +- } ++ + ndlp->nlp_class_sup = 0; + if (sp->cls1.classValid) + ndlp->nlp_class_sup |= FC_COS_CLASS1; +@@ -756,16 +802,23 @@ + if (sp->cls4.classValid) + ndlp->nlp_class_sup |= FC_COS_CLASS4; + ndlp->nlp_maxframe = +- ((sp->cmn.bbRcvSizeMsb & 0x0F) << 8) | +- sp->cmn.bbRcvSizeLsb; ++ ((sp->cmn.bbRcvSizeMsb & 0x0F) << 8) | sp->cmn.bbRcvSizeLsb; + +- if (!(mbox = mempool_alloc(phba->mbox_mem_pool, +- GFP_KERNEL))) ++ mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (!mbox) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0133 PLOGI: no memory for reg_login " ++ "Data: x%x x%x x%x x%x\n", ++ phba->brd_no, vport->vpi, ++ ndlp->nlp_DID, ndlp->nlp_state, ++ ndlp->nlp_flag, ndlp->nlp_rpi); + goto out; ++ } ++ ++ lpfc_unreg_rpi(vport, ndlp); + +- lpfc_unreg_rpi(phba, ndlp); +- if (lpfc_reg_login(phba, irsp->un.elsreq64.remoteID, (uint8_t *) sp, +- mbox, 0) == 0) { ++ if (lpfc_reg_login(phba, vport->vpi, irsp->un.elsreq64.remoteID, ++ (uint8_t *) sp, mbox, 0) == 0) { + switch (ndlp->nlp_DID) { + case NameServer_DID: + mbox->mbox_cmpl = lpfc_mbx_cmpl_ns_reg_login; +@@ -777,68 +830,104 @@ + mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login; + } + mbox->context2 = lpfc_nlp_get(ndlp); ++ mbox->vport = vport; + if (lpfc_sli_issue_mbox(phba, mbox, + (MBX_NOWAIT | MBX_STOP_IOCB)) + != MBX_NOT_FINISHED) { +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_REG_LOGIN_ISSUE); ++ lpfc_nlp_set_state(vport, ndlp, ++ NLP_STE_REG_LOGIN_ISSUE); + return ndlp->nlp_state; + } + lpfc_nlp_put(ndlp); +- mp = (struct lpfc_dmabuf *)mbox->context1; ++ mp = (struct lpfc_dmabuf *) mbox->context1; + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); + mempool_free(mbox, phba->mbox_mem_pool); ++ ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0134 PLOGI: cannot issue reg_login " ++ "Data: x%x x%x x%x x%x\n", ++ phba->brd_no, vport->vpi, ++ ndlp->nlp_DID, ndlp->nlp_state, ++ ndlp->nlp_flag, ndlp->nlp_rpi); + } else { + mempool_free(mbox, phba->mbox_mem_pool); ++ ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0135 PLOGI: cannot format reg_login " ++ "Data: x%x x%x x%x x%x\n", ++ phba->brd_no, vport->vpi, ++ ndlp->nlp_DID, ndlp->nlp_state, ++ ndlp->nlp_flag, ndlp->nlp_rpi); + } + + +- out: ++out: ++ if (ndlp->nlp_DID == NameServer_DID) { ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0261 Cannot Register NameServer login\n", ++ phba->brd_no, vport->vpi); ++ } ++ + /* Free this node since the driver cannot login or has the wrong + sparm */ +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + return NLP_STE_FREED_NODE; + } + + static uint32_t +-lpfc_device_rm_plogi_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_device_rm_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- if(ndlp->nlp_flag & NLP_NPR_2B_DISC) { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ if (ndlp->nlp_flag & NLP_NPR_2B_DISC) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NODEV_REMOVE; ++ spin_unlock_irq(shost->host_lock); + return ndlp->nlp_state; +- } +- else { ++ } else { + /* software abort outstanding PLOGI */ +- lpfc_els_abort(phba, ndlp); ++ lpfc_els_abort(vport->phba, ndlp); + +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + return NLP_STE_FREED_NODE; + } + } + + static uint32_t +-lpfc_device_recov_plogi_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, ++lpfc_device_recov_plogi_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, + uint32_t evt) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; ++ ++ /* Don't do anything that will mess up processing of the ++ * previous RSCN. ++ */ ++ if (vport->fc_flag & FC_RSCN_DEFERRED) ++ return ndlp->nlp_state; ++ + /* software abort outstanding PLOGI */ + lpfc_els_abort(phba, ndlp); + + ndlp->nlp_prev_state = NLP_STE_PLOGI_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); +- spin_lock_irq(phba->host->host_lock); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_plogi_adisc_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_rcv_plogi_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *cmdiocb; + + /* software abort outstanding ADISC */ +@@ -846,34 +935,31 @@ + + cmdiocb = (struct lpfc_iocbq *) arg; + +- if (lpfc_rcv_plogi(phba, ndlp, cmdiocb)) { ++ if (lpfc_rcv_plogi(vport, ndlp, cmdiocb)) + return ndlp->nlp_state; +- } ++ + ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); +- lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0); ++ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); + + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_prli_adisc_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_rcv_prli_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- cmdiocb = (struct lpfc_iocbq *) arg; +- +- lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp); ++ lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_logo_adisc_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_rcv_logo_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *cmdiocb; + + cmdiocb = (struct lpfc_iocbq *) arg; +@@ -881,42 +967,43 @@ + /* software abort outstanding ADISC */ + lpfc_els_abort(phba, ndlp); + +- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); ++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_padisc_adisc_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_rcv_padisc_adisc_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { + struct lpfc_iocbq *cmdiocb; + + cmdiocb = (struct lpfc_iocbq *) arg; + +- lpfc_rcv_padisc(phba, ndlp, cmdiocb); ++ lpfc_rcv_padisc(vport, ndlp, cmdiocb); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_prlo_adisc_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_rcv_prlo_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { + struct lpfc_iocbq *cmdiocb; + + cmdiocb = (struct lpfc_iocbq *) arg; + + /* Treat like rcv logo */ +- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_PRLO); ++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_PRLO); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_cmpl_adisc_adisc_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_cmpl_adisc_adisc_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *cmdiocb, *rspiocb; + IOCB_t *irsp; + ADISC *ap; +@@ -928,101 +1015,112 @@ + irsp = &rspiocb->iocb; + + if ((irsp->ulpStatus) || +- (!lpfc_check_adisc(phba, ndlp, &ap->nodeName, &ap->portName))) { ++ (!lpfc_check_adisc(vport, ndlp, &ap->nodeName, &ap->portName))) { + /* 1 sec timeout */ + mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ); +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_DELAY_TMO; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + ndlp->nlp_last_elscmd = ELS_CMD_PLOGI; + +- memset(&ndlp->nlp_nodename, 0, sizeof (struct lpfc_name)); +- memset(&ndlp->nlp_portname, 0, sizeof (struct lpfc_name)); ++ memset(&ndlp->nlp_nodename, 0, sizeof(struct lpfc_name)); ++ memset(&ndlp->nlp_portname, 0, sizeof(struct lpfc_name)); + + ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); +- lpfc_unreg_rpi(phba, ndlp); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); ++ lpfc_unreg_rpi(vport, ndlp); + return ndlp->nlp_state; + } + + if (ndlp->nlp_type & NLP_FCP_TARGET) { + ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_MAPPED_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE); + } else { + ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); + } + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_device_rm_adisc_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_device_rm_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- if(ndlp->nlp_flag & NLP_NPR_2B_DISC) { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ if (ndlp->nlp_flag & NLP_NPR_2B_DISC) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NODEV_REMOVE; ++ spin_unlock_irq(shost->host_lock); + return ndlp->nlp_state; +- } +- else { ++ } else { + /* software abort outstanding ADISC */ +- lpfc_els_abort(phba, ndlp); ++ lpfc_els_abort(vport->phba, ndlp); + +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + return NLP_STE_FREED_NODE; + } + } + + static uint32_t +-lpfc_device_recov_adisc_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, ++lpfc_device_recov_adisc_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, + uint32_t evt) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; ++ ++ /* Don't do anything that will mess up processing of the ++ * previous RSCN. ++ */ ++ if (vport->fc_flag & FC_RSCN_DEFERRED) ++ return ndlp->nlp_state; ++ + /* software abort outstanding ADISC */ + lpfc_els_abort(phba, ndlp); + + ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); +- spin_lock_irq(phba->host->host_lock); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); +- ndlp->nlp_flag |= NLP_NPR_ADISC; +- spin_unlock_irq(phba->host->host_lock); +- ++ spin_unlock_irq(shost->host_lock); ++ lpfc_disc_set_adisc(vport, ndlp); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_plogi_reglogin_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, ++lpfc_rcv_plogi_reglogin_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, + uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; +- +- cmdiocb = (struct lpfc_iocbq *) arg; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- lpfc_rcv_plogi(phba, ndlp, cmdiocb); ++ lpfc_rcv_plogi(vport, ndlp, cmdiocb); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_prli_reglogin_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, ++lpfc_rcv_prli_reglogin_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, + uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- cmdiocb = (struct lpfc_iocbq *) arg; +- +- lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp); ++ lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_logo_reglogin_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, ++lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, + uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + LPFC_MBOXQ_t *mb; + LPFC_MBOXQ_t *nextmb; + struct lpfc_dmabuf *mp; +@@ -1033,12 +1131,13 @@ + if ((mb = phba->sli.mbox_active)) { + if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) && + (ndlp == (struct lpfc_nodelist *) mb->context2)) { ++ lpfc_nlp_put(ndlp); + mb->context2 = NULL; + mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + } + } + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) { + if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) && + (ndlp == (struct lpfc_nodelist *) mb->context2)) { +@@ -1047,61 +1146,61 @@ + lpfc_mbuf_free(phba, mp->virt, mp->phys); + kfree(mp); + } ++ lpfc_nlp_put(ndlp); + list_del(&mb->list); + mempool_free(mb, phba->mbox_mem_pool); + } + } +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + +- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); ++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_padisc_reglogin_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, ++lpfc_rcv_padisc_reglogin_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, + uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; +- +- cmdiocb = (struct lpfc_iocbq *) arg; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- lpfc_rcv_padisc(phba, ndlp, cmdiocb); ++ lpfc_rcv_padisc(vport, ndlp, cmdiocb); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_prlo_reglogin_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, ++lpfc_rcv_prlo_reglogin_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, + uint32_t evt) + { + struct lpfc_iocbq *cmdiocb; + + cmdiocb = (struct lpfc_iocbq *) arg; +- lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); ++ lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, +- void *arg, uint32_t evt) ++lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, ++ uint32_t evt) + { +- LPFC_MBOXQ_t *pmb; +- MAILBOX_t *mb; +- uint32_t did; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; ++ LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg; ++ MAILBOX_t *mb = &pmb->mb; ++ uint32_t did = mb->un.varWords[1]; + +- pmb = (LPFC_MBOXQ_t *) arg; +- mb = &pmb->mb; +- did = mb->un.varWords[1]; + if (mb->mbxStatus) { + /* RegLogin failed */ +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_DISCOVERY, +- "%d:0246 RegLogin failed Data: x%x x%x x%x\n", +- phba->brd_no, +- did, mb->mbxStatus, phba->hba_state); ++ lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY, ++ "%d (%d):0246 RegLogin failed Data: x%x x%x " ++ "x%x\n", ++ phba->brd_no, vport->vpi, ++ did, mb->mbxStatus, vport->port_state); + + /* + * If RegLogin failed due to lack of HBA resources do not +@@ -1109,20 +1208,20 @@ + */ + if (mb->mbxStatus == MBXERR_RPI_FULL) { + ndlp->nlp_prev_state = NLP_STE_UNUSED_NODE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); + return ndlp->nlp_state; + } + +- /* Put ndlp in npr list set plogi timer for 1 sec */ ++ /* Put ndlp in npr state set plogi timer for 1 sec */ + mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1); +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_DELAY_TMO; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + ndlp->nlp_last_elscmd = ELS_CMD_PLOGI; + +- lpfc_issue_els_logo(phba, ndlp, 0); ++ lpfc_issue_els_logo(vport, ndlp, 0); + ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + return ndlp->nlp_state; + } + +@@ -1131,91 +1230,99 @@ + /* Only if we are not a fabric nport do we issue PRLI */ + if (!(ndlp->nlp_type & NLP_FABRIC)) { + ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PRLI_ISSUE); +- lpfc_issue_els_prli(phba, ndlp, 0); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE); ++ lpfc_issue_els_prli(vport, ndlp, 0); + } else { + ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); + } + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_device_rm_reglogin_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, ++lpfc_device_rm_reglogin_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, + uint32_t evt) + { +- if(ndlp->nlp_flag & NLP_NPR_2B_DISC) { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ if (ndlp->nlp_flag & NLP_NPR_2B_DISC) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NODEV_REMOVE; ++ spin_unlock_irq(shost->host_lock); + return ndlp->nlp_state; +- } +- else { +- lpfc_drop_node(phba, ndlp); ++ } else { ++ lpfc_drop_node(vport, ndlp); + return NLP_STE_FREED_NODE; + } + } + + static uint32_t +-lpfc_device_recov_reglogin_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, ++lpfc_device_recov_reglogin_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, + uint32_t evt) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ /* Don't do anything that will mess up processing of the ++ * previous RSCN. ++ */ ++ if (vport->fc_flag & FC_RSCN_DEFERRED) ++ return ndlp->nlp_state; ++ + ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); +- spin_lock_irq(phba->host->host_lock); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); ++ lpfc_disc_set_adisc(vport, ndlp); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_plogi_prli_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_plogi_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { + struct lpfc_iocbq *cmdiocb; + + cmdiocb = (struct lpfc_iocbq *) arg; + +- lpfc_rcv_plogi(phba, ndlp, cmdiocb); ++ lpfc_rcv_plogi(vport, ndlp, cmdiocb); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_prli_prli_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- cmdiocb = (struct lpfc_iocbq *) arg; +- +- lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp); ++ lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_logo_prli_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_logo_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; +- +- cmdiocb = (struct lpfc_iocbq *) arg; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + + /* Software abort outstanding PRLI before sending acc */ +- lpfc_els_abort(phba, ndlp); ++ lpfc_els_abort(vport->phba, ndlp); + +- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); ++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_padisc_prli_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_padisc_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- cmdiocb = (struct lpfc_iocbq *) arg; +- +- lpfc_rcv_padisc(phba, ndlp, cmdiocb); ++ lpfc_rcv_padisc(vport, ndlp, cmdiocb); + return ndlp->nlp_state; + } + +@@ -1225,21 +1332,22 @@ + * NEXT STATE = PRLI_ISSUE + */ + static uint32_t +-lpfc_rcv_prlo_prli_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_prlo_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- cmdiocb = (struct lpfc_iocbq *) arg; +- lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); ++ lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_cmpl_prli_prli_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_iocbq *cmdiocb, *rspiocb; ++ struct lpfc_hba *phba = vport->phba; + IOCB_t *irsp; + PRLI *npr; + +@@ -1249,8 +1357,12 @@ + + irsp = &rspiocb->iocb; + if (irsp->ulpStatus) { ++ if ((vport->port_type == LPFC_NPIV_PORT) && ++ phba->cfg_vport_restrict_login) { ++ goto out; ++ } + ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); + return ndlp->nlp_state; + } + +@@ -1266,9 +1378,25 @@ + if (npr->Retry) + ndlp->nlp_fcp_info |= NLP_FCP_2_DEVICE; + } ++ if (!(ndlp->nlp_type & NLP_FCP_TARGET) && ++ (vport->port_type == LPFC_NPIV_PORT) && ++ phba->cfg_vport_restrict_login) { ++out: ++ spin_lock_irq(shost->host_lock); ++ ndlp->nlp_flag |= NLP_TARGET_REMOVE; ++ spin_unlock_irq(shost->host_lock); ++ lpfc_issue_els_logo(vport, ndlp, 0); ++ ++ ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE; ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE); ++ return ndlp->nlp_state; ++ } + + ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_MAPPED_NODE); ++ if (ndlp->nlp_type & NLP_FCP_TARGET) ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE); ++ else ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE); + return ndlp->nlp_state; + } + +@@ -1289,19 +1417,23 @@ + * on plogi list so it can be freed when LOGO completes. + * + */ ++ + static uint32_t +-lpfc_device_rm_prli_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_device_rm_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- if(ndlp->nlp_flag & NLP_NPR_2B_DISC) { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ if (ndlp->nlp_flag & NLP_NPR_2B_DISC) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NODEV_REMOVE; ++ spin_unlock_irq(shost->host_lock); + return ndlp->nlp_state; +- } +- else { ++ } else { + /* software abort outstanding PLOGI */ +- lpfc_els_abort(phba, ndlp); ++ lpfc_els_abort(vport->phba, ndlp); + +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + return NLP_STE_FREED_NODE; + } + } +@@ -1324,261 +1456,251 @@ + * outstanding PRLI command, then free the node entry. + */ + static uint32_t +-lpfc_device_recov_prli_issue(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_device_recov_prli_issue(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, ++ uint32_t evt) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_hba *phba = vport->phba; ++ ++ /* Don't do anything that will mess up processing of the ++ * previous RSCN. ++ */ ++ if (vport->fc_flag & FC_RSCN_DEFERRED) ++ return ndlp->nlp_state; ++ + /* software abort outstanding PRLI */ + lpfc_els_abort(phba, ndlp); + + ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); +- spin_lock_irq(phba->host->host_lock); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); ++ lpfc_disc_set_adisc(vport, ndlp); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_plogi_unmap_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_plogi_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; +- +- cmdiocb = (struct lpfc_iocbq *) arg; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- lpfc_rcv_plogi(phba, ndlp, cmdiocb); ++ lpfc_rcv_plogi(vport, ndlp, cmdiocb); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_prli_unmap_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_prli_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- cmdiocb = (struct lpfc_iocbq *) arg; +- +- lpfc_rcv_prli(phba, ndlp, cmdiocb); +- lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp); ++ lpfc_rcv_prli(vport, ndlp, cmdiocb); ++ lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_logo_unmap_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_logo_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; +- +- cmdiocb = (struct lpfc_iocbq *) arg; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); ++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_padisc_unmap_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_padisc_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; +- +- cmdiocb = (struct lpfc_iocbq *) arg; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- lpfc_rcv_padisc(phba, ndlp, cmdiocb); ++ lpfc_rcv_padisc(vport, ndlp, cmdiocb); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_prlo_unmap_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_prlo_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- cmdiocb = (struct lpfc_iocbq *) arg; +- +- lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); ++ lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_device_recov_unmap_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_device_recov_unmap_node(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, ++ uint32_t evt) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ + ndlp->nlp_prev_state = NLP_STE_UNMAPPED_NODE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); +- lpfc_disc_set_adisc(phba, ndlp); ++ spin_unlock_irq(shost->host_lock); ++ lpfc_disc_set_adisc(vport, ndlp); + + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_plogi_mapped_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_plogi_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- cmdiocb = (struct lpfc_iocbq *) arg; +- +- lpfc_rcv_plogi(phba, ndlp, cmdiocb); ++ lpfc_rcv_plogi(vport, ndlp, cmdiocb); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_prli_mapped_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_prli_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; +- +- cmdiocb = (struct lpfc_iocbq *) arg; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp); ++ lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_logo_mapped_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_logo_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- cmdiocb = (struct lpfc_iocbq *) arg; +- +- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); ++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_padisc_mapped_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_rcv_padisc_mapped_node(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- cmdiocb = (struct lpfc_iocbq *) arg; +- +- lpfc_rcv_padisc(phba, ndlp, cmdiocb); ++ lpfc_rcv_padisc(vport, ndlp, cmdiocb); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_prlo_mapped_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_rcv_prlo_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; +- +- cmdiocb = (struct lpfc_iocbq *) arg; ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + + /* flush the target */ +- spin_lock_irq(phba->host->host_lock); + lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring], + ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT); +- spin_unlock_irq(phba->host->host_lock); + + /* Treat like rcv logo */ +- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_PRLO); ++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_PRLO); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_device_recov_mapped_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, ++lpfc_device_recov_mapped_node(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, + uint32_t evt) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ + ndlp->nlp_prev_state = NLP_STE_MAPPED_NODE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE); +- spin_lock_irq(phba->host->host_lock); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); +- spin_unlock_irq(phba->host->host_lock); +- lpfc_disc_set_adisc(phba, ndlp); ++ spin_unlock_irq(shost->host_lock); ++ lpfc_disc_set_adisc(vport, ndlp); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_plogi_npr_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_rcv_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; +- +- cmdiocb = (struct lpfc_iocbq *) arg; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + + /* Ignore PLOGI if we have an outstanding LOGO */ +- if (ndlp->nlp_flag & NLP_LOGO_SND) { ++ if (ndlp->nlp_flag & (NLP_LOGO_SND | NLP_LOGO_ACC)) { + return ndlp->nlp_state; + } + +- if (lpfc_rcv_plogi(phba, ndlp, cmdiocb)) { +- spin_lock_irq(phba->host->host_lock); ++ if (lpfc_rcv_plogi(vport, ndlp, cmdiocb)) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_NPR_ADISC; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + return ndlp->nlp_state; + } + + /* send PLOGI immediately, move to PLOGI issue state */ + if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) { + ndlp->nlp_prev_state = NLP_STE_NPR_NODE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); +- lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); ++ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); + } + + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_prli_npr_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_rcv_prli_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + struct ls_rjt stat; + +- cmdiocb = (struct lpfc_iocbq *) arg; +- + memset(&stat, 0, sizeof (struct ls_rjt)); + stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC; + stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; +- lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp); ++ lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL); + + if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) { + if (ndlp->nlp_flag & NLP_NPR_ADISC) { +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_NPR_ADISC; +- spin_unlock_irq(phba->host->host_lock); + ndlp->nlp_prev_state = NLP_STE_NPR_NODE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE); +- lpfc_issue_els_adisc(phba, ndlp, 0); ++ spin_unlock_irq(shost->host_lock); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE); ++ lpfc_issue_els_adisc(vport, ndlp, 0); + } else { + ndlp->nlp_prev_state = NLP_STE_NPR_NODE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); +- lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); ++ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); + } + } + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_logo_npr_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_rcv_logo_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; +- +- cmdiocb = (struct lpfc_iocbq *) arg; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO); ++ lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO); + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_padisc_npr_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_rcv_padisc_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; +- +- cmdiocb = (struct lpfc_iocbq *) arg; ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- lpfc_rcv_padisc(phba, ndlp, cmdiocb); ++ lpfc_rcv_padisc(vport, ndlp, cmdiocb); + + /* + * Do not start discovery if discovery is about to start +@@ -1586,53 +1708,52 @@ + * here will affect the counting of discovery threads. + */ + if (!(ndlp->nlp_flag & NLP_DELAY_TMO) && +- !(ndlp->nlp_flag & NLP_NPR_2B_DISC)){ ++ !(ndlp->nlp_flag & NLP_NPR_2B_DISC)) { + if (ndlp->nlp_flag & NLP_NPR_ADISC) { ++ ndlp->nlp_flag &= ~NLP_NPR_ADISC; + ndlp->nlp_prev_state = NLP_STE_NPR_NODE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE); +- lpfc_issue_els_adisc(phba, ndlp, 0); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE); ++ lpfc_issue_els_adisc(vport, ndlp, 0); + } else { + ndlp->nlp_prev_state = NLP_STE_NPR_NODE; +- lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE); +- lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0); ++ lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); ++ lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0); + } + } + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_rcv_prlo_npr_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_rcv_prlo_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- struct lpfc_iocbq *cmdiocb; ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg; + +- cmdiocb = (struct lpfc_iocbq *) arg; +- +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_LOGO_ACC; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + +- lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); ++ lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0); + +- if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) { ++ if ((ndlp->nlp_flag & NLP_DELAY_TMO) == 0) { + mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1); +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_DELAY_TMO; + ndlp->nlp_flag &= ~NLP_NPR_ADISC; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + ndlp->nlp_last_elscmd = ELS_CMD_PLOGI; + } else { +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_NPR_ADISC; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + } + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_cmpl_plogi_npr_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_cmpl_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { + struct lpfc_iocbq *cmdiocb, *rspiocb; + IOCB_t *irsp; +@@ -1642,15 +1763,15 @@ + + irsp = &rspiocb->iocb; + if (irsp->ulpStatus) { +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + return NLP_STE_FREED_NODE; + } + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_cmpl_prli_npr_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_cmpl_prli_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { + struct lpfc_iocbq *cmdiocb, *rspiocb; + IOCB_t *irsp; +@@ -1660,25 +1781,24 @@ + + irsp = &rspiocb->iocb; + if (irsp->ulpStatus && (ndlp->nlp_flag & NLP_NODEV_REMOVE)) { +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + return NLP_STE_FREED_NODE; + } + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_cmpl_logo_npr_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_cmpl_logo_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- lpfc_unreg_rpi(phba, ndlp); ++ lpfc_unreg_rpi(vport, ndlp); + /* This routine does nothing, just return the current state */ + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_cmpl_adisc_npr_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_cmpl_adisc_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { + struct lpfc_iocbq *cmdiocb, *rspiocb; + IOCB_t *irsp; +@@ -1688,28 +1808,25 @@ + + irsp = &rspiocb->iocb; + if (irsp->ulpStatus && (ndlp->nlp_flag & NLP_NODEV_REMOVE)) { +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + return NLP_STE_FREED_NODE; + } + return ndlp->nlp_state; + } + + static uint32_t +-lpfc_cmpl_reglogin_npr_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_cmpl_reglogin_npr_node(struct lpfc_vport *vport, ++ struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- LPFC_MBOXQ_t *pmb; +- MAILBOX_t *mb; +- +- pmb = (LPFC_MBOXQ_t *) arg; +- mb = &pmb->mb; ++ LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg; ++ MAILBOX_t *mb = &pmb->mb; + + if (!mb->mbxStatus) + ndlp->nlp_rpi = mb->un.varWords[0]; + else { + if (ndlp->nlp_flag & NLP_NODEV_REMOVE) { +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + return NLP_STE_FREED_NODE; + } + } +@@ -1717,28 +1834,38 @@ + } + + static uint32_t +-lpfc_device_rm_npr_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_device_rm_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ + if (ndlp->nlp_flag & NLP_NPR_2B_DISC) { ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag |= NLP_NODEV_REMOVE; ++ spin_unlock_irq(shost->host_lock); + return ndlp->nlp_state; + } +- lpfc_drop_node(phba, ndlp); ++ lpfc_drop_node(vport, ndlp); + return NLP_STE_FREED_NODE; + } + + static uint32_t +-lpfc_device_recov_npr_node(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, +- uint32_t evt) ++lpfc_device_recov_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { +- spin_lock_irq(phba->host->host_lock); ++ struct Scsi_Host *shost = lpfc_shost_from_vport(vport); ++ ++ /* Don't do anything that will mess up processing of the ++ * previous RSCN. ++ */ ++ if (vport->fc_flag & FC_RSCN_DEFERRED) ++ return ndlp->nlp_state; ++ ++ spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC); +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(shost->host_lock); + if (ndlp->nlp_flag & NLP_DELAY_TMO) { +- lpfc_cancel_retry_delay_tmo(phba, ndlp); ++ lpfc_cancel_retry_delay_tmo(vport, ndlp); + } + return ndlp->nlp_state; + } +@@ -1801,7 +1928,7 @@ + */ + + static uint32_t (*lpfc_disc_action[NLP_STE_MAX_STATE * NLP_EVT_MAX_EVENT]) +- (struct lpfc_hba *, struct lpfc_nodelist *, void *, uint32_t) = { ++ (struct lpfc_vport *, struct lpfc_nodelist *, void *, uint32_t) = { + /* Action routine Event Current State */ + lpfc_rcv_plogi_unused_node, /* RCV_PLOGI UNUSED_NODE */ + lpfc_rcv_els_unused_node, /* RCV_PRLI */ +@@ -1818,7 +1945,7 @@ + lpfc_disc_illegal, /* DEVICE_RECOVERY */ + + lpfc_rcv_plogi_plogi_issue, /* RCV_PLOGI PLOGI_ISSUE */ +- lpfc_rcv_els_plogi_issue, /* RCV_PRLI */ ++ lpfc_rcv_prli_plogi_issue, /* RCV_PRLI */ + lpfc_rcv_logo_plogi_issue, /* RCV_LOGO */ + lpfc_rcv_els_plogi_issue, /* RCV_ADISC */ + lpfc_rcv_els_plogi_issue, /* RCV_PDISC */ +@@ -1917,34 +2044,40 @@ + }; + + int +-lpfc_disc_state_machine(struct lpfc_hba * phba, +- struct lpfc_nodelist * ndlp, void *arg, uint32_t evt) ++lpfc_disc_state_machine(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, ++ void *arg, uint32_t evt) + { ++ struct lpfc_hba *phba = vport->phba; + uint32_t cur_state, rc; +- uint32_t(*func) (struct lpfc_hba *, struct lpfc_nodelist *, void *, ++ uint32_t(*func) (struct lpfc_vport *, struct lpfc_nodelist *, void *, + uint32_t); + + lpfc_nlp_get(ndlp); + cur_state = ndlp->nlp_state; + + /* DSM in event on NPort in state */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_DISCOVERY, +- "%d:0211 DSM in event x%x on NPort x%x in state %d " +- "Data: x%x\n", +- phba->brd_no, ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0211 DSM in event x%x on NPort x%x in " ++ "state %d Data: x%x\n", ++ phba->brd_no, vport->vpi, + evt, ndlp->nlp_DID, cur_state, ndlp->nlp_flag); + ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM, ++ "DSM in: evt:%d ste:%d did:x%x", ++ evt, cur_state, ndlp->nlp_DID); ++ + func = lpfc_disc_action[(cur_state * NLP_EVT_MAX_EVENT) + evt]; +- rc = (func) (phba, ndlp, arg, evt); ++ rc = (func) (vport, ndlp, arg, evt); + + /* DSM out state on NPort */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_DISCOVERY, +- "%d:0212 DSM out state %d on NPort x%x Data: x%x\n", +- phba->brd_no, ++ lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY, ++ "%d (%d):0212 DSM out state %d on NPort x%x " ++ "Data: x%x\n", ++ phba->brd_no, vport->vpi, ++ rc, ndlp->nlp_DID, ndlp->nlp_flag); ++ ++ lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM, ++ "DSM out: ste:%d did:x%x flg:x%x", + rc, ndlp->nlp_DID, ndlp->nlp_flag); + + lpfc_nlp_put(ndlp); +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_scsi.c +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_scsi.c 2007-12-21 15:36:12.000000000 -0500 +@@ -37,10 +37,158 @@ + #include "lpfc.h" + #include "lpfc_logmsg.h" + #include "lpfc_crtn.h" ++#include "lpfc_vport.h" + + #define LPFC_RESET_WAIT 2 + #define LPFC_ABORT_WAIT 2 + ++/* ++ * This function is called with no lock held when there is a resource ++ * error in driver or in firmware. ++ */ ++void ++lpfc_adjust_queue_depth(struct lpfc_hba *phba) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&phba->hbalock, flags); ++ atomic_inc(&phba->num_rsrc_err); ++ phba->last_rsrc_error_time = jiffies; ++ ++ if ((phba->last_ramp_down_time + QUEUE_RAMP_DOWN_INTERVAL) > jiffies) { ++ spin_unlock_irqrestore(&phba->hbalock, flags); ++ return; ++ } ++ ++ phba->last_ramp_down_time = jiffies; ++ ++ spin_unlock_irqrestore(&phba->hbalock, flags); ++ ++ spin_lock_irqsave(&phba->pport->work_port_lock, flags); ++ if ((phba->pport->work_port_events & ++ WORKER_RAMP_DOWN_QUEUE) == 0) { ++ phba->pport->work_port_events |= WORKER_RAMP_DOWN_QUEUE; ++ } ++ spin_unlock_irqrestore(&phba->pport->work_port_lock, flags); ++ ++ spin_lock_irqsave(&phba->hbalock, flags); ++ if (phba->work_wait) ++ wake_up(phba->work_wait); ++ spin_unlock_irqrestore(&phba->hbalock, flags); ++ ++ return; ++} ++ ++/* ++ * This function is called with no lock held when there is a successful ++ * SCSI command completion. ++ */ ++static inline void ++lpfc_rampup_queue_depth(struct lpfc_hba *phba, ++ struct scsi_device *sdev) ++{ ++ unsigned long flags; ++ atomic_inc(&phba->num_cmd_success); ++ ++ if (phba->cfg_lun_queue_depth <= sdev->queue_depth) ++ return; ++ ++ spin_lock_irqsave(&phba->hbalock, flags); ++ if (((phba->last_ramp_up_time + QUEUE_RAMP_UP_INTERVAL) > jiffies) || ++ ((phba->last_rsrc_error_time + QUEUE_RAMP_UP_INTERVAL ) > jiffies)) { ++ spin_unlock_irqrestore(&phba->hbalock, flags); ++ return; ++ } ++ ++ phba->last_ramp_up_time = jiffies; ++ spin_unlock_irqrestore(&phba->hbalock, flags); ++ ++ spin_lock_irqsave(&phba->pport->work_port_lock, flags); ++ if ((phba->pport->work_port_events & ++ WORKER_RAMP_UP_QUEUE) == 0) { ++ phba->pport->work_port_events |= WORKER_RAMP_UP_QUEUE; ++ } ++ spin_unlock_irqrestore(&phba->pport->work_port_lock, flags); ++ ++ spin_lock_irqsave(&phba->hbalock, flags); ++ if (phba->work_wait) ++ wake_up(phba->work_wait); ++ spin_unlock_irqrestore(&phba->hbalock, flags); ++} ++ ++void ++lpfc_ramp_down_queue_handler(struct lpfc_hba *phba) ++{ ++ struct lpfc_vport *vport; ++ struct Scsi_Host *host; ++ struct scsi_device *sdev; ++ unsigned long new_queue_depth; ++ unsigned long num_rsrc_err, num_cmd_success; ++ ++ num_rsrc_err = atomic_read(&phba->num_rsrc_err); ++ num_cmd_success = atomic_read(&phba->num_cmd_success); ++ ++ spin_lock_irq(&phba->hbalock); ++ list_for_each_entry(vport, &phba->port_list, listentry) { ++ host = lpfc_shost_from_vport(vport); ++ if (!scsi_host_get(host)) ++ continue; ++ ++ spin_unlock_irq(&phba->hbalock); ++ ++ shost_for_each_device(sdev, host) { ++ new_queue_depth = sdev->queue_depth * num_rsrc_err / ++ (num_rsrc_err + num_cmd_success); ++ if (!new_queue_depth) ++ new_queue_depth = sdev->queue_depth - 1; ++ else ++ new_queue_depth = ++ sdev->queue_depth - new_queue_depth; ++ ++ if (sdev->ordered_tags) ++ scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, ++ new_queue_depth); ++ else ++ scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, ++ new_queue_depth); ++ } ++ spin_lock_irq(&phba->hbalock); ++ scsi_host_put(host); ++ } ++ spin_unlock_irq(&phba->hbalock); ++ atomic_set(&phba->num_rsrc_err, 0); ++ atomic_set(&phba->num_cmd_success, 0); ++} ++ ++void ++lpfc_ramp_up_queue_handler(struct lpfc_hba *phba) ++{ ++ struct lpfc_vport *vport; ++ struct Scsi_Host *host; ++ struct scsi_device *sdev; ++ ++ spin_lock_irq(&phba->hbalock); ++ list_for_each_entry(vport, &phba->port_list, listentry) { ++ host = lpfc_shost_from_vport(vport); ++ if (!scsi_host_get(host)) ++ continue; ++ ++ spin_unlock_irq(&phba->hbalock); ++ shost_for_each_device(sdev, host) { ++ if (sdev->ordered_tags) ++ scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, ++ sdev->queue_depth+1); ++ else ++ scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG, ++ sdev->queue_depth+1); ++ } ++ spin_lock_irq(&phba->hbalock); ++ scsi_host_put(host); ++ } ++ spin_unlock_irq(&phba->hbalock); ++ atomic_set(&phba->num_rsrc_err, 0); ++ atomic_set(&phba->num_cmd_success, 0); ++} + + /* + * This routine allocates a scsi buffer, which contains all the necessary +@@ -51,8 +199,9 @@ + * and the BPL BDE is setup in the IOCB. + */ + static struct lpfc_scsi_buf * +-lpfc_new_scsi_buf(struct lpfc_hba * phba) ++lpfc_new_scsi_buf(struct lpfc_vport *vport) + { ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_scsi_buf *psb; + struct ulp_bde64 *bpl; + IOCB_t *iocb; +@@ -63,7 +212,6 @@ + if (!psb) + return NULL; + memset(psb, 0, sizeof (struct lpfc_scsi_buf)); +- psb->scsi_hba = phba; + + /* + * Get memory from the pci pool to map the virt space to pci bus space +@@ -155,7 +303,7 @@ + } + + static void +-lpfc_release_scsi_buf(struct lpfc_hba * phba, struct lpfc_scsi_buf * psb) ++lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb) + { + unsigned long iflag = 0; + +@@ -166,7 +314,7 @@ + } + + static int +-lpfc_scsi_prep_dma_buf(struct lpfc_hba * phba, struct lpfc_scsi_buf * lpfc_cmd) ++lpfc_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd) + { + struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd; + struct scatterlist *sgel = NULL; +@@ -175,8 +323,7 @@ + IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb; + dma_addr_t physaddr; + uint32_t i, num_bde = 0; +- int datadir = scsi_cmnd->sc_data_direction; +- int dma_error; ++ int nseg, datadir = scsi_cmnd->sc_data_direction; + + /* + * There are three possibilities here - use scatter-gather segment, use +@@ -185,26 +332,22 @@ + * data bde entry. + */ + bpl += 2; +- if (scsi_cmnd->use_sg) { ++ nseg = scsi_dma_map(scsi_cmnd); ++ if (nseg > 0) { + /* + * The driver stores the segment count returned from pci_map_sg + * because this a count of dma-mappings used to map the use_sg + * pages. They are not guaranteed to be the same for those + * architectures that implement an IOMMU. + */ +- sgel = (struct scatterlist *)scsi_cmnd->request_buffer; +- lpfc_cmd->seg_cnt = dma_map_sg(&phba->pcidev->dev, sgel, +- scsi_cmnd->use_sg, datadir); +- if (lpfc_cmd->seg_cnt == 0) +- return 1; + ++ lpfc_cmd->seg_cnt = nseg; + if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) { + printk(KERN_ERR "%s: Too many sg segments from " + "dma_map_sg. Config %d, seg_cnt %d", + __FUNCTION__, phba->cfg_sg_seg_cnt, + lpfc_cmd->seg_cnt); +- dma_unmap_sg(&phba->pcidev->dev, sgel, +- lpfc_cmd->seg_cnt, datadir); ++ scsi_dma_unmap(scsi_cmnd); + return 1; + } + +@@ -214,7 +357,7 @@ + * single scsi command. Just run through the seg_cnt and format + * the bde's. + */ +- for (i = 0; i < lpfc_cmd->seg_cnt; i++) { ++ scsi_for_each_sg(scsi_cmnd, sgel, nseg, i) { + physaddr = sg_dma_address(sgel); + bpl->addrLow = le32_to_cpu(putPaddrLow(physaddr)); + bpl->addrHigh = le32_to_cpu(putPaddrHigh(physaddr)); +@@ -225,35 +368,10 @@ + bpl->tus.f.bdeFlags = BUFF_USE_RCV; + bpl->tus.w = le32_to_cpu(bpl->tus.w); + bpl++; +- sgel++; + num_bde++; + } +- } else if (scsi_cmnd->request_buffer && scsi_cmnd->request_bufflen) { +- physaddr = dma_map_single(&phba->pcidev->dev, +- scsi_cmnd->request_buffer, +- scsi_cmnd->request_bufflen, +- datadir); +- dma_error = dma_mapping_error(physaddr); +- if (dma_error) { +- lpfc_printf_log(phba, KERN_ERR, LOG_FCP, +- "%d:0718 Unable to dma_map_single " +- "request_buffer: x%x\n", +- phba->brd_no, dma_error); ++ } else if (nseg < 0) + return 1; +- } +- +- lpfc_cmd->nonsg_phys = physaddr; +- bpl->addrLow = le32_to_cpu(putPaddrLow(physaddr)); +- bpl->addrHigh = le32_to_cpu(putPaddrHigh(physaddr)); +- bpl->tus.f.bdeSize = scsi_cmnd->request_bufflen; +- if (datadir == DMA_TO_DEVICE) +- bpl->tus.f.bdeFlags = 0; +- else +- bpl->tus.f.bdeFlags = BUFF_USE_RCV; +- bpl->tus.w = le32_to_cpu(bpl->tus.w); +- num_bde = 1; +- bpl++; +- } + + /* + * Finish initializing those IOCB fields that are dependent on the +@@ -266,7 +384,7 @@ + (num_bde * sizeof (struct ulp_bde64)); + iocb_cmd->ulpBdeCount = 1; + iocb_cmd->ulpLe = 1; +- fcp_cmnd->fcpDl = be32_to_cpu(scsi_cmnd->request_bufflen); ++ fcp_cmnd->fcpDl = be32_to_cpu(scsi_bufflen(scsi_cmnd)); + return 0; + } + +@@ -279,26 +397,20 @@ + * a request buffer, but did not request use_sg. There is a third + * case, but it does not require resource deallocation. + */ +- if ((psb->seg_cnt > 0) && (psb->pCmd->use_sg)) { +- dma_unmap_sg(&phba->pcidev->dev, psb->pCmd->request_buffer, +- psb->seg_cnt, psb->pCmd->sc_data_direction); +- } else { +- if ((psb->nonsg_phys) && (psb->pCmd->request_bufflen)) { +- dma_unmap_single(&phba->pcidev->dev, psb->nonsg_phys, +- psb->pCmd->request_bufflen, +- psb->pCmd->sc_data_direction); +- } +- } ++ if (psb->seg_cnt > 0) ++ scsi_dma_unmap(psb->pCmd); + } + + static void +-lpfc_handle_fcp_err(struct lpfc_scsi_buf *lpfc_cmd, struct lpfc_iocbq *rsp_iocb) ++lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, ++ struct lpfc_iocbq *rsp_iocb) + { + struct scsi_cmnd *cmnd = lpfc_cmd->pCmd; + struct fcp_cmnd *fcpcmd = lpfc_cmd->fcp_cmnd; + struct fcp_rsp *fcprsp = lpfc_cmd->fcp_rsp; +- struct lpfc_hba *phba = lpfc_cmd->scsi_hba; ++ struct lpfc_hba *phba = vport->phba; + uint32_t fcpi_parm = rsp_iocb->iocb.un.fcpi.fcpi_parm; ++ uint32_t vpi = vport->vpi; + uint32_t resp_info = fcprsp->rspStatus2; + uint32_t scsi_status = fcprsp->rspStatus3; + uint32_t *lp; +@@ -331,9 +443,9 @@ + logit = LOG_FCP; + + lpfc_printf_log(phba, KERN_WARNING, logit, +- "%d:0730 FCP command x%x failed: x%x SNS x%x x%x " ++ "%d (%d):0730 FCP command x%x failed: x%x SNS x%x x%x " + "Data: x%x x%x x%x x%x x%x\n", +- phba->brd_no, cmnd->cmnd[0], scsi_status, ++ phba->brd_no, vpi, cmnd->cmnd[0], scsi_status, + be32_to_cpu(*lp), be32_to_cpu(*(lp + 3)), resp_info, + be32_to_cpu(fcprsp->rspResId), + be32_to_cpu(fcprsp->rspSnsLen), +@@ -349,15 +461,16 @@ + } + } + +- cmnd->resid = 0; ++ scsi_set_resid(cmnd, 0); + if (resp_info & RESID_UNDER) { +- cmnd->resid = be32_to_cpu(fcprsp->rspResId); ++ scsi_set_resid(cmnd, be32_to_cpu(fcprsp->rspResId)); + + lpfc_printf_log(phba, KERN_INFO, LOG_FCP, +- "%d:0716 FCP Read Underrun, expected %d, " +- "residual %d Data: x%x x%x x%x\n", phba->brd_no, +- be32_to_cpu(fcpcmd->fcpDl), cmnd->resid, +- fcpi_parm, cmnd->cmnd[0], cmnd->underflow); ++ "%d (%d):0716 FCP Read Underrun, expected %d, " ++ "residual %d Data: x%x x%x x%x\n", ++ phba->brd_no, vpi, be32_to_cpu(fcpcmd->fcpDl), ++ scsi_get_resid(cmnd), fcpi_parm, cmnd->cmnd[0], ++ cmnd->underflow); + + /* + * If there is an under run check if under run reported by +@@ -366,15 +479,16 @@ + */ + if ((cmnd->sc_data_direction == DMA_FROM_DEVICE) && + fcpi_parm && +- (cmnd->resid != fcpi_parm)) { ++ (scsi_get_resid(cmnd) != fcpi_parm)) { + lpfc_printf_log(phba, KERN_WARNING, + LOG_FCP | LOG_FCP_ERROR, +- "%d:0735 FCP Read Check Error and Underrun " +- "Data: x%x x%x x%x x%x\n", phba->brd_no, ++ "%d (%d):0735 FCP Read Check Error " ++ "and Underrun Data: x%x x%x x%x x%x\n", ++ phba->brd_no, vpi, + be32_to_cpu(fcpcmd->fcpDl), +- cmnd->resid, +- fcpi_parm, cmnd->cmnd[0]); +- cmnd->resid = cmnd->request_bufflen; ++ scsi_get_resid(cmnd), fcpi_parm, ++ cmnd->cmnd[0]); ++ scsi_set_resid(cmnd, scsi_bufflen(cmnd)); + host_status = DID_ERROR; + } + /* +@@ -385,22 +499,23 @@ + */ + if (!(resp_info & SNS_LEN_VALID) && + (scsi_status == SAM_STAT_GOOD) && +- (cmnd->request_bufflen - cmnd->resid) < cmnd->underflow) { ++ (scsi_bufflen(cmnd) - scsi_get_resid(cmnd) ++ < cmnd->underflow)) { + lpfc_printf_log(phba, KERN_INFO, LOG_FCP, +- "%d:0717 FCP command x%x residual " ++ "%d (%d):0717 FCP command x%x residual " + "underrun converted to error " +- "Data: x%x x%x x%x\n", phba->brd_no, +- cmnd->cmnd[0], cmnd->request_bufflen, +- cmnd->resid, cmnd->underflow); +- ++ "Data: x%x x%x x%x\n", ++ phba->brd_no, vpi, cmnd->cmnd[0], ++ cmnd->request_bufflen, ++ scsi_get_resid(cmnd), cmnd->underflow); + host_status = DID_ERROR; + } + } else if (resp_info & RESID_OVER) { + lpfc_printf_log(phba, KERN_WARNING, LOG_FCP, +- "%d:0720 FCP command x%x residual " ++ "%d (%d):0720 FCP command x%x residual " + "overrun error. Data: x%x x%x \n", +- phba->brd_no, cmnd->cmnd[0], +- cmnd->request_bufflen, cmnd->resid); ++ phba->brd_no, vpi, cmnd->cmnd[0], ++ scsi_bufflen(cmnd), scsi_get_resid(cmnd)); + host_status = DID_ERROR; + + /* +@@ -410,13 +525,14 @@ + } else if ((scsi_status == SAM_STAT_GOOD) && fcpi_parm && + (cmnd->sc_data_direction == DMA_FROM_DEVICE)) { + lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_FCP_ERROR, +- "%d:0734 FCP Read Check Error Data: " +- "x%x x%x x%x x%x\n", phba->brd_no, ++ "%d (%d):0734 FCP Read Check Error Data: " ++ "x%x x%x x%x x%x\n", ++ phba->brd_no, vpi, + be32_to_cpu(fcpcmd->fcpDl), + be32_to_cpu(fcprsp->rspResId), + fcpi_parm, cmnd->cmnd[0]); + host_status = DID_ERROR; +- cmnd->resid = cmnd->request_bufflen; ++ scsi_set_resid(cmnd, scsi_bufflen(cmnd)); + } + + out: +@@ -429,9 +545,13 @@ + { + struct lpfc_scsi_buf *lpfc_cmd = + (struct lpfc_scsi_buf *) pIocbIn->context1; ++ struct lpfc_vport *vport = pIocbIn->vport; + struct lpfc_rport_data *rdata = lpfc_cmd->rdata; + struct lpfc_nodelist *pnode = rdata->pnode; + struct scsi_cmnd *cmd = lpfc_cmd->pCmd; ++ uint32_t vpi = (lpfc_cmd->cur_iocbq.vport ++ ? lpfc_cmd->cur_iocbq.vport->vpi ++ : 0); + int result; + struct scsi_device *sdev, *tmp_sdev; + int depth = 0; +@@ -447,22 +567,31 @@ + lpfc_cmd->status = IOSTAT_DEFAULT; + + lpfc_printf_log(phba, KERN_WARNING, LOG_FCP, +- "%d:0729 FCP cmd x%x failed <%d/%d> status: " +- "x%x result: x%x Data: x%x x%x\n", +- phba->brd_no, cmd->cmnd[0], cmd->device->id, +- cmd->device->lun, lpfc_cmd->status, +- lpfc_cmd->result, pIocbOut->iocb.ulpContext, ++ "%d (%d):0729 FCP cmd x%x failed <%d/%d> " ++ "status: x%x result: x%x Data: x%x x%x\n", ++ phba->brd_no, vpi, cmd->cmnd[0], ++ cmd->device ? cmd->device->id : 0xffff, ++ cmd->device ? cmd->device->lun : 0xffff, ++ lpfc_cmd->status, lpfc_cmd->result, ++ pIocbOut->iocb.ulpContext, + lpfc_cmd->cur_iocbq.iocb.ulpIoTag); + + switch (lpfc_cmd->status) { + case IOSTAT_FCP_RSP_ERROR: + /* Call FCP RSP handler to determine result */ +- lpfc_handle_fcp_err(lpfc_cmd,pIocbOut); ++ lpfc_handle_fcp_err(vport, lpfc_cmd, pIocbOut); + break; + case IOSTAT_NPORT_BSY: + case IOSTAT_FABRIC_BSY: + cmd->result = ScsiResult(DID_BUS_BUSY, 0); + break; ++ case IOSTAT_LOCAL_REJECT: ++ if (lpfc_cmd->result == RJT_UNAVAIL_PERM || ++ lpfc_cmd->result == IOERR_NO_RESOURCES || ++ lpfc_cmd->result == RJT_LOGIN_REQUIRED) { ++ cmd->result = ScsiResult(DID_REQUEUE, 0); ++ break; ++ } /* else: fall through */ + default: + cmd->result = ScsiResult(DID_ERROR, 0); + break; +@@ -479,11 +608,12 @@ + uint32_t *lp = (uint32_t *)cmd->sense_buffer; + + lpfc_printf_log(phba, KERN_INFO, LOG_FCP, +- "%d:0710 Iodone <%d/%d> cmd %p, error x%x " +- "SNS x%x x%x Data: x%x x%x\n", +- phba->brd_no, cmd->device->id, ++ "%d (%d):0710 Iodone <%d/%d> cmd %p, error " ++ "x%x SNS x%x x%x Data: x%x x%x\n", ++ phba->brd_no, vpi, cmd->device->id, + cmd->device->lun, cmd, cmd->result, +- *lp, *(lp + 3), cmd->retries, cmd->resid); ++ *lp, *(lp + 3), cmd->retries, ++ scsi_get_resid(cmd)); + } + + result = cmd->result; +@@ -496,6 +626,10 @@ + return; + } + ++ ++ if (!result) ++ lpfc_rampup_queue_depth(phba, sdev); ++ + if (!result && pnode != NULL && + ((jiffies - pnode->last_ramp_up_time) > + LPFC_Q_RAMP_UP_INTERVAL * HZ) && +@@ -544,8 +678,9 @@ + + if (depth) { + lpfc_printf_log(phba, KERN_WARNING, LOG_FCP, +- "%d:0711 detected queue full - lun queue depth " +- " adjusted to %d.\n", phba->brd_no, depth); ++ "%d (%d):0711 detected queue full - " ++ "lun queue depth adjusted to %d.\n", ++ phba->brd_no, vpi, depth); + } + } + +@@ -553,9 +688,10 @@ + } + + static void +-lpfc_scsi_prep_cmnd(struct lpfc_hba * phba, struct lpfc_scsi_buf * lpfc_cmd, ++lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, + struct lpfc_nodelist *pnode) + { ++ struct lpfc_hba *phba = vport->phba; + struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd; + struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd; + IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb; +@@ -592,22 +728,7 @@ + * bumping the bpl beyond the fcp_cmnd and fcp_rsp regions to the first + * data bde entry. + */ +- if (scsi_cmnd->use_sg) { +- if (datadir == DMA_TO_DEVICE) { +- iocb_cmd->ulpCommand = CMD_FCP_IWRITE64_CR; +- iocb_cmd->un.fcpi.fcpi_parm = 0; +- iocb_cmd->ulpPU = 0; +- fcp_cmnd->fcpCntl3 = WRITE_DATA; +- phba->fc4OutputRequests++; +- } else { +- iocb_cmd->ulpCommand = CMD_FCP_IREAD64_CR; +- iocb_cmd->ulpPU = PARM_READ_CHECK; +- iocb_cmd->un.fcpi.fcpi_parm = +- scsi_cmnd->request_bufflen; +- fcp_cmnd->fcpCntl3 = READ_DATA; +- phba->fc4InputRequests++; +- } +- } else if (scsi_cmnd->request_buffer && scsi_cmnd->request_bufflen) { ++ if (scsi_sg_count(scsi_cmnd)) { + if (datadir == DMA_TO_DEVICE) { + iocb_cmd->ulpCommand = CMD_FCP_IWRITE64_CR; + iocb_cmd->un.fcpi.fcpi_parm = 0; +@@ -617,8 +738,7 @@ + } else { + iocb_cmd->ulpCommand = CMD_FCP_IREAD64_CR; + iocb_cmd->ulpPU = PARM_READ_CHECK; +- iocb_cmd->un.fcpi.fcpi_parm = +- scsi_cmnd->request_bufflen; ++ iocb_cmd->un.fcpi.fcpi_parm = scsi_bufflen(scsi_cmnd); + fcp_cmnd->fcpCntl3 = READ_DATA; + phba->fc4InputRequests++; + } +@@ -642,15 +762,15 @@ + piocbq->context1 = lpfc_cmd; + piocbq->iocb_cmpl = lpfc_scsi_cmd_iocb_cmpl; + piocbq->iocb.ulpTimeout = lpfc_cmd->timeout; ++ piocbq->vport = vport; + } + + static int +-lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_hba *phba, ++lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport, + struct lpfc_scsi_buf *lpfc_cmd, + unsigned int lun, + uint8_t task_mgmt_cmd) + { +- struct lpfc_sli *psli; + struct lpfc_iocbq *piocbq; + IOCB_t *piocb; + struct fcp_cmnd *fcp_cmnd; +@@ -661,8 +781,9 @@ + return 0; + } + +- psli = &phba->sli; + piocbq = &(lpfc_cmd->cur_iocbq); ++ piocbq->vport = vport; ++ + piocb = &piocbq->iocb; + + fcp_cmnd = lpfc_cmd->fcp_cmnd; +@@ -688,7 +809,7 @@ + piocb->ulpTimeout = lpfc_cmd->timeout; + } + +- return (1); ++ return 1; + } + + static void +@@ -704,10 +825,11 @@ + } + + static int +-lpfc_scsi_tgt_reset(struct lpfc_scsi_buf * lpfc_cmd, struct lpfc_hba * phba, ++lpfc_scsi_tgt_reset(struct lpfc_scsi_buf *lpfc_cmd, struct lpfc_vport *vport, + unsigned tgt_id, unsigned int lun, + struct lpfc_rport_data *rdata) + { ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_iocbq *iocbq; + struct lpfc_iocbq *iocbqrsp; + int ret; +@@ -716,12 +838,11 @@ + return FAILED; + + lpfc_cmd->rdata = rdata; +- ret = lpfc_scsi_prep_task_mgmt_cmd(phba, lpfc_cmd, lun, ++ ret = lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, lun, + FCP_TARGET_RESET); + if (!ret) + return FAILED; + +- lpfc_cmd->scsi_hba = phba; + iocbq = &lpfc_cmd->cur_iocbq; + iocbqrsp = lpfc_sli_get_iocbq(phba); + +@@ -730,10 +851,10 @@ + + /* Issue Target Reset to TGT */ + lpfc_printf_log(phba, KERN_INFO, LOG_FCP, +- "%d:0702 Issue Target Reset to TGT %d " ++ "%d (%d):0702 Issue Target Reset to TGT %d " + "Data: x%x x%x\n", +- phba->brd_no, tgt_id, rdata->pnode->nlp_rpi, +- rdata->pnode->nlp_flag); ++ phba->brd_no, vport->vpi, tgt_id, ++ rdata->pnode->nlp_rpi, rdata->pnode->nlp_flag); + + ret = lpfc_sli_issue_iocb_wait(phba, + &phba->sli.ring[phba->sli.fcp_ring], +@@ -758,7 +879,8 @@ + const char * + lpfc_info(struct Scsi_Host *host) + { +- struct lpfc_hba *phba = (struct lpfc_hba *) host->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) host->hostdata; ++ struct lpfc_hba *phba = vport->phba; + int len; + static char lpfcinfobuf[384]; + +@@ -800,25 +922,21 @@ + + void lpfc_poll_timeout(unsigned long ptr) + { +- struct lpfc_hba *phba = (struct lpfc_hba *)ptr; +- unsigned long iflag; +- +- spin_lock_irqsave(phba->host->host_lock, iflag); ++ struct lpfc_hba *phba = (struct lpfc_hba *) ptr; + + if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) { + lpfc_sli_poll_fcp_ring (phba); + if (phba->cfg_poll & DISABLE_FCP_RING_INT) + lpfc_poll_rearm_timer(phba); + } +- +- spin_unlock_irqrestore(phba->host->host_lock, iflag); + } + + static int + lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *)) + { +- struct lpfc_hba *phba = +- (struct lpfc_hba *) cmnd->device->host->hostdata; ++ struct Scsi_Host *shost = cmnd->device->host; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_sli *psli = &phba->sli; + struct lpfc_rport_data *rdata = cmnd->device->hostdata; + struct lpfc_nodelist *ndlp = rdata->pnode; +@@ -840,11 +958,14 @@ + cmnd->result = ScsiResult(DID_BUS_BUSY, 0); + goto out_fail_command; + } +- lpfc_cmd = lpfc_get_scsi_buf (phba); ++ lpfc_cmd = lpfc_get_scsi_buf(phba); + if (lpfc_cmd == NULL) { ++ lpfc_adjust_queue_depth(phba); ++ + lpfc_printf_log(phba, KERN_INFO, LOG_FCP, +- "%d:0707 driver's buffer pool is empty, " +- "IO busied\n", phba->brd_no); ++ "%d (%d):0707 driver's buffer pool is empty, " ++ "IO busied\n", ++ phba->brd_no, vport->vpi); + goto out_host_busy; + } + +@@ -862,7 +983,7 @@ + if (err) + goto out_host_busy_free_buf; + +- lpfc_scsi_prep_cmnd(phba, lpfc_cmd, ndlp); ++ lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp); + + err = lpfc_sli_issue_iocb(phba, &phba->sli.ring[psli->fcp_ring], + &lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB); +@@ -908,7 +1029,8 @@ + lpfc_abort_handler(struct scsi_cmnd *cmnd) + { + struct Scsi_Host *shost = cmnd->device->host; +- struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_sli_ring *pring = &phba->sli.ring[phba->sli.fcp_ring]; + struct lpfc_iocbq *iocb; + struct lpfc_iocbq *abtsiocb; +@@ -918,8 +1040,6 @@ + int ret = SUCCESS; + + lpfc_block_error_handler(cmnd); +- spin_lock_irq(shost->host_lock); +- + lpfc_cmd = (struct lpfc_scsi_buf *)cmnd->host_scribble; + BUG_ON(!lpfc_cmd); + +@@ -956,12 +1076,13 @@ + + icmd->ulpLe = 1; + icmd->ulpClass = cmd->ulpClass; +- if (phba->hba_state >= LPFC_LINK_UP) ++ if (lpfc_is_link_up(phba)) + icmd->ulpCommand = CMD_ABORT_XRI_CN; + else + icmd->ulpCommand = CMD_CLOSE_XRI_CN; + + abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl; ++ abtsiocb->vport = vport; + if (lpfc_sli_issue_iocb(phba, pring, abtsiocb, 0) == IOCB_ERROR) { + lpfc_sli_release_iocbq(phba, abtsiocb); + ret = FAILED; +@@ -977,9 +1098,7 @@ + if (phba->cfg_poll & DISABLE_FCP_RING_INT) + lpfc_sli_poll_fcp_ring (phba); + +- spin_unlock_irq(phba->host->host_lock); +- schedule_timeout_uninterruptible(LPFC_ABORT_WAIT*HZ); +- spin_lock_irq(phba->host->host_lock); ++ schedule_timeout_uninterruptible(LPFC_ABORT_WAIT * HZ); + if (++loop_count + > (2 * phba->cfg_devloss_tmo)/LPFC_ABORT_WAIT) + break; +@@ -988,22 +1107,21 @@ + if (lpfc_cmd->pCmd == cmnd) { + ret = FAILED; + lpfc_printf_log(phba, KERN_ERR, LOG_FCP, +- "%d:0748 abort handler timed out waiting for " +- "abort to complete: ret %#x, ID %d, LUN %d, " +- "snum %#lx\n", +- phba->brd_no, ret, cmnd->device->id, +- cmnd->device->lun, cmnd->serial_number); ++ "%d (%d):0748 abort handler timed out waiting " ++ "for abort to complete: ret %#x, ID %d, " ++ "LUN %d, snum %#lx\n", ++ phba->brd_no, vport->vpi, ret, ++ cmnd->device->id, cmnd->device->lun, ++ cmnd->serial_number); + } + + out: + lpfc_printf_log(phba, KERN_WARNING, LOG_FCP, +- "%d:0749 SCSI Layer I/O Abort Request " ++ "%d (%d):0749 SCSI Layer I/O Abort Request " + "Status x%x ID %d LUN %d snum %#lx\n", +- phba->brd_no, ret, cmnd->device->id, ++ phba->brd_no, vport->vpi, ret, cmnd->device->id, + cmnd->device->lun, cmnd->serial_number); + +- spin_unlock_irq(shost->host_lock); +- + return ret; + } + +@@ -1011,7 +1129,8 @@ + lpfc_device_reset_handler(struct scsi_cmnd *cmnd) + { + struct Scsi_Host *shost = cmnd->device->host; +- struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_scsi_buf *lpfc_cmd; + struct lpfc_iocbq *iocbq, *iocbqrsp; + struct lpfc_rport_data *rdata = cmnd->device->hostdata; +@@ -1022,28 +1141,26 @@ + int cnt, loopcnt; + + lpfc_block_error_handler(cmnd); +- spin_lock_irq(shost->host_lock); + loopcnt = 0; + /* + * If target is not in a MAPPED state, delay the reset until + * target is rediscovered or devloss timeout expires. + */ +- while ( 1 ) { ++ while (1) { + if (!pnode) + goto out; + + if (pnode->nlp_state != NLP_STE_MAPPED_NODE) { +- spin_unlock_irq(phba->host->host_lock); + schedule_timeout_uninterruptible(msecs_to_jiffies(500)); +- spin_lock_irq(phba->host->host_lock); + loopcnt++; + rdata = cmnd->device->hostdata; + if (!rdata || + (loopcnt > ((phba->cfg_devloss_tmo * 2) + 1))) { + lpfc_printf_log(phba, KERN_ERR, LOG_FCP, +- "%d:0721 LUN Reset rport failure:" +- " cnt x%x rdata x%p\n", +- phba->brd_no, loopcnt, rdata); ++ "%d (%d):0721 LUN Reset rport " ++ "failure: cnt x%x rdata x%p\n", ++ phba->brd_no, vport->vpi, ++ loopcnt, rdata); + goto out; + } + pnode = rdata->pnode; +@@ -1054,15 +1171,14 @@ + break; + } + +- lpfc_cmd = lpfc_get_scsi_buf (phba); ++ lpfc_cmd = lpfc_get_scsi_buf(phba); + if (lpfc_cmd == NULL) + goto out; + + lpfc_cmd->timeout = 60; +- lpfc_cmd->scsi_hba = phba; + lpfc_cmd->rdata = rdata; + +- ret = lpfc_scsi_prep_task_mgmt_cmd(phba, lpfc_cmd, cmnd->device->lun, ++ ret = lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, cmnd->device->lun, + FCP_TARGET_RESET); + if (!ret) + goto out_free_scsi_buf; +@@ -1075,8 +1191,9 @@ + goto out_free_scsi_buf; + + lpfc_printf_log(phba, KERN_INFO, LOG_FCP, +- "%d:0703 Issue target reset to TGT %d LUN %d rpi x%x " +- "nlp_flag x%x\n", phba->brd_no, cmnd->device->id, ++ "%d (%d):0703 Issue target reset to TGT %d LUN %d " ++ "rpi x%x nlp_flag x%x\n", ++ phba->brd_no, vport->vpi, cmnd->device->id, + cmnd->device->lun, pnode->nlp_rpi, pnode->nlp_flag); + + iocb_status = lpfc_sli_issue_iocb_wait(phba, +@@ -1111,9 +1228,7 @@ + 0, LPFC_CTX_LUN); + loopcnt = 0; + while(cnt) { +- spin_unlock_irq(phba->host->host_lock); + schedule_timeout_uninterruptible(LPFC_RESET_WAIT*HZ); +- spin_lock_irq(phba->host->host_lock); + + if (++loopcnt + > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT) +@@ -1127,8 +1242,9 @@ + + if (cnt) { + lpfc_printf_log(phba, KERN_ERR, LOG_FCP, +- "%d:0719 device reset I/O flush failure: cnt x%x\n", +- phba->brd_no, cnt); ++ "%d (%d):0719 device reset I/O flush failure: " ++ "cnt x%x\n", ++ phba->brd_no, vport->vpi, cnt); + ret = FAILED; + } + +@@ -1137,13 +1253,12 @@ + lpfc_release_scsi_buf(phba, lpfc_cmd); + } + lpfc_printf_log(phba, KERN_ERR, LOG_FCP, +- "%d:0713 SCSI layer issued device reset (%d, %d) " ++ "%d (%d):0713 SCSI layer issued device reset (%d, %d) " + "return x%x status x%x result x%x\n", +- phba->brd_no, cmnd->device->id, cmnd->device->lun, +- ret, cmd_status, cmd_result); ++ phba->brd_no, vport->vpi, cmnd->device->id, ++ cmnd->device->lun, ret, cmd_status, cmd_result); + + out: +- spin_unlock_irq(shost->host_lock); + return ret; + } + +@@ -1151,7 +1266,8 @@ + lpfc_bus_reset_handler(struct scsi_cmnd *cmnd) + { + struct Scsi_Host *shost = cmnd->device->host; +- struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_nodelist *ndlp = NULL; + int match; + int ret = FAILED, i, err_count = 0; +@@ -1159,7 +1275,6 @@ + struct lpfc_scsi_buf * lpfc_cmd; + + lpfc_block_error_handler(cmnd); +- spin_lock_irq(shost->host_lock); + + lpfc_cmd = lpfc_get_scsi_buf(phba); + if (lpfc_cmd == NULL) +@@ -1167,7 +1282,6 @@ + + /* The lpfc_cmd storage is reused. Set all loop invariants. */ + lpfc_cmd->timeout = 60; +- lpfc_cmd->scsi_hba = phba; + + /* + * Since the driver manages a single bus device, reset all +@@ -1177,7 +1291,8 @@ + for (i = 0; i < LPFC_MAX_TARGET; i++) { + /* Search for mapped node by target ID */ + match = 0; +- list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) { ++ spin_lock_irq(shost->host_lock); ++ list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_state == NLP_STE_MAPPED_NODE && + i == ndlp->nlp_sid && + ndlp->rport) { +@@ -1185,15 +1300,18 @@ + break; + } + } ++ spin_unlock_irq(shost->host_lock); + if (!match) + continue; + +- ret = lpfc_scsi_tgt_reset(lpfc_cmd, phba, i, cmnd->device->lun, ++ ret = lpfc_scsi_tgt_reset(lpfc_cmd, vport, i, ++ cmnd->device->lun, + ndlp->rport->dd_data); + if (ret != SUCCESS) { + lpfc_printf_log(phba, KERN_ERR, LOG_FCP, +- "%d:0700 Bus Reset on target %d failed\n", +- phba->brd_no, i); ++ "%d (%d):0700 Bus Reset on target %d " ++ "failed\n", ++ phba->brd_no, vport->vpi, i); + err_count++; + break; + } +@@ -1219,9 +1337,7 @@ + 0, 0, 0, LPFC_CTX_HOST); + loopcnt = 0; + while(cnt) { +- spin_unlock_irq(phba->host->host_lock); + schedule_timeout_uninterruptible(LPFC_RESET_WAIT*HZ); +- spin_lock_irq(phba->host->host_lock); + + if (++loopcnt + > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT) +@@ -1234,25 +1350,24 @@ + + if (cnt) { + lpfc_printf_log(phba, KERN_ERR, LOG_FCP, +- "%d:0715 Bus Reset I/O flush failure: cnt x%x left x%x\n", +- phba->brd_no, cnt, i); ++ "%d (%d):0715 Bus Reset I/O flush failure: " ++ "cnt x%x left x%x\n", ++ phba->brd_no, vport->vpi, cnt, i); + ret = FAILED; + } + +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_FCP, +- "%d:0714 SCSI layer issued Bus Reset Data: x%x\n", +- phba->brd_no, ret); ++ lpfc_printf_log(phba, KERN_ERR, LOG_FCP, ++ "%d (%d):0714 SCSI layer issued Bus Reset Data: x%x\n", ++ phba->brd_no, vport->vpi, ret); + out: +- spin_unlock_irq(shost->host_lock); + return ret; + } + + static int + lpfc_slave_alloc(struct scsi_device *sdev) + { +- struct lpfc_hba *phba = (struct lpfc_hba *)sdev->host->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) sdev->host->hostdata; ++ struct lpfc_hba *phba = vport->phba; + struct lpfc_scsi_buf *scsi_buf = NULL; + struct fc_rport *rport = starget_to_rport(scsi_target(sdev)); + uint32_t total = 0, i; +@@ -1273,27 +1388,35 @@ + */ + total = phba->total_scsi_bufs; + num_to_alloc = phba->cfg_lun_queue_depth + 2; +- if (total >= phba->cfg_hba_queue_depth) { ++ ++ /* Allow some exchanges to be available always to complete discovery */ ++ if (total >= phba->cfg_hba_queue_depth - LPFC_DISC_IOCB_BUFF_COUNT ) { + lpfc_printf_log(phba, KERN_WARNING, LOG_FCP, +- "%d:0704 At limitation of %d preallocated " +- "command buffers\n", phba->brd_no, total); ++ "%d (%d):0704 At limitation of %d " ++ "preallocated command buffers\n", ++ phba->brd_no, vport->vpi, total); + return 0; +- } else if (total + num_to_alloc > phba->cfg_hba_queue_depth) { ++ ++ /* Allow some exchanges to be available always to complete discovery */ ++ } else if (total + num_to_alloc > ++ phba->cfg_hba_queue_depth - LPFC_DISC_IOCB_BUFF_COUNT ) { + lpfc_printf_log(phba, KERN_WARNING, LOG_FCP, +- "%d:0705 Allocation request of %d command " +- "buffers will exceed max of %d. Reducing " +- "allocation request to %d.\n", phba->brd_no, +- num_to_alloc, phba->cfg_hba_queue_depth, ++ "%d (%d):0705 Allocation request of %d " ++ "command buffers will exceed max of %d. " ++ "Reducing allocation request to %d.\n", ++ phba->brd_no, vport->vpi, num_to_alloc, ++ phba->cfg_hba_queue_depth, + (phba->cfg_hba_queue_depth - total)); + num_to_alloc = phba->cfg_hba_queue_depth - total; + } + + for (i = 0; i < num_to_alloc; i++) { +- scsi_buf = lpfc_new_scsi_buf(phba); ++ scsi_buf = lpfc_new_scsi_buf(vport); + if (!scsi_buf) { + lpfc_printf_log(phba, KERN_ERR, LOG_FCP, +- "%d:0706 Failed to allocate command " +- "buffer\n", phba->brd_no); ++ "%d (%d):0706 Failed to allocate " ++ "command buffer\n", ++ phba->brd_no, vport->vpi); + break; + } + +@@ -1308,7 +1431,8 @@ + static int + lpfc_slave_configure(struct scsi_device *sdev) + { +- struct lpfc_hba *phba = (struct lpfc_hba *) sdev->host->hostdata; ++ struct lpfc_vport *vport = (struct lpfc_vport *) sdev->host->hostdata; ++ struct lpfc_hba *phba = vport->phba; + struct fc_rport *rport = starget_to_rport(sdev->sdev_target); + + if (sdev->tagged_supported) +@@ -1340,6 +1464,7 @@ + return; + } + ++ + struct scsi_host_template lpfc_template = { + .module = THIS_MODULE, + .name = LPFC_DRIVER_NAME, +@@ -1352,11 +1477,10 @@ + .slave_configure = lpfc_slave_configure, + .slave_destroy = lpfc_slave_destroy, + .scan_finished = lpfc_scan_finished, +- .scan_start = lpfc_scan_start, + .this_id = -1, + .sg_tablesize = LPFC_SG_SEG_CNT, + .cmd_per_lun = LPFC_CMD_PER_LUN, + .use_clustering = ENABLE_CLUSTERING, +- .shost_attrs = lpfc_host_attrs, ++ .shost_attrs = lpfc_hba_attrs, + .max_sectors = 0xFFFF, + }; +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_scsi.h +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_scsi.h 2007-12-21 15:36:12.000000000 -0500 +@@ -1,7 +1,7 @@ + /******************************************************************* + * This file is part of the Emulex Linux Device Driver for * + * Fibre Channel Host Bus Adapters. * +- * Copyright (C) 2004-2005 Emulex. All rights reserved. * ++ * Copyright (C) 2004-2006 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * +@@ -110,7 +110,6 @@ + struct lpfc_scsi_buf { + struct list_head list; + struct scsi_cmnd *pCmd; +- struct lpfc_hba *scsi_hba; + struct lpfc_rport_data *rdata; + + uint32_t timeout; +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_sli.c +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_sli.c 2007-12-21 15:36:12.000000000 -0500 +@@ -38,23 +38,25 @@ + #include "lpfc_crtn.h" + #include "lpfc_logmsg.h" + #include "lpfc_compat.h" ++#include "lpfc_debugfs.h" + + /* + * Define macro to log: Mailbox command x%x cannot issue Data + * This allows multiple uses of lpfc_msgBlk0311 + * w/o perturbing log msg utility. + */ +-#define LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag) \ ++#define LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag) \ + lpfc_printf_log(phba, \ + KERN_INFO, \ + LOG_MBOX | LOG_SLI, \ +- "%d:0311 Mailbox command x%x cannot issue " \ +- "Data: x%x x%x x%x\n", \ ++ "%d (%d):0311 Mailbox command x%x cannot " \ ++ "issue Data: x%x x%x x%x\n", \ + phba->brd_no, \ +- mb->mbxCommand, \ +- phba->hba_state, \ ++ pmbox->vport ? pmbox->vport->vpi : 0, \ ++ pmbox->mb.mbxCommand, \ ++ phba->pport->port_state, \ + psli->sli_flag, \ +- flag); ++ flag) + + + /* There are only four IOCB completion types. */ +@@ -65,8 +67,26 @@ + LPFC_ABORT_IOCB + } lpfc_iocb_type; + +-struct lpfc_iocbq * +-lpfc_sli_get_iocbq(struct lpfc_hba * phba) ++ /* SLI-2/SLI-3 provide different sized iocbs. Given a pointer ++ * to the start of the ring, and the slot number of the ++ * desired iocb entry, calc a pointer to that entry. ++ */ ++static inline IOCB_t * ++lpfc_cmd_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) ++{ ++ return (IOCB_t *) (((char *) pring->cmdringaddr) + ++ pring->cmdidx * phba->iocb_cmd_size); ++} ++ ++static inline IOCB_t * ++lpfc_resp_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) ++{ ++ return (IOCB_t *) (((char *) pring->rspringaddr) + ++ pring->rspidx * phba->iocb_rsp_size); ++} ++ ++static struct lpfc_iocbq * ++__lpfc_sli_get_iocbq(struct lpfc_hba *phba) + { + struct list_head *lpfc_iocb_list = &phba->lpfc_iocb_list; + struct lpfc_iocbq * iocbq = NULL; +@@ -75,10 +95,22 @@ + return iocbq; + } + ++struct lpfc_iocbq * ++lpfc_sli_get_iocbq(struct lpfc_hba *phba) ++{ ++ struct lpfc_iocbq * iocbq = NULL; ++ unsigned long iflags; ++ ++ spin_lock_irqsave(&phba->hbalock, iflags); ++ iocbq = __lpfc_sli_get_iocbq(phba); ++ spin_unlock_irqrestore(&phba->hbalock, iflags); ++ return iocbq; ++} ++ + void +-lpfc_sli_release_iocbq(struct lpfc_hba * phba, struct lpfc_iocbq * iocbq) ++__lpfc_sli_release_iocbq(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) + { +- size_t start_clean = (size_t)(&((struct lpfc_iocbq *)NULL)->iocb); ++ size_t start_clean = offsetof(struct lpfc_iocbq, iocb); + + /* + * Clean all volatile data fields, preserve iotag and node struct. +@@ -87,6 +119,19 @@ + list_add_tail(&iocbq->list, &phba->lpfc_iocb_list); + } + ++void ++lpfc_sli_release_iocbq(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) ++{ ++ unsigned long iflags; ++ ++ /* ++ * Clean all volatile data fields, preserve iotag and node struct. ++ */ ++ spin_lock_irqsave(&phba->hbalock, iflags); ++ __lpfc_sli_release_iocbq(phba, iocbq); ++ spin_unlock_irqrestore(&phba->hbalock, iflags); ++} ++ + /* + * Translate the iocb command to an iocb command type used to decide the final + * disposition of each completed IOCB. +@@ -155,6 +200,9 @@ + case CMD_RCV_ELS_REQ_CX: + case CMD_RCV_SEQUENCE64_CX: + case CMD_RCV_ELS_REQ64_CX: ++ case CMD_IOCB_RCV_SEQ64_CX: ++ case CMD_IOCB_RCV_ELS64_CX: ++ case CMD_IOCB_RCV_CONT64_CX: + type = LPFC_UNSOL_IOCB; + break; + default: +@@ -166,73 +214,77 @@ + } + + static int +-lpfc_sli_ring_map(struct lpfc_hba * phba, LPFC_MBOXQ_t *pmb) ++lpfc_sli_ring_map(struct lpfc_hba *phba) + { + struct lpfc_sli *psli = &phba->sli; +- MAILBOX_t *pmbox = &pmb->mb; +- int i, rc; ++ LPFC_MBOXQ_t *pmb; ++ MAILBOX_t *pmbox; ++ int i, rc, ret = 0; + ++ pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (!pmb) ++ return -ENOMEM; ++ pmbox = &pmb->mb; ++ phba->link_state = LPFC_INIT_MBX_CMDS; + for (i = 0; i < psli->num_rings; i++) { +- phba->hba_state = LPFC_INIT_MBX_CMDS; + lpfc_config_ring(phba, i, pmb); + rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL); + if (rc != MBX_SUCCESS) { +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_INIT, +- "%d:0446 Adapter failed to init, " ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, ++ "%d:0446 Adapter failed to init (%d), " + "mbxCmd x%x CFG_RING, mbxStatus x%x, " + "ring %d\n", +- phba->brd_no, ++ phba->brd_no, rc, + pmbox->mbxCommand, + pmbox->mbxStatus, + i); +- phba->hba_state = LPFC_HBA_ERROR; +- return -ENXIO; ++ phba->link_state = LPFC_HBA_ERROR; ++ ret = -ENXIO; ++ break; + } + } +- return 0; ++ mempool_free(pmb, phba->mbox_mem_pool); ++ return ret; + } + + static int +-lpfc_sli_ringtxcmpl_put(struct lpfc_hba * phba, +- struct lpfc_sli_ring * pring, struct lpfc_iocbq * piocb) ++lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ++ struct lpfc_iocbq *piocb) + { + list_add_tail(&piocb->list, &pring->txcmplq); + pring->txcmplq_cnt++; +- if (unlikely(pring->ringno == LPFC_ELS_RING)) +- mod_timer(&phba->els_tmofunc, ++ if ((unlikely(pring->ringno == LPFC_ELS_RING)) && ++ (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) && ++ (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN)) { ++ if (!piocb->vport) ++ BUG(); ++ else ++ mod_timer(&piocb->vport->els_tmofunc, + jiffies + HZ * (phba->fc_ratov << 1)); ++ } + +- return (0); ++ ++ return 0; + } + + static struct lpfc_iocbq * +-lpfc_sli_ringtx_get(struct lpfc_hba * phba, struct lpfc_sli_ring * pring) ++lpfc_sli_ringtx_get(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) + { +- struct list_head *dlp; + struct lpfc_iocbq *cmd_iocb; + +- dlp = &pring->txq; +- cmd_iocb = NULL; +- list_remove_head((&pring->txq), cmd_iocb, +- struct lpfc_iocbq, +- list); +- if (cmd_iocb) { +- /* If the first ptr is not equal to the list header, +- * deque the IOCBQ_t and return it. +- */ ++ list_remove_head((&pring->txq), cmd_iocb, struct lpfc_iocbq, list); ++ if (cmd_iocb != NULL) + pring->txq_cnt--; +- } +- return (cmd_iocb); ++ return cmd_iocb; + } + + static IOCB_t * + lpfc_sli_next_iocb_slot (struct lpfc_hba *phba, struct lpfc_sli_ring *pring) + { +- struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno]; ++ struct lpfc_pgp *pgp = (phba->sli_rev == 3) ? ++ &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] : ++ &phba->slim2p->mbx.us.s2.port[pring->ringno]; + uint32_t max_cmd_idx = pring->numCiocb; +- IOCB_t *iocb = NULL; + + if ((pring->next_cmdidx == pring->cmdidx) && + (++pring->next_cmdidx >= max_cmd_idx)) +@@ -249,15 +301,17 @@ + phba->brd_no, pring->ringno, + pring->local_getidx, max_cmd_idx); + +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + /* + * All error attention handlers are posted to + * worker thread + */ + phba->work_ha |= HA_ERATT; + phba->work_hs = HS_FFER3; ++ ++ /* hbalock should already be held */ + if (phba->work_wait) +- wake_up(phba->work_wait); ++ lpfc_worker_wake_up(phba); + + return NULL; + } +@@ -266,39 +320,34 @@ + return NULL; + } + +- iocb = IOCB_ENTRY(pring->cmdringaddr, pring->cmdidx); +- +- return iocb; ++ return lpfc_cmd_iocb(phba, pring); + } + + uint16_t +-lpfc_sli_next_iotag(struct lpfc_hba * phba, struct lpfc_iocbq * iocbq) ++lpfc_sli_next_iotag(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq) + { +- struct lpfc_iocbq ** new_arr; +- struct lpfc_iocbq ** old_arr; ++ struct lpfc_iocbq **new_arr; ++ struct lpfc_iocbq **old_arr; + size_t new_len; + struct lpfc_sli *psli = &phba->sli; + uint16_t iotag; + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + iotag = psli->last_iotag; + if(++iotag < psli->iocbq_lookup_len) { + psli->last_iotag = iotag; + psli->iocbq_lookup[iotag] = iocbq; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + iocbq->iotag = iotag; + return iotag; +- } +- else if (psli->iocbq_lookup_len < (0xffff ++ } else if (psli->iocbq_lookup_len < (0xffff + - LPFC_IOCBQ_LOOKUP_INCREMENT)) { + new_len = psli->iocbq_lookup_len + LPFC_IOCBQ_LOOKUP_INCREMENT; +- spin_unlock_irq(phba->host->host_lock); +- new_arr = kmalloc(new_len * sizeof (struct lpfc_iocbq *), ++ spin_unlock_irq(&phba->hbalock); ++ new_arr = kzalloc(new_len * sizeof (struct lpfc_iocbq *), + GFP_KERNEL); + if (new_arr) { +- memset((char *)new_arr, 0, +- new_len * sizeof (struct lpfc_iocbq *)); +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + old_arr = psli->iocbq_lookup; + if (new_len <= psli->iocbq_lookup_len) { + /* highly unprobable case */ +@@ -307,11 +356,11 @@ + if(++iotag < psli->iocbq_lookup_len) { + psli->last_iotag = iotag; + psli->iocbq_lookup[iotag] = iocbq; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + iocbq->iotag = iotag; + return iotag; + } +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + return 0; + } + if (psli->iocbq_lookup) +@@ -322,13 +371,13 @@ + psli->iocbq_lookup_len = new_len; + psli->last_iotag = iotag; + psli->iocbq_lookup[iotag] = iocbq; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + iocbq->iotag = iotag; + kfree(old_arr); + return iotag; + } + } else +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + lpfc_printf_log(phba, KERN_ERR,LOG_SLI, + "%d:0318 Failed to allocate IOTAG.last IOTAG is %d\n", +@@ -349,7 +398,7 @@ + /* + * Issue iocb command to adapter + */ +- lpfc_sli_pcimem_bcopy(&nextiocb->iocb, iocb, sizeof (IOCB_t)); ++ lpfc_sli_pcimem_bcopy(&nextiocb->iocb, iocb, phba->iocb_cmd_size); + wmb(); + pring->stats.iocb_cmd++; + +@@ -361,20 +410,18 @@ + if (nextiocb->iocb_cmpl) + lpfc_sli_ringtxcmpl_put(phba, pring, nextiocb); + else +- lpfc_sli_release_iocbq(phba, nextiocb); ++ __lpfc_sli_release_iocbq(phba, nextiocb); + + /* + * Let the HBA know what IOCB slot will be the next one the + * driver will put a command into. + */ + pring->cmdidx = pring->next_cmdidx; +- writel(pring->cmdidx, phba->MBslimaddr +- + (SLIMOFF + (pring->ringno * 2)) * 4); ++ writel(pring->cmdidx, &phba->host_gp[pring->ringno].cmdPutInx); + } + + static void +-lpfc_sli_update_full_ring(struct lpfc_hba * phba, +- struct lpfc_sli_ring *pring) ++lpfc_sli_update_full_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) + { + int ringno = pring->ringno; + +@@ -393,8 +440,7 @@ + } + + static void +-lpfc_sli_update_ring(struct lpfc_hba * phba, +- struct lpfc_sli_ring *pring) ++lpfc_sli_update_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) + { + int ringno = pring->ringno; + +@@ -407,7 +453,7 @@ + } + + static void +-lpfc_sli_resume_iocb(struct lpfc_hba * phba, struct lpfc_sli_ring * pring) ++lpfc_sli_resume_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) + { + IOCB_t *iocb; + struct lpfc_iocbq *nextiocb; +@@ -420,7 +466,7 @@ + * (d) IOCB processing is not blocked by the outstanding mbox command. + */ + if (pring->txq_cnt && +- (phba->hba_state > LPFC_LINK_DOWN) && ++ lpfc_is_link_up(phba) && + (pring->ringno != phba->sli.fcp_ring || + phba->sli.sli_flag & LPFC_PROCESS_LA) && + !(pring->flag & LPFC_STOP_IOCB_MBX)) { +@@ -440,11 +486,15 @@ + + /* lpfc_sli_turn_on_ring is only called by lpfc_sli_handle_mb_event below */ + static void +-lpfc_sli_turn_on_ring(struct lpfc_hba * phba, int ringno) ++lpfc_sli_turn_on_ring(struct lpfc_hba *phba, int ringno) + { +- struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[ringno]; ++ struct lpfc_pgp *pgp = (phba->sli_rev == 3) ? ++ &phba->slim2p->mbx.us.s3_pgp.port[ringno] : ++ &phba->slim2p->mbx.us.s2.port[ringno]; ++ unsigned long iflags; + + /* If the ring is active, flag it */ ++ spin_lock_irqsave(&phba->hbalock, iflags); + if (phba->sli.ring[ringno].cmdringaddr) { + if (phba->sli.ring[ringno].flag & LPFC_STOP_IOCB_MBX) { + phba->sli.ring[ringno].flag &= ~LPFC_STOP_IOCB_MBX; +@@ -453,11 +503,176 @@ + */ + phba->sli.ring[ringno].local_getidx + = le32_to_cpu(pgp->cmdGetInx); +- spin_lock_irq(phba->host->host_lock); + lpfc_sli_resume_iocb(phba, &phba->sli.ring[ringno]); +- spin_unlock_irq(phba->host->host_lock); + } + } ++ spin_unlock_irqrestore(&phba->hbalock, iflags); ++} ++ ++struct lpfc_hbq_entry * ++lpfc_sli_next_hbq_slot(struct lpfc_hba *phba, uint32_t hbqno) ++{ ++ struct hbq_s *hbqp = &phba->hbqs[hbqno]; ++ ++ if (hbqp->next_hbqPutIdx == hbqp->hbqPutIdx && ++ ++hbqp->next_hbqPutIdx >= hbqp->entry_count) ++ hbqp->next_hbqPutIdx = 0; ++ ++ if (unlikely(hbqp->local_hbqGetIdx == hbqp->next_hbqPutIdx)) { ++ uint32_t raw_index = phba->hbq_get[hbqno]; ++ uint32_t getidx = le32_to_cpu(raw_index); ++ ++ hbqp->local_hbqGetIdx = getidx; ++ ++ if (unlikely(hbqp->local_hbqGetIdx >= hbqp->entry_count)) { ++ lpfc_printf_log(phba, KERN_ERR, ++ LOG_SLI | LOG_VPORT, ++ "%d:1802 HBQ %d: local_hbqGetIdx " ++ "%u is > than hbqp->entry_count %u\n", ++ phba->brd_no, hbqno, ++ hbqp->local_hbqGetIdx, ++ hbqp->entry_count); ++ ++ phba->link_state = LPFC_HBA_ERROR; ++ return NULL; ++ } ++ ++ if (hbqp->local_hbqGetIdx == hbqp->next_hbqPutIdx) ++ return NULL; ++ } ++ ++ return (struct lpfc_hbq_entry *) phba->hbqslimp.virt + hbqp->hbqPutIdx; ++} ++ ++void ++lpfc_sli_hbqbuf_free_all(struct lpfc_hba *phba) ++{ ++ struct lpfc_dmabuf *dmabuf, *next_dmabuf; ++ struct hbq_dmabuf *hbq_buf; ++ ++ /* Return all memory used by all HBQs */ ++ list_for_each_entry_safe(dmabuf, next_dmabuf, ++ &phba->hbq_buffer_list, list) { ++ hbq_buf = container_of(dmabuf, struct hbq_dmabuf, dbuf); ++ list_del(&hbq_buf->dbuf.list); ++ lpfc_hbq_free(phba, hbq_buf->dbuf.virt, hbq_buf->dbuf.phys); ++ kfree(hbq_buf); ++ } ++} ++ ++static void ++lpfc_sli_hbq_to_firmware(struct lpfc_hba *phba, uint32_t hbqno, ++ struct hbq_dmabuf *hbq_buf) ++{ ++ struct lpfc_hbq_entry *hbqe; ++ dma_addr_t physaddr = hbq_buf->dbuf.phys; ++ ++ /* Get next HBQ entry slot to use */ ++ hbqe = lpfc_sli_next_hbq_slot(phba, hbqno); ++ if (hbqe) { ++ struct hbq_s *hbqp = &phba->hbqs[hbqno]; ++ ++ hbqe->bde.addrHigh = le32_to_cpu(putPaddrHigh(physaddr)); ++ hbqe->bde.addrLow = le32_to_cpu(putPaddrLow(physaddr)); ++ hbqe->bde.tus.f.bdeSize = FCELSSIZE; ++ hbqe->bde.tus.f.bdeFlags = 0; ++ hbqe->bde.tus.w = le32_to_cpu(hbqe->bde.tus.w); ++ hbqe->buffer_tag = le32_to_cpu(hbq_buf->tag); ++ /* Sync SLIM */ ++ hbqp->hbqPutIdx = hbqp->next_hbqPutIdx; ++ writel(hbqp->hbqPutIdx, phba->hbq_put + hbqno); ++ /* flush */ ++ readl(phba->hbq_put + hbqno); ++ list_add_tail(&hbq_buf->dbuf.list, &phba->hbq_buffer_list); ++ } ++} ++ ++static struct lpfc_hbq_init lpfc_els_hbq = { ++ .rn = 1, ++ .entry_count = 200, ++ .mask_count = 0, ++ .profile = 0, ++ .ring_mask = 1 << LPFC_ELS_RING, ++ .buffer_count = 0, ++ .init_count = 20, ++ .add_count = 5, ++}; ++ ++static struct lpfc_hbq_init *lpfc_hbq_defs[] = { ++ &lpfc_els_hbq, ++}; ++ ++int ++lpfc_sli_hbqbuf_fill_hbqs(struct lpfc_hba *phba, uint32_t hbqno, uint32_t count) ++{ ++ uint32_t i, start, end; ++ struct hbq_dmabuf *hbq_buffer; ++ ++ start = lpfc_hbq_defs[hbqno]->buffer_count; ++ end = count + lpfc_hbq_defs[hbqno]->buffer_count; ++ if (end > lpfc_hbq_defs[hbqno]->entry_count) { ++ end = lpfc_hbq_defs[hbqno]->entry_count; ++ } ++ ++ /* Populate HBQ entries */ ++ for (i = start; i < end; i++) { ++ hbq_buffer = kmalloc(sizeof(struct hbq_dmabuf), ++ GFP_KERNEL); ++ if (!hbq_buffer) ++ return 1; ++ hbq_buffer->dbuf.virt = lpfc_hbq_alloc(phba, MEM_PRI, ++ &hbq_buffer->dbuf.phys); ++ if (hbq_buffer->dbuf.virt == NULL) ++ return 1; ++ hbq_buffer->tag = (i | (hbqno << 16)); ++ lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buffer); ++ lpfc_hbq_defs[hbqno]->buffer_count++; ++ } ++ return 0; ++} ++ ++int ++lpfc_sli_hbqbuf_add_hbqs(struct lpfc_hba *phba, uint32_t qno) ++{ ++ return(lpfc_sli_hbqbuf_fill_hbqs(phba, qno, ++ lpfc_hbq_defs[qno]->add_count)); ++} ++ ++int ++lpfc_sli_hbqbuf_init_hbqs(struct lpfc_hba *phba, uint32_t qno) ++{ ++ return(lpfc_sli_hbqbuf_fill_hbqs(phba, qno, ++ lpfc_hbq_defs[qno]->init_count)); ++} ++ ++struct hbq_dmabuf * ++lpfc_sli_hbqbuf_find(struct lpfc_hba *phba, uint32_t tag) ++{ ++ struct lpfc_dmabuf *d_buf; ++ struct hbq_dmabuf *hbq_buf; ++ ++ list_for_each_entry(d_buf, &phba->hbq_buffer_list, list) { ++ hbq_buf = container_of(d_buf, struct hbq_dmabuf, dbuf); ++ if ((hbq_buf->tag & 0xffff) == tag) { ++ return hbq_buf; ++ } ++ } ++ lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_VPORT, ++ "%d:1803 Bad hbq tag. Data: x%x x%x\n", ++ phba->brd_no, tag, ++ lpfc_hbq_defs[tag >> 16]->buffer_count); ++ return NULL; ++} ++ ++void ++lpfc_sli_free_hbq(struct lpfc_hba *phba, struct hbq_dmabuf *sp) ++{ ++ uint32_t hbqno; ++ ++ if (sp) { ++ hbqno = sp->tag >> 16; ++ lpfc_sli_hbq_to_firmware(phba, hbqno, sp); ++ } + } + + static int +@@ -511,32 +726,38 @@ + case MBX_FLASH_WR_ULA: + case MBX_SET_DEBUG: + case MBX_LOAD_EXP_ROM: ++ case MBX_REG_VPI: ++ case MBX_UNREG_VPI: ++ case MBX_HEARTBEAT: + ret = mbxCommand; + break; + default: + ret = MBX_SHUTDOWN; + break; + } +- return (ret); ++ return ret; + } + static void +-lpfc_sli_wake_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq) ++lpfc_sli_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq) + { + wait_queue_head_t *pdone_q; ++ unsigned long drvr_flag; + + /* + * If pdone_q is empty, the driver thread gave up waiting and + * continued running. + */ + pmboxq->mbox_flag |= LPFC_MBX_WAKE; ++ spin_lock_irqsave(&phba->hbalock, drvr_flag); + pdone_q = (wait_queue_head_t *) pmboxq->context1; + if (pdone_q) + wake_up_interruptible(pdone_q); ++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag); + return; + } + + void +-lpfc_sli_def_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb) ++lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) + { + struct lpfc_dmabuf *mp; + uint16_t rpi; +@@ -553,78 +774,63 @@ + * If a REG_LOGIN succeeded after node is destroyed or node + * is in re-discovery driver need to cleanup the RPI. + */ +- if (!(phba->fc_flag & FC_UNLOADING) && +- (pmb->mb.mbxCommand == MBX_REG_LOGIN64) && +- (!pmb->mb.mbxStatus)) { ++ if (!(phba->pport->load_flag & FC_UNLOADING) && ++ pmb->mb.mbxCommand == MBX_REG_LOGIN64 && ++ !pmb->mb.mbxStatus) { + + rpi = pmb->mb.un.varWords[0]; +- lpfc_unreg_login(phba, rpi, pmb); +- pmb->mbox_cmpl=lpfc_sli_def_mbox_cmpl; ++ lpfc_unreg_login(phba, pmb->mb.un.varRegLogin.vpi, rpi, pmb); ++ pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; + rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); + if (rc != MBX_NOT_FINISHED) + return; + } + +- mempool_free( pmb, phba->mbox_mem_pool); ++ mempool_free(pmb, phba->mbox_mem_pool); + return; + } + + int +-lpfc_sli_handle_mb_event(struct lpfc_hba * phba) ++lpfc_sli_handle_mb_event(struct lpfc_hba *phba) + { +- MAILBOX_t *mbox; + MAILBOX_t *pmbox; + LPFC_MBOXQ_t *pmb; +- struct lpfc_sli *psli; +- int i, rc; +- uint32_t process_next; +- +- psli = &phba->sli; +- /* We should only get here if we are in SLI2 mode */ +- if (!(phba->sli.sli_flag & LPFC_SLI2_ACTIVE)) { +- return (1); +- } ++ int rc; ++ LIST_HEAD(cmplq); + + phba->sli.slistat.mbox_event++; + ++ /* Get all completed mailboxe buffers into the cmplq */ ++ spin_lock_irq(&phba->hbalock); ++ list_splice_init(&phba->sli.mboxq_cmpl, &cmplq); ++ spin_unlock_irq(&phba->hbalock); ++ + /* Get a Mailbox buffer to setup mailbox commands for callback */ +- if ((pmb = phba->sli.mbox_active)) { +- pmbox = &pmb->mb; +- mbox = &phba->slim2p->mbx; ++ do { ++ list_remove_head(&cmplq, pmb, LPFC_MBOXQ_t, list); ++ if (pmb == NULL) ++ break; + +- /* First check out the status word */ +- lpfc_sli_pcimem_bcopy(mbox, pmbox, sizeof (uint32_t)); ++ pmbox = &pmb->mb; + +- /* Sanity check to ensure the host owns the mailbox */ +- if (pmbox->mbxOwner != OWN_HOST) { +- /* Lets try for a while */ +- for (i = 0; i < 10240; i++) { +- /* First copy command data */ +- lpfc_sli_pcimem_bcopy(mbox, pmbox, +- sizeof (uint32_t)); +- if (pmbox->mbxOwner == OWN_HOST) +- goto mbout; ++ if (pmbox->mbxCommand != MBX_HEARTBEAT) { ++ if (pmb->vport) { ++ lpfc_debugfs_disc_trc(pmb->vport, ++ LPFC_DISC_TRC_MBOX_VPORT, ++ "MBOX cmpl vport: cmd:x%x mb:x%x x%x", ++ (uint32_t)pmbox->mbxCommand, ++ pmbox->un.varWords[0], ++ pmbox->un.varWords[1]); ++ } ++ else { ++ lpfc_debugfs_disc_trc(phba->pport, ++ LPFC_DISC_TRC_MBOX, ++ "MBOX cmpl: cmd:x%x mb:x%x x%x", ++ (uint32_t)pmbox->mbxCommand, ++ pmbox->un.varWords[0], ++ pmbox->un.varWords[1]); + } +- /* Stray Mailbox Interrupt, mbxCommand mbxStatus +- */ +- lpfc_printf_log(phba, +- KERN_WARNING, +- LOG_MBOX | LOG_SLI, +- "%d:0304 Stray Mailbox Interrupt " +- "mbxCommand x%x mbxStatus x%x\n", +- phba->brd_no, +- pmbox->mbxCommand, +- pmbox->mbxStatus); +- +- spin_lock_irq(phba->host->host_lock); +- phba->sli.sli_flag |= LPFC_SLI_MBOX_ACTIVE; +- spin_unlock_irq(phba->host->host_lock); +- return (1); + } +- +- mbout: +- del_timer_sync(&phba->sli.mbox_tmo); +- phba->work_hba_events &= ~WORKER_MBOX_TMO; + + /* + * It is a fatal error if unknown mbox command completion. +@@ -633,51 +839,50 @@ + MBX_SHUTDOWN) { + + /* Unknow mailbox command compl */ +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_MBOX | LOG_SLI, +- "%d:0323 Unknown Mailbox command %x Cmpl\n", ++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI, ++ "%d (%d):0323 Unknown Mailbox command " ++ "%x Cmpl\n", + phba->brd_no, ++ pmb->vport ? pmb->vport->vpi : 0, + pmbox->mbxCommand); +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + phba->work_hs = HS_FFER3; + lpfc_handle_eratt(phba); +- return (0); ++ continue; + } + +- phba->sli.mbox_active = NULL; + if (pmbox->mbxStatus) { + phba->sli.slistat.mbox_stat_err++; + if (pmbox->mbxStatus == MBXERR_NO_RESOURCES) { + /* Mbox cmd cmpl error - RETRYing */ +- lpfc_printf_log(phba, +- KERN_INFO, ++ lpfc_printf_log(phba, KERN_INFO, + LOG_MBOX | LOG_SLI, +- "%d:0305 Mbox cmd cmpl error - " +- "RETRYing Data: x%x x%x x%x x%x\n", ++ "%d (%d):0305 Mbox cmd cmpl " ++ "error - RETRYing Data: x%x " ++ "x%x x%x x%x\n", + phba->brd_no, ++ pmb->vport ? pmb->vport->vpi :0, + pmbox->mbxCommand, + pmbox->mbxStatus, + pmbox->un.varWords[0], +- phba->hba_state); ++ pmb->vport->port_state); + pmbox->mbxStatus = 0; + pmbox->mbxOwner = OWN_HOST; +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); + if (rc == MBX_SUCCESS) +- return (0); ++ continue; + } + } + + /* Mailbox cmd Cmpl */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_MBOX | LOG_SLI, +- "%d:0307 Mailbox cmd x%x Cmpl x%p " ++ lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI, ++ "%d (%d):0307 Mailbox cmd x%x Cmpl x%p " + "Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x\n", + phba->brd_no, ++ pmb->vport ? pmb->vport->vpi : 0, + pmbox->mbxCommand, + pmb->mbox_cmpl, + *((uint32_t *) pmbox), +@@ -690,39 +895,35 @@ + pmbox->un.varWords[6], + pmbox->un.varWords[7]); + +- if (pmb->mbox_cmpl) { +- lpfc_sli_pcimem_bcopy(mbox, pmbox, MAILBOX_CMD_SIZE); ++ if (pmb->mbox_cmpl) + pmb->mbox_cmpl(phba,pmb); +- } +- } +- ++ } while (1); ++ return 0; ++} + +- do { +- process_next = 0; /* by default don't loop */ +- spin_lock_irq(phba->host->host_lock); +- phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; ++static struct lpfc_dmabuf * ++lpfc_sli_replace_hbqbuff(struct lpfc_hba *phba, uint32_t tag) ++{ ++ struct hbq_dmabuf *hbq_entry, *new_hbq_entry; + +- /* Process next mailbox command if there is one */ +- if ((pmb = lpfc_mbox_get(phba))) { +- spin_unlock_irq(phba->host->host_lock); +- rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); +- if (rc == MBX_NOT_FINISHED) { +- pmb->mb.mbxStatus = MBX_NOT_FINISHED; +- pmb->mbox_cmpl(phba,pmb); +- process_next = 1; +- continue; /* loop back */ +- } +- } else { +- spin_unlock_irq(phba->host->host_lock); +- /* Turn on IOCB processing */ +- for (i = 0; i < phba->sli.num_rings; i++) +- lpfc_sli_turn_on_ring(phba, i); ++ hbq_entry = lpfc_sli_hbqbuf_find(phba, tag); ++ if (hbq_entry == NULL) ++ return NULL; ++ list_del(&hbq_entry->dbuf.list); ++ new_hbq_entry = kmalloc(sizeof(struct hbq_dmabuf), GFP_ATOMIC); ++ if (new_hbq_entry == NULL) ++ return &hbq_entry->dbuf; ++ new_hbq_entry->dbuf = hbq_entry->dbuf; ++ new_hbq_entry->tag = -1; ++ hbq_entry->dbuf.virt = lpfc_hbq_alloc(phba, 0, &hbq_entry->dbuf.phys); ++ if (hbq_entry->dbuf.virt == NULL) { ++ kfree(new_hbq_entry); ++ return &hbq_entry->dbuf; + } +- +- } while (process_next); +- +- return (0); ++ lpfc_sli_free_hbq(phba, hbq_entry); ++ return &new_hbq_entry->dbuf; + } ++ + static int + lpfc_sli_process_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, + struct lpfc_iocbq *saveq) +@@ -735,7 +936,9 @@ + match = 0; + irsp = &(saveq->iocb); + if ((irsp->ulpCommand == CMD_RCV_ELS_REQ64_CX) +- || (irsp->ulpCommand == CMD_RCV_ELS_REQ_CX)) { ++ || (irsp->ulpCommand == CMD_RCV_ELS_REQ_CX) ++ || (irsp->ulpCommand == CMD_IOCB_RCV_ELS64_CX) ++ || (irsp->ulpCommand == CMD_IOCB_RCV_CONT64_CX)) { + Rctl = FC_ELS_REQ; + Type = FC_ELS_DATA; + } else { +@@ -747,13 +950,24 @@ + + /* Firmware Workaround */ + if ((Rctl == 0) && (pring->ringno == LPFC_ELS_RING) && +- (irsp->ulpCommand == CMD_RCV_SEQUENCE64_CX)) { ++ (irsp->ulpCommand == CMD_RCV_SEQUENCE64_CX || ++ irsp->ulpCommand == CMD_IOCB_RCV_SEQ64_CX)) { + Rctl = FC_ELS_REQ; + Type = FC_ELS_DATA; + w5p->hcsw.Rctl = Rctl; + w5p->hcsw.Type = Type; + } + } ++ ++ if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) { ++ if (irsp->ulpBdeCount != 0) ++ saveq->context2 = lpfc_sli_replace_hbqbuff(phba, ++ irsp->un.ulpWord[3]); ++ if (irsp->ulpBdeCount == 2) ++ saveq->context3 = lpfc_sli_replace_hbqbuff(phba, ++ irsp->un.ulpWord[15]); ++ } ++ + /* unSolicited Responses */ + if (pring->prt[0].profile) { + if (pring->prt[0].lpfc_sli_rcv_unsol_event) +@@ -781,23 +995,21 @@ + /* Unexpected Rctl / Type received */ + /* Ring handler: unexpected + Rctl Type received */ +- lpfc_printf_log(phba, +- KERN_WARNING, +- LOG_SLI, ++ lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "%d:0313 Ring %d handler: unexpected Rctl x%x " +- "Type x%x received \n", ++ "Type x%x received\n", + phba->brd_no, + pring->ringno, + Rctl, + Type); + } +- return(1); ++ return 1; + } + + static struct lpfc_iocbq * +-lpfc_sli_iocbq_lookup(struct lpfc_hba * phba, +- struct lpfc_sli_ring * pring, +- struct lpfc_iocbq * prspiocb) ++lpfc_sli_iocbq_lookup(struct lpfc_hba *phba, ++ struct lpfc_sli_ring *pring, ++ struct lpfc_iocbq *prspiocb) + { + struct lpfc_iocbq *cmd_iocb = NULL; + uint16_t iotag; +@@ -806,7 +1018,7 @@ + + if (iotag != 0 && iotag <= phba->sli.last_iotag) { + cmd_iocb = phba->sli.iocbq_lookup[iotag]; +- list_del(&cmd_iocb->list); ++ list_del_init(&cmd_iocb->list); + pring->txcmplq_cnt--; + return cmd_iocb; + } +@@ -821,16 +1033,18 @@ + } + + static int +-lpfc_sli_process_sol_iocb(struct lpfc_hba * phba, struct lpfc_sli_ring * pring, ++lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, + struct lpfc_iocbq *saveq) + { +- struct lpfc_iocbq * cmdiocbp; ++ struct lpfc_iocbq *cmdiocbp; + int rc = 1; + unsigned long iflag; + + /* Based on the iotag field, get the cmd IOCB from the txcmplq */ +- spin_lock_irqsave(phba->host->host_lock, iflag); ++ spin_lock_irqsave(&phba->hbalock, iflag); + cmdiocbp = lpfc_sli_iocbq_lookup(phba, pring, saveq); ++ spin_unlock_irqrestore(&phba->hbalock, iflag); ++ + if (cmdiocbp) { + if (cmdiocbp->iocb_cmpl) { + /* +@@ -846,17 +1060,8 @@ + saveq->iocb.un.ulpWord[4] = + IOERR_SLI_ABORTED; + } +- spin_unlock_irqrestore(phba->host->host_lock, +- iflag); +- (cmdiocbp->iocb_cmpl) (phba, cmdiocbp, saveq); +- spin_lock_irqsave(phba->host->host_lock, iflag); + } +- else { +- spin_unlock_irqrestore(phba->host->host_lock, +- iflag); + (cmdiocbp->iocb_cmpl) (phba, cmdiocbp, saveq); +- spin_lock_irqsave(phba->host->host_lock, iflag); +- } + } else + lpfc_sli_release_iocbq(phba, cmdiocbp); + } else { +@@ -870,12 +1075,12 @@ + * Ring handler: unexpected completion IoTag + * + */ +- lpfc_printf_log(phba, +- KERN_WARNING, +- LOG_SLI, +- "%d:0322 Ring %d handler: unexpected " +- "completion IoTag x%x Data: x%x x%x x%x x%x\n", ++ lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, ++ "%d (%d):0322 Ring %d handler: " ++ "unexpected completion IoTag x%x " ++ "Data: x%x x%x x%x x%x\n", + phba->brd_no, ++ cmdiocbp->vport->vpi, + pring->ringno, + saveq->iocb.ulpIoTag, + saveq->iocb.ulpStatus, +@@ -885,14 +1090,15 @@ + } + } + +- spin_unlock_irqrestore(phba->host->host_lock, iflag); + return rc; + } + +-static void lpfc_sli_rsp_pointers_error(struct lpfc_hba * phba, +- struct lpfc_sli_ring * pring) ++static void ++lpfc_sli_rsp_pointers_error(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) + { +- struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno]; ++ struct lpfc_pgp *pgp = (phba->sli_rev == 3) ? ++ &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] : ++ &phba->slim2p->mbx.us.s2.port[pring->ringno]; + /* + * Ring handler: portRspPut is bigger then + * rsp ring +@@ -904,7 +1110,7 @@ + le32_to_cpu(pgp->rspPutInx), + pring->numRiocb); + +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + + /* + * All error attention handlers are posted to +@@ -912,16 +1118,18 @@ + */ + phba->work_ha |= HA_ERATT; + phba->work_hs = HS_FFER3; ++ ++ /* hbalock should already be held */ + if (phba->work_wait) +- wake_up(phba->work_wait); ++ lpfc_worker_wake_up(phba); + + return; + } + +-void lpfc_sli_poll_fcp_ring(struct lpfc_hba * phba) ++void lpfc_sli_poll_fcp_ring(struct lpfc_hba *phba) + { +- struct lpfc_sli * psli = &phba->sli; +- struct lpfc_sli_ring * pring = &psli->ring[LPFC_FCP_RING]; ++ struct lpfc_sli *psli = &phba->sli; ++ struct lpfc_sli_ring *pring = &psli->ring[LPFC_FCP_RING]; + IOCB_t *irsp = NULL; + IOCB_t *entry = NULL; + struct lpfc_iocbq *cmdiocbq = NULL; +@@ -931,13 +1139,15 @@ + uint32_t portRspPut, portRspMax; + int type; + uint32_t rsp_cmpl = 0; +- void __iomem *to_slim; + uint32_t ha_copy; ++ unsigned long iflags; + + pring->stats.iocb_event++; + +- /* The driver assumes SLI-2 mode */ +- pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno]; ++ pgp = (phba->sli_rev == 3) ? ++ &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] : ++ &phba->slim2p->mbx.us.s2.port[pring->ringno]; ++ + + /* + * The next available response entry should never exceed the maximum +@@ -952,15 +1162,13 @@ + + rmb(); + while (pring->rspidx != portRspPut) { +- +- entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx); +- ++ entry = lpfc_resp_iocb(phba, pring); + if (++pring->rspidx >= portRspMax) + pring->rspidx = 0; + + lpfc_sli_pcimem_bcopy((uint32_t *) entry, + (uint32_t *) &rspiocbq.iocb, +- sizeof (IOCB_t)); ++ phba->iocb_rsp_size); + irsp = &rspiocbq.iocb; + type = lpfc_sli_iocb_cmd_type(irsp->ulpCommand & CMD_IOCB_MASK); + pring->stats.iocb_rsp++; +@@ -998,8 +1206,10 @@ + break; + } + ++ spin_lock_irqsave(&phba->hbalock, iflags); + cmdiocbq = lpfc_sli_iocbq_lookup(phba, pring, + &rspiocbq); ++ spin_unlock_irqrestore(&phba->hbalock, iflags); + if ((cmdiocbq) && (cmdiocbq->iocb_cmpl)) { + (cmdiocbq->iocb_cmpl)(phba, cmdiocbq, + &rspiocbq); +@@ -1033,9 +1243,7 @@ + * been updated, sync the pgp->rspPutInx and fetch the new port + * response put pointer. + */ +- to_slim = phba->MBslimaddr + +- (SLIMOFF + (pring->ringno * 2) + 1) * 4; +- writeb(pring->rspidx, to_slim); ++ writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx); + + if (pring->rspidx == portRspPut) + portRspPut = le32_to_cpu(pgp->rspPutInx); +@@ -1045,13 +1253,16 @@ + ha_copy >>= (LPFC_FCP_RING * 4); + + if ((rsp_cmpl > 0) && (ha_copy & HA_R0RE_REQ)) { ++ spin_lock_irqsave(&phba->hbalock, iflags); + pring->stats.iocb_rsp_full++; + status = ((CA_R0ATT | CA_R0RE_RSP) << (LPFC_FCP_RING * 4)); + writel(status, phba->CAregaddr); + readl(phba->CAregaddr); ++ spin_unlock_irqrestore(&phba->hbalock, iflags); + } + if ((ha_copy & HA_R0CE_RSP) && + (pring->flag & LPFC_CALL_RING_AVAILABLE)) { ++ spin_lock_irqsave(&phba->hbalock, iflags); + pring->flag &= ~LPFC_CALL_RING_AVAILABLE; + pring->stats.iocb_cmd_empty++; + +@@ -1062,6 +1273,7 @@ + if ((pring->lpfc_sli_cmd_available)) + (pring->lpfc_sli_cmd_available) (phba, pring); + ++ spin_unlock_irqrestore(&phba->hbalock, iflags); + } + + return; +@@ -1072,10 +1284,12 @@ + * to check it explicitly. + */ + static int +-lpfc_sli_handle_fast_ring_event(struct lpfc_hba * phba, +- struct lpfc_sli_ring * pring, uint32_t mask) ++lpfc_sli_handle_fast_ring_event(struct lpfc_hba *phba, ++ struct lpfc_sli_ring *pring, uint32_t mask) + { +- struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno]; ++ struct lpfc_pgp *pgp = (phba->sli_rev == 3) ? ++ &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] : ++ &phba->slim2p->mbx.us.s2.port[pring->ringno]; + IOCB_t *irsp = NULL; + IOCB_t *entry = NULL; + struct lpfc_iocbq *cmdiocbq = NULL; +@@ -1086,9 +1300,8 @@ + lpfc_iocb_type type; + unsigned long iflag; + uint32_t rsp_cmpl = 0; +- void __iomem *to_slim; + +- spin_lock_irqsave(phba->host->host_lock, iflag); ++ spin_lock_irqsave(&phba->hbalock, iflag); + pring->stats.iocb_event++; + + /* +@@ -1099,7 +1312,7 @@ + portRspPut = le32_to_cpu(pgp->rspPutInx); + if (unlikely(portRspPut >= portRspMax)) { + lpfc_sli_rsp_pointers_error(phba, pring); +- spin_unlock_irqrestore(phba->host->host_lock, iflag); ++ spin_unlock_irqrestore(&phba->hbalock, iflag); + return 1; + } + +@@ -1110,14 +1323,15 @@ + * structure. The copy involves a byte-swap since the + * network byte order and pci byte orders are different. + */ +- entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx); ++ entry = lpfc_resp_iocb(phba, pring); ++ phba->last_completion_time = jiffies; + + if (++pring->rspidx >= portRspMax) + pring->rspidx = 0; + + lpfc_sli_pcimem_bcopy((uint32_t *) entry, + (uint32_t *) &rspiocbq.iocb, +- sizeof (IOCB_t)); ++ phba->iocb_rsp_size); + INIT_LIST_HEAD(&(rspiocbq.list)); + irsp = &rspiocbq.iocb; + +@@ -1126,14 +1340,28 @@ + rsp_cmpl++; + + if (unlikely(irsp->ulpStatus)) { ++ /* ++ * If resource errors reported from HBA, reduce ++ * queuedepths of the SCSI device. ++ */ ++ if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && ++ (irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) { ++ spin_unlock_irqrestore(&phba->hbalock, iflag); ++ lpfc_adjust_queue_depth(phba); ++ spin_lock_irqsave(&phba->hbalock, iflag); ++ } ++ + /* Rsp ring error: IOCB */ + lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, + "%d:0336 Rsp Ring %d error: IOCB Data: " + "x%x x%x x%x x%x x%x x%x x%x x%x\n", + phba->brd_no, pring->ringno, +- irsp->un.ulpWord[0], irsp->un.ulpWord[1], +- irsp->un.ulpWord[2], irsp->un.ulpWord[3], +- irsp->un.ulpWord[4], irsp->un.ulpWord[5], ++ irsp->un.ulpWord[0], ++ irsp->un.ulpWord[1], ++ irsp->un.ulpWord[2], ++ irsp->un.ulpWord[3], ++ irsp->un.ulpWord[4], ++ irsp->un.ulpWord[5], + *(((uint32_t *) irsp) + 6), + *(((uint32_t *) irsp) + 7)); + } +@@ -1149,7 +1377,8 @@ + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "%d:0333 IOCB cmd 0x%x" + " processed. Skipping" +- " completion\n", phba->brd_no, ++ " completion\n", ++ phba->brd_no, + irsp->ulpCommand); + break; + } +@@ -1161,19 +1390,19 @@ + (cmdiocbq->iocb_cmpl)(phba, cmdiocbq, + &rspiocbq); + } else { +- spin_unlock_irqrestore( +- phba->host->host_lock, iflag); ++ spin_unlock_irqrestore(&phba->hbalock, ++ iflag); + (cmdiocbq->iocb_cmpl)(phba, cmdiocbq, + &rspiocbq); +- spin_lock_irqsave(phba->host->host_lock, ++ spin_lock_irqsave(&phba->hbalock, + iflag); + } + } + break; + case LPFC_UNSOL_IOCB: +- spin_unlock_irqrestore(phba->host->host_lock, iflag); ++ spin_unlock_irqrestore(&phba->hbalock, iflag); + lpfc_sli_process_unsol_iocb(phba, pring, &rspiocbq); +- spin_lock_irqsave(phba->host->host_lock, iflag); ++ spin_lock_irqsave(&phba->hbalock, iflag); + break; + default: + if (irsp->ulpCommand == CMD_ADAPTER_MSG) { +@@ -1188,8 +1417,10 @@ + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "%d:0334 Unknown IOCB command " + "Data: x%x, x%x x%x x%x x%x\n", +- phba->brd_no, type, irsp->ulpCommand, +- irsp->ulpStatus, irsp->ulpIoTag, ++ phba->brd_no, type, ++ irsp->ulpCommand, ++ irsp->ulpStatus, ++ irsp->ulpIoTag, + irsp->ulpContext); + } + break; +@@ -1201,9 +1432,7 @@ + * been updated, sync the pgp->rspPutInx and fetch the new port + * response put pointer. + */ +- to_slim = phba->MBslimaddr + +- (SLIMOFF + (pring->ringno * 2) + 1) * 4; +- writel(pring->rspidx, to_slim); ++ writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx); + + if (pring->rspidx == portRspPut) + portRspPut = le32_to_cpu(pgp->rspPutInx); +@@ -1228,31 +1457,31 @@ + + } + +- spin_unlock_irqrestore(phba->host->host_lock, iflag); ++ spin_unlock_irqrestore(&phba->hbalock, iflag); + return rc; + } + +- + int +-lpfc_sli_handle_slow_ring_event(struct lpfc_hba * phba, +- struct lpfc_sli_ring * pring, uint32_t mask) ++lpfc_sli_handle_slow_ring_event(struct lpfc_hba *phba, ++ struct lpfc_sli_ring *pring, uint32_t mask) + { ++ struct lpfc_pgp *pgp = (phba->sli_rev == 3) ? ++ &phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] : ++ &phba->slim2p->mbx.us.s2.port[pring->ringno]; + IOCB_t *entry; + IOCB_t *irsp = NULL; + struct lpfc_iocbq *rspiocbp = NULL; + struct lpfc_iocbq *next_iocb; + struct lpfc_iocbq *cmdiocbp; + struct lpfc_iocbq *saveq; +- struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno]; + uint8_t iocb_cmd_type; + lpfc_iocb_type type; + uint32_t status, free_saveq; + uint32_t portRspPut, portRspMax; + int rc = 1; + unsigned long iflag; +- void __iomem *to_slim; + +- spin_lock_irqsave(phba->host->host_lock, iflag); ++ spin_lock_irqsave(&phba->hbalock, iflag); + pring->stats.iocb_event++; + + /* +@@ -1266,16 +1495,14 @@ + * Ring handler: portRspPut is bigger then + * rsp ring + */ +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_SLI, ++ lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "%d:0303 Ring %d handler: portRspPut %d " + "is bigger then rsp ring %d\n", +- phba->brd_no, +- pring->ringno, portRspPut, portRspMax); ++ phba->brd_no, pring->ringno, portRspPut, ++ portRspMax); + +- phba->hba_state = LPFC_HBA_ERROR; +- spin_unlock_irqrestore(phba->host->host_lock, iflag); ++ phba->link_state = LPFC_HBA_ERROR; ++ spin_unlock_irqrestore(&phba->hbalock, iflag); + + phba->work_hs = HS_FFER3; + lpfc_handle_eratt(phba); +@@ -1298,23 +1525,24 @@ + * the ulpLe field is set, the entire Command has been + * received. + */ +- entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx); +- rspiocbp = lpfc_sli_get_iocbq(phba); ++ entry = lpfc_resp_iocb(phba, pring); ++ ++ phba->last_completion_time = jiffies; ++ rspiocbp = __lpfc_sli_get_iocbq(phba); + if (rspiocbp == NULL) { + printk(KERN_ERR "%s: out of buffers! Failing " + "completion.\n", __FUNCTION__); + break; + } + +- lpfc_sli_pcimem_bcopy(entry, &rspiocbp->iocb, sizeof (IOCB_t)); ++ lpfc_sli_pcimem_bcopy(entry, &rspiocbp->iocb, ++ phba->iocb_rsp_size); + irsp = &rspiocbp->iocb; + + if (++pring->rspidx >= portRspMax) + pring->rspidx = 0; + +- to_slim = phba->MBslimaddr + (SLIMOFF + (pring->ringno * 2) +- + 1) * 4; +- writel(pring->rspidx, to_slim); ++ writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx); + + if (list_empty(&(pring->iocb_continueq))) { + list_add(&rspiocbp->list, &(pring->iocb_continueq)); +@@ -1338,13 +1566,26 @@ + + pring->stats.iocb_rsp++; + ++ /* ++ * If resource errors reported from HBA, reduce ++ * queuedepths of the SCSI device. ++ */ ++ if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) && ++ (irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) { ++ spin_unlock_irqrestore(&phba->hbalock, iflag); ++ lpfc_adjust_queue_depth(phba); ++ spin_lock_irqsave(&phba->hbalock, iflag); ++ } ++ + if (irsp->ulpStatus) { + /* Rsp ring error: IOCB */ +- lpfc_printf_log(phba, +- KERN_WARNING, +- LOG_SLI, +- "%d:0328 Rsp Ring %d error: IOCB Data: " +- "x%x x%x x%x x%x x%x x%x x%x x%x\n", ++ lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, ++ "%d:0328 Rsp Ring %d error: " ++ "IOCB Data: " ++ "x%x x%x x%x x%x " ++ "x%x x%x x%x x%x " ++ "x%x x%x x%x x%x " ++ "x%x x%x x%x x%x\n", + phba->brd_no, + pring->ringno, + irsp->un.ulpWord[0], +@@ -1354,7 +1595,15 @@ + irsp->un.ulpWord[4], + irsp->un.ulpWord[5], + *(((uint32_t *) irsp) + 6), +- *(((uint32_t *) irsp) + 7)); ++ *(((uint32_t *) irsp) + 7), ++ *(((uint32_t *) irsp) + 8), ++ *(((uint32_t *) irsp) + 9), ++ *(((uint32_t *) irsp) + 10), ++ *(((uint32_t *) irsp) + 11), ++ *(((uint32_t *) irsp) + 12), ++ *(((uint32_t *) irsp) + 13), ++ *(((uint32_t *) irsp) + 14), ++ *(((uint32_t *) irsp) + 15)); + } + + /* +@@ -1366,17 +1615,17 @@ + iocb_cmd_type = irsp->ulpCommand & CMD_IOCB_MASK; + type = lpfc_sli_iocb_cmd_type(iocb_cmd_type); + if (type == LPFC_SOL_IOCB) { +- spin_unlock_irqrestore(phba->host->host_lock, ++ spin_unlock_irqrestore(&phba->hbalock, + iflag); + rc = lpfc_sli_process_sol_iocb(phba, pring, + saveq); +- spin_lock_irqsave(phba->host->host_lock, iflag); ++ spin_lock_irqsave(&phba->hbalock, iflag); + } else if (type == LPFC_UNSOL_IOCB) { +- spin_unlock_irqrestore(phba->host->host_lock, ++ spin_unlock_irqrestore(&phba->hbalock, + iflag); + rc = lpfc_sli_process_unsol_iocb(phba, pring, + saveq); +- spin_lock_irqsave(phba->host->host_lock, iflag); ++ spin_lock_irqsave(&phba->hbalock, iflag); + } else if (type == LPFC_ABORT_IOCB) { + if ((irsp->ulpCommand != CMD_XRI_ABORTED_CX) && + ((cmdiocbp = +@@ -1386,15 +1635,15 @@ + routine */ + if (cmdiocbp->iocb_cmpl) { + spin_unlock_irqrestore( +- phba->host->host_lock, ++ &phba->hbalock, + iflag); + (cmdiocbp->iocb_cmpl) (phba, + cmdiocbp, saveq); + spin_lock_irqsave( +- phba->host->host_lock, ++ &phba->hbalock, + iflag); + } else +- lpfc_sli_release_iocbq(phba, ++ __lpfc_sli_release_iocbq(phba, + cmdiocbp); + } + } else if (type == LPFC_UNKNOWN_IOCB) { +@@ -1411,11 +1660,10 @@ + phba->brd_no, adaptermsg); + } else { + /* Unknown IOCB command */ +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_SLI, +- "%d:0335 Unknown IOCB command " +- "Data: x%x x%x x%x x%x\n", ++ lpfc_printf_log(phba, KERN_ERR, LOG_SLI, ++ "%d:0335 Unknown IOCB " ++ "command Data: x%x " ++ "x%x x%x x%x\n", + phba->brd_no, + irsp->ulpCommand, + irsp->ulpStatus, +@@ -1425,18 +1673,15 @@ + } + + if (free_saveq) { +- if (!list_empty(&saveq->list)) { +- list_for_each_entry_safe(rspiocbp, +- next_iocb, +- &saveq->list, +- list) { ++ list_for_each_entry_safe(rspiocbp, next_iocb, ++ &saveq->list, list) { + list_del(&rspiocbp->list); +- lpfc_sli_release_iocbq(phba, ++ __lpfc_sli_release_iocbq(phba, + rspiocbp); + } ++ __lpfc_sli_release_iocbq(phba, saveq); + } +- lpfc_sli_release_iocbq(phba, saveq); +- } ++ rspiocbp = NULL; + } + + /* +@@ -1449,7 +1694,7 @@ + } + } /* while (pring->rspidx != portRspPut) */ + +- if ((rspiocbp != 0) && (mask & HA_R0RE_REQ)) { ++ if ((rspiocbp != NULL) && (mask & HA_R0RE_REQ)) { + /* At least one response entry has been freed */ + pring->stats.iocb_rsp_full++; + /* SET RxRE_RSP in Chip Att register */ +@@ -1470,24 +1715,25 @@ + + } + +- spin_unlock_irqrestore(phba->host->host_lock, iflag); ++ spin_unlock_irqrestore(&phba->hbalock, iflag); + return rc; + } + +-int ++void + lpfc_sli_abort_iocb_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring) + { + LIST_HEAD(completions); + struct lpfc_iocbq *iocb, *next_iocb; + IOCB_t *cmd = NULL; +- int errcnt; + +- errcnt = 0; ++ if (pring->ringno == LPFC_ELS_RING) { ++ lpfc_fabric_abort_hba(phba); ++ } + + /* Error everything on txq and txcmplq + * First do the txq. + */ +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + list_splice_init(&pring->txq, &completions); + pring->txq_cnt = 0; + +@@ -1495,26 +1741,25 @@ + list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) + lpfc_sli_issue_abort_iotag(phba, pring, iocb); + +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + while (!list_empty(&completions)) { + iocb = list_get_first(&completions, struct lpfc_iocbq, list); + cmd = &iocb->iocb; +- list_del(&iocb->list); ++ list_del_init(&iocb->list); + +- if (iocb->iocb_cmpl) { ++ if (!iocb->iocb_cmpl) ++ lpfc_sli_release_iocbq(phba, iocb); ++ else { + cmd->ulpStatus = IOSTAT_LOCAL_REJECT; + cmd->un.ulpWord[4] = IOERR_SLI_ABORTED; + (iocb->iocb_cmpl) (phba, iocb, iocb); +- } else +- lpfc_sli_release_iocbq(phba, iocb); + } +- +- return errcnt; ++ } + } + + int +-lpfc_sli_brdready(struct lpfc_hba * phba, uint32_t mask) ++lpfc_sli_brdready(struct lpfc_hba *phba, uint32_t mask) + { + uint32_t status; + int i = 0; +@@ -1541,7 +1786,8 @@ + msleep(2500); + + if (i == 15) { +- phba->hba_state = LPFC_STATE_UNKNOWN; /* Do post */ ++ /* Do post */ ++ phba->pport->port_state = LPFC_VPORT_UNKNOWN; + lpfc_sli_brdrestart(phba); + } + /* Read the HBA Host Status Register */ +@@ -1550,7 +1796,7 @@ + + /* Check to see if any errors occurred during init */ + if ((status & HS_FFERM) || (i >= 20)) { +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + retval = 1; + } + +@@ -1559,7 +1805,7 @@ + + #define BARRIER_TEST_PATTERN (0xdeadbeef) + +-void lpfc_reset_barrier(struct lpfc_hba * phba) ++void lpfc_reset_barrier(struct lpfc_hba *phba) + { + uint32_t __iomem *resp_buf; + uint32_t __iomem *mbox_buf; +@@ -1584,12 +1830,12 @@ + hc_copy = readl(phba->HCregaddr); + writel((hc_copy & ~HC_ERINT_ENA), phba->HCregaddr); + readl(phba->HCregaddr); /* flush */ +- phba->fc_flag |= FC_IGNORE_ERATT; ++ phba->link_flag |= LS_IGNORE_ERATT; + + if (readl(phba->HAregaddr) & HA_ERATT) { + /* Clear Chip error bit */ + writel(HA_ERATT, phba->HAregaddr); +- phba->stopped = 1; ++ phba->pport->stopped = 1; + } + + mbox = 0; +@@ -1606,7 +1852,7 @@ + + if (readl(resp_buf + 1) != ~(BARRIER_TEST_PATTERN)) { + if (phba->sli.sli_flag & LPFC_SLI2_ACTIVE || +- phba->stopped) ++ phba->pport->stopped) + goto restore_hc; + else + goto clear_errat; +@@ -1623,17 +1869,17 @@ + + if (readl(phba->HAregaddr) & HA_ERATT) { + writel(HA_ERATT, phba->HAregaddr); +- phba->stopped = 1; ++ phba->pport->stopped = 1; + } + + restore_hc: +- phba->fc_flag &= ~FC_IGNORE_ERATT; ++ phba->link_flag &= ~LS_IGNORE_ERATT; + writel(hc_copy, phba->HCregaddr); + readl(phba->HCregaddr); /* flush */ + } + + int +-lpfc_sli_brdkill(struct lpfc_hba * phba) ++lpfc_sli_brdkill(struct lpfc_hba *phba) + { + struct lpfc_sli *psli; + LPFC_MBOXQ_t *pmb; +@@ -1645,26 +1891,22 @@ + psli = &phba->sli; + + /* Kill HBA */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_SLI, ++ lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "%d:0329 Kill HBA Data: x%x x%x\n", +- phba->brd_no, +- phba->hba_state, +- psli->sli_flag); ++ phba->brd_no, phba->pport->port_state, psli->sli_flag); + + if ((pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, + GFP_KERNEL)) == 0) + return 1; + + /* Disable the error attention */ +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + status = readl(phba->HCregaddr); + status &= ~HC_ERINT_ENA; + writel(status, phba->HCregaddr); + readl(phba->HCregaddr); /* flush */ +- phba->fc_flag |= FC_IGNORE_ERATT; +- spin_unlock_irq(phba->host->host_lock); ++ phba->link_flag |= LS_IGNORE_ERATT; ++ spin_unlock_irq(&phba->hbalock); + + lpfc_kill_board(phba, pmb); + pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl; +@@ -1673,9 +1915,9 @@ + if (retval != MBX_SUCCESS) { + if (retval != MBX_BUSY) + mempool_free(pmb, phba->mbox_mem_pool); +- spin_lock_irq(phba->host->host_lock); +- phba->fc_flag &= ~FC_IGNORE_ERATT; +- spin_unlock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); ++ phba->link_flag &= ~LS_IGNORE_ERATT; ++ spin_unlock_irq(&phba->hbalock); + return 1; + } + +@@ -1698,22 +1940,22 @@ + del_timer_sync(&psli->mbox_tmo); + if (ha_copy & HA_ERATT) { + writel(HA_ERATT, phba->HAregaddr); +- phba->stopped = 1; ++ phba->pport->stopped = 1; + } +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; +- phba->fc_flag &= ~FC_IGNORE_ERATT; +- spin_unlock_irq(phba->host->host_lock); ++ phba->link_flag &= ~LS_IGNORE_ERATT; ++ spin_unlock_irq(&phba->hbalock); + + psli->mbox_active = NULL; + lpfc_hba_down_post(phba); +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + +- return (ha_copy & HA_ERATT ? 0 : 1); ++ return ha_copy & HA_ERATT ? 0 : 1; + } + + int +-lpfc_sli_brdreset(struct lpfc_hba * phba) ++lpfc_sli_brdreset(struct lpfc_hba *phba) + { + struct lpfc_sli *psli; + struct lpfc_sli_ring *pring; +@@ -1725,12 +1967,12 @@ + /* Reset HBA */ + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "%d:0325 Reset HBA Data: x%x x%x\n", phba->brd_no, +- phba->hba_state, psli->sli_flag); ++ phba->pport->port_state, psli->sli_flag); + + /* perform board reset */ + phba->fc_eventTag = 0; +- phba->fc_myDID = 0; +- phba->fc_prevDID = 0; ++ phba->pport->fc_myDID = 0; ++ phba->pport->fc_prevDID = 0; + + /* Turn off parity checking and serr during the physical reset */ + pci_read_config_word(phba->pcidev, PCI_COMMAND, &cfg_value); +@@ -1760,12 +2002,12 @@ + pring->missbufcnt = 0; + } + +- phba->hba_state = LPFC_WARM_START; ++ phba->link_state = LPFC_WARM_START; + return 0; + } + + int +-lpfc_sli_brdrestart(struct lpfc_hba * phba) ++lpfc_sli_brdrestart(struct lpfc_hba *phba) + { + MAILBOX_t *mb; + struct lpfc_sli *psli; +@@ -1773,14 +2015,14 @@ + volatile uint32_t word0; + void __iomem *to_slim; + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + + psli = &phba->sli; + + /* Restart HBA */ + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "%d:0337 Restart HBA Data: x%x x%x\n", phba->brd_no, +- phba->hba_state, psli->sli_flag); ++ phba->pport->port_state, psli->sli_flag); + + word0 = 0; + mb = (MAILBOX_t *) &word0; +@@ -1794,7 +2036,7 @@ + readl(to_slim); /* flush */ + + /* Only skip post after fc_ffinit is completed */ +- if (phba->hba_state) { ++ if (phba->pport->port_state) { + skip_post = 1; + word0 = 1; /* This is really setting up word1 */ + } else { +@@ -1806,10 +2048,10 @@ + readl(to_slim); /* flush */ + + lpfc_sli_brdreset(phba); +- phba->stopped = 0; +- phba->hba_state = LPFC_INIT_START; ++ phba->pport->stopped = 0; ++ phba->link_state = LPFC_INIT_START; + +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + memset(&psli->lnk_stat_offsets, 0, sizeof(psli->lnk_stat_offsets)); + psli->stats_start = get_seconds(); +@@ -1843,14 +2085,11 @@ + if (i++ >= 20) { + /* Adapter failed to init, timeout, status reg + */ +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_INIT, ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0436 Adapter failed to init, " + "timeout, status reg x%x\n", +- phba->brd_no, +- status); +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->brd_no, status); ++ phba->link_state = LPFC_HBA_ERROR; + return -ETIMEDOUT; + } + +@@ -1859,14 +2098,12 @@ + /* ERROR: During chipset initialization */ + /* Adapter failed to init, chipset, status reg + */ +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_INIT, ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0437 Adapter failed to init, " + "chipset, status reg x%x\n", + phba->brd_no, + status); +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + return -EIO; + } + +@@ -1879,7 +2116,8 @@ + } + + if (i == 15) { +- phba->hba_state = LPFC_STATE_UNKNOWN; /* Do post */ ++ /* Do post */ ++ phba->pport->port_state = LPFC_VPORT_UNKNOWN; + lpfc_sli_brdrestart(phba); + } + /* Read the HBA Host Status Register */ +@@ -1890,14 +2128,12 @@ + if (status & HS_FFERM) { + /* ERROR: During chipset initialization */ + /* Adapter failed to init, chipset, status reg */ +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_INIT, ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0438 Adapter failed to init, chipset, " + "status reg x%x\n", + phba->brd_no, + status); +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + return -EIO; + } + +@@ -1911,68 +2147,239 @@ + return 0; + } + +-int +-lpfc_sli_hba_setup(struct lpfc_hba * phba) ++static int ++lpfc_sli_hbq_count(void) + { +- LPFC_MBOXQ_t *pmb; +- uint32_t resetcount = 0, rc = 0, done = 0; +- +- pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); +- if (!pmb) { +- phba->hba_state = LPFC_HBA_ERROR; +- return -ENOMEM; +- } ++ return ARRAY_SIZE(lpfc_hbq_defs); ++} + ++static int ++lpfc_sli_hbq_entry_count(void) ++{ ++ int hbq_count = lpfc_sli_hbq_count(); ++ int count = 0; ++ int i; ++ ++ for (i = 0; i < hbq_count; ++i) ++ count += lpfc_hbq_defs[i]->entry_count; ++ return count; ++} ++ ++int ++lpfc_sli_hbq_size(void) ++{ ++ return lpfc_sli_hbq_entry_count() * sizeof(struct lpfc_hbq_entry); ++} ++ ++static int ++lpfc_sli_hbq_setup(struct lpfc_hba *phba) ++{ ++ int hbq_count = lpfc_sli_hbq_count(); ++ LPFC_MBOXQ_t *pmb; ++ MAILBOX_t *pmbox; ++ uint32_t hbqno; ++ uint32_t hbq_entry_index; ++ ++ /* Get a Mailbox buffer to setup mailbox ++ * commands for HBA initialization ++ */ ++ pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ ++ if (!pmb) ++ return -ENOMEM; ++ ++ pmbox = &pmb->mb; ++ ++ /* Initialize the struct lpfc_sli_hbq structure for each hbq */ ++ phba->link_state = LPFC_INIT_MBX_CMDS; ++ ++ hbq_entry_index = 0; ++ for (hbqno = 0; hbqno < hbq_count; ++hbqno) { ++ phba->hbqs[hbqno].next_hbqPutIdx = 0; ++ phba->hbqs[hbqno].hbqPutIdx = 0; ++ phba->hbqs[hbqno].local_hbqGetIdx = 0; ++ phba->hbqs[hbqno].entry_count = ++ lpfc_hbq_defs[hbqno]->entry_count; ++ lpfc_config_hbq(phba, lpfc_hbq_defs[hbqno], hbq_entry_index, ++ pmb); ++ hbq_entry_index += phba->hbqs[hbqno].entry_count; ++ ++ if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) { ++ /* Adapter failed to init, mbxCmd CFG_RING, ++ mbxStatus , ring */ ++ ++ lpfc_printf_log(phba, KERN_ERR, ++ LOG_SLI | LOG_VPORT, ++ "%d:1805 Adapter failed to init. " ++ "Data: x%x x%x x%x\n", ++ phba->brd_no, pmbox->mbxCommand, ++ pmbox->mbxStatus, hbqno); ++ ++ phba->link_state = LPFC_HBA_ERROR; ++ mempool_free(pmb, phba->mbox_mem_pool); ++ return ENXIO; ++ } ++ } ++ phba->hbq_count = hbq_count; ++ ++ mempool_free(pmb, phba->mbox_mem_pool); ++ ++ /* Initially populate or replenish the HBQs */ ++ for (hbqno = 0; hbqno < hbq_count; ++hbqno) { ++ if (lpfc_sli_hbqbuf_init_hbqs(phba, hbqno)) ++ return -ENOMEM; ++ } ++ return 0; ++} ++ ++static int ++lpfc_do_config_port(struct lpfc_hba *phba, int sli_mode) ++{ ++ LPFC_MBOXQ_t *pmb; ++ uint32_t resetcount = 0, rc = 0, done = 0; ++ ++ pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (!pmb) { ++ phba->link_state = LPFC_HBA_ERROR; ++ return -ENOMEM; ++ } ++ ++ phba->sli_rev = sli_mode; + while (resetcount < 2 && !done) { +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + phba->sli.sli_flag |= LPFC_SLI_MBOX_ACTIVE; +- spin_unlock_irq(phba->host->host_lock); +- phba->hba_state = LPFC_STATE_UNKNOWN; ++ spin_unlock_irq(&phba->hbalock); ++ phba->pport->port_state = LPFC_VPORT_UNKNOWN; + lpfc_sli_brdrestart(phba); + msleep(2500); + rc = lpfc_sli_chipset_init(phba); + if (rc) + break; + +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + resetcount++; + +- /* Call pre CONFIG_PORT mailbox command initialization. A value of 0 +- * means the call was successful. Any other nonzero value is a failure, +- * but if ERESTART is returned, the driver may reset the HBA and try +- * again. ++ /* Call pre CONFIG_PORT mailbox command initialization. A ++ * value of 0 means the call was successful. Any other ++ * nonzero value is a failure, but if ERESTART is returned, ++ * the driver may reset the HBA and try again. + */ + rc = lpfc_config_port_prep(phba); + if (rc == -ERESTART) { +- phba->hba_state = 0; ++ phba->link_state = LPFC_LINK_UNKNOWN; + continue; + } else if (rc) { + break; + } + +- phba->hba_state = LPFC_INIT_MBX_CMDS; ++ phba->link_state = LPFC_INIT_MBX_CMDS; + lpfc_config_port(phba, pmb); + rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL); +- if (rc == MBX_SUCCESS) +- done = 1; +- else { ++ if (rc != MBX_SUCCESS) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0442 Adapter failed to init, mbxCmd x%x " + "CONFIG_PORT, mbxStatus x%x Data: x%x\n", + phba->brd_no, pmb->mb.mbxCommand, + pmb->mb.mbxStatus, 0); ++ spin_lock_irq(&phba->hbalock); + phba->sli.sli_flag &= ~LPFC_SLI2_ACTIVE; ++ spin_unlock_irq(&phba->hbalock); ++ rc = -ENXIO; ++ } else { ++ done = 1; ++ phba->max_vpi = (phba->max_vpi && ++ pmb->mb.un.varCfgPort.gmv) != 0 ++ ? pmb->mb.un.varCfgPort.max_vpi ++ : 0; + } + } +- if (!done) ++ ++ if (!done) { ++ rc = -EINVAL; ++ goto do_prep_failed; ++ } ++ ++ if ((pmb->mb.un.varCfgPort.sli_mode == 3) && ++ (!pmb->mb.un.varCfgPort.cMA)) { ++ rc = -ENXIO; ++ goto do_prep_failed; ++ } ++ return rc; ++ ++do_prep_failed: ++ mempool_free(pmb, phba->mbox_mem_pool); ++ return rc; ++} ++ ++int ++lpfc_sli_hba_setup(struct lpfc_hba *phba) ++{ ++ uint32_t rc; ++ int mode = 3; ++ ++ switch (lpfc_sli_mode) { ++ case 2: ++ if (phba->cfg_npiv_enable) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT, ++ "%d:1824 NPIV enabled: Override lpfc_sli_mode " ++ "parameter (%d) to auto (0).\n", ++ phba->brd_no, lpfc_sli_mode); ++ break; ++ } ++ mode = 2; ++ break; ++ case 0: ++ case 3: ++ break; ++ default: ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT, ++ "%d:1819 Unrecognized lpfc_sli_mode " ++ "parameter: %d.\n", ++ phba->brd_no, lpfc_sli_mode); ++ ++ break; ++ } ++ ++ rc = lpfc_do_config_port(phba, mode); ++ if (rc && lpfc_sli_mode == 3) ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT, ++ "%d:1820 Unable to select SLI-3. " ++ "Not supported by adapter.\n", ++ phba->brd_no); ++ if (rc && mode != 2) ++ rc = lpfc_do_config_port(phba, 2); ++ if (rc) ++ goto lpfc_sli_hba_setup_error; ++ ++ if (phba->sli_rev == 3) { ++ phba->iocb_cmd_size = SLI3_IOCB_CMD_SIZE; ++ phba->iocb_rsp_size = SLI3_IOCB_RSP_SIZE; ++ phba->sli3_options |= LPFC_SLI3_ENABLED; ++ phba->sli3_options |= LPFC_SLI3_HBQ_ENABLED; ++ ++ } else { ++ phba->iocb_cmd_size = SLI2_IOCB_CMD_SIZE; ++ phba->iocb_rsp_size = SLI2_IOCB_RSP_SIZE; ++ phba->sli3_options = 0; ++ } ++ ++ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, ++ "%d:0444 Firmware in SLI %x mode. Max_vpi %d\n", ++ phba->brd_no, phba->sli_rev, phba->max_vpi); ++ rc = lpfc_sli_ring_map(phba); ++ ++ if (rc) + goto lpfc_sli_hba_setup_error; + +- rc = lpfc_sli_ring_map(phba, pmb); ++ /* Init HBQs */ + ++ if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) { ++ rc = lpfc_sli_hbq_setup(phba); + if (rc) + goto lpfc_sli_hba_setup_error; ++ } + + phba->sli.sli_flag |= LPFC_PROCESS_LA; + +@@ -1980,11 +2387,13 @@ + if (rc) + goto lpfc_sli_hba_setup_error; + +- goto lpfc_sli_hba_setup_exit; ++ return rc; ++ + lpfc_sli_hba_setup_error: +- phba->hba_state = LPFC_HBA_ERROR; +-lpfc_sli_hba_setup_exit: +- mempool_free(pmb, phba->mbox_mem_pool); ++ phba->link_state = LPFC_HBA_ERROR; ++ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, ++ "%d:0445 Firmware initialization failed\n", ++ phba->brd_no); + return rc; + } + +@@ -2004,44 +2413,43 @@ + void + lpfc_mbox_timeout(unsigned long ptr) + { +- struct lpfc_hba *phba; ++ struct lpfc_hba *phba = (struct lpfc_hba *) ptr; + unsigned long iflag; ++ uint32_t tmo_posted; ++ ++ spin_lock_irqsave(&phba->pport->work_port_lock, iflag); ++ tmo_posted = phba->pport->work_port_events & WORKER_MBOX_TMO; ++ if (!tmo_posted) ++ phba->pport->work_port_events |= WORKER_MBOX_TMO; ++ spin_unlock_irqrestore(&phba->pport->work_port_lock, iflag); + +- phba = (struct lpfc_hba *)ptr; +- spin_lock_irqsave(phba->host->host_lock, iflag); +- if (!(phba->work_hba_events & WORKER_MBOX_TMO)) { +- phba->work_hba_events |= WORKER_MBOX_TMO; ++ if (!tmo_posted) { ++ spin_lock_irqsave(&phba->hbalock, iflag); + if (phba->work_wait) +- wake_up(phba->work_wait); ++ lpfc_worker_wake_up(phba); ++ spin_unlock_irqrestore(&phba->hbalock, iflag); + } +- spin_unlock_irqrestore(phba->host->host_lock, iflag); + } + + void + lpfc_mbox_timeout_handler(struct lpfc_hba *phba) + { +- LPFC_MBOXQ_t *pmbox; +- MAILBOX_t *mb; ++ LPFC_MBOXQ_t *pmbox = phba->sli.mbox_active; ++ MAILBOX_t *mb = &pmbox->mb; + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring; + +- spin_lock_irq(phba->host->host_lock); +- if (!(phba->work_hba_events & WORKER_MBOX_TMO)) { +- spin_unlock_irq(phba->host->host_lock); ++ if (!(phba->pport->work_port_events & WORKER_MBOX_TMO)) { + return; + } + +- pmbox = phba->sli.mbox_active; +- mb = &pmbox->mb; +- + /* Mbox cmd timeout */ +- lpfc_printf_log(phba, +- KERN_ERR, +- LOG_MBOX | LOG_SLI, +- "%d:0310 Mailbox command x%x timeout Data: x%x x%x x%p\n", ++ lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI, ++ "%d:0310 Mailbox command x%x timeout Data: x%x x%x " ++ "x%p\n", + phba->brd_no, + mb->mbxCommand, +- phba->hba_state, ++ phba->pport->port_state, + phba->sli.sli_flag, + phba->sli.mbox_active); + +@@ -2049,11 +2457,14 @@ + * would get IOCB_ERROR from lpfc_sli_issue_iocb, allowing + * it to fail all oustanding SCSI IO. + */ +- phba->hba_state = LPFC_STATE_UNKNOWN; +- phba->work_hba_events &= ~WORKER_MBOX_TMO; +- phba->fc_flag |= FC_ESTABLISH_LINK; ++ spin_lock_irq(&phba->pport->work_port_lock); ++ phba->pport->work_port_events &= ~WORKER_MBOX_TMO; ++ spin_unlock_irq(&phba->pport->work_port_lock); ++ spin_lock_irq(&phba->hbalock); ++ phba->link_state = LPFC_LINK_UNKNOWN; ++ phba->pport->fc_flag |= FC_ESTABLISH_LINK; + psli->sli_flag &= ~LPFC_SLI2_ACTIVE; +- spin_unlock_irq(phba->host->host_lock); ++ spin_unlock_irq(&phba->hbalock); + + pring = &psli->ring[psli->fcp_ring]; + lpfc_sli_abort_iocb_ring(phba, pring); +@@ -2075,10 +2486,10 @@ + } + + int +-lpfc_sli_issue_mbox(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmbox, uint32_t flag) ++lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag) + { + MAILBOX_t *mb; +- struct lpfc_sli *psli; ++ struct lpfc_sli *psli = &phba->sli; + uint32_t status, evtctr; + uint32_t ha_copy; + int i; +@@ -2086,31 +2497,44 @@ + volatile uint32_t word0, ldata; + void __iomem *to_slim; + ++ if (pmbox->mbox_cmpl && pmbox->mbox_cmpl != lpfc_sli_def_mbox_cmpl && ++ pmbox->mbox_cmpl != lpfc_sli_wake_mbox_wait) { ++ if(!pmbox->vport) { ++ lpfc_printf_log(phba, KERN_ERR, ++ LOG_MBOX | LOG_VPORT, ++ "%d:1806 Mbox x%x failed. No vport\n", ++ phba->brd_no, ++ pmbox->mb.mbxCommand); ++ dump_stack(); ++ return MBXERR_ERROR; ++ } ++ } ++ ++ + /* If the PCI channel is in offline state, do not post mbox. */ + if (unlikely(pci_channel_offline(phba->pcidev))) + return MBX_NOT_FINISHED; + ++ spin_lock_irqsave(&phba->hbalock, drvr_flag); + psli = &phba->sli; + +- spin_lock_irqsave(phba->host->host_lock, drvr_flag); +- + + mb = &pmbox->mb; + status = MBX_SUCCESS; + +- if (phba->hba_state == LPFC_HBA_ERROR) { +- spin_unlock_irqrestore(phba->host->host_lock, drvr_flag); ++ if (phba->link_state == LPFC_HBA_ERROR) { ++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag); + + /* Mbox command cannot issue */ +- LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag) +- return (MBX_NOT_FINISHED); ++ LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag) ++ return MBX_NOT_FINISHED; + } + + if (mb->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT && + !(readl(phba->HCregaddr) & HC_MBINT_ENA)) { +- spin_unlock_irqrestore(phba->host->host_lock, drvr_flag); +- LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag) +- return (MBX_NOT_FINISHED); ++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag); ++ LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag) ++ return MBX_NOT_FINISHED; + } + + if (psli->sli_flag & LPFC_SLI_MBOX_ACTIVE) { +@@ -2120,20 +2544,18 @@ + */ + + if (flag & MBX_POLL) { +- spin_unlock_irqrestore(phba->host->host_lock, +- drvr_flag); ++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag); + + /* Mbox command cannot issue */ +- LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag) +- return (MBX_NOT_FINISHED); ++ LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag); ++ return MBX_NOT_FINISHED; + } + + if (!(psli->sli_flag & LPFC_SLI2_ACTIVE)) { +- spin_unlock_irqrestore(phba->host->host_lock, +- drvr_flag); ++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag); + /* Mbox command cannot issue */ +- LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag) +- return (MBX_NOT_FINISHED); ++ LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag); ++ return MBX_NOT_FINISHED; + } + + /* Handle STOP IOCB processing flag. This is only meaningful +@@ -2157,21 +2579,33 @@ + lpfc_mbox_put(phba, pmbox); + + /* Mbox cmd issue - BUSY */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_MBOX | LOG_SLI, +- "%d:0308 Mbox cmd issue - BUSY Data: x%x x%x x%x x%x\n", ++ lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI, ++ "%d (%d):0308 Mbox cmd issue - BUSY Data: " ++ "x%x x%x x%x x%x\n", + phba->brd_no, +- mb->mbxCommand, +- phba->hba_state, +- psli->sli_flag, +- flag); ++ pmbox->vport ? pmbox->vport->vpi : 0xffffff, ++ mb->mbxCommand, phba->pport->port_state, ++ psli->sli_flag, flag); + + psli->slistat.mbox_busy++; +- spin_unlock_irqrestore(phba->host->host_lock, +- drvr_flag); ++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag); + +- return (MBX_BUSY); ++ if (pmbox->vport) { ++ lpfc_debugfs_disc_trc(pmbox->vport, ++ LPFC_DISC_TRC_MBOX_VPORT, ++ "MBOX Bsy vport: cmd:x%x mb:x%x x%x", ++ (uint32_t)mb->mbxCommand, ++ mb->un.varWords[0], mb->un.varWords[1]); ++ } ++ else { ++ lpfc_debugfs_disc_trc(phba->pport, ++ LPFC_DISC_TRC_MBOX, ++ "MBOX Bsy: cmd:x%x mb:x%x x%x", ++ (uint32_t)mb->mbxCommand, ++ mb->un.varWords[0], mb->un.varWords[1]); ++ } ++ ++ return MBX_BUSY; + } + + /* Handle STOP IOCB processing flag. This is only meaningful +@@ -2198,11 +2632,10 @@ + if (!(psli->sli_flag & LPFC_SLI2_ACTIVE) && + (mb->mbxCommand != MBX_KILL_BOARD)) { + psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; +- spin_unlock_irqrestore(phba->host->host_lock, +- drvr_flag); ++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag); + /* Mbox command cannot issue */ +- LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag); +- return (MBX_NOT_FINISHED); ++ LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag); ++ return MBX_NOT_FINISHED; + } + /* timeout active mbox command */ + mod_timer(&psli->mbox_tmo, (jiffies + +@@ -2210,15 +2643,29 @@ + } + + /* Mailbox cmd issue */ +- lpfc_printf_log(phba, +- KERN_INFO, +- LOG_MBOX | LOG_SLI, +- "%d:0309 Mailbox cmd x%x issue Data: x%x x%x x%x\n", +- phba->brd_no, +- mb->mbxCommand, +- phba->hba_state, +- psli->sli_flag, +- flag); ++ lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI, ++ "%d (%d):0309 Mailbox cmd x%x issue Data: x%x x%x " ++ "x%x\n", ++ phba->brd_no, pmbox->vport ? pmbox->vport->vpi : 0, ++ mb->mbxCommand, phba->pport->port_state, ++ psli->sli_flag, flag); ++ ++ if (mb->mbxCommand != MBX_HEARTBEAT) { ++ if (pmbox->vport) { ++ lpfc_debugfs_disc_trc(pmbox->vport, ++ LPFC_DISC_TRC_MBOX_VPORT, ++ "MBOX Send vport: cmd:x%x mb:x%x x%x", ++ (uint32_t)mb->mbxCommand, ++ mb->un.varWords[0], mb->un.varWords[1]); ++ } ++ else { ++ lpfc_debugfs_disc_trc(phba->pport, ++ LPFC_DISC_TRC_MBOX, ++ "MBOX Send: cmd:x%x mb:x%x x%x", ++ (uint32_t)mb->mbxCommand, ++ mb->un.varWords[0], mb->un.varWords[1]); ++ } ++ } + + psli->slistat.mbox_cmd++; + evtctr = psli->slistat.mbox_event; +@@ -2285,12 +2732,12 @@ + /* Wait for command to complete */ + while (((word0 & OWN_CHIP) == OWN_CHIP) || + (!(ha_copy & HA_MBATT) && +- (phba->hba_state > LPFC_WARM_START))) { ++ (phba->link_state > LPFC_WARM_START))) { + if (i-- <= 0) { + psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; +- spin_unlock_irqrestore(phba->host->host_lock, ++ spin_unlock_irqrestore(&phba->hbalock, + drvr_flag); +- return (MBX_NOT_FINISHED); ++ return MBX_NOT_FINISHED; + } + + /* Check if we took a mbox interrupt while we were +@@ -2299,12 +2746,12 @@ + && (evtctr != psli->slistat.mbox_event)) + break; + +- spin_unlock_irqrestore(phba->host->host_lock, ++ spin_unlock_irqrestore(&phba->hbalock, + drvr_flag); + + msleep(1); + +- spin_lock_irqsave(phba->host->host_lock, drvr_flag); ++ spin_lock_irqsave(&phba->hbalock, drvr_flag); + + if (psli->sli_flag & LPFC_SLI2_ACTIVE) { + /* First copy command data */ +@@ -2355,23 +2802,25 @@ + status = mb->mbxStatus; + } + +- spin_unlock_irqrestore(phba->host->host_lock, drvr_flag); +- return (status); ++ spin_unlock_irqrestore(&phba->hbalock, drvr_flag); ++ return status; + } + +-static int +-lpfc_sli_ringtx_put(struct lpfc_hba * phba, struct lpfc_sli_ring * pring, +- struct lpfc_iocbq * piocb) ++/* ++ * Caller needs to hold lock. ++ */ ++static void ++__lpfc_sli_ringtx_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ++ struct lpfc_iocbq *piocb) + { + /* Insert the caller's iocb in the txq tail for later processing. */ + list_add_tail(&piocb->list, &pring->txq); + pring->txq_cnt++; +- return (0); + } + + static struct lpfc_iocbq * + lpfc_sli_next_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, +- struct lpfc_iocbq ** piocb) ++ struct lpfc_iocbq **piocb) + { + struct lpfc_iocbq * nextiocb; + +@@ -2384,13 +2833,29 @@ + return nextiocb; + } + ++/* ++ * Lockless version of lpfc_sli_issue_iocb. ++ */ + int +-lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ++__lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, + struct lpfc_iocbq *piocb, uint32_t flag) + { + struct lpfc_iocbq *nextiocb; + IOCB_t *iocb; + ++ if (piocb->iocb_cmpl && (!piocb->vport) && ++ (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) && ++ (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN)) { ++ lpfc_printf_log(phba, KERN_ERR, ++ LOG_SLI | LOG_VPORT, ++ "%d:1807 IOCB x%x failed. No vport\n", ++ phba->brd_no, ++ piocb->iocb.ulpCommand); ++ dump_stack(); ++ return IOCB_ERROR; ++ } ++ ++ + /* If the PCI channel is in offline state, do not post iocbs. */ + if (unlikely(pci_channel_offline(phba->pcidev))) + return IOCB_ERROR; +@@ -2398,7 +2863,7 @@ + /* + * We should never get an IOCB if we are in a < LINK_DOWN state + */ +- if (unlikely(phba->hba_state < LPFC_LINK_DOWN)) ++ if (unlikely(phba->link_state < LPFC_LINK_DOWN)) + return IOCB_ERROR; + + /* +@@ -2408,7 +2873,7 @@ + if (unlikely(pring->flag & LPFC_STOP_IOCB_MBX)) + goto iocb_busy; + +- if (unlikely(phba->hba_state == LPFC_LINK_DOWN)) { ++ if (unlikely(phba->link_state == LPFC_LINK_DOWN)) { + /* + * Only CREATE_XRI, CLOSE_XRI, and QUE_RING_BUF + * can be issued if the link is not up. +@@ -2436,8 +2901,9 @@ + * attention events. + */ + } else if (unlikely(pring->ringno == phba->sli.fcp_ring && +- !(phba->sli.sli_flag & LPFC_PROCESS_LA))) ++ !(phba->sli.sli_flag & LPFC_PROCESS_LA))) { + goto iocb_busy; ++ } + + while ((iocb = lpfc_sli_next_iocb_slot(phba, pring)) && + (nextiocb = lpfc_sli_next_iocb(phba, pring, &piocb))) +@@ -2459,13 +2925,28 @@ + out_busy: + + if (!(flag & SLI_IOCB_RET_IOCB)) { +- lpfc_sli_ringtx_put(phba, pring, piocb); ++ __lpfc_sli_ringtx_put(phba, pring, piocb); + return IOCB_SUCCESS; + } + + return IOCB_BUSY; + } + ++ ++int ++lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ++ struct lpfc_iocbq *piocb, uint32_t flag) ++{ ++ unsigned long iflags; ++ int rc; ++ ++ spin_lock_irqsave(&phba->hbalock, iflags); ++ rc = __lpfc_sli_issue_iocb(phba, pring, piocb, flag); ++ spin_unlock_irqrestore(&phba->hbalock, iflags); ++ ++ return rc; ++} ++ + static int + lpfc_extra_ring_setup( struct lpfc_hba *phba) + { +@@ -2504,7 +2985,7 @@ + int + lpfc_sli_setup(struct lpfc_hba *phba) + { +- int i, totiocb = 0; ++ int i, totiocbsize = 0; + struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring; + +@@ -2529,6 +3010,12 @@ + pring->numRiocb += SLI2_IOCB_RSP_R1XTRA_ENTRIES; + pring->numCiocb += SLI2_IOCB_CMD_R3XTRA_ENTRIES; + pring->numRiocb += SLI2_IOCB_RSP_R3XTRA_ENTRIES; ++ pring->sizeCiocb = (phba->sli_rev == 3) ? ++ SLI3_IOCB_CMD_SIZE : ++ SLI2_IOCB_CMD_SIZE; ++ pring->sizeRiocb = (phba->sli_rev == 3) ? ++ SLI3_IOCB_RSP_SIZE : ++ SLI2_IOCB_RSP_SIZE; + pring->iotag_ctr = 0; + pring->iotag_max = + (phba->cfg_hba_queue_depth * 2); +@@ -2539,12 +3026,25 @@ + /* numCiocb and numRiocb are used in config_port */ + pring->numCiocb = SLI2_IOCB_CMD_R1_ENTRIES; + pring->numRiocb = SLI2_IOCB_RSP_R1_ENTRIES; ++ pring->sizeCiocb = (phba->sli_rev == 3) ? ++ SLI3_IOCB_CMD_SIZE : ++ SLI2_IOCB_CMD_SIZE; ++ pring->sizeRiocb = (phba->sli_rev == 3) ? ++ SLI3_IOCB_RSP_SIZE : ++ SLI2_IOCB_RSP_SIZE; ++ pring->iotag_max = phba->cfg_hba_queue_depth; + pring->num_mask = 0; + break; + case LPFC_ELS_RING: /* ring 2 - ELS / CT */ + /* numCiocb and numRiocb are used in config_port */ + pring->numCiocb = SLI2_IOCB_CMD_R2_ENTRIES; + pring->numRiocb = SLI2_IOCB_RSP_R2_ENTRIES; ++ pring->sizeCiocb = (phba->sli_rev == 3) ? ++ SLI3_IOCB_CMD_SIZE : ++ SLI2_IOCB_CMD_SIZE; ++ pring->sizeRiocb = (phba->sli_rev == 3) ? ++ SLI3_IOCB_RSP_SIZE : ++ SLI2_IOCB_RSP_SIZE; + pring->fast_iotag = 0; + pring->iotag_ctr = 0; + pring->iotag_max = 4096; +@@ -2575,14 +3075,16 @@ + lpfc_ct_unsol_event; + break; + } +- totiocb += (pring->numCiocb + pring->numRiocb); ++ totiocbsize += (pring->numCiocb * pring->sizeCiocb) + ++ (pring->numRiocb * pring->sizeRiocb); + } +- if (totiocb > MAX_SLI2_IOCB) { ++ if (totiocbsize > MAX_SLIM_IOCB_SIZE) { + /* Too many cmd / rsp ring entries in SLI2 SLIM */ + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0462 Too many cmd / rsp ring entries in " +- "SLI2 SLIM Data: x%x x%x\n", +- phba->brd_no, totiocb, MAX_SLI2_IOCB); ++ "SLI2 SLIM Data: x%x x%lx\n", ++ phba->brd_no, totiocbsize, ++ (unsigned long) MAX_SLIM_IOCB_SIZE); + } + if (phba->cfg_multi_ring_support == 2) + lpfc_extra_ring_setup(phba); +@@ -2591,15 +3093,16 @@ + } + + int +-lpfc_sli_queue_setup(struct lpfc_hba * phba) ++lpfc_sli_queue_setup(struct lpfc_hba *phba) + { + struct lpfc_sli *psli; + struct lpfc_sli_ring *pring; + int i; + + psli = &phba->sli; +- spin_lock_irq(phba->host->host_lock); ++ spin_lock_irq(&phba->hbalock); + INIT_LIST_HEAD(&psli->mboxq); ++ INIT_LIST_HEAD(&psli->mboxq_cmpl); + /* Initialize list headers for txq and txcmplq as double linked lists */ + for (i = 0; i < psli->num_rings; i++) { + pring = &psli->ring[i]; +@@ -2612,15 +3115,73 @@ + INIT_LIST_HEAD(&pring->iocb_continueq); + INIT_LIST_HEAD(&pring->postbufq); + } +- spin_unlock_irq(phba->host->host_lock); +- return (1); ++ spin_unlock_irq(&phba->hbalock); ++ return 1; + } + + int +-lpfc_sli_hba_down(struct lpfc_hba * phba) ++lpfc_sli_host_down(struct lpfc_vport *vport) + { + LIST_HEAD(completions); +- struct lpfc_sli *psli; ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_sli *psli = &phba->sli; ++ struct lpfc_sli_ring *pring; ++ struct lpfc_iocbq *iocb, *next_iocb; ++ int i; ++ unsigned long flags = 0; ++ uint16_t prev_pring_flag; ++ ++ lpfc_cleanup_discovery_resources(vport); ++ ++ spin_lock_irqsave(&phba->hbalock, flags); ++ for (i = 0; i < psli->num_rings; i++) { ++ pring = &psli->ring[i]; ++ prev_pring_flag = pring->flag; ++ if (pring->ringno == LPFC_ELS_RING) /* Only slow rings */ ++ pring->flag |= LPFC_DEFERRED_RING_EVENT; ++ /* ++ * Error everything on the txq since these iocbs have not been ++ * given to the FW yet. ++ */ ++ list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) { ++ if (iocb->vport != vport) ++ continue; ++ list_move_tail(&iocb->list, &completions); ++ pring->txq_cnt--; ++ } ++ ++ /* Next issue ABTS for everything on the txcmplq */ ++ list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, ++ list) { ++ if (iocb->vport != vport) ++ continue; ++ lpfc_sli_issue_abort_iotag(phba, pring, iocb); ++ } ++ ++ pring->flag = prev_pring_flag; ++ } ++ ++ spin_unlock_irqrestore(&phba->hbalock, flags); ++ ++ while (!list_empty(&completions)) { ++ list_remove_head(&completions, iocb, struct lpfc_iocbq, list); ++ ++ if (!iocb->iocb_cmpl) ++ lpfc_sli_release_iocbq(phba, iocb); ++ else { ++ iocb->iocb.ulpStatus = IOSTAT_LOCAL_REJECT; ++ iocb->iocb.un.ulpWord[4] = IOERR_SLI_DOWN; ++ (iocb->iocb_cmpl) (phba, iocb, iocb); ++ } ++ } ++ return 1; ++} ++ ++int ++lpfc_sli_hba_down(struct lpfc_hba *phba) ++{ ++ LIST_HEAD(completions); ++ struct lpfc_sli *psli = &phba->sli; + struct lpfc_sli_ring *pring; + LPFC_MBOXQ_t *pmb; + struct lpfc_iocbq *iocb; +@@ -2628,12 +3189,14 @@ + int i; + unsigned long flags = 0; + +- psli = &phba->sli; + lpfc_hba_down_prep(phba); + +- spin_lock_irqsave(phba->host->host_lock, flags); ++ lpfc_fabric_abort_hba(phba); ++ ++ spin_lock_irqsave(&phba->hbalock, flags); + for (i = 0; i < psli->num_rings; i++) { + pring = &psli->ring[i]; ++ if (pring->ringno == LPFC_ELS_RING) /* Only slow rings */ + pring->flag |= LPFC_DEFERRED_RING_EVENT; + + /* +@@ -2644,51 +3207,50 @@ + pring->txq_cnt = 0; + + } +- spin_unlock_irqrestore(phba->host->host_lock, flags); ++ spin_unlock_irqrestore(&phba->hbalock, flags); + + while (!list_empty(&completions)) { +- iocb = list_get_first(&completions, struct lpfc_iocbq, list); ++ list_remove_head(&completions, iocb, struct lpfc_iocbq, list); + cmd = &iocb->iocb; +- list_del(&iocb->list); + +- if (iocb->iocb_cmpl) { ++ if (!iocb->iocb_cmpl) ++ lpfc_sli_release_iocbq(phba, iocb); ++ else { + cmd->ulpStatus = IOSTAT_LOCAL_REJECT; + cmd->un.ulpWord[4] = IOERR_SLI_DOWN; + (iocb->iocb_cmpl) (phba, iocb, iocb); +- } else +- lpfc_sli_release_iocbq(phba, iocb); ++ } + } + + /* Return any active mbox cmds */ + del_timer_sync(&psli->mbox_tmo); +- spin_lock_irqsave(phba->host->host_lock, flags); +- phba->work_hba_events &= ~WORKER_MBOX_TMO; ++ spin_lock_irqsave(&phba->hbalock, flags); ++ ++ spin_lock(&phba->pport->work_port_lock); ++ phba->pport->work_port_events &= ~WORKER_MBOX_TMO; ++ spin_unlock(&phba->pport->work_port_lock); ++ + if (psli->mbox_active) { +- pmb = psli->mbox_active; +- pmb->mb.mbxStatus = MBX_NOT_FINISHED; +- if (pmb->mbox_cmpl) { +- spin_unlock_irqrestore(phba->host->host_lock, flags); +- pmb->mbox_cmpl(phba,pmb); +- spin_lock_irqsave(phba->host->host_lock, flags); +- } +- } +- psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; ++ list_add_tail(&psli->mbox_active->list, &completions); + psli->mbox_active = NULL; ++ psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; ++ } + +- /* Return any pending mbox cmds */ +- while ((pmb = lpfc_mbox_get(phba)) != NULL) { ++ /* Return any pending or completed mbox cmds */ ++ list_splice_init(&phba->sli.mboxq, &completions); ++ list_splice_init(&phba->sli.mboxq_cmpl, &completions); ++ INIT_LIST_HEAD(&psli->mboxq); ++ INIT_LIST_HEAD(&psli->mboxq_cmpl); ++ ++ spin_unlock_irqrestore(&phba->hbalock, flags); ++ ++ while (!list_empty(&completions)) { ++ list_remove_head(&completions, pmb, LPFC_MBOXQ_t, list); + pmb->mb.mbxStatus = MBX_NOT_FINISHED; + if (pmb->mbox_cmpl) { +- spin_unlock_irqrestore(phba->host->host_lock, flags); + pmb->mbox_cmpl(phba,pmb); +- spin_lock_irqsave(phba->host->host_lock, flags); + } + } +- +- INIT_LIST_HEAD(&psli->mboxq); +- +- spin_unlock_irqrestore(phba->host->host_lock, flags); +- + return 1; + } + +@@ -2710,14 +3272,15 @@ + } + + int +-lpfc_sli_ringpostbuf_put(struct lpfc_hba * phba, struct lpfc_sli_ring * pring, +- struct lpfc_dmabuf * mp) ++lpfc_sli_ringpostbuf_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ++ struct lpfc_dmabuf *mp) + { + /* Stick struct lpfc_dmabuf at end of postbufq so driver can look it up + later */ ++ spin_lock_irq(&phba->hbalock); + list_add_tail(&mp->list, &pring->postbufq); +- + pring->postbufq_cnt++; ++ spin_unlock_irq(&phba->hbalock); + return 0; + } + +@@ -2730,14 +3293,17 @@ + struct list_head *slp = &pring->postbufq; + + /* Search postbufq, from the begining, looking for a match on phys */ ++ spin_lock_irq(&phba->hbalock); + list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) { + if (mp->phys == phys) { + list_del_init(&mp->list); + pring->postbufq_cnt--; ++ spin_unlock_irq(&phba->hbalock); + return mp; + } + } + ++ spin_unlock_irq(&phba->hbalock); + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "%d:0410 Cannot find virtual addr for mapped buf on " + "ring %d Data x%llx x%p x%p x%x\n", +@@ -2747,92 +3313,110 @@ + } + + static void +-lpfc_sli_abort_els_cmpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_sli_abort_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { +- IOCB_t *irsp; ++ IOCB_t *irsp = &rspiocb->iocb; + uint16_t abort_iotag, abort_context; +- struct lpfc_iocbq *abort_iocb, *rsp_ab_iocb; ++ struct lpfc_iocbq *abort_iocb; + struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING]; + + abort_iocb = NULL; +- irsp = &rspiocb->iocb; +- +- spin_lock_irq(phba->host->host_lock); + + if (irsp->ulpStatus) { + abort_context = cmdiocb->iocb.un.acxri.abortContextTag; + abort_iotag = cmdiocb->iocb.un.acxri.abortIoTag; + ++ spin_lock_irq(&phba->hbalock); + if (abort_iotag != 0 && abort_iotag <= phba->sli.last_iotag) + abort_iocb = phba->sli.iocbq_lookup[abort_iotag]; + +- lpfc_printf_log(phba, KERN_ERR, LOG_SLI, +- "%d:0327 Cannot abort els iocb %p" +- " with tag %x context %x\n", +- phba->brd_no, abort_iocb, +- abort_iotag, abort_context); ++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS | LOG_SLI, ++ "%d:0327 Cannot abort els iocb %p " ++ "with tag %x context %x, abort status %x, " ++ "abort code %x\n", ++ phba->brd_no, abort_iocb, abort_iotag, ++ abort_context, irsp->ulpStatus, ++ irsp->un.ulpWord[4]); + + /* + * make sure we have the right iocbq before taking it + * off the txcmplq and try to call completion routine. + */ +- if (abort_iocb && +- abort_iocb->iocb.ulpContext == abort_context && +- abort_iocb->iocb_flag & LPFC_DRIVER_ABORTED) { +- list_del(&abort_iocb->list); ++ if (!abort_iocb || ++ abort_iocb->iocb.ulpContext != abort_context || ++ (abort_iocb->iocb_flag & LPFC_DRIVER_ABORTED) == 0) ++ spin_unlock_irq(&phba->hbalock); ++ else { ++ list_del_init(&abort_iocb->list); + pring->txcmplq_cnt--; ++ spin_unlock_irq(&phba->hbalock); + +- rsp_ab_iocb = lpfc_sli_get_iocbq(phba); +- if (rsp_ab_iocb == NULL) +- lpfc_sli_release_iocbq(phba, abort_iocb); +- else { +- abort_iocb->iocb_flag &= +- ~LPFC_DRIVER_ABORTED; +- rsp_ab_iocb->iocb.ulpStatus = +- IOSTAT_LOCAL_REJECT; +- rsp_ab_iocb->iocb.un.ulpWord[4] = +- IOERR_SLI_ABORTED; +- spin_unlock_irq(phba->host->host_lock); +- (abort_iocb->iocb_cmpl) +- (phba, abort_iocb, rsp_ab_iocb); +- spin_lock_irq(phba->host->host_lock); +- lpfc_sli_release_iocbq(phba, rsp_ab_iocb); +- } ++ abort_iocb->iocb_flag &= ~LPFC_DRIVER_ABORTED; ++ abort_iocb->iocb.ulpStatus = IOSTAT_LOCAL_REJECT; ++ abort_iocb->iocb.un.ulpWord[4] = IOERR_SLI_ABORTED; ++ (abort_iocb->iocb_cmpl)(phba, abort_iocb, abort_iocb); + } + } + + lpfc_sli_release_iocbq(phba, cmdiocb); +- spin_unlock_irq(phba->host->host_lock); ++ return; ++} ++ ++static void ++lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) ++{ ++ IOCB_t *irsp = &rspiocb->iocb; ++ ++ /* ELS cmd tag completes */ ++ lpfc_printf_log(phba, KERN_INFO, LOG_ELS, ++ "%d (X):0133 Ignoring ELS cmd tag x%x completion Data: " ++ "x%x x%x x%x\n", ++ phba->brd_no, irsp->ulpIoTag, irsp->ulpStatus, ++ irsp->un.ulpWord[4], irsp->ulpTimeout); ++ if (cmdiocb->iocb.ulpCommand == CMD_GEN_REQUEST64_CR) ++ lpfc_ct_free_iocb(phba, cmdiocb); ++ else ++ lpfc_els_free_iocb(phba, cmdiocb); + return; + } + + int +-lpfc_sli_issue_abort_iotag(struct lpfc_hba * phba, +- struct lpfc_sli_ring * pring, +- struct lpfc_iocbq * cmdiocb) ++lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ++ struct lpfc_iocbq *cmdiocb) + { ++ struct lpfc_vport *vport = cmdiocb->vport; + struct lpfc_iocbq *abtsiocbp; + IOCB_t *icmd = NULL; + IOCB_t *iabt = NULL; + int retval = IOCB_ERROR; + +- /* There are certain command types we don't want +- * to abort. ++ /* ++ * There are certain command types we don't want to abort. And we ++ * don't want to abort commands that are already in the process of ++ * being aborted. + */ + icmd = &cmdiocb->iocb; +- if ((icmd->ulpCommand == CMD_ABORT_XRI_CN) || +- (icmd->ulpCommand == CMD_CLOSE_XRI_CN)) ++ if (icmd->ulpCommand == CMD_ABORT_XRI_CN || ++ icmd->ulpCommand == CMD_CLOSE_XRI_CN || ++ (cmdiocb->iocb_flag & LPFC_DRIVER_ABORTED) != 0) + return 0; + +- /* If we're unloading, interrupts are disabled so we +- * need to cleanup the iocb here. ++ /* If we're unloading, don't abort iocb on the ELS ring, but change the ++ * callback so that nothing happens when it finishes. + */ +- if (phba->fc_flag & FC_UNLOADING) ++ if ((vport->load_flag & FC_UNLOADING) && ++ (pring->ringno == LPFC_ELS_RING)) { ++ if (cmdiocb->iocb_flag & LPFC_IO_FABRIC) ++ cmdiocb->fabric_iocb_cmpl = lpfc_ignore_els_cmpl; ++ else ++ cmdiocb->iocb_cmpl = lpfc_ignore_els_cmpl; + goto abort_iotag_exit; ++ } + + /* issue ABTS for this IOCB based on iotag */ +- abtsiocbp = lpfc_sli_get_iocbq(phba); ++ abtsiocbp = __lpfc_sli_get_iocbq(phba); + if (abtsiocbp == NULL) + return 0; + +@@ -2848,7 +3432,7 @@ + iabt->ulpLe = 1; + iabt->ulpClass = icmd->ulpClass; + +- if (phba->hba_state >= LPFC_LINK_UP) ++ if (phba->link_state >= LPFC_LINK_UP) + iabt->ulpCommand = CMD_ABORT_XRI_CN; + else + iabt->ulpCommand = CMD_CLOSE_XRI_CN; +@@ -2856,32 +3440,20 @@ + abtsiocbp->iocb_cmpl = lpfc_sli_abort_els_cmpl; + + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, +- "%d:0339 Abort xri x%x, original iotag x%x, abort " +- "cmd iotag x%x\n", +- phba->brd_no, iabt->un.acxri.abortContextTag, ++ "%d (%d):0339 Abort xri x%x, original iotag x%x, " ++ "abort cmd iotag x%x\n", ++ phba->brd_no, vport->vpi, ++ iabt->un.acxri.abortContextTag, + iabt->un.acxri.abortIoTag, abtsiocbp->iotag); +- retval = lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0); ++ retval = __lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0); + + abort_iotag_exit: +- +- /* If we could not issue an abort dequeue the iocb and handle +- * the completion here. ++ /* ++ * Caller to this routine should check for IOCB_ERROR ++ * and handle it properly. This routine no longer removes ++ * iocb off txcmplq and call compl in case of IOCB_ERROR. + */ +- if (retval == IOCB_ERROR) { +- list_del(&cmdiocb->list); +- pring->txcmplq_cnt--; +- +- if (cmdiocb->iocb_cmpl) { +- icmd->ulpStatus = IOSTAT_LOCAL_REJECT; +- icmd->un.ulpWord[4] = IOERR_SLI_ABORTED; +- spin_unlock_irq(phba->host->host_lock); +- (cmdiocb->iocb_cmpl) (phba, cmdiocb, cmdiocb); +- spin_lock_irq(phba->host->host_lock); +- } else +- lpfc_sli_release_iocbq(phba, cmdiocb); +- } +- +- return 1; ++ return retval; + } + + static int +@@ -2947,14 +3519,10 @@ + } + + void +-lpfc_sli_abort_fcp_cmpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb, +- struct lpfc_iocbq * rspiocb) ++lpfc_sli_abort_fcp_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, ++ struct lpfc_iocbq *rspiocb) + { +- unsigned long iflags; +- +- spin_lock_irqsave(phba->host->host_lock, iflags); + lpfc_sli_release_iocbq(phba, cmdiocb); +- spin_unlock_irqrestore(phba->host->host_lock, iflags); + return; + } + +@@ -2972,8 +3540,8 @@ + for (i = 1; i <= phba->sli.last_iotag; i++) { + iocbq = phba->sli.iocbq_lookup[i]; + +- if (lpfc_sli_validate_fcp_iocb (iocbq, tgt_id, lun_id, +- 0, abort_cmd) != 0) ++ if (lpfc_sli_validate_fcp_iocb(iocbq, tgt_id, lun_id, 0, ++ abort_cmd) != 0) + continue; + + /* issue ABTS for this IOCB based on iotag */ +@@ -2989,8 +3557,9 @@ + abtsiocb->iocb.un.acxri.abortIoTag = cmd->ulpIoTag; + abtsiocb->iocb.ulpLe = 1; + abtsiocb->iocb.ulpClass = cmd->ulpClass; ++ abtsiocb->vport = phba->pport; + +- if (phba->hba_state >= LPFC_LINK_UP) ++ if (lpfc_is_link_up(phba)) + abtsiocb->iocb.ulpCommand = CMD_ABORT_XRI_CN; + else + abtsiocb->iocb.ulpCommand = CMD_CLOSE_XRI_CN; +@@ -3016,16 +3585,16 @@ + wait_queue_head_t *pdone_q; + unsigned long iflags; + +- spin_lock_irqsave(phba->host->host_lock, iflags); ++ spin_lock_irqsave(&phba->hbalock, iflags); + cmdiocbq->iocb_flag |= LPFC_IO_WAKE; + if (cmdiocbq->context2 && rspiocbq) + memcpy(&((struct lpfc_iocbq *)cmdiocbq->context2)->iocb, + &rspiocbq->iocb, sizeof(IOCB_t)); + + pdone_q = cmdiocbq->context_un.wait_queue; +- spin_unlock_irqrestore(phba->host->host_lock, iflags); + if (pdone_q) + wake_up(pdone_q); ++ spin_unlock_irqrestore(&phba->hbalock, iflags); + return; + } + +@@ -3035,11 +3604,12 @@ + * lpfc_sli_issue_call since the wake routine sets a unique value and by + * definition this is a wait function. + */ ++ + int +-lpfc_sli_issue_iocb_wait(struct lpfc_hba * phba, +- struct lpfc_sli_ring * pring, +- struct lpfc_iocbq * piocb, +- struct lpfc_iocbq * prspiocbq, ++lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba, ++ struct lpfc_sli_ring *pring, ++ struct lpfc_iocbq *piocb, ++ struct lpfc_iocbq *prspiocbq, + uint32_t timeout) + { + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_q); +@@ -3071,11 +3641,9 @@ + retval = lpfc_sli_issue_iocb(phba, pring, piocb, 0); + if (retval == IOCB_SUCCESS) { + timeout_req = timeout * HZ; +- spin_unlock_irq(phba->host->host_lock); + timeleft = wait_event_timeout(done_q, + piocb->iocb_flag & LPFC_IO_WAKE, + timeout_req); +- spin_lock_irq(phba->host->host_lock); + + if (piocb->iocb_flag & LPFC_IO_WAKE) { + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, +@@ -3117,16 +3685,16 @@ + } + + int +-lpfc_sli_issue_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq, ++lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq, + uint32_t timeout) + { + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_q); + int retval; ++ unsigned long flag; + + /* The caller must leave context1 empty. */ +- if (pmboxq->context1 != 0) { +- return (MBX_NOT_FINISHED); +- } ++ if (pmboxq->context1 != 0) ++ return MBX_NOT_FINISHED; + + /* setup wake call as IOCB callback */ + pmboxq->mbox_cmpl = lpfc_sli_wake_mbox_wait; +@@ -3141,6 +3709,7 @@ + pmboxq->mbox_flag & LPFC_MBX_WAKE, + timeout * HZ); + ++ spin_lock_irqsave(&phba->hbalock, flag); + pmboxq->context1 = NULL; + /* + * if LPFC_MBX_WAKE flag is set the mailbox is completed +@@ -3148,8 +3717,11 @@ + */ + if (pmboxq->mbox_flag & LPFC_MBX_WAKE) + retval = MBX_SUCCESS; +- else ++ else { + retval = MBX_TIMEOUT; ++ pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl; ++ } ++ spin_unlock_irqrestore(&phba->hbalock, flag); + } + + return retval; +@@ -3158,12 +3730,25 @@ + int + lpfc_sli_flush_mbox_queue(struct lpfc_hba * phba) + { ++ struct lpfc_vport *vport = phba->pport; + int i = 0; ++ uint32_t ha_copy; + +- while (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE && !phba->stopped) { ++ while (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE && !vport->stopped) { + if (i++ > LPFC_MBOX_TMO * 1000) + return 1; + ++ /* ++ * Call lpfc_sli_handle_mb_event only if a mailbox cmd ++ * did finish. This way we won't get the misleading ++ * "Stray Mailbox Interrupt" message. ++ */ ++ spin_lock_irq(&phba->hbalock); ++ ha_copy = phba->work_ha; ++ phba->work_ha &= ~HA_MBATT; ++ spin_unlock_irq(&phba->hbalock); ++ ++ if (ha_copy & HA_MBATT) + if (lpfc_sli_handle_mb_event(phba) == 0) + i = 0; + +@@ -3183,6 +3768,13 @@ + int i; + uint32_t control; + ++ MAILBOX_t *mbox, *pmbox; ++ struct lpfc_vport *vport; ++ struct lpfc_nodelist *ndlp; ++ struct lpfc_dmabuf *mp; ++ LPFC_MBOXQ_t *pmb; ++ int rc; ++ + /* + * Get the driver's phba structure from the dev_id and + * assume the HBA is not interrupting. +@@ -3204,7 +3796,7 @@ + */ + + /* Ignore all interrupts during initialization. */ +- if (unlikely(phba->hba_state < LPFC_LINK_DOWN)) ++ if (unlikely(phba->link_state < LPFC_LINK_DOWN)) + return IRQ_NONE; + + /* +@@ -3212,16 +3804,16 @@ + * Clear Attention Sources, except Error Attention (to + * preserve status) and Link Attention + */ +- spin_lock(phba->host->host_lock); ++ spin_lock(&phba->hbalock); + ha_copy = readl(phba->HAregaddr); + /* If somebody is waiting to handle an eratt don't process it + * here. The brdkill function will do this. + */ +- if (phba->fc_flag & FC_IGNORE_ERATT) ++ if (phba->link_flag & LS_IGNORE_ERATT) + ha_copy &= ~HA_ERATT; + writel((ha_copy & ~(HA_LATT | HA_ERATT)), phba->HAregaddr); + readl(phba->HAregaddr); /* flush */ +- spin_unlock(phba->host->host_lock); ++ spin_unlock(&phba->hbalock); + + if (unlikely(!ha_copy)) + return IRQ_NONE; +@@ -3235,36 +3827,41 @@ + * Turn off Link Attention interrupts + * until CLEAR_LA done + */ +- spin_lock(phba->host->host_lock); ++ spin_lock(&phba->hbalock); + phba->sli.sli_flag &= ~LPFC_PROCESS_LA; + control = readl(phba->HCregaddr); + control &= ~HC_LAINT_ENA; + writel(control, phba->HCregaddr); + readl(phba->HCregaddr); /* flush */ +- spin_unlock(phba->host->host_lock); ++ spin_unlock(&phba->hbalock); + } + else + work_ha_copy &= ~HA_LATT; + } + + if (work_ha_copy & ~(HA_ERATT|HA_MBATT|HA_LATT)) { +- for (i = 0; i < phba->sli.num_rings; i++) { +- if (work_ha_copy & (HA_RXATT << (4*i))) { + /* +- * Turn off Slow Rings interrupts ++ * Turn off Slow Rings interrupts, LPFC_ELS_RING is ++ * the only slow ring. + */ +- spin_lock(phba->host->host_lock); ++ status = (work_ha_copy & ++ (HA_RXMASK << (4*LPFC_ELS_RING))); ++ status >>= (4*LPFC_ELS_RING); ++ if (status & HA_RXMASK) { ++ spin_lock(&phba->hbalock); + control = readl(phba->HCregaddr); +- control &= ~(HC_R0INT_ENA << i); ++ if (control & (HC_R0INT_ENA << LPFC_ELS_RING)) { ++ control &= ++ ~(HC_R0INT_ENA << LPFC_ELS_RING); + writel(control, phba->HCregaddr); + readl(phba->HCregaddr); /* flush */ +- spin_unlock(phba->host->host_lock); + } ++ spin_unlock(&phba->hbalock); + } + } + + if (work_ha_copy & HA_ERATT) { +- phba->hba_state = LPFC_HBA_ERROR; ++ phba->link_state = LPFC_HBA_ERROR; + /* + * There was a link/board error. Read the + * status register to retrieve the error event +@@ -3279,14 +3876,108 @@ + /* Clear Chip error bit */ + writel(HA_ERATT, phba->HAregaddr); + readl(phba->HAregaddr); /* flush */ +- phba->stopped = 1; ++ phba->pport->stopped = 1; ++ } ++ ++ if ((work_ha_copy & HA_MBATT) && ++ (phba->sli.mbox_active)) { ++ pmb = phba->sli.mbox_active; ++ pmbox = &pmb->mb; ++ mbox = &phba->slim2p->mbx; ++ vport = pmb->vport; ++ ++ /* First check out the status word */ ++ lpfc_sli_pcimem_bcopy(mbox, pmbox, sizeof(uint32_t)); ++ if (pmbox->mbxOwner != OWN_HOST) { ++ /* ++ * Stray Mailbox Interrupt, mbxCommand ++ * mbxStatus ++ */ ++ lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | ++ LOG_SLI, ++ "%d (%d):0304 Stray Mailbox " ++ "Interrupt mbxCommand x%x " ++ "mbxStatus x%x\n", ++ phba->brd_no, ++ (vport ++ ? vport->vpi : 0), ++ pmbox->mbxCommand, ++ pmbox->mbxStatus); ++ } ++ phba->last_completion_time = jiffies; ++ del_timer_sync(&phba->sli.mbox_tmo); ++ ++ phba->sli.mbox_active = NULL; ++ if (pmb->mbox_cmpl) { ++ lpfc_sli_pcimem_bcopy(mbox, pmbox, ++ MAILBOX_CMD_SIZE); ++ } ++ if (pmb->mbox_flag & LPFC_MBX_IMED_UNREG) { ++ pmb->mbox_flag &= ~LPFC_MBX_IMED_UNREG; ++ ++ lpfc_debugfs_disc_trc(vport, ++ LPFC_DISC_TRC_MBOX_VPORT, ++ "MBOX dflt rpi: : status:x%x rpi:x%x", ++ (uint32_t)pmbox->mbxStatus, ++ pmbox->un.varWords[0], 0); ++ ++ if ( !pmbox->mbxStatus) { ++ mp = (struct lpfc_dmabuf *) ++ (pmb->context1); ++ ndlp = (struct lpfc_nodelist *) ++ pmb->context2; ++ ++ /* Reg_LOGIN of dflt RPI was successful. ++ * new lets get rid of the RPI using the ++ * same mbox buffer. ++ */ ++ lpfc_unreg_login(phba, vport->vpi, ++ pmbox->un.varWords[0], pmb); ++ pmb->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi; ++ pmb->context1 = mp; ++ pmb->context2 = ndlp; ++ pmb->vport = vport; ++ spin_lock(&phba->hbalock); ++ phba->sli.sli_flag &= ++ ~LPFC_SLI_MBOX_ACTIVE; ++ spin_unlock(&phba->hbalock); ++ goto send_current_mbox; ++ } ++ } ++ spin_lock(&phba->pport->work_port_lock); ++ phba->pport->work_port_events &= ~WORKER_MBOX_TMO; ++ spin_unlock(&phba->pport->work_port_lock); ++ lpfc_mbox_cmpl_put(phba, pmb); ++ } ++ if ((work_ha_copy & HA_MBATT) && ++ (phba->sli.mbox_active == NULL)) { ++send_next_mbox: ++ spin_lock(&phba->hbalock); ++ phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; ++ pmb = lpfc_mbox_get(phba); ++ spin_unlock(&phba->hbalock); ++send_current_mbox: ++ /* Process next mailbox command if there is one */ ++ if (pmb != NULL) { ++ rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT); ++ if (rc == MBX_NOT_FINISHED) { ++ pmb->mb.mbxStatus = MBX_NOT_FINISHED; ++ lpfc_mbox_cmpl_put(phba, pmb); ++ goto send_next_mbox; ++ } ++ } else { ++ /* Turn on IOCB processing */ ++ for (i = 0; i < phba->sli.num_rings; i++) ++ lpfc_sli_turn_on_ring(phba, i); ++ } ++ + } + +- spin_lock(phba->host->host_lock); ++ spin_lock(&phba->hbalock); + phba->work_ha |= work_ha_copy; + if (phba->work_wait) +- wake_up(phba->work_wait); +- spin_unlock(phba->host->host_lock); ++ lpfc_worker_wake_up(phba); ++ spin_unlock(&phba->hbalock); + } + + ha_copy &= ~(phba->work_ha_mask); +@@ -3298,7 +3989,7 @@ + */ + status = (ha_copy & (HA_RXMASK << (4*LPFC_FCP_RING))); + status >>= (4*LPFC_FCP_RING); +- if (status & HA_RXATT) ++ if (status & HA_RXMASK) + lpfc_sli_handle_fast_ring_event(phba, + &phba->sli.ring[LPFC_FCP_RING], + status); +@@ -3311,7 +4002,7 @@ + */ + status = (ha_copy & (HA_RXMASK << (4*LPFC_EXTRA_RING))); + status >>= (4*LPFC_EXTRA_RING); +- if (status & HA_RXATT) { ++ if (status & HA_RXMASK) { + lpfc_sli_handle_fast_ring_event(phba, + &phba->sli.ring[LPFC_EXTRA_RING], + status); +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_sli.h +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_sli.h 2007-12-21 15:36:12.000000000 -0500 +@@ -20,6 +20,7 @@ + + /* forward declaration for LPFC_IOCB_t's use */ + struct lpfc_hba; ++struct lpfc_vport; + + /* Define the context types that SLI handles for abort and sums. */ + typedef enum _lpfc_ctx_cmd { +@@ -43,10 +44,12 @@ + #define LPFC_IO_WAKE 2 /* High Priority Queue signal flag */ + #define LPFC_IO_FCP 4 /* FCP command -- iocbq in scsi_buf */ + #define LPFC_DRIVER_ABORTED 8 /* driver aborted this request */ ++#define LPFC_IO_FABRIC 0x10 /* Iocb send using fabric scheduler */ + + uint8_t abort_count; + uint8_t rsvd2; + uint32_t drvrTimeout; /* driver timeout in seconds */ ++ struct lpfc_vport *vport;/* virtual port pointer */ + void *context1; /* caller context information */ + void *context2; /* caller context information */ + void *context3; /* caller context information */ +@@ -56,6 +59,8 @@ + struct lpfcMboxq *mbox; + } context_un; + ++ void (*fabric_iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *, ++ struct lpfc_iocbq *); + void (*iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *, + struct lpfc_iocbq *); + +@@ -69,11 +74,13 @@ + #define IOCB_TIMEDOUT 3 + + #define LPFC_MBX_WAKE 1 ++#define LPFC_MBX_IMED_UNREG 2 + + typedef struct lpfcMboxq { + /* MBOXQs are used in single linked lists */ + struct list_head list; /* ptr to next mailbox command */ + MAILBOX_t mb; /* Mailbox cmd */ ++ struct lpfc_vport *vport;/* virutal port pointer */ + void *context1; /* caller context information */ + void *context2; /* caller context information */ + +@@ -135,6 +142,8 @@ + uint8_t ringno; /* ring number */ + uint16_t numCiocb; /* number of command iocb's per ring */ + uint16_t numRiocb; /* number of rsp iocb's per ring */ ++ uint16_t sizeCiocb; /* Size of command iocb's in this ring */ ++ uint16_t sizeRiocb; /* Size of response iocb's in this ring */ + + uint32_t fast_iotag; /* max fastlookup based iotag */ + uint32_t iotag_ctr; /* keeps track of the next iotag to use */ +@@ -165,6 +174,34 @@ + struct lpfc_sli_ring *); + }; + ++/* Structure used for configuring rings to a specific profile or rctl / type */ ++struct lpfc_hbq_init { ++ uint32_t rn; /* Receive buffer notification */ ++ uint32_t entry_count; /* max # of entries in HBQ */ ++ uint32_t headerLen; /* 0 if not profile 4 or 5 */ ++ uint32_t logEntry; /* Set to 1 if this HBQ used for LogEntry */ ++ uint32_t profile; /* Selection profile 0=all, 7=logentry */ ++ uint32_t ring_mask; /* Binds HBQ to a ring e.g. Ring0=b0001, ++ * ring2=b0100 */ ++ uint32_t hbq_index; /* index of this hbq in ring .HBQs[] */ ++ ++ uint32_t seqlenoff; ++ uint32_t maxlen; ++ uint32_t seqlenbcnt; ++ uint32_t cmdcodeoff; ++ uint32_t cmdmatch[8]; ++ uint32_t mask_count; /* number of mask entries in prt array */ ++ struct hbq_mask hbqMasks[6]; ++ ++ /* Non-config rings fields to keep track of buffer allocations */ ++ uint32_t buffer_count; /* number of buffers allocated */ ++ uint32_t init_count; /* number to allocate when initialized */ ++ uint32_t add_count; /* number to allocate when starved */ ++} ; ++ ++#define LPFC_MAX_HBQ 16 ++ ++ + /* Structure used to hold SLI statistical counters and info */ + struct lpfc_sli_stat { + uint64_t mbox_stat_err; /* Mbox cmds completed status error */ +@@ -197,6 +234,7 @@ + #define LPFC_SLI_MBOX_ACTIVE 0x100 /* HBA mailbox is currently active */ + #define LPFC_SLI2_ACTIVE 0x200 /* SLI2 overlay in firmware is active */ + #define LPFC_PROCESS_LA 0x400 /* Able to process link attention */ ++#define LPFC_BLOCK_MGMT_IO 0x800 /* Don't allow mgmt mbx or iocb cmds */ + + struct lpfc_sli_ring ring[LPFC_MAX_RING]; + int fcp_ring; /* ring used for FCP initiator commands */ +@@ -209,6 +247,7 @@ + uint16_t mboxq_cnt; /* current length of queue */ + uint16_t mboxq_max; /* max length */ + LPFC_MBOXQ_t *mbox_active; /* active mboxq information */ ++ struct list_head mboxq_cmpl; + + struct timer_list mbox_tmo; /* Hold clk to timeout active mbox + cmd */ +@@ -221,12 +260,6 @@ + struct lpfc_lnk_stat lnk_stat_offsets; + }; + +-/* Given a pointer to the start of the ring, and the slot number of +- * the desired iocb entry, calc a pointer to that entry. +- * (assume iocb entry size is 32 bytes, or 8 words) +- */ +-#define IOCB_ENTRY(ring,slot) ((IOCB_t *)(((char *)(ring)) + ((slot) * 32))) +- + #define LPFC_MBOX_TMO 30 /* Sec tmo for outstanding mbox + command */ + #define LPFC_MBOX_TMO_FLASH_CMD 300 /* Sec tmo for outstanding FLASH write +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_version.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_version.h +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_version.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_version.h 2007-12-21 15:36:12.000000000 -0500 +@@ -18,7 +18,7 @@ + * included with this package. * + *******************************************************************/ + +-#define LPFC_DRIVER_VERSION "8.1.12" ++#define LPFC_DRIVER_VERSION "8.2.1" + + #define LPFC_DRIVER_NAME "lpfc" + +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.c linux-2.6.22-591/drivers/scsi/lpfc/lpfc_vport.c +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_vport.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,523 @@ ++/******************************************************************* ++ * This file is part of the Emulex Linux Device Driver for * ++ * Fibre Channel Host Bus Adapters. * ++ * Copyright (C) 2004-2006 Emulex. All rights reserved. * ++ * EMULEX and SLI are trademarks of Emulex. * ++ * www.emulex.com * ++ * Portions Copyright (C) 2004-2005 Christoph Hellwig * ++ * * ++ * This program is free software; you can redistribute it and/or * ++ * modify it under the terms of version 2 of the GNU General * ++ * Public License as published by the Free Software Foundation. * ++ * This program is distributed in the hope that it will be useful. * ++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * ++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * ++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * ++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * ++ * TO BE LEGALLY INVALID. See the GNU General Public License for * ++ * more details, a copy of which can be found in the file COPYING * ++ * included with this package. * ++ *******************************************************************/ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include "lpfc_hw.h" ++#include "lpfc_sli.h" ++#include "lpfc_disc.h" ++#include "lpfc_scsi.h" ++#include "lpfc.h" ++#include "lpfc_logmsg.h" ++#include "lpfc_crtn.h" ++#include "lpfc_version.h" ++#include "lpfc_vport.h" ++ ++inline void lpfc_vport_set_state(struct lpfc_vport *vport, ++ enum fc_vport_state new_state) ++{ ++ struct fc_vport *fc_vport = vport->fc_vport; ++ ++ if (fc_vport) { ++ /* ++ * When the transport defines fc_vport_set state we will replace ++ * this code with the following line ++ */ ++ /* fc_vport_set_state(fc_vport, new_state); */ ++ if (new_state != FC_VPORT_INITIALIZING) ++ fc_vport->vport_last_state = fc_vport->vport_state; ++ fc_vport->vport_state = new_state; ++ } ++ ++ /* for all the error states we will set the invternal state to FAILED */ ++ switch (new_state) { ++ case FC_VPORT_NO_FABRIC_SUPP: ++ case FC_VPORT_NO_FABRIC_RSCS: ++ case FC_VPORT_FABRIC_LOGOUT: ++ case FC_VPORT_FABRIC_REJ_WWN: ++ case FC_VPORT_FAILED: ++ vport->port_state = LPFC_VPORT_FAILED; ++ break; ++ case FC_VPORT_LINKDOWN: ++ vport->port_state = LPFC_VPORT_UNKNOWN; ++ break; ++ default: ++ /* do nothing */ ++ break; ++ } ++} ++ ++static int ++lpfc_alloc_vpi(struct lpfc_hba *phba) ++{ ++ int vpi; ++ ++ spin_lock_irq(&phba->hbalock); ++ /* Start at bit 1 because vpi zero is reserved for the physical port */ ++ vpi = find_next_zero_bit(phba->vpi_bmask, (phba->max_vpi + 1), 1); ++ if (vpi > phba->max_vpi) ++ vpi = 0; ++ else ++ set_bit(vpi, phba->vpi_bmask); ++ spin_unlock_irq(&phba->hbalock); ++ return vpi; ++} ++ ++static void ++lpfc_free_vpi(struct lpfc_hba *phba, int vpi) ++{ ++ spin_lock_irq(&phba->hbalock); ++ clear_bit(vpi, phba->vpi_bmask); ++ spin_unlock_irq(&phba->hbalock); ++} ++ ++static int ++lpfc_vport_sparm(struct lpfc_hba *phba, struct lpfc_vport *vport) ++{ ++ LPFC_MBOXQ_t *pmb; ++ MAILBOX_t *mb; ++ struct lpfc_dmabuf *mp; ++ int rc; ++ ++ pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); ++ if (!pmb) { ++ return -ENOMEM; ++ } ++ mb = &pmb->mb; ++ ++ lpfc_read_sparam(phba, pmb, vport->vpi); ++ /* ++ * Grab buffer pointer and clear context1 so we can use ++ * lpfc_sli_issue_box_wait ++ */ ++ mp = (struct lpfc_dmabuf *) pmb->context1; ++ pmb->context1 = NULL; ++ ++ pmb->vport = vport; ++ rc = lpfc_sli_issue_mbox_wait(phba, pmb, phba->fc_ratov * 2); ++ if (rc != MBX_SUCCESS) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT, ++ "%d (%d):1818 VPort failed init, mbxCmd x%x " ++ "READ_SPARM mbxStatus x%x, rc = x%x\n", ++ phba->brd_no, vport->vpi, ++ mb->mbxCommand, mb->mbxStatus, rc); ++ lpfc_mbuf_free(phba, mp->virt, mp->phys); ++ kfree(mp); ++ if (rc != MBX_TIMEOUT) ++ mempool_free(pmb, phba->mbox_mem_pool); ++ return -EIO; ++ } ++ ++ memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm)); ++ memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName, ++ sizeof (struct lpfc_name)); ++ memcpy(&vport->fc_portname, &vport->fc_sparam.portName, ++ sizeof (struct lpfc_name)); ++ ++ lpfc_mbuf_free(phba, mp->virt, mp->phys); ++ kfree(mp); ++ mempool_free(pmb, phba->mbox_mem_pool); ++ ++ return 0; ++} ++ ++static int ++lpfc_valid_wwn_format(struct lpfc_hba *phba, struct lpfc_name *wwn, ++ const char *name_type) ++{ ++ /* ensure that IEEE format 1 addresses ++ * contain zeros in bits 59-48 ++ */ ++ if (!((wwn->u.wwn[0] >> 4) == 1 && ++ ((wwn->u.wwn[0] & 0xf) != 0 || (wwn->u.wwn[1] & 0xf) != 0))) ++ return 1; ++ ++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, ++ "%d:1822 Invalid %s: %02x:%02x:%02x:%02x:" ++ "%02x:%02x:%02x:%02x\n", ++ phba->brd_no, name_type, ++ wwn->u.wwn[0], wwn->u.wwn[1], ++ wwn->u.wwn[2], wwn->u.wwn[3], ++ wwn->u.wwn[4], wwn->u.wwn[5], ++ wwn->u.wwn[6], wwn->u.wwn[7]); ++ return 0; ++} ++ ++static int ++lpfc_unique_wwpn(struct lpfc_hba *phba, struct lpfc_vport *new_vport) ++{ ++ struct lpfc_vport *vport; ++ ++ list_for_each_entry(vport, &phba->port_list, listentry) { ++ if (vport == new_vport) ++ continue; ++ /* If they match, return not unique */ ++ if (memcmp(&vport->fc_sparam.portName, ++ &new_vport->fc_sparam.portName, ++ sizeof(struct lpfc_name)) == 0) ++ return 0; ++ } ++ return 1; ++} ++ ++int ++lpfc_vport_create(struct fc_vport *fc_vport, bool disable) ++{ ++ struct lpfc_nodelist *ndlp; ++ struct lpfc_vport *pport = ++ (struct lpfc_vport *) fc_vport->shost->hostdata; ++ struct lpfc_hba *phba = pport->phba; ++ struct lpfc_vport *vport = NULL; ++ int instance; ++ int vpi; ++ int rc = VPORT_ERROR; ++ ++ if ((phba->sli_rev < 3) || ++ !(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, ++ "%d:1808 Create VPORT failed: " ++ "NPIV is not enabled: SLImode:%d\n", ++ phba->brd_no, phba->sli_rev); ++ rc = VPORT_INVAL; ++ goto error_out; ++ } ++ ++ vpi = lpfc_alloc_vpi(phba); ++ if (vpi == 0) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, ++ "%d:1809 Create VPORT failed: " ++ "Max VPORTs (%d) exceeded\n", ++ phba->brd_no, phba->max_vpi); ++ rc = VPORT_NORESOURCES; ++ goto error_out; ++ } ++ ++ ++ /* Assign an unused board number */ ++ if ((instance = lpfc_get_instance()) < 0) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, ++ "%d:1810 Create VPORT failed: Cannot get " ++ "instance number\n", phba->brd_no); ++ lpfc_free_vpi(phba, vpi); ++ rc = VPORT_NORESOURCES; ++ goto error_out; ++ } ++ ++ vport = lpfc_create_port(phba, instance, fc_vport); ++ if (!vport) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, ++ "%d:1811 Create VPORT failed: vpi x%x\n", ++ phba->brd_no, vpi); ++ lpfc_free_vpi(phba, vpi); ++ rc = VPORT_NORESOURCES; ++ goto error_out; ++ } ++ ++ vport->vpi = vpi; ++ lpfc_debugfs_initialize(vport); ++ ++ if (lpfc_vport_sparm(phba, vport)) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, ++ "%d:1813 Create VPORT failed: vpi:%d " ++ "Cannot get sparam\n", ++ phba->brd_no, vpi); ++ lpfc_free_vpi(phba, vpi); ++ destroy_port(vport); ++ rc = VPORT_NORESOURCES; ++ goto error_out; ++ } ++ ++ memcpy(vport->fc_portname.u.wwn, vport->fc_sparam.portName.u.wwn, 8); ++ memcpy(vport->fc_nodename.u.wwn, vport->fc_sparam.nodeName.u.wwn, 8); ++ ++ if (fc_vport->node_name != 0) ++ u64_to_wwn(fc_vport->node_name, vport->fc_nodename.u.wwn); ++ if (fc_vport->port_name != 0) ++ u64_to_wwn(fc_vport->port_name, vport->fc_portname.u.wwn); ++ ++ memcpy(&vport->fc_sparam.portName, vport->fc_portname.u.wwn, 8); ++ memcpy(&vport->fc_sparam.nodeName, vport->fc_nodename.u.wwn, 8); ++ ++ if (!lpfc_valid_wwn_format(phba, &vport->fc_sparam.nodeName, "WWNN") || ++ !lpfc_valid_wwn_format(phba, &vport->fc_sparam.portName, "WWPN")) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, ++ "%d:1821 Create VPORT failed: vpi:%d " ++ "Invalid WWN format\n", ++ phba->brd_no, vpi); ++ lpfc_free_vpi(phba, vpi); ++ destroy_port(vport); ++ rc = VPORT_INVAL; ++ goto error_out; ++ } ++ ++ if (!lpfc_unique_wwpn(phba, vport)) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, ++ "%d:1823 Create VPORT failed: vpi:%d " ++ "Duplicate WWN on HBA\n", ++ phba->brd_no, vpi); ++ lpfc_free_vpi(phba, vpi); ++ destroy_port(vport); ++ rc = VPORT_INVAL; ++ goto error_out; ++ } ++ ++ *(struct lpfc_vport **)fc_vport->dd_data = vport; ++ vport->fc_vport = fc_vport; ++ ++ if ((phba->link_state < LPFC_LINK_UP) || ++ (phba->fc_topology == TOPOLOGY_LOOP)) { ++ lpfc_vport_set_state(vport, FC_VPORT_LINKDOWN); ++ rc = VPORT_OK; ++ goto out; ++ } ++ ++ if (disable) { ++ rc = VPORT_OK; ++ goto out; ++ } ++ ++ /* Use the Physical nodes Fabric NDLP to determine if the link is ++ * up and ready to FDISC. ++ */ ++ ndlp = lpfc_findnode_did(phba->pport, Fabric_DID); ++ if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { ++ if (phba->link_flag & LS_NPIV_FAB_SUPPORTED) { ++ lpfc_set_disctmo(vport); ++ lpfc_initial_fdisc(vport); ++ } else { ++ lpfc_vport_set_state(vport, FC_VPORT_NO_FABRIC_SUPP); ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0262 No NPIV Fabric " ++ "support\n", ++ phba->brd_no, vport->vpi); ++ } ++ } else { ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ } ++ rc = VPORT_OK; ++ ++out: ++ lpfc_host_attrib_init(lpfc_shost_from_vport(vport)); ++error_out: ++ return rc; ++} ++ ++int ++disable_vport(struct fc_vport *fc_vport) ++{ ++ struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data; ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_nodelist *ndlp = NULL, *next_ndlp = NULL; ++ long timeout; ++ ++ ndlp = lpfc_findnode_did(vport, Fabric_DID); ++ if (ndlp && phba->link_state >= LPFC_LINK_UP) { ++ vport->unreg_vpi_cmpl = VPORT_INVAL; ++ timeout = msecs_to_jiffies(phba->fc_ratov * 2000); ++ if (!lpfc_issue_els_npiv_logo(vport, ndlp)) ++ while (vport->unreg_vpi_cmpl == VPORT_INVAL && timeout) ++ timeout = schedule_timeout(timeout); ++ } ++ ++ lpfc_sli_host_down(vport); ++ ++ /* Mark all nodes for discovery so we can remove them by ++ * calling lpfc_cleanup_rpis(vport, 1) ++ */ ++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { ++ if (ndlp->nlp_state == NLP_STE_UNUSED_NODE) ++ continue; ++ lpfc_disc_state_machine(vport, ndlp, NULL, ++ NLP_EVT_DEVICE_RECOVERY); ++ } ++ lpfc_cleanup_rpis(vport, 1); ++ ++ lpfc_stop_vport_timers(vport); ++ lpfc_unreg_all_rpis(vport); ++ lpfc_unreg_default_rpis(vport); ++ /* ++ * Completion of unreg_vpi (lpfc_mbx_cmpl_unreg_vpi) does the ++ * scsi_host_put() to release the vport. ++ */ ++ lpfc_mbx_unreg_vpi(vport); ++ ++ lpfc_vport_set_state(vport, FC_VPORT_DISABLED); ++ return VPORT_OK; ++} ++ ++int ++enable_vport(struct fc_vport *fc_vport) ++{ ++ struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data; ++ struct lpfc_hba *phba = vport->phba; ++ struct lpfc_nodelist *ndlp = NULL; ++ ++ if ((phba->link_state < LPFC_LINK_UP) || ++ (phba->fc_topology == TOPOLOGY_LOOP)) { ++ lpfc_vport_set_state(vport, FC_VPORT_LINKDOWN); ++ return VPORT_OK; ++ } ++ ++ vport->load_flag |= FC_LOADING; ++ vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; ++ ++ /* Use the Physical nodes Fabric NDLP to determine if the link is ++ * up and ready to FDISC. ++ */ ++ ndlp = lpfc_findnode_did(phba->pport, Fabric_DID); ++ if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { ++ if (phba->link_flag & LS_NPIV_FAB_SUPPORTED) { ++ lpfc_set_disctmo(vport); ++ lpfc_initial_fdisc(vport); ++ } else { ++ lpfc_vport_set_state(vport, FC_VPORT_NO_FABRIC_SUPP); ++ lpfc_printf_log(phba, KERN_ERR, LOG_ELS, ++ "%d (%d):0264 No NPIV Fabric " ++ "support\n", ++ phba->brd_no, vport->vpi); ++ } ++ } else { ++ lpfc_vport_set_state(vport, FC_VPORT_FAILED); ++ } ++ ++ return VPORT_OK; ++} ++ ++int ++lpfc_vport_disable(struct fc_vport *fc_vport, bool disable) ++{ ++ if (disable) ++ return disable_vport(fc_vport); ++ else ++ return enable_vport(fc_vport); ++} ++ ++ ++int ++lpfc_vport_delete(struct fc_vport *fc_vport) ++{ ++ struct lpfc_nodelist *ndlp = NULL; ++ struct lpfc_nodelist *next_ndlp; ++ struct Scsi_Host *shost = (struct Scsi_Host *) fc_vport->shost; ++ struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data; ++ struct lpfc_hba *phba = vport->phba; ++ long timeout; ++ int rc = VPORT_ERROR; ++ ++ /* ++ * This is a bit of a mess. We want to ensure the shost doesn't get ++ * torn down until we're done with the embedded lpfc_vport structure. ++ * ++ * Beyond holding a reference for this function, we also need a ++ * reference for outstanding I/O requests we schedule during delete ++ * processing. But once we scsi_remove_host() we can no longer obtain ++ * a reference through scsi_host_get(). ++ * ++ * So we take two references here. We release one reference at the ++ * bottom of the function -- after delinking the vport. And we ++ * release the other at the completion of the unreg_vpi that get's ++ * initiated after we've disposed of all other resources associated ++ * with the port. ++ */ ++ if (!scsi_host_get(shost) || !scsi_host_get(shost)) ++ return VPORT_INVAL; ++ ++ if (vport->port_type == LPFC_PHYSICAL_PORT) { ++ lpfc_printf_log(phba, KERN_ERR, LOG_VPORT, ++ "%d:1812 vport_delete failed: Cannot delete " ++ "physical host\n", phba->brd_no); ++ goto out; ++ } ++ ++ vport->load_flag |= FC_UNLOADING; ++ ++ kfree(vport->vname); ++ lpfc_debugfs_terminate(vport); ++ fc_remove_host(lpfc_shost_from_vport(vport)); ++ scsi_remove_host(lpfc_shost_from_vport(vport)); ++ ++ ndlp = lpfc_findnode_did(phba->pport, Fabric_DID); ++ if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE && ++ phba->link_state >= LPFC_LINK_UP) { ++ ++ /* First look for the Fabric ndlp */ ++ ndlp = lpfc_findnode_did(vport, Fabric_DID); ++ if (!ndlp) { ++ /* Cannot find existing Fabric ndlp, allocate one */ ++ ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); ++ if (!ndlp) ++ goto skip_logo; ++ lpfc_nlp_init(vport, ndlp, Fabric_DID); ++ } else { ++ lpfc_dequeue_node(vport, ndlp); ++ } ++ vport->unreg_vpi_cmpl = VPORT_INVAL; ++ timeout = msecs_to_jiffies(phba->fc_ratov * 2000); ++ if (!lpfc_issue_els_npiv_logo(vport, ndlp)) ++ while (vport->unreg_vpi_cmpl == VPORT_INVAL && timeout) ++ timeout = schedule_timeout(timeout); ++ } ++ ++skip_logo: ++ lpfc_sli_host_down(vport); ++ ++ list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) { ++ lpfc_disc_state_machine(vport, ndlp, NULL, ++ NLP_EVT_DEVICE_RECOVERY); ++ lpfc_disc_state_machine(vport, ndlp, NULL, ++ NLP_EVT_DEVICE_RM); ++ } ++ ++ lpfc_stop_vport_timers(vport); ++ lpfc_unreg_all_rpis(vport); ++ lpfc_unreg_default_rpis(vport); ++ /* ++ * Completion of unreg_vpi (lpfc_mbx_cmpl_unreg_vpi) does the ++ * scsi_host_put() to release the vport. ++ */ ++ lpfc_mbx_unreg_vpi(vport); ++ ++ lpfc_free_vpi(phba, vport->vpi); ++ vport->work_port_events = 0; ++ spin_lock_irq(&phba->hbalock); ++ list_del_init(&vport->listentry); ++ spin_unlock_irq(&phba->hbalock); ++ ++ rc = VPORT_OK; ++out: ++ scsi_host_put(shost); ++ return rc; ++} ++ ++ ++EXPORT_SYMBOL(lpfc_vport_create); ++EXPORT_SYMBOL(lpfc_vport_delete); +diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.h linux-2.6.22-591/drivers/scsi/lpfc/lpfc_vport.h +--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/scsi/lpfc/lpfc_vport.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,113 @@ ++/******************************************************************* ++ * This file is part of the Emulex Linux Device Driver for * ++ * Fibre Channel Host Bus Adapters. * ++ * Copyright (C) 2004-2006 Emulex. All rights reserved. * ++ * EMULEX and SLI are trademarks of Emulex. * ++ * www.emulex.com * ++ * Portions Copyright (C) 2004-2005 Christoph Hellwig * ++ * * ++ * This program is free software; you can redistribute it and/or * ++ * modify it under the terms of version 2 of the GNU General * ++ * Public License as published by the Free Software Foundation. * ++ * This program is distributed in the hope that it will be useful. * ++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * ++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * ++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * ++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * ++ * TO BE LEGALLY INVALID. See the GNU General Public License for * ++ * more details, a copy of which can be found in the file COPYING * ++ * included with this package. * ++ *******************************************************************/ ++ ++#ifndef _H_LPFC_VPORT ++#define _H_LPFC_VPORT ++ ++/* API version values (each will be an individual bit) */ ++#define VPORT_API_VERSION_1 0x01 ++ ++/* Values returned via lpfc_vport_getinfo() */ ++struct vport_info { ++ ++ uint32_t api_versions; ++ uint8_t linktype; ++#define VPORT_TYPE_PHYSICAL 0 ++#define VPORT_TYPE_VIRTUAL 1 ++ ++ uint8_t state; ++#define VPORT_STATE_OFFLINE 0 ++#define VPORT_STATE_ACTIVE 1 ++#define VPORT_STATE_FAILED 2 ++ ++ uint8_t fail_reason; ++ uint8_t prev_fail_reason; ++#define VPORT_FAIL_UNKNOWN 0 ++#define VPORT_FAIL_LINKDOWN 1 ++#define VPORT_FAIL_FAB_UNSUPPORTED 2 ++#define VPORT_FAIL_FAB_NORESOURCES 3 ++#define VPORT_FAIL_FAB_LOGOUT 4 ++#define VPORT_FAIL_ADAP_NORESOURCES 5 ++ ++ uint8_t node_name[8]; /* WWNN */ ++ uint8_t port_name[8]; /* WWPN */ ++ ++ struct Scsi_Host *shost; ++ ++/* Following values are valid only on physical links */ ++ uint32_t vports_max; ++ uint32_t vports_inuse; ++ uint32_t rpi_max; ++ uint32_t rpi_inuse; ++#define VPORT_CNT_INVALID 0xFFFFFFFF ++}; ++ ++/* data used in link creation */ ++struct vport_data { ++ uint32_t api_version; ++ ++ uint32_t options; ++#define VPORT_OPT_AUTORETRY 0x01 ++ ++ uint8_t node_name[8]; /* WWNN */ ++ uint8_t port_name[8]; /* WWPN */ ++ ++/* ++ * Upon successful creation, vport_shost will point to the new Scsi_Host ++ * structure for the new virtual link. ++ */ ++ struct Scsi_Host *vport_shost; ++}; ++ ++/* API function return codes */ ++#define VPORT_OK 0 ++#define VPORT_ERROR -1 ++#define VPORT_INVAL -2 ++#define VPORT_NOMEM -3 ++#define VPORT_NORESOURCES -4 ++ ++int lpfc_vport_create(struct fc_vport *, bool); ++int lpfc_vport_delete(struct fc_vport *); ++int lpfc_vport_getinfo(struct Scsi_Host *, struct vport_info *); ++int lpfc_vport_tgt_remove(struct Scsi_Host *, uint, uint); ++ ++/* ++ * queuecommand VPORT-specific return codes. Specified in the host byte code. ++ * Returned when the virtual link has failed or is not active. ++ */ ++#define DID_VPORT_ERROR 0x0f ++ ++#define VPORT_INFO 0x1 ++#define VPORT_CREATE 0x2 ++#define VPORT_DELETE 0x4 ++ ++struct vport_cmd_tag { ++ uint32_t cmd; ++ struct vport_data cdata; ++ struct vport_info cinfo; ++ void *vport; ++ int vport_num; ++}; ++ ++void lpfc_vport_set_state(struct lpfc_vport *vport, ++ enum fc_vport_state new_state); ++ ++#endif /* H_LPFC_VPORT */ +diff -Nurb linux-2.6.22-570/drivers/scsi/mac53c94.c linux-2.6.22-591/drivers/scsi/mac53c94.c +--- linux-2.6.22-570/drivers/scsi/mac53c94.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/mac53c94.c 2007-12-21 15:36:12.000000000 -0500 +@@ -77,7 +77,7 @@ + for (i = 0; i < cmd->cmd_len; ++i) + printk(" %.2x", cmd->cmnd[i]); + printk("\n" KERN_DEBUG "use_sg=%d request_bufflen=%d request_buffer=%p\n", +- cmd->use_sg, cmd->request_bufflen, cmd->request_buffer); ++ scsi_sg_count(cmd), scsi_bufflen(cmd), scsi_sglist(cmd)); + } + #endif + +@@ -173,7 +173,6 @@ + writeb(CMD_SELECT, ®s->command); + state->phase = selecting; + +- if (cmd->use_sg > 0 || cmd->request_bufflen != 0) + set_dma_cmds(state, cmd); + } + +@@ -262,7 +261,7 @@ + writeb(CMD_NOP, ®s->command); + /* set DMA controller going if any data to transfer */ + if ((stat & (STAT_MSG|STAT_CD)) == 0 +- && (cmd->use_sg > 0 || cmd->request_bufflen != 0)) { ++ && (scsi_sg_count(cmd) > 0 || scsi_bufflen(cmd))) { + nb = cmd->SCp.this_residual; + if (nb > 0xfff0) + nb = 0xfff0; +@@ -310,14 +309,7 @@ + printk(KERN_DEBUG "intr %x before data xfer complete\n", intr); + } + writel(RUN << 16, &dma->control); /* stop dma */ +- if (cmd->use_sg != 0) { +- pci_unmap_sg(state->pdev, +- (struct scatterlist *)cmd->request_buffer, +- cmd->use_sg, cmd->sc_data_direction); +- } else { +- pci_unmap_single(state->pdev, state->dma_addr, +- cmd->request_bufflen, cmd->sc_data_direction); +- } ++ scsi_dma_unmap(cmd); + /* should check dma status */ + writeb(CMD_I_COMPLETE, ®s->command); + state->phase = completing; +@@ -365,23 +357,23 @@ + */ + static void set_dma_cmds(struct fsc_state *state, struct scsi_cmnd *cmd) + { +- int i, dma_cmd, total; ++ int i, dma_cmd, total, nseg; + struct scatterlist *scl; + struct dbdma_cmd *dcmds; + dma_addr_t dma_addr; + u32 dma_len; + ++ nseg = scsi_dma_map(cmd); ++ BUG_ON(nseg < 0); ++ if (!nseg) ++ return; ++ + dma_cmd = cmd->sc_data_direction == DMA_TO_DEVICE ? + OUTPUT_MORE : INPUT_MORE; + dcmds = state->dma_cmds; +- if (cmd->use_sg > 0) { +- int nseg; +- + total = 0; +- scl = (struct scatterlist *) cmd->request_buffer; +- nseg = pci_map_sg(state->pdev, scl, cmd->use_sg, +- cmd->sc_data_direction); +- for (i = 0; i < nseg; ++i) { ++ ++ scsi_for_each_sg(cmd, scl, nseg, i) { + dma_addr = sg_dma_address(scl); + dma_len = sg_dma_len(scl); + if (dma_len > 0xffff) +@@ -391,21 +383,9 @@ + st_le16(&dcmds->command, dma_cmd); + st_le32(&dcmds->phy_addr, dma_addr); + dcmds->xfer_status = 0; +- ++scl; +- ++dcmds; +- } +- } else { +- total = cmd->request_bufflen; +- if (total > 0xffff) +- panic("mac53c94: transfer size >= 64k"); +- dma_addr = pci_map_single(state->pdev, cmd->request_buffer, +- total, cmd->sc_data_direction); +- state->dma_addr = dma_addr; +- st_le16(&dcmds->req_count, total); +- st_le32(&dcmds->phy_addr, dma_addr); +- dcmds->xfer_status = 0; + ++dcmds; + } ++ + dma_cmd += OUTPUT_LAST - OUTPUT_MORE; + st_le16(&dcmds[-1].command, dma_cmd); + st_le16(&dcmds->command, DBDMA_STOP); +diff -Nurb linux-2.6.22-570/drivers/scsi/megaraid/megaraid_mbox.c linux-2.6.22-591/drivers/scsi/megaraid/megaraid_mbox.c +--- linux-2.6.22-570/drivers/scsi/megaraid/megaraid_mbox.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/megaraid/megaraid_mbox.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1378,8 +1378,6 @@ + { + struct scatterlist *sgl; + mbox_ccb_t *ccb; +- struct page *page; +- unsigned long offset; + struct scsi_cmnd *scp; + int sgcnt; + int i; +@@ -1388,48 +1386,16 @@ + scp = scb->scp; + ccb = (mbox_ccb_t *)scb->ccb; + ++ sgcnt = scsi_dma_map(scp); ++ BUG_ON(sgcnt < 0 || sgcnt > adapter->sglen); ++ + // no mapping required if no data to be transferred +- if (!scp->request_buffer || !scp->request_bufflen) ++ if (!sgcnt) + return 0; + +- if (!scp->use_sg) { /* scatter-gather list not used */ +- +- page = virt_to_page(scp->request_buffer); +- +- offset = ((unsigned long)scp->request_buffer & ~PAGE_MASK); +- +- ccb->buf_dma_h = pci_map_page(adapter->pdev, page, offset, +- scp->request_bufflen, +- scb->dma_direction); +- scb->dma_type = MRAID_DMA_WBUF; +- +- /* +- * We need to handle special 64-bit commands that need a +- * minimum of 1 SG +- */ +- sgcnt = 1; +- ccb->sgl64[0].address = ccb->buf_dma_h; +- ccb->sgl64[0].length = scp->request_bufflen; +- +- return sgcnt; +- } +- +- sgl = (struct scatterlist *)scp->request_buffer; +- +- // The number of sg elements returned must not exceed our limit +- sgcnt = pci_map_sg(adapter->pdev, sgl, scp->use_sg, +- scb->dma_direction); +- +- if (sgcnt > adapter->sglen) { +- con_log(CL_ANN, (KERN_CRIT +- "megaraid critical: too many sg elements:%d\n", +- sgcnt)); +- BUG(); +- } +- + scb->dma_type = MRAID_DMA_WSG; + +- for (i = 0; i < sgcnt; i++, sgl++) { ++ scsi_for_each_sg(scp, sgl, sgcnt, i) { + ccb->sgl64[i].address = sg_dma_address(sgl); + ccb->sgl64[i].length = sg_dma_len(sgl); + } +@@ -1489,19 +1455,11 @@ + + adapter->outstanding_cmds++; + +- if (scb->dma_direction == PCI_DMA_TODEVICE) { +- if (!scb->scp->use_sg) { // sg list not used +- pci_dma_sync_single_for_device(adapter->pdev, +- ccb->buf_dma_h, +- scb->scp->request_bufflen, +- PCI_DMA_TODEVICE); +- } +- else { ++ if (scb->dma_direction == PCI_DMA_TODEVICE) + pci_dma_sync_sg_for_device(adapter->pdev, +- scb->scp->request_buffer, +- scb->scp->use_sg, PCI_DMA_TODEVICE); +- } +- } ++ scsi_sglist(scb->scp), ++ scsi_sg_count(scb->scp), ++ PCI_DMA_TODEVICE); + + mbox->busy = 1; // Set busy + mbox->poll = 0; +@@ -1624,11 +1582,11 @@ + return scb; + + case MODE_SENSE: +- if (scp->use_sg) { ++ { + struct scatterlist *sgl; + caddr_t vaddr; + +- sgl = (struct scatterlist *)scp->request_buffer; ++ sgl = scsi_sglist(scp); + if (sgl->page) { + vaddr = (caddr_t) + (page_address((&sgl[0])->page) +@@ -1642,9 +1600,6 @@ + __LINE__)); + } + } +- else { +- memset(scp->request_buffer, 0, scp->cmnd[4]); +- } + scp->result = (DID_OK << 16); + return NULL; + +@@ -1716,7 +1671,7 @@ + mbox->cmd = MBOXCMD_PASSTHRU64; + scb->dma_direction = scp->sc_data_direction; + +- pthru->dataxferlen = scp->request_bufflen; ++ pthru->dataxferlen = scsi_bufflen(scp); + pthru->dataxferaddr = ccb->sgl_dma_h; + pthru->numsge = megaraid_mbox_mksgl(adapter, + scb); +@@ -2050,8 +2005,8 @@ + + memcpy(pthru->cdb, scp->cmnd, scp->cmd_len); + +- if (scp->request_bufflen) { +- pthru->dataxferlen = scp->request_bufflen; ++ if (scsi_bufflen(scp)) { ++ pthru->dataxferlen = scsi_bufflen(scp); + pthru->dataxferaddr = ccb->sgl_dma_h; + pthru->numsge = megaraid_mbox_mksgl(adapter, scb); + } +@@ -2099,8 +2054,8 @@ + + memcpy(epthru->cdb, scp->cmnd, scp->cmd_len); + +- if (scp->request_bufflen) { +- epthru->dataxferlen = scp->request_bufflen; ++ if (scsi_bufflen(scp)) { ++ epthru->dataxferlen = scsi_bufflen(scp); + epthru->dataxferaddr = ccb->sgl_dma_h; + epthru->numsge = megaraid_mbox_mksgl(adapter, scb); + } +@@ -2266,37 +2221,13 @@ + + ccb = (mbox_ccb_t *)scb->ccb; + +- switch (scb->dma_type) { +- +- case MRAID_DMA_WBUF: +- if (scb->dma_direction == PCI_DMA_FROMDEVICE) { +- pci_dma_sync_single_for_cpu(adapter->pdev, +- ccb->buf_dma_h, +- scb->scp->request_bufflen, +- PCI_DMA_FROMDEVICE); +- } +- +- pci_unmap_page(adapter->pdev, ccb->buf_dma_h, +- scb->scp->request_bufflen, scb->dma_direction); +- +- break; +- +- case MRAID_DMA_WSG: +- if (scb->dma_direction == PCI_DMA_FROMDEVICE) { ++ if (scb->dma_direction == PCI_DMA_FROMDEVICE) + pci_dma_sync_sg_for_cpu(adapter->pdev, +- scb->scp->request_buffer, +- scb->scp->use_sg, PCI_DMA_FROMDEVICE); +- } +- +- pci_unmap_sg(adapter->pdev, scb->scp->request_buffer, +- scb->scp->use_sg, scb->dma_direction); +- +- break; +- +- default: +- break; +- } ++ scsi_sglist(scb->scp), ++ scsi_sg_count(scb->scp), ++ PCI_DMA_FROMDEVICE); + ++ scsi_dma_unmap(scb->scp); + return; + } + +@@ -2399,25 +2330,17 @@ + if (scp->cmnd[0] == INQUIRY && status == 0 && islogical == 0 + && IS_RAID_CH(raid_dev, scb->dev_channel)) { + +- if (scp->use_sg) { +- sgl = (struct scatterlist *) +- scp->request_buffer; +- ++ sgl = scsi_sglist(scp); + if (sgl->page) { + c = *(unsigned char *) + (page_address((&sgl[0])->page) + + (&sgl[0])->offset); +- } +- else { ++ } else { + con_log(CL_ANN, (KERN_WARNING + "megaraid mailbox: invalid sg:%d\n", + __LINE__)); + c = 0; + } +- } +- else { +- c = *(uint8_t *)scp->request_buffer; +- } + + if ((c & 0x1F ) == TYPE_DISK) { + pdev_index = (scb->dev_channel * 16) + +diff -Nurb linux-2.6.22-570/drivers/scsi/megaraid/megaraid_sas.c linux-2.6.22-591/drivers/scsi/megaraid/megaraid_sas.c +--- linux-2.6.22-570/drivers/scsi/megaraid/megaraid_sas.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/megaraid/megaraid_sas.c 2007-12-21 15:36:12.000000000 -0500 +@@ -433,34 +433,15 @@ + int sge_count; + struct scatterlist *os_sgl; + +- /* +- * Return 0 if there is no data transfer +- */ +- if (!scp->request_buffer || !scp->request_bufflen) +- return 0; ++ sge_count = scsi_dma_map(scp); ++ BUG_ON(sge_count < 0); + +- if (!scp->use_sg) { +- mfi_sgl->sge32[0].phys_addr = pci_map_single(instance->pdev, +- scp-> +- request_buffer, +- scp-> +- request_bufflen, +- scp-> +- sc_data_direction); +- mfi_sgl->sge32[0].length = scp->request_bufflen; +- +- return 1; +- } +- +- os_sgl = (struct scatterlist *)scp->request_buffer; +- sge_count = pci_map_sg(instance->pdev, os_sgl, scp->use_sg, +- scp->sc_data_direction); +- +- for (i = 0; i < sge_count; i++, os_sgl++) { ++ if (sge_count) { ++ scsi_for_each_sg(scp, os_sgl, sge_count, i) { + mfi_sgl->sge32[i].length = sg_dma_len(os_sgl); + mfi_sgl->sge32[i].phys_addr = sg_dma_address(os_sgl); + } +- ++ } + return sge_count; + } + +@@ -481,35 +462,15 @@ + int sge_count; + struct scatterlist *os_sgl; + +- /* +- * Return 0 if there is no data transfer +- */ +- if (!scp->request_buffer || !scp->request_bufflen) +- return 0; +- +- if (!scp->use_sg) { +- mfi_sgl->sge64[0].phys_addr = pci_map_single(instance->pdev, +- scp-> +- request_buffer, +- scp-> +- request_bufflen, +- scp-> +- sc_data_direction); ++ sge_count = scsi_dma_map(scp); ++ BUG_ON(sge_count < 0); + +- mfi_sgl->sge64[0].length = scp->request_bufflen; +- +- return 1; +- } +- +- os_sgl = (struct scatterlist *)scp->request_buffer; +- sge_count = pci_map_sg(instance->pdev, os_sgl, scp->use_sg, +- scp->sc_data_direction); +- +- for (i = 0; i < sge_count; i++, os_sgl++) { ++ if (sge_count) { ++ scsi_for_each_sg(scp, os_sgl, sge_count, i) { + mfi_sgl->sge64[i].length = sg_dma_len(os_sgl); + mfi_sgl->sge64[i].phys_addr = sg_dma_address(os_sgl); + } +- ++ } + return sge_count; + } + +@@ -593,7 +554,7 @@ + pthru->cdb_len = scp->cmd_len; + pthru->timeout = 0; + pthru->flags = flags; +- pthru->data_xfer_len = scp->request_bufflen; ++ pthru->data_xfer_len = scsi_bufflen(scp); + + memcpy(pthru->cdb, scp->cmnd, scp->cmd_len); + +@@ -1195,45 +1156,6 @@ + } + + /** +- * megasas_unmap_sgbuf - Unmap SG buffers +- * @instance: Adapter soft state +- * @cmd: Completed command +- */ +-static void +-megasas_unmap_sgbuf(struct megasas_instance *instance, struct megasas_cmd *cmd) +-{ +- dma_addr_t buf_h; +- u8 opcode; +- +- if (cmd->scmd->use_sg) { +- pci_unmap_sg(instance->pdev, cmd->scmd->request_buffer, +- cmd->scmd->use_sg, cmd->scmd->sc_data_direction); +- return; +- } +- +- if (!cmd->scmd->request_bufflen) +- return; +- +- opcode = cmd->frame->hdr.cmd; +- +- if ((opcode == MFI_CMD_LD_READ) || (opcode == MFI_CMD_LD_WRITE)) { +- if (IS_DMA64) +- buf_h = cmd->frame->io.sgl.sge64[0].phys_addr; +- else +- buf_h = cmd->frame->io.sgl.sge32[0].phys_addr; +- } else { +- if (IS_DMA64) +- buf_h = cmd->frame->pthru.sgl.sge64[0].phys_addr; +- else +- buf_h = cmd->frame->pthru.sgl.sge32[0].phys_addr; +- } +- +- pci_unmap_single(instance->pdev, buf_h, cmd->scmd->request_bufflen, +- cmd->scmd->sc_data_direction); +- return; +-} +- +-/** + * megasas_complete_cmd - Completes a command + * @instance: Adapter soft state + * @cmd: Command to be completed +@@ -1281,7 +1203,7 @@ + + atomic_dec(&instance->fw_outstanding); + +- megasas_unmap_sgbuf(instance, cmd); ++ scsi_dma_unmap(cmd->scmd); + cmd->scmd->scsi_done(cmd->scmd); + megasas_return_cmd(instance, cmd); + +@@ -1329,7 +1251,7 @@ + + atomic_dec(&instance->fw_outstanding); + +- megasas_unmap_sgbuf(instance, cmd); ++ scsi_dma_unmap(cmd->scmd); + cmd->scmd->scsi_done(cmd->scmd); + megasas_return_cmd(instance, cmd); + +diff -Nurb linux-2.6.22-570/drivers/scsi/megaraid.c linux-2.6.22-591/drivers/scsi/megaraid.c +--- linux-2.6.22-570/drivers/scsi/megaraid.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/megaraid.c 2007-12-21 15:36:12.000000000 -0500 +@@ -523,10 +523,8 @@ + /* + * filter the internal and ioctl commands + */ +- if((cmd->cmnd[0] == MEGA_INTERNAL_CMD)) { +- return cmd->request_buffer; +- } +- ++ if((cmd->cmnd[0] == MEGA_INTERNAL_CMD)) ++ return (scb_t *)cmd->host_scribble; + + /* + * We know what channels our logical drives are on - mega_find_card() +@@ -657,22 +655,14 @@ + + case MODE_SENSE: { + char *buf; +- +- if (cmd->use_sg) { + struct scatterlist *sg; + +- sg = (struct scatterlist *)cmd->request_buffer; +- buf = kmap_atomic(sg->page, KM_IRQ0) + +- sg->offset; +- } else +- buf = cmd->request_buffer; +- memset(buf, 0, cmd->cmnd[4]); +- if (cmd->use_sg) { +- struct scatterlist *sg; ++ sg = scsi_sglist(cmd); ++ buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset; + +- sg = (struct scatterlist *)cmd->request_buffer; ++ memset(buf, 0, cmd->cmnd[4]); + kunmap_atomic(buf - sg->offset, KM_IRQ0); +- } ++ + cmd->result = (DID_OK << 16); + cmd->scsi_done(cmd); + return NULL; +@@ -1551,24 +1541,16 @@ + islogical = adapter->logdrv_chan[cmd->device->channel]; + if( cmd->cmnd[0] == INQUIRY && !islogical ) { + +- if( cmd->use_sg ) { +- sgl = (struct scatterlist *) +- cmd->request_buffer; +- ++ sgl = scsi_sglist(cmd); + if( sgl->page ) { + c = *(unsigned char *) + page_address((&sgl[0])->page) + + (&sgl[0])->offset; +- } +- else { ++ } else { + printk(KERN_WARNING + "megaraid: invalid sg.\n"); + c = 0; + } +- } +- else { +- c = *(u8 *)cmd->request_buffer; +- } + + if(IS_RAID_CH(adapter, cmd->device->channel) && + ((c & 0x1F ) == TYPE_DISK)) { +@@ -1704,30 +1686,14 @@ + static void + mega_free_scb(adapter_t *adapter, scb_t *scb) + { +- unsigned long length; +- + switch( scb->dma_type ) { + + case MEGA_DMA_TYPE_NONE: + break; + +- case MEGA_BULK_DATA: +- if (scb->cmd->use_sg == 0) +- length = scb->cmd->request_bufflen; +- else { +- struct scatterlist *sgl = +- (struct scatterlist *)scb->cmd->request_buffer; +- length = sgl->length; +- } +- pci_unmap_page(adapter->dev, scb->dma_h_bulkdata, +- length, scb->dma_direction); +- break; +- + case MEGA_SGLIST: +- pci_unmap_sg(adapter->dev, scb->cmd->request_buffer, +- scb->cmd->use_sg, scb->dma_direction); ++ scsi_dma_unmap(scb->cmd); + break; +- + default: + break; + } +@@ -1767,80 +1733,33 @@ + static int + mega_build_sglist(adapter_t *adapter, scb_t *scb, u32 *buf, u32 *len) + { +- struct scatterlist *sgl; +- struct page *page; +- unsigned long offset; +- unsigned int length; ++ struct scatterlist *sg; + Scsi_Cmnd *cmd; + int sgcnt; + int idx; + + cmd = scb->cmd; + +- /* Scatter-gather not used */ +- if( cmd->use_sg == 0 || (cmd->use_sg == 1 && +- !adapter->has_64bit_addr)) { +- +- if (cmd->use_sg == 0) { +- page = virt_to_page(cmd->request_buffer); +- offset = offset_in_page(cmd->request_buffer); +- length = cmd->request_bufflen; +- } else { +- sgl = (struct scatterlist *)cmd->request_buffer; +- page = sgl->page; +- offset = sgl->offset; +- length = sgl->length; +- } +- +- scb->dma_h_bulkdata = pci_map_page(adapter->dev, +- page, offset, +- length, +- scb->dma_direction); +- scb->dma_type = MEGA_BULK_DATA; +- +- /* +- * We need to handle special 64-bit commands that need a +- * minimum of 1 SG +- */ +- if( adapter->has_64bit_addr ) { +- scb->sgl64[0].address = scb->dma_h_bulkdata; +- scb->sgl64[0].length = length; +- *buf = (u32)scb->sgl_dma_addr; +- *len = (u32)length; +- return 1; +- } +- else { +- *buf = (u32)scb->dma_h_bulkdata; +- *len = (u32)length; +- } +- return 0; +- } +- +- sgl = (struct scatterlist *)cmd->request_buffer; +- + /* + * Copy Scatter-Gather list info into controller structure. + * + * The number of sg elements returned must not exceed our limit + */ +- sgcnt = pci_map_sg(adapter->dev, sgl, cmd->use_sg, +- scb->dma_direction); ++ sgcnt = scsi_dma_map(cmd); + + scb->dma_type = MEGA_SGLIST; + +- BUG_ON(sgcnt > adapter->sglen); ++ BUG_ON(sgcnt > adapter->sglen || sgcnt < 0); + + *len = 0; + +- for( idx = 0; idx < sgcnt; idx++, sgl++ ) { +- +- if( adapter->has_64bit_addr ) { +- scb->sgl64[idx].address = sg_dma_address(sgl); +- *len += scb->sgl64[idx].length = sg_dma_len(sgl); +- } +- else { +- scb->sgl[idx].address = sg_dma_address(sgl); +- *len += scb->sgl[idx].length = sg_dma_len(sgl); ++ scsi_for_each_sg(cmd, sg, sgcnt, idx) { ++ if (adapter->has_64bit_addr) { ++ scb->sgl64[idx].address = sg_dma_address(sg); ++ *len += scb->sgl64[idx].length = sg_dma_len(sg); ++ } else { ++ scb->sgl[idx].address = sg_dma_address(sg); ++ *len += scb->sgl[idx].length = sg_dma_len(sg); + } + } + +@@ -3571,7 +3490,7 @@ + /* + * The user passthru structure + */ +- upthru = (mega_passthru __user *)MBOX(uioc)->xferaddr; ++ upthru = (mega_passthru __user *)(unsigned long)MBOX(uioc)->xferaddr; + + /* + * Copy in the user passthru here. +@@ -3623,7 +3542,7 @@ + /* + * Get the user data + */ +- if( copy_from_user(data, (char __user *)uxferaddr, ++ if( copy_from_user(data, (char __user *)(unsigned long) uxferaddr, + pthru->dataxferlen) ) { + rval = (-EFAULT); + goto freemem_and_return; +@@ -3649,7 +3568,7 @@ + * Is data going up-stream + */ + if( pthru->dataxferlen && (uioc.flags & UIOC_RD) ) { +- if( copy_to_user((char __user *)uxferaddr, data, ++ if( copy_to_user((char __user *)(unsigned long) uxferaddr, data, + pthru->dataxferlen) ) { + rval = (-EFAULT); + } +@@ -3702,7 +3621,7 @@ + /* + * Get the user data + */ +- if( copy_from_user(data, (char __user *)uxferaddr, ++ if( copy_from_user(data, (char __user *)(unsigned long) uxferaddr, + uioc.xferlen) ) { + + pci_free_consistent(pdev, +@@ -3742,7 +3661,7 @@ + * Is data going up-stream + */ + if( uioc.xferlen && (uioc.flags & UIOC_RD) ) { +- if( copy_to_user((char __user *)uxferaddr, data, ++ if( copy_to_user((char __user *)(unsigned long) uxferaddr, data, + uioc.xferlen) ) { + + rval = (-EFAULT); +@@ -4494,7 +4413,7 @@ + scmd->device = sdev; + + scmd->device->host = adapter->host; +- scmd->request_buffer = (void *)scb; ++ scmd->host_scribble = (void *)scb; + scmd->cmnd[0] = MEGA_INTERNAL_CMD; + + scb->state |= SCB_ACTIVE; +diff -Nurb linux-2.6.22-570/drivers/scsi/mesh.c linux-2.6.22-591/drivers/scsi/mesh.c +--- linux-2.6.22-570/drivers/scsi/mesh.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/mesh.c 2007-12-21 15:36:12.000000000 -0500 +@@ -421,7 +421,7 @@ + for (i = 0; i < cmd->cmd_len; ++i) + printk(" %x", cmd->cmnd[i]); + printk(" use_sg=%d buffer=%p bufflen=%u\n", +- cmd->use_sg, cmd->request_buffer, cmd->request_bufflen); ++ scsi_sg_count(cmd), scsi_sglist(cmd), scsi_bufflen(cmd)); + } + #endif + if (ms->dma_started) +@@ -602,13 +602,16 @@ + cmd->result += (cmd->SCp.Message << 8); + if (DEBUG_TARGET(cmd)) { + printk(KERN_DEBUG "mesh_done: result = %x, data_ptr=%d, buflen=%d\n", +- cmd->result, ms->data_ptr, cmd->request_bufflen); ++ cmd->result, ms->data_ptr, scsi_bufflen(cmd)); ++#if 0 ++ /* needs to use sg? */ + if ((cmd->cmnd[0] == 0 || cmd->cmnd[0] == 0x12 || cmd->cmnd[0] == 3) + && cmd->request_buffer != 0) { + unsigned char *b = cmd->request_buffer; + printk(KERN_DEBUG "buffer = %x %x %x %x %x %x %x %x\n", + b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]); + } ++#endif + } + cmd->SCp.this_residual -= ms->data_ptr; + mesh_completed(ms, cmd); +@@ -1265,15 +1268,18 @@ + dcmds = ms->dma_cmds; + dtot = 0; + if (cmd) { +- cmd->SCp.this_residual = cmd->request_bufflen; +- if (cmd->use_sg > 0) { + int nseg; ++ ++ cmd->SCp.this_residual = scsi_bufflen(cmd); ++ ++ nseg = scsi_dma_map(cmd); ++ BUG_ON(nseg < 0); ++ ++ if (nseg) { + total = 0; +- scl = (struct scatterlist *) cmd->request_buffer; + off = ms->data_ptr; +- nseg = pci_map_sg(ms->pdev, scl, cmd->use_sg, +- cmd->sc_data_direction); +- for (i = 0; i data_ptr < cmd->request_bufflen) { +- dtot = cmd->request_bufflen - ms->data_ptr; +- if (dtot > 0xffff) +- panic("mesh: transfer size >= 64k"); +- st_le16(&dcmds->req_count, dtot); +- /* XXX Use pci DMA API here ... */ +- st_le32(&dcmds->phy_addr, +- virt_to_phys(cmd->request_buffer) + ms->data_ptr); +- dcmds->xfer_status = 0; +- ++dcmds; + } + } + if (dtot == 0) { +@@ -1356,18 +1352,14 @@ + dumplog(ms, ms->conn_tgt); + dumpslog(ms); + #endif /* MESH_DBG */ +- } else if (cmd && cmd->request_bufflen != 0 && +- ms->data_ptr > cmd->request_bufflen) { ++ } else if (cmd && scsi_bufflen(cmd) && ++ ms->data_ptr > scsi_bufflen(cmd)) { + printk(KERN_DEBUG "mesh: target %d overrun, " + "data_ptr=%x total=%x goes_out=%d\n", +- ms->conn_tgt, ms->data_ptr, cmd->request_bufflen, ++ ms->conn_tgt, ms->data_ptr, scsi_bufflen(cmd), + ms->tgts[ms->conn_tgt].data_goes_out); + } +- if (cmd->use_sg != 0) { +- struct scatterlist *sg; +- sg = (struct scatterlist *)cmd->request_buffer; +- pci_unmap_sg(ms->pdev, sg, cmd->use_sg, cmd->sc_data_direction); +- } ++ scsi_dma_unmap(cmd); + ms->dma_started = 0; + } + +diff -Nurb linux-2.6.22-570/drivers/scsi/mvme16x.c linux-2.6.22-591/drivers/scsi/mvme16x.c +--- linux-2.6.22-570/drivers/scsi/mvme16x.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/mvme16x.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,78 +0,0 @@ +-/* +- * Detection routine for the NCR53c710 based MVME16x SCSI Controllers for Linux. +- * +- * Based on work by Alan Hourihane +- */ +-#include +-#include +-#include +- +-#include +-#include +-#include +-#include +- +-#include "scsi.h" +-#include +-#include "53c7xx.h" +-#include "mvme16x.h" +- +-#include +- +- +-int mvme16x_scsi_detect(struct scsi_host_template *tpnt) +-{ +- static unsigned char called = 0; +- int clock; +- long long options; +- +- if (!MACH_IS_MVME16x) +- return 0; +- if (mvme16x_config & MVME16x_CONFIG_NO_SCSICHIP) { +- printk ("SCSI detection disabled, SCSI chip not present\n"); +- return 0; +- } +- if (called) +- return 0; +- +- tpnt->proc_name = "MVME16x"; +- +- options = OPTION_MEMORY_MAPPED|OPTION_DEBUG_TEST1|OPTION_INTFLY|OPTION_SYNCHRONOUS|OPTION_ALWAYS_SYNCHRONOUS|OPTION_DISCONNECT; +- +- clock = 66000000; /* 66MHz SCSI Clock */ +- +- ncr53c7xx_init(tpnt, 0, 710, (unsigned long)0xfff47000, +- 0, MVME16x_IRQ_SCSI, DMA_NONE, +- options, clock); +- called = 1; +- return 1; +-} +- +-static int mvme16x_scsi_release(struct Scsi_Host *shost) +-{ +- if (shost->irq) +- free_irq(shost->irq, NULL); +- if (shost->dma_channel != 0xff) +- free_dma(shost->dma_channel); +- if (shost->io_port && shost->n_io_port) +- release_region(shost->io_port, shost->n_io_port); +- scsi_unregister(shost); +- return 0; +-} +- +-static struct scsi_host_template driver_template = { +- .name = "MVME16x NCR53c710 SCSI", +- .detect = mvme16x_scsi_detect, +- .release = mvme16x_scsi_release, +- .queuecommand = NCR53c7xx_queue_command, +- .abort = NCR53c7xx_abort, +- .reset = NCR53c7xx_reset, +- .can_queue = 24, +- .this_id = 7, +- .sg_tablesize = 63, +- .cmd_per_lun = 3, +- .use_clustering = DISABLE_CLUSTERING +-}; +- +- +-#include "scsi_module.c" +diff -Nurb linux-2.6.22-570/drivers/scsi/mvme16x.h linux-2.6.22-591/drivers/scsi/mvme16x.h +--- linux-2.6.22-570/drivers/scsi/mvme16x.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/mvme16x.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,24 +0,0 @@ +-#ifndef MVME16x_SCSI_H +-#define MVME16x_SCSI_H +- +-#include +- +-int mvme16x_scsi_detect(struct scsi_host_template *); +-const char *NCR53c7x0_info(void); +-int NCR53c7xx_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *)); +-int NCR53c7xx_abort(Scsi_Cmnd *); +-int NCR53c7x0_release (struct Scsi_Host *); +-int NCR53c7xx_reset(Scsi_Cmnd *, unsigned int); +-void NCR53c7x0_intr(int irq, void *dev_id); +- +-#ifndef CMD_PER_LUN +-#define CMD_PER_LUN 3 +-#endif +- +-#ifndef CAN_QUEUE +-#define CAN_QUEUE 24 +-#endif +- +-#include +- +-#endif /* MVME16x_SCSI_H */ +diff -Nurb linux-2.6.22-570/drivers/scsi/mvme16x_scsi.c linux-2.6.22-591/drivers/scsi/mvme16x_scsi.c +--- linux-2.6.22-570/drivers/scsi/mvme16x_scsi.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/scsi/mvme16x_scsi.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,158 @@ ++/* ++ * Detection routine for the NCR53c710 based MVME16x SCSI Controllers for Linux. ++ * ++ * Based on work by Alan Hourihane ++ * ++ * Rewritten to use 53c700.c by Kars de Jong ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "53c700.h" ++ ++MODULE_AUTHOR("Kars de Jong "); ++MODULE_DESCRIPTION("MVME16x NCR53C710 driver"); ++MODULE_LICENSE("GPL"); ++ ++static struct scsi_host_template mvme16x_scsi_driver_template = { ++ .name = "MVME16x NCR53c710 SCSI", ++ .proc_name = "MVME16x", ++ .this_id = 7, ++ .module = THIS_MODULE, ++}; ++ ++static struct platform_device *mvme16x_scsi_device; ++ ++static __devinit int ++mvme16x_probe(struct device *dev) ++{ ++ struct Scsi_Host * host = NULL; ++ struct NCR_700_Host_Parameters *hostdata; ++ ++ if (!MACH_IS_MVME16x) ++ goto out; ++ ++ if (mvme16x_config & MVME16x_CONFIG_NO_SCSICHIP) { ++ printk(KERN_INFO "mvme16x-scsi: detection disabled, " ++ "SCSI chip not present\n"); ++ goto out; ++ } ++ ++ hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL); ++ if (hostdata == NULL) { ++ printk(KERN_ERR "mvme16x-scsi: " ++ "Failed to allocate host data\n"); ++ goto out; ++ } ++ memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters)); ++ ++ /* Fill in the required pieces of hostdata */ ++ hostdata->base = (void __iomem *)0xfff47000UL; ++ hostdata->clock = 50; /* XXX - depends on the CPU clock! */ ++ hostdata->chip710 = 1; ++ hostdata->dmode_extra = DMODE_FC2; ++ hostdata->dcntl_extra = EA_710; ++ hostdata->ctest7_extra = CTEST7_TT1; ++ ++ /* and register the chip */ ++ host = NCR_700_detect(&mvme16x_scsi_driver_template, hostdata, dev); ++ if (!host) { ++ printk(KERN_ERR "mvme16x-scsi: No host detected; " ++ "board configuration problem?\n"); ++ goto out_free; ++ } ++ host->this_id = 7; ++ host->base = 0xfff47000UL; ++ host->irq = MVME16x_IRQ_SCSI; ++ if (request_irq(host->irq, NCR_700_intr, 0, "mvme16x-scsi", host)) { ++ printk(KERN_ERR "mvme16x-scsi: request_irq failed\n"); ++ goto out_put_host; ++ } ++ ++ /* Enable scsi chip ints */ ++ { ++ volatile unsigned long v; ++ ++ /* Enable scsi interrupts at level 4 in PCCchip2 */ ++ v = in_be32(0xfff4202c); ++ v = (v & ~0xff) | 0x10 | 4; ++ out_be32(0xfff4202c, v); ++ } ++ ++ scsi_scan_host(host); ++ ++ return 0; ++ ++ out_put_host: ++ scsi_host_put(host); ++ out_free: ++ kfree(hostdata); ++ out: ++ return -ENODEV; ++} ++ ++static __devexit int ++mvme16x_device_remove(struct device *dev) ++{ ++ struct Scsi_Host *host = dev_to_shost(dev); ++ struct NCR_700_Host_Parameters *hostdata = shost_priv(host); ++ ++ /* Disable scsi chip ints */ ++ { ++ volatile unsigned long v; ++ ++ v = in_be32(0xfff4202c); ++ v &= ~0x10; ++ out_be32(0xfff4202c, v); ++ } ++ scsi_remove_host(host); ++ NCR_700_release(host); ++ kfree(hostdata); ++ free_irq(host->irq, host); ++ ++ return 0; ++} ++ ++static struct device_driver mvme16x_scsi_driver = { ++ .name = "mvme16x-scsi", ++ .bus = &platform_bus_type, ++ .probe = mvme16x_probe, ++ .remove = __devexit_p(mvme16x_device_remove), ++}; ++ ++static int __init mvme16x_scsi_init(void) ++{ ++ int err; ++ ++ err = driver_register(&mvme16x_scsi_driver); ++ if (err) ++ return err; ++ ++ mvme16x_scsi_device = platform_device_register_simple("mvme16x-scsi", ++ -1, NULL, 0); ++ if (IS_ERR(mvme16x_scsi_device)) { ++ driver_unregister(&mvme16x_scsi_driver); ++ return PTR_ERR(mvme16x_scsi_device); ++ } ++ ++ return 0; ++} ++ ++static void __exit mvme16x_scsi_exit(void) ++{ ++ platform_device_unregister(mvme16x_scsi_device); ++ driver_unregister(&mvme16x_scsi_driver); ++} ++ ++module_init(mvme16x_scsi_init); ++module_exit(mvme16x_scsi_exit); +diff -Nurb linux-2.6.22-570/drivers/scsi/nsp32.c linux-2.6.22-591/drivers/scsi/nsp32.c +--- linux-2.6.22-570/drivers/scsi/nsp32.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/nsp32.c 2007-12-21 15:36:12.000000000 -0500 +@@ -49,10 +49,6 @@ + #include + #include + +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) +-# include +-#endif +- + #include "nsp32.h" + + +@@ -199,17 +195,9 @@ + static void __exit exit_nsp32 (void); + + /* struct struct scsi_host_template */ +-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) + static int nsp32_proc_info (struct Scsi_Host *, char *, char **, off_t, int, int); +-#else +-static int nsp32_proc_info (char *, char **, off_t, int, int, int); +-#endif + +-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) + static int nsp32_detect (struct pci_dev *pdev); +-#else +-static int nsp32_detect (struct scsi_host_template *); +-#endif + static int nsp32_queuecommand(struct scsi_cmnd *, + void (*done)(struct scsi_cmnd *)); + static const char *nsp32_info (struct Scsi_Host *); +@@ -296,15 +284,7 @@ + .eh_abort_handler = nsp32_eh_abort, + .eh_bus_reset_handler = nsp32_eh_bus_reset, + .eh_host_reset_handler = nsp32_eh_host_reset, +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,74)) +- .detect = nsp32_detect, +- .release = nsp32_release, +-#endif +-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,2)) +- .use_new_eh_code = 1, +-#else + /* .highmem_io = 1, */ +-#endif + }; + + #include "nsp32_io.h" +@@ -739,7 +719,7 @@ + command = 0; + command |= (TRANSFER_GO | ALL_COUNTER_CLR); + if (data->trans_method & NSP32_TRANSFER_BUSMASTER) { +- if (SCpnt->request_bufflen > 0) { ++ if (scsi_bufflen(SCpnt) > 0) { + command |= BM_START; + } + } else if (data->trans_method & NSP32_TRANSFER_MMIO) { +@@ -888,31 +868,28 @@ + static int nsp32_setup_sg_table(struct scsi_cmnd *SCpnt) + { + nsp32_hw_data *data = (nsp32_hw_data *)SCpnt->device->host->hostdata; +- struct scatterlist *sgl; ++ struct scatterlist *sg; + nsp32_sgtable *sgt = data->cur_lunt->sglun->sgt; + int num, i; + u32_le l; + +- if (SCpnt->request_bufflen == 0) { +- return TRUE; +- } +- + if (sgt == NULL) { + nsp32_dbg(NSP32_DEBUG_SGLIST, "SGT == null"); + return FALSE; + } + +- if (SCpnt->use_sg) { +- sgl = (struct scatterlist *)SCpnt->request_buffer; +- num = pci_map_sg(data->Pci, sgl, SCpnt->use_sg, +- SCpnt->sc_data_direction); +- for (i = 0; i < num; i++) { ++ num = scsi_dma_map(SCpnt); ++ if (!num) ++ return TRUE; ++ else if (num < 0) ++ return FALSE; ++ else { ++ scsi_for_each_sg(SCpnt, sg, num, i) { + /* + * Build nsp32_sglist, substitute sg dma addresses. + */ +- sgt[i].addr = cpu_to_le32(sg_dma_address(sgl)); +- sgt[i].len = cpu_to_le32(sg_dma_len(sgl)); +- sgl++; ++ sgt[i].addr = cpu_to_le32(sg_dma_address(sg)); ++ sgt[i].len = cpu_to_le32(sg_dma_len(sg)); + + if (le32_to_cpu(sgt[i].len) > 0x10000) { + nsp32_msg(KERN_ERR, +@@ -929,23 +906,6 @@ + /* set end mark */ + l = le32_to_cpu(sgt[num-1].len); + sgt[num-1].len = cpu_to_le32(l | SGTEND); +- +- } else { +- SCpnt->SCp.have_data_in = pci_map_single(data->Pci, +- SCpnt->request_buffer, SCpnt->request_bufflen, +- SCpnt->sc_data_direction); +- +- sgt[0].addr = cpu_to_le32(SCpnt->SCp.have_data_in); +- sgt[0].len = cpu_to_le32(SCpnt->request_bufflen | SGTEND); /* set end mark */ +- +- if (SCpnt->request_bufflen > 0x10000) { +- nsp32_msg(KERN_ERR, +- "can't transfer over 64KB at a time, size=0x%lx", SCpnt->request_bufflen); +- return FALSE; +- } +- nsp32_dbg(NSP32_DEBUG_SGLIST, "single : addr 0x%lx len=0x%lx", +- le32_to_cpu(sgt[0].addr), +- le32_to_cpu(sgt[0].len )); + } + + return TRUE; +@@ -962,7 +922,7 @@ + "enter. target: 0x%x LUN: 0x%x cmnd: 0x%x cmndlen: 0x%x " + "use_sg: 0x%x reqbuf: 0x%lx reqlen: 0x%x", + SCpnt->device->id, SCpnt->device->lun, SCpnt->cmnd[0], SCpnt->cmd_len, +- SCpnt->use_sg, SCpnt->request_buffer, SCpnt->request_bufflen); ++ scsi_sg_count(SCpnt), scsi_sglist(SCpnt), scsi_bufflen(SCpnt)); + + if (data->CurrentSC != NULL) { + nsp32_msg(KERN_ERR, "Currentsc != NULL. Cancel this command request"); +@@ -994,10 +954,10 @@ + data->CurrentSC = SCpnt; + SCpnt->SCp.Status = CHECK_CONDITION; + SCpnt->SCp.Message = 0; +- SCpnt->resid = SCpnt->request_bufflen; ++ scsi_set_resid(SCpnt, scsi_bufflen(SCpnt)); + +- SCpnt->SCp.ptr = (char *) SCpnt->request_buffer; +- SCpnt->SCp.this_residual = SCpnt->request_bufflen; ++ SCpnt->SCp.ptr = (char *)scsi_sglist(SCpnt); ++ SCpnt->SCp.this_residual = scsi_bufflen(SCpnt); + SCpnt->SCp.buffer = NULL; + SCpnt->SCp.buffers_residual = 0; + +@@ -1210,13 +1170,9 @@ + unsigned long flags; + int ret; + int handled = 0; +- +-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) + struct Scsi_Host *host = data->Host; ++ + spin_lock_irqsave(host->host_lock, flags); +-#else +- spin_lock_irqsave(&io_request_lock, flags); +-#endif + + /* + * IRQ check, then enable IRQ mask +@@ -1312,7 +1268,7 @@ + } + + if ((auto_stat & DATA_IN_PHASE) && +- (SCpnt->resid > 0) && ++ (scsi_get_resid(SCpnt) > 0) && + ((nsp32_read2(base, FIFO_REST_CNT) & FIFO_REST_MASK) != 0)) { + printk( "auto+fifo\n"); + //nsp32_pio_read(SCpnt); +@@ -1333,7 +1289,7 @@ + nsp32_dbg(NSP32_DEBUG_INTR, "SSACK=0x%lx", + nsp32_read4(base, SAVED_SACK_CNT)); + +- SCpnt->resid = 0; /* all data transfered! */ ++ scsi_set_resid(SCpnt, 0); /* all data transfered! */ + } + + /* +@@ -1480,11 +1436,7 @@ + nsp32_write2(base, IRQ_CONTROL, 0); + + out2: +-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) + spin_unlock_irqrestore(host->host_lock, flags); +-#else +- spin_unlock_irqrestore(&io_request_lock, flags); +-#endif + + nsp32_dbg(NSP32_DEBUG_INTR, "exit"); + +@@ -1499,28 +1451,15 @@ + nsp32_dbg(NSP32_DEBUG_PROC, "buffer=0x%p pos=0x%p length=%d %d\n", buffer, pos, length, length - (pos - buffer));\ + } \ + } while(0) +-static int nsp32_proc_info( +-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) +- struct Scsi_Host *host, +-#endif +- char *buffer, +- char **start, +- off_t offset, +- int length, +-#if !(LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) +- int hostno, +-#endif +- int inout) ++ ++static int nsp32_proc_info(struct Scsi_Host *host, char *buffer, char **start, ++ off_t offset, int length, int inout) + { + char *pos = buffer; + int thislength; + unsigned long flags; + nsp32_hw_data *data; +-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) + int hostno; +-#else +- struct Scsi_Host *host; +-#endif + unsigned int base; + unsigned char mode_reg; + int id, speed; +@@ -1531,15 +1470,7 @@ + return -EINVAL; + } + +-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) + hostno = host->host_no; +-#else +- /* search this HBA host */ +- host = scsi_host_hn_get(hostno); +- if (host == NULL) { +- return -ESRCH; +- } +-#endif + data = (nsp32_hw_data *)host->hostdata; + base = host->io_port; + +@@ -1626,25 +1557,8 @@ + nsp32_hw_data *data = (nsp32_hw_data *)SCpnt->device->host->hostdata; + unsigned int base = SCpnt->device->host->io_port; + +- /* +- * unmap pci +- */ +- if (SCpnt->request_bufflen == 0) { +- goto skip; +- } +- +- if (SCpnt->use_sg) { +- pci_unmap_sg(data->Pci, +- (struct scatterlist *)SCpnt->request_buffer, +- SCpnt->use_sg, SCpnt->sc_data_direction); +- } else { +- pci_unmap_single(data->Pci, +- (u32)SCpnt->SCp.have_data_in, +- SCpnt->request_bufflen, +- SCpnt->sc_data_direction); +- } ++ scsi_dma_unmap(SCpnt); + +- skip: + /* + * clear TRANSFERCONTROL_BM_START + */ +@@ -1800,7 +1714,7 @@ + SCpnt->SCp.Message = 0; + nsp32_dbg(NSP32_DEBUG_BUSFREE, + "normal end stat=0x%x resid=0x%x\n", +- SCpnt->SCp.Status, SCpnt->resid); ++ SCpnt->SCp.Status, scsi_get_resid(SCpnt)); + SCpnt->result = (DID_OK << 16) | + (SCpnt->SCp.Message << 8) | + (SCpnt->SCp.Status << 0); +@@ -1844,7 +1758,7 @@ + unsigned int restlen, sentlen; + u32_le len, addr; + +- nsp32_dbg(NSP32_DEBUG_SGLIST, "old resid=0x%x", SCpnt->resid); ++ nsp32_dbg(NSP32_DEBUG_SGLIST, "old resid=0x%x", scsi_get_resid(SCpnt)); + + /* adjust saved SACK count with 4 byte start address boundary */ + s_sacklen -= le32_to_cpu(sgt[old_entry].addr) & 3; +@@ -1888,12 +1802,12 @@ + return; + + last: +- if (SCpnt->resid < sentlen) { ++ if (scsi_get_resid(SCpnt) < sentlen) { + nsp32_msg(KERN_ERR, "resid underflow"); + } + +- SCpnt->resid -= sentlen; +- nsp32_dbg(NSP32_DEBUG_SGLIST, "new resid=0x%x", SCpnt->resid); ++ scsi_set_resid(SCpnt, scsi_get_resid(SCpnt) - sentlen); ++ nsp32_dbg(NSP32_DEBUG_SGLIST, "new resid=0x%x", scsi_get_resid(SCpnt)); + + /* update hostdata and lun */ + +@@ -2022,7 +1936,7 @@ + transfer = 0; + transfer |= (TRANSFER_GO | ALL_COUNTER_CLR); + if (data->trans_method & NSP32_TRANSFER_BUSMASTER) { +- if (SCpnt->request_bufflen > 0) { ++ if (scsi_bufflen(SCpnt) > 0) { + transfer |= BM_START; + } + } else if (data->trans_method & NSP32_TRANSFER_MMIO) { +@@ -2674,17 +2588,7 @@ + * 0x900-0xbff: (map same 0x800-0x8ff I/O port image repeatedly) + * 0xc00-0xfff: CardBus status registers + */ +-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) +-#define DETECT_OK 0 +-#define DETECT_NG 1 +-#define PCIDEV pdev + static int nsp32_detect(struct pci_dev *pdev) +-#else +-#define DETECT_OK 1 +-#define DETECT_NG 0 +-#define PCIDEV (data->Pci) +-static int nsp32_detect(struct scsi_host_template *sht) +-#endif + { + struct Scsi_Host *host; /* registered host structure */ + struct resource *res; +@@ -2697,11 +2601,7 @@ + /* + * register this HBA as SCSI device + */ +-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) + host = scsi_host_alloc(&nsp32_template, sizeof(nsp32_hw_data)); +-#else +- host = scsi_register(sht, sizeof(nsp32_hw_data)); +-#endif + if (host == NULL) { + nsp32_msg (KERN_ERR, "failed to scsi register"); + goto err; +@@ -2719,9 +2619,6 @@ + host->unique_id = data->BaseAddress; + host->n_io_port = data->NumAddress; + host->base = (unsigned long)data->MmioAddress; +-#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,63)) +- scsi_set_pci_device(host, PCIDEV); +-#endif + + data->Host = host; + spin_lock_init(&(data->Lock)); +@@ -2776,7 +2673,7 @@ + /* + * setup DMA + */ +- if (pci_set_dma_mask(PCIDEV, DMA_32BIT_MASK) != 0) { ++ if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) != 0) { + nsp32_msg (KERN_ERR, "failed to set PCI DMA mask"); + goto scsi_unregister; + } +@@ -2784,7 +2681,7 @@ + /* + * allocate autoparam DMA resource. + */ +- data->autoparam = pci_alloc_consistent(PCIDEV, sizeof(nsp32_autoparam), &(data->auto_paddr)); ++ data->autoparam = pci_alloc_consistent(pdev, sizeof(nsp32_autoparam), &(data->auto_paddr)); + if (data->autoparam == NULL) { + nsp32_msg(KERN_ERR, "failed to allocate DMA memory"); + goto scsi_unregister; +@@ -2793,7 +2690,7 @@ + /* + * allocate scatter-gather DMA resource. + */ +- data->sg_list = pci_alloc_consistent(PCIDEV, NSP32_SG_TABLE_SIZE, ++ data->sg_list = pci_alloc_consistent(pdev, NSP32_SG_TABLE_SIZE, + &(data->sg_paddr)); + if (data->sg_list == NULL) { + nsp32_msg(KERN_ERR, "failed to allocate DMA memory"); +@@ -2883,16 +2780,14 @@ + goto free_irq; + } + +-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) +- ret = scsi_add_host(host, &PCIDEV->dev); ++ ret = scsi_add_host(host, &pdev->dev); + if (ret) { + nsp32_msg(KERN_ERR, "failed to add scsi host"); + goto free_region; + } + scsi_scan_host(host); +-#endif +- pci_set_drvdata(PCIDEV, host); +- return DETECT_OK; ++ pci_set_drvdata(pdev, host); ++ return 0; + + free_region: + release_region(host->io_port, host->n_io_port); +@@ -2901,22 +2796,19 @@ + free_irq(host->irq, data); + + free_sg_list: +- pci_free_consistent(PCIDEV, NSP32_SG_TABLE_SIZE, ++ pci_free_consistent(pdev, NSP32_SG_TABLE_SIZE, + data->sg_list, data->sg_paddr); + + free_autoparam: +- pci_free_consistent(PCIDEV, sizeof(nsp32_autoparam), ++ pci_free_consistent(pdev, sizeof(nsp32_autoparam), + data->autoparam, data->auto_paddr); + + scsi_unregister: + scsi_host_put(host); + + err: +- return DETECT_NG; ++ return 1; + } +-#undef DETECT_OK +-#undef DETECT_NG +-#undef PCIDEV + + static int nsp32_release(struct Scsi_Host *host) + { +@@ -3525,11 +3417,7 @@ + + pci_set_master(pdev); + +-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) + ret = nsp32_detect(pdev); +-#else +- ret = scsi_register_host(&nsp32_template); +-#endif + + nsp32_msg(KERN_INFO, "irq: %i mmio: %p+0x%lx slot: %s model: %s", + pdev->irq, +@@ -3544,25 +3432,17 @@ + + static void __devexit nsp32_remove(struct pci_dev *pdev) + { +-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) + struct Scsi_Host *host = pci_get_drvdata(pdev); +-#endif + + nsp32_dbg(NSP32_DEBUG_REGISTER, "enter"); + +-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) + scsi_remove_host(host); + + nsp32_release(host); + + scsi_host_put(host); +-#else +- scsi_unregister_host(&nsp32_template); +-#endif + } + +- +- + static struct pci_driver nsp32_driver = { + .name = "nsp32", + .id_table = nsp32_pci_table, +diff -Nurb linux-2.6.22-570/drivers/scsi/pcmcia/sym53c500_cs.c linux-2.6.22-591/drivers/scsi/pcmcia/sym53c500_cs.c +--- linux-2.6.22-570/drivers/scsi/pcmcia/sym53c500_cs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/pcmcia/sym53c500_cs.c 2007-12-21 15:36:12.000000000 -0500 +@@ -370,8 +370,6 @@ + DEB(unsigned char seq_reg;) + unsigned char status, int_reg; + unsigned char pio_status; +- struct scatterlist *sglist; +- unsigned int sgcount; + int port_base = dev->io_port; + struct sym53c500_data *data = + (struct sym53c500_data *)dev->hostdata; +@@ -434,20 +432,19 @@ + switch (status & 0x07) { /* scsi phase */ + case 0x00: /* DATA-OUT */ + if (int_reg & 0x10) { /* Target requesting info transfer */ ++ struct scatterlist *sg; ++ int i; ++ + curSC->SCp.phase = data_out; + VDEB(printk("SYM53C500: Data-Out phase\n")); + outb(FLUSH_FIFO, port_base + CMD_REG); +- LOAD_DMA_COUNT(port_base, curSC->request_bufflen); /* Max transfer size */ ++ LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC)); /* Max transfer size */ + outb(TRANSFER_INFO | DMA_OP, port_base + CMD_REG); +- if (!curSC->use_sg) /* Don't use scatter-gather */ +- SYM53C500_pio_write(fast_pio, port_base, curSC->request_buffer, curSC->request_bufflen); +- else { /* use scatter-gather */ +- sgcount = curSC->use_sg; +- sglist = curSC->request_buffer; +- while (sgcount--) { +- SYM53C500_pio_write(fast_pio, port_base, page_address(sglist->page) + sglist->offset, sglist->length); +- sglist++; +- } ++ ++ scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) { ++ SYM53C500_pio_write(fast_pio, port_base, ++ page_address(sg->page) + sg->offset, ++ sg->length); + } + REG0(port_base); + } +@@ -455,20 +452,19 @@ + + case 0x01: /* DATA-IN */ + if (int_reg & 0x10) { /* Target requesting info transfer */ ++ struct scatterlist *sg; ++ int i; ++ + curSC->SCp.phase = data_in; + VDEB(printk("SYM53C500: Data-In phase\n")); + outb(FLUSH_FIFO, port_base + CMD_REG); +- LOAD_DMA_COUNT(port_base, curSC->request_bufflen); /* Max transfer size */ ++ LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC)); /* Max transfer size */ + outb(TRANSFER_INFO | DMA_OP, port_base + CMD_REG); +- if (!curSC->use_sg) /* Don't use scatter-gather */ +- SYM53C500_pio_read(fast_pio, port_base, curSC->request_buffer, curSC->request_bufflen); +- else { /* Use scatter-gather */ +- sgcount = curSC->use_sg; +- sglist = curSC->request_buffer; +- while (sgcount--) { +- SYM53C500_pio_read(fast_pio, port_base, page_address(sglist->page) + sglist->offset, sglist->length); +- sglist++; +- } ++ ++ scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) { ++ SYM53C500_pio_read(fast_pio, port_base, ++ page_address(sg->page) + sg->offset, ++ sg->length); + } + REG0(port_base); + } +@@ -578,7 +574,7 @@ + + DEB(printk("cmd=%02x, cmd_len=%02x, target=%02x, lun=%02x, bufflen=%d\n", + SCpnt->cmnd[0], SCpnt->cmd_len, SCpnt->device->id, +- SCpnt->device->lun, SCpnt->request_bufflen)); ++ SCpnt->device->lun, scsi_bufflen(SCpnt))); + + VDEB(for (i = 0; i < SCpnt->cmd_len; i++) + printk("cmd[%d]=%02x ", i, SCpnt->cmnd[i])); +diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_attr.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_attr.c +--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_attr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_attr.c 2007-12-21 15:36:12.000000000 -0500 +@@ -11,8 +11,9 @@ + /* SYSFS attributes --------------------------------------------------------- */ + + static ssize_t +-qla2x00_sysfs_read_fw_dump(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++qla2x00_sysfs_read_fw_dump(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, + struct device, kobj))); +@@ -31,8 +32,9 @@ + } + + static ssize_t +-qla2x00_sysfs_write_fw_dump(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++qla2x00_sysfs_write_fw_dump(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, + struct device, kobj))); +@@ -73,7 +75,6 @@ + .attr = { + .name = "fw_dump", + .mode = S_IRUSR | S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = 0, + .read = qla2x00_sysfs_read_fw_dump, +@@ -81,8 +82,9 @@ + }; + + static ssize_t +-qla2x00_sysfs_read_nvram(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++qla2x00_sysfs_read_nvram(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, + struct device, kobj))); +@@ -101,8 +103,9 @@ + } + + static ssize_t +-qla2x00_sysfs_write_nvram(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++qla2x00_sysfs_write_nvram(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, + struct device, kobj))); +@@ -149,7 +152,6 @@ + .attr = { + .name = "nvram", + .mode = S_IRUSR | S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = 512, + .read = qla2x00_sysfs_read_nvram, +@@ -157,8 +159,9 @@ + }; + + static ssize_t +-qla2x00_sysfs_read_optrom(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++qla2x00_sysfs_read_optrom(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, + struct device, kobj))); +@@ -176,8 +179,9 @@ + } + + static ssize_t +-qla2x00_sysfs_write_optrom(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++qla2x00_sysfs_write_optrom(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, + struct device, kobj))); +@@ -198,7 +202,6 @@ + .attr = { + .name = "optrom", + .mode = S_IRUSR | S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = OPTROM_SIZE_24XX, + .read = qla2x00_sysfs_read_optrom, +@@ -206,8 +209,9 @@ + }; + + static ssize_t +-qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, + struct device, kobj))); +@@ -279,15 +283,15 @@ + .attr = { + .name = "optrom_ctl", + .mode = S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = 0, + .write = qla2x00_sysfs_write_optrom_ctl, + }; + + static ssize_t +-qla2x00_sysfs_read_vpd(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++qla2x00_sysfs_read_vpd(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, + struct device, kobj))); +@@ -305,8 +309,9 @@ + } + + static ssize_t +-qla2x00_sysfs_write_vpd(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++qla2x00_sysfs_write_vpd(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, + struct device, kobj))); +@@ -327,7 +332,6 @@ + .attr = { + .name = "vpd", + .mode = S_IRUSR | S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = 0, + .read = qla2x00_sysfs_read_vpd, +@@ -335,8 +339,9 @@ + }; + + static ssize_t +-qla2x00_sysfs_read_sfp(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++qla2x00_sysfs_read_sfp(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj, + struct device, kobj))); +@@ -375,7 +380,6 @@ + .attr = { + .name = "sfp", + .mode = S_IRUSR | S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = SFP_DEV_SIZE * 2, + .read = qla2x00_sysfs_read_sfp, +diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_dbg.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_dbg.c +--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_dbg.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_dbg.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1411,9 +1411,9 @@ + printk("0x%02x ", cmd->cmnd[i]); + } + printk("\n seg_cnt=%d, allowed=%d, retries=%d\n", +- cmd->use_sg, cmd->allowed, cmd->retries); ++ scsi_sg_count(cmd), cmd->allowed, cmd->retries); + printk(" request buffer=0x%p, request buffer len=0x%x\n", +- cmd->request_buffer, cmd->request_bufflen); ++ scsi_sglist(cmd), scsi_bufflen(cmd)); + printk(" tag=%d, transfersize=0x%x\n", + cmd->tag, cmd->transfersize); + printk(" serial_number=%lx, SP=%p\n", cmd->serial_number, sp); +diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_iocb.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_iocb.c +--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_iocb.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_iocb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -155,6 +155,8 @@ + uint32_t *cur_dsd; + scsi_qla_host_t *ha; + struct scsi_cmnd *cmd; ++ struct scatterlist *sg; ++ int i; + + cmd = sp->cmd; + +@@ -163,7 +165,7 @@ + __constant_cpu_to_le32(COMMAND_TYPE); + + /* No data transfer */ +- if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) { ++ if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) { + cmd_pkt->byte_count = __constant_cpu_to_le32(0); + return; + } +@@ -177,13 +179,8 @@ + cur_dsd = (uint32_t *)&cmd_pkt->dseg_0_address; + + /* Load data segments */ +- if (cmd->use_sg != 0) { +- struct scatterlist *cur_seg; +- struct scatterlist *end_seg; +- +- cur_seg = (struct scatterlist *)cmd->request_buffer; +- end_seg = cur_seg + tot_dsds; +- while (cur_seg < end_seg) { ++ ++ scsi_for_each_sg(cmd, sg, tot_dsds, i) { + cont_entry_t *cont_pkt; + + /* Allocate additional continuation packets? */ +@@ -197,15 +194,9 @@ + avail_dsds = 7; + } + +- *cur_dsd++ = cpu_to_le32(sg_dma_address(cur_seg)); +- *cur_dsd++ = cpu_to_le32(sg_dma_len(cur_seg)); ++ *cur_dsd++ = cpu_to_le32(sg_dma_address(sg)); ++ *cur_dsd++ = cpu_to_le32(sg_dma_len(sg)); + avail_dsds--; +- +- cur_seg++; +- } +- } else { +- *cur_dsd++ = cpu_to_le32(sp->dma_handle); +- *cur_dsd++ = cpu_to_le32(cmd->request_bufflen); + } + } + +@@ -224,6 +215,8 @@ + uint32_t *cur_dsd; + scsi_qla_host_t *ha; + struct scsi_cmnd *cmd; ++ struct scatterlist *sg; ++ int i; + + cmd = sp->cmd; + +@@ -232,7 +225,7 @@ + __constant_cpu_to_le32(COMMAND_A64_TYPE); + + /* No data transfer */ +- if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) { ++ if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) { + cmd_pkt->byte_count = __constant_cpu_to_le32(0); + return; + } +@@ -246,13 +239,7 @@ + cur_dsd = (uint32_t *)&cmd_pkt->dseg_0_address; + + /* Load data segments */ +- if (cmd->use_sg != 0) { +- struct scatterlist *cur_seg; +- struct scatterlist *end_seg; +- +- cur_seg = (struct scatterlist *)cmd->request_buffer; +- end_seg = cur_seg + tot_dsds; +- while (cur_seg < end_seg) { ++ scsi_for_each_sg(cmd, sg, tot_dsds, i) { + dma_addr_t sle_dma; + cont_a64_entry_t *cont_pkt; + +@@ -267,18 +254,11 @@ + avail_dsds = 5; + } + +- sle_dma = sg_dma_address(cur_seg); ++ sle_dma = sg_dma_address(sg); + *cur_dsd++ = cpu_to_le32(LSD(sle_dma)); + *cur_dsd++ = cpu_to_le32(MSD(sle_dma)); +- *cur_dsd++ = cpu_to_le32(sg_dma_len(cur_seg)); ++ *cur_dsd++ = cpu_to_le32(sg_dma_len(sg)); + avail_dsds--; +- +- cur_seg++; +- } +- } else { +- *cur_dsd++ = cpu_to_le32(LSD(sp->dma_handle)); +- *cur_dsd++ = cpu_to_le32(MSD(sp->dma_handle)); +- *cur_dsd++ = cpu_to_le32(cmd->request_bufflen); + } + } + +@@ -291,7 +271,7 @@ + int + qla2x00_start_scsi(srb_t *sp) + { +- int ret; ++ int ret, nseg; + unsigned long flags; + scsi_qla_host_t *ha; + struct scsi_cmnd *cmd; +@@ -299,7 +279,6 @@ + uint32_t index; + uint32_t handle; + cmd_entry_t *cmd_pkt; +- struct scatterlist *sg; + uint16_t cnt; + uint16_t req_cnt; + uint16_t tot_dsds; +@@ -337,23 +316,10 @@ + goto queuing_error; + + /* Map the sg table so we have an accurate count of sg entries needed */ +- if (cmd->use_sg) { +- sg = (struct scatterlist *) cmd->request_buffer; +- tot_dsds = pci_map_sg(ha->pdev, sg, cmd->use_sg, +- cmd->sc_data_direction); +- if (tot_dsds == 0) ++ nseg = scsi_dma_map(cmd); ++ if (nseg < 0) + goto queuing_error; +- } else if (cmd->request_bufflen) { +- dma_addr_t req_dma; +- +- req_dma = pci_map_single(ha->pdev, cmd->request_buffer, +- cmd->request_bufflen, cmd->sc_data_direction); +- if (dma_mapping_error(req_dma)) +- goto queuing_error; +- +- sp->dma_handle = req_dma; +- tot_dsds = 1; +- } ++ tot_dsds = nseg; + + /* Calculate the number of request entries needed. */ + req_cnt = ha->isp_ops.calc_req_entries(tot_dsds); +@@ -391,7 +357,7 @@ + + /* Load SCSI command packet. */ + memcpy(cmd_pkt->scsi_cdb, cmd->cmnd, cmd->cmd_len); +- cmd_pkt->byte_count = cpu_to_le32((uint32_t)cmd->request_bufflen); ++ cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd)); + + /* Build IOCB segments */ + ha->isp_ops.build_iocbs(sp, cmd_pkt, tot_dsds); +@@ -423,14 +389,9 @@ + return (QLA_SUCCESS); + + queuing_error: +- if (cmd->use_sg && tot_dsds) { +- sg = (struct scatterlist *) cmd->request_buffer; +- pci_unmap_sg(ha->pdev, sg, cmd->use_sg, +- cmd->sc_data_direction); +- } else if (tot_dsds) { +- pci_unmap_single(ha->pdev, sp->dma_handle, +- cmd->request_bufflen, cmd->sc_data_direction); +- } ++ if (tot_dsds) ++ scsi_dma_unmap(cmd); ++ + spin_unlock_irqrestore(&ha->hardware_lock, flags); + + return (QLA_FUNCTION_FAILED); +@@ -642,6 +603,8 @@ + uint32_t *cur_dsd; + scsi_qla_host_t *ha; + struct scsi_cmnd *cmd; ++ struct scatterlist *sg; ++ int i; + + cmd = sp->cmd; + +@@ -650,7 +613,7 @@ + __constant_cpu_to_le32(COMMAND_TYPE_7); + + /* No data transfer */ +- if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) { ++ if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) { + cmd_pkt->byte_count = __constant_cpu_to_le32(0); + return; + } +@@ -670,13 +633,8 @@ + cur_dsd = (uint32_t *)&cmd_pkt->dseg_0_address; + + /* Load data segments */ +- if (cmd->use_sg != 0) { +- struct scatterlist *cur_seg; +- struct scatterlist *end_seg; +- +- cur_seg = (struct scatterlist *)cmd->request_buffer; +- end_seg = cur_seg + tot_dsds; +- while (cur_seg < end_seg) { ++ ++ scsi_for_each_sg(cmd, sg, tot_dsds, i) { + dma_addr_t sle_dma; + cont_a64_entry_t *cont_pkt; + +@@ -691,18 +649,11 @@ + avail_dsds = 5; + } + +- sle_dma = sg_dma_address(cur_seg); ++ sle_dma = sg_dma_address(sg); + *cur_dsd++ = cpu_to_le32(LSD(sle_dma)); + *cur_dsd++ = cpu_to_le32(MSD(sle_dma)); +- *cur_dsd++ = cpu_to_le32(sg_dma_len(cur_seg)); ++ *cur_dsd++ = cpu_to_le32(sg_dma_len(sg)); + avail_dsds--; +- +- cur_seg++; +- } +- } else { +- *cur_dsd++ = cpu_to_le32(LSD(sp->dma_handle)); +- *cur_dsd++ = cpu_to_le32(MSD(sp->dma_handle)); +- *cur_dsd++ = cpu_to_le32(cmd->request_bufflen); + } + } + +@@ -716,7 +667,7 @@ + int + qla24xx_start_scsi(srb_t *sp) + { +- int ret; ++ int ret, nseg; + unsigned long flags; + scsi_qla_host_t *ha; + struct scsi_cmnd *cmd; +@@ -724,7 +675,6 @@ + uint32_t index; + uint32_t handle; + struct cmd_type_7 *cmd_pkt; +- struct scatterlist *sg; + uint16_t cnt; + uint16_t req_cnt; + uint16_t tot_dsds; +@@ -762,23 +712,10 @@ + goto queuing_error; + + /* Map the sg table so we have an accurate count of sg entries needed */ +- if (cmd->use_sg) { +- sg = (struct scatterlist *) cmd->request_buffer; +- tot_dsds = pci_map_sg(ha->pdev, sg, cmd->use_sg, +- cmd->sc_data_direction); +- if (tot_dsds == 0) +- goto queuing_error; +- } else if (cmd->request_bufflen) { +- dma_addr_t req_dma; +- +- req_dma = pci_map_single(ha->pdev, cmd->request_buffer, +- cmd->request_bufflen, cmd->sc_data_direction); +- if (dma_mapping_error(req_dma)) ++ nseg = scsi_dma_map(cmd); ++ if (nseg < 0) + goto queuing_error; +- +- sp->dma_handle = req_dma; +- tot_dsds = 1; +- } ++ tot_dsds = nseg; + + req_cnt = qla24xx_calc_iocbs(tot_dsds); + if (ha->req_q_cnt < (req_cnt + 2)) { +@@ -821,7 +758,7 @@ + memcpy(cmd_pkt->fcp_cdb, cmd->cmnd, cmd->cmd_len); + host_to_fcp_swap(cmd_pkt->fcp_cdb, sizeof(cmd_pkt->fcp_cdb)); + +- cmd_pkt->byte_count = cpu_to_le32((uint32_t)cmd->request_bufflen); ++ cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd)); + + /* Build IOCB segments */ + qla24xx_build_scsi_iocbs(sp, cmd_pkt, tot_dsds); +@@ -853,14 +790,9 @@ + return QLA_SUCCESS; + + queuing_error: +- if (cmd->use_sg && tot_dsds) { +- sg = (struct scatterlist *) cmd->request_buffer; +- pci_unmap_sg(ha->pdev, sg, cmd->use_sg, +- cmd->sc_data_direction); +- } else if (tot_dsds) { +- pci_unmap_single(ha->pdev, sp->dma_handle, +- cmd->request_bufflen, cmd->sc_data_direction); +- } ++ if (tot_dsds) ++ scsi_dma_unmap(cmd); ++ + spin_unlock_irqrestore(&ha->hardware_lock, flags); + + return QLA_FUNCTION_FAILED; +diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_isr.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_isr.c +--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_isr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_isr.c 2007-12-21 15:36:12.000000000 -0500 +@@ -889,11 +889,11 @@ + } + if (scsi_status & (SS_RESIDUAL_UNDER | SS_RESIDUAL_OVER)) { + resid = resid_len; +- cp->resid = resid; ++ scsi_set_resid(cp, resid); + CMD_RESID_LEN(cp) = resid; + + if (!lscsi_status && +- ((unsigned)(cp->request_bufflen - resid) < ++ ((unsigned)(scsi_bufflen(cp) - resid) < + cp->underflow)) { + qla_printk(KERN_INFO, ha, + "scsi(%ld:%d:%d:%d): Mid-layer underflow " +@@ -901,7 +901,7 @@ + "error status.\n", ha->host_no, + cp->device->channel, cp->device->id, + cp->device->lun, resid, +- cp->request_bufflen); ++ scsi_bufflen(cp)); + + cp->result = DID_ERROR << 16; + break; +@@ -963,7 +963,7 @@ + resid = fw_resid_len; + + if (scsi_status & SS_RESIDUAL_UNDER) { +- cp->resid = resid; ++ scsi_set_resid(cp, resid); + CMD_RESID_LEN(cp) = resid; + } else { + DEBUG2(printk(KERN_INFO +@@ -1046,14 +1046,14 @@ + "retrying command.\n", ha->host_no, + cp->device->channel, cp->device->id, + cp->device->lun, resid, +- cp->request_bufflen)); ++ scsi_bufflen(cp))); + + cp->result = DID_BUS_BUSY << 16; + break; + } + + /* Handle mid-layer underflow */ +- if ((unsigned)(cp->request_bufflen - resid) < ++ if ((unsigned)(scsi_bufflen(cp) - resid) < + cp->underflow) { + qla_printk(KERN_INFO, ha, + "scsi(%ld:%d:%d:%d): Mid-layer underflow " +@@ -1061,7 +1061,7 @@ + "error status.\n", ha->host_no, + cp->device->channel, cp->device->id, + cp->device->lun, resid, +- cp->request_bufflen); ++ scsi_bufflen(cp)); + + cp->result = DID_ERROR << 16; + break; +@@ -1084,7 +1084,7 @@ + DEBUG2(printk(KERN_INFO + "PID=0x%lx req=0x%x xtra=0x%x -- returning DID_ERROR " + "status!\n", +- cp->serial_number, cp->request_bufflen, resid_len)); ++ cp->serial_number, scsi_bufflen(cp), resid_len)); + + cp->result = DID_ERROR << 16; + break; +@@ -1633,7 +1633,7 @@ + uint16_t entry; + uint16_t index; + const char *name; +- irqreturn_t (*handler)(int, void *); ++ irq_handler_t handler; + }; + + static struct qla_init_msix_entry imsix_entries[QLA_MSIX_ENTRIES] = { +diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_os.c linux-2.6.22-591/drivers/scsi/qla2xxx/qla_os.c +--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_os.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla2xxx/qla_os.c 2007-12-21 15:36:12.000000000 -0500 +@@ -2426,13 +2426,7 @@ + struct scsi_cmnd *cmd = sp->cmd; + + if (sp->flags & SRB_DMA_VALID) { +- if (cmd->use_sg) { +- dma_unmap_sg(&ha->pdev->dev, cmd->request_buffer, +- cmd->use_sg, cmd->sc_data_direction); +- } else if (cmd->request_bufflen) { +- dma_unmap_single(&ha->pdev->dev, sp->dma_handle, +- cmd->request_bufflen, cmd->sc_data_direction); +- } ++ scsi_dma_unmap(cmd); + sp->flags &= ~SRB_DMA_VALID; + } + CMD_SP(cmd) = NULL; +diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_dbg.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_dbg.c +--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_dbg.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_dbg.c 2007-12-21 15:36:12.000000000 -0500 +@@ -6,176 +6,9 @@ + */ + + #include "ql4_def.h" +-#include +- +-#if 0 +- +-static void qla4xxx_print_srb_info(struct srb * srb) +-{ +- printk("%s: srb = 0x%p, flags=0x%02x\n", __func__, srb, srb->flags); +- printk("%s: cmd = 0x%p, saved_dma_handle = 0x%lx\n", +- __func__, srb->cmd, (unsigned long) srb->dma_handle); +- printk("%s: fw_ddb_index = %d, lun = %d\n", +- __func__, srb->fw_ddb_index, srb->cmd->device->lun); +- printk("%s: iocb_tov = %d\n", +- __func__, srb->iocb_tov); +- printk("%s: cc_stat = 0x%x, r_start = 0x%lx, u_start = 0x%lx\n\n", +- __func__, srb->cc_stat, srb->r_start, srb->u_start); +-} +- +-void qla4xxx_print_scsi_cmd(struct scsi_cmnd *cmd) +-{ +- printk("SCSI Command = 0x%p, Handle=0x%p\n", cmd, cmd->host_scribble); +- printk(" b=%d, t=%02xh, l=%02xh, cmd_len = %02xh\n", +- cmd->device->channel, cmd->device->id, cmd->device->lun, +- cmd->cmd_len); +- scsi_print_command(cmd); +- printk(" seg_cnt = %d\n", cmd->use_sg); +- printk(" request buffer = 0x%p, request buffer len = 0x%x\n", +- cmd->request_buffer, cmd->request_bufflen); +- if (cmd->use_sg) { +- struct scatterlist *sg; +- sg = (struct scatterlist *)cmd->request_buffer; +- printk(" SG buffer: \n"); +- qla4xxx_dump_buffer((caddr_t) sg, +- (cmd->use_sg * sizeof(*sg))); +- } +- printk(" tag = %d, transfersize = 0x%x \n", cmd->tag, +- cmd->transfersize); +- printk(" Pid = %d, SP = 0x%p\n", (int)cmd->pid, cmd->SCp.ptr); +- printk(" underflow size = 0x%x, direction=0x%x\n", cmd->underflow, +- cmd->sc_data_direction); +- printk(" Current time (jiffies) = 0x%lx, " +- "timeout expires = 0x%lx\n", jiffies, cmd->eh_timeout.expires); +- qla4xxx_print_srb_info((struct srb *) cmd->SCp.ptr); +-} +- +-void __dump_registers(struct scsi_qla_host *ha) +-{ +- uint8_t i; +- for (i = 0; i < MBOX_REG_COUNT; i++) { +- printk(KERN_INFO "0x%02X mailbox[%d] = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, mailbox[i]), i, +- readw(&ha->reg->mailbox[i])); +- } +- printk(KERN_INFO "0x%02X flash_address = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, flash_address), +- readw(&ha->reg->flash_address)); +- printk(KERN_INFO "0x%02X flash_data = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, flash_data), +- readw(&ha->reg->flash_data)); +- printk(KERN_INFO "0x%02X ctrl_status = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, ctrl_status), +- readw(&ha->reg->ctrl_status)); +- if (is_qla4010(ha)) { +- printk(KERN_INFO "0x%02X nvram = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, u1.isp4010.nvram), +- readw(&ha->reg->u1.isp4010.nvram)); +- } +- +- else if (is_qla4022(ha) | is_qla4032(ha)) { +- printk(KERN_INFO "0x%02X intr_mask = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, +- u1.isp4022.intr_mask), +- readw(&ha->reg->u1.isp4022.intr_mask)); +- printk(KERN_INFO "0x%02X nvram = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, u1.isp4022.nvram), +- readw(&ha->reg->u1.isp4022.nvram)); +- printk(KERN_INFO "0x%02X semaphore = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, +- u1.isp4022.semaphore), +- readw(&ha->reg->u1.isp4022.semaphore)); +- } +- printk(KERN_INFO "0x%02X req_q_in = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, req_q_in), +- readw(&ha->reg->req_q_in)); +- printk(KERN_INFO "0x%02X rsp_q_out = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, rsp_q_out), +- readw(&ha->reg->rsp_q_out)); +- if (is_qla4010(ha)) { +- printk(KERN_INFO "0x%02X ext_hw_conf = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, +- u2.isp4010.ext_hw_conf), +- readw(&ha->reg->u2.isp4010.ext_hw_conf)); +- printk(KERN_INFO "0x%02X port_ctrl = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, +- u2.isp4010.port_ctrl), +- readw(&ha->reg->u2.isp4010.port_ctrl)); +- printk(KERN_INFO "0x%02X port_status = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, +- u2.isp4010.port_status), +- readw(&ha->reg->u2.isp4010.port_status)); +- printk(KERN_INFO "0x%02X req_q_out = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, +- u2.isp4010.req_q_out), +- readw(&ha->reg->u2.isp4010.req_q_out)); +- printk(KERN_INFO "0x%02X gp_out = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, u2.isp4010.gp_out), +- readw(&ha->reg->u2.isp4010.gp_out)); +- printk(KERN_INFO "0x%02X gp_in = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, u2.isp4010.gp_in), +- readw(&ha->reg->u2.isp4010.gp_in)); +- printk(KERN_INFO "0x%02X port_err_status = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, +- u2.isp4010.port_err_status), +- readw(&ha->reg->u2.isp4010.port_err_status)); +- } +- +- else if (is_qla4022(ha) | is_qla4032(ha)) { +- printk(KERN_INFO "Page 0 Registers:\n"); +- printk(KERN_INFO "0x%02X ext_hw_conf = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, +- u2.isp4022.p0.ext_hw_conf), +- readw(&ha->reg->u2.isp4022.p0.ext_hw_conf)); +- printk(KERN_INFO "0x%02X port_ctrl = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, +- u2.isp4022.p0.port_ctrl), +- readw(&ha->reg->u2.isp4022.p0.port_ctrl)); +- printk(KERN_INFO "0x%02X port_status = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, +- u2.isp4022.p0.port_status), +- readw(&ha->reg->u2.isp4022.p0.port_status)); +- printk(KERN_INFO "0x%02X gp_out = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, +- u2.isp4022.p0.gp_out), +- readw(&ha->reg->u2.isp4022.p0.gp_out)); +- printk(KERN_INFO "0x%02X gp_in = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, u2.isp4022.p0.gp_in), +- readw(&ha->reg->u2.isp4022.p0.gp_in)); +- printk(KERN_INFO "0x%02X port_err_status = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, +- u2.isp4022.p0.port_err_status), +- readw(&ha->reg->u2.isp4022.p0.port_err_status)); +- printk(KERN_INFO "Page 1 Registers:\n"); +- writel(HOST_MEM_CFG_PAGE & set_rmask(CSR_SCSI_PAGE_SELECT), +- &ha->reg->ctrl_status); +- printk(KERN_INFO "0x%02X req_q_out = 0x%08X\n", +- (uint8_t) offsetof(struct isp_reg, +- u2.isp4022.p1.req_q_out), +- readw(&ha->reg->u2.isp4022.p1.req_q_out)); +- writel(PORT_CTRL_STAT_PAGE & set_rmask(CSR_SCSI_PAGE_SELECT), +- &ha->reg->ctrl_status); +- } +-} +- +-void qla4xxx_dump_mbox_registers(struct scsi_qla_host *ha) +-{ +- unsigned long flags = 0; +- int i = 0; +- spin_lock_irqsave(&ha->hardware_lock, flags); +- for (i = 1; i < MBOX_REG_COUNT; i++) +- printk(KERN_INFO " Mailbox[%d] = %08x\n", i, +- readw(&ha->reg->mailbox[i])); +- spin_unlock_irqrestore(&ha->hardware_lock, flags); +-} +- +-void qla4xxx_dump_registers(struct scsi_qla_host *ha) +-{ +- unsigned long flags = 0; +- spin_lock_irqsave(&ha->hardware_lock, flags); +- __dump_registers(ha); +- spin_unlock_irqrestore(&ha->hardware_lock, flags); +-} ++#include "ql4_glbl.h" ++#include "ql4_dbg.h" ++#include "ql4_inline.h" + + void qla4xxx_dump_buffer(void *b, uint32_t size) + { +@@ -198,4 +31,3 @@ + printk(KERN_DEBUG "\n"); + } + +-#endif /* 0 */ +diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_def.h linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_def.h +--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_def.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_def.h 2007-12-21 15:36:12.000000000 -0500 +@@ -122,8 +122,7 @@ + + #define ISCSI_IPADDR_SIZE 4 /* IP address size */ + #define ISCSI_ALIAS_SIZE 32 /* ISCSI Alais name size */ +-#define ISCSI_NAME_SIZE 255 /* ISCSI Name size - +- * usually a string */ ++#define ISCSI_NAME_SIZE 0xE0 /* ISCSI Name size */ + + #define LSDW(x) ((u32)((u64)(x))) + #define MSDW(x) ((u32)((((u64)(x)) >> 16) >> 16)) +@@ -187,7 +186,19 @@ + u_long u_start; /* Time when we handed the cmd to F/W */ + }; + +- /* ++/* ++ * Asynchronous Event Queue structure ++ */ ++struct aen { ++ uint32_t mbox_sts[MBOX_AEN_REG_COUNT]; ++}; ++ ++struct ql4_aen_log { ++ int count; ++ struct aen entry[MAX_AEN_ENTRIES]; ++}; ++ ++/* + * Device Database (DDB) structure + */ + struct ddb_entry { +@@ -254,13 +265,6 @@ + #define DF_ISNS_DISCOVERED 2 /* Device was discovered via iSNS */ + #define DF_FO_MASKED 3 + +-/* +- * Asynchronous Event Queue structure +- */ +-struct aen { +- uint32_t mbox_sts[MBOX_AEN_REG_COUNT]; +-}; +- + + #include "ql4_fw.h" + #include "ql4_nvram.h" +@@ -270,20 +274,17 @@ + */ + struct scsi_qla_host { + /* Linux adapter configuration data */ +- struct Scsi_Host *host; /* pointer to host data */ +- uint32_t tot_ddbs; + unsigned long flags; + + #define AF_ONLINE 0 /* 0x00000001 */ + #define AF_INIT_DONE 1 /* 0x00000002 */ + #define AF_MBOX_COMMAND 2 /* 0x00000004 */ + #define AF_MBOX_COMMAND_DONE 3 /* 0x00000008 */ +-#define AF_INTERRUPTS_ON 6 /* 0x00000040 Not Used */ ++#define AF_INTERRUPTS_ON 6 /* 0x00000040 */ + #define AF_GET_CRASH_RECORD 7 /* 0x00000080 */ + #define AF_LINK_UP 8 /* 0x00000100 */ + #define AF_IRQ_ATTACHED 10 /* 0x00000400 */ +-#define AF_ISNS_CMD_IN_PROCESS 12 /* 0x00001000 */ +-#define AF_ISNS_CMD_DONE 13 /* 0x00002000 */ ++#define AF_DISABLE_ACB_COMPLETE 11 /* 0x00000800 */ + + unsigned long dpc_flags; + +@@ -296,6 +297,9 @@ + #define DPC_AEN 9 /* 0x00000200 */ + #define DPC_GET_DHCP_IP_ADDR 15 /* 0x00008000 */ + ++ struct Scsi_Host *host; /* pointer to host data */ ++ uint32_t tot_ddbs; ++ + uint16_t iocb_cnt; + uint16_t iocb_hiwat; + +@@ -344,6 +348,7 @@ + uint32_t firmware_version[2]; + uint32_t patch_number; + uint32_t build_number; ++ uint32_t board_id; + + /* --- From Init_FW --- */ + /* init_cb_t *init_cb; */ +@@ -363,7 +368,6 @@ + + /* --- From GetFwState --- */ + uint32_t firmware_state; +- uint32_t board_id; + uint32_t addl_fw_state; + + /* Linux kernel thread */ +@@ -414,6 +418,8 @@ + uint16_t aen_out; + struct aen aen_q[MAX_AEN_ENTRIES]; + ++ struct ql4_aen_log aen_log;/* tracks all aens */ ++ + /* This mutex protects several threads to do mailbox commands + * concurrently. + */ +@@ -585,10 +591,4 @@ + #define FLUSH_DDB_CHANGED_AENS 1 + #define RELOGIN_DDB_CHANGED_AENS 2 + +-#include "ql4_version.h" +-#include "ql4_glbl.h" +-#include "ql4_dbg.h" +-#include "ql4_inline.h" +- +- + #endif /*_QLA4XXX_H */ +diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_fw.h linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_fw.h +--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_fw.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_fw.h 2007-12-21 15:36:12.000000000 -0500 +@@ -20,143 +20,23 @@ + *************************************************************************/ + + struct port_ctrl_stat_regs { +- __le32 ext_hw_conf; /* 80 x50 R/W */ +- __le32 intChipConfiguration; /* 84 x54 */ +- __le32 port_ctrl; /* 88 x58 */ +- __le32 port_status; /* 92 x5c */ +- __le32 HostPrimMACHi; /* 96 x60 */ +- __le32 HostPrimMACLow; /* 100 x64 */ +- __le32 HostSecMACHi; /* 104 x68 */ +- __le32 HostSecMACLow; /* 108 x6c */ +- __le32 EPPrimMACHi; /* 112 x70 */ +- __le32 EPPrimMACLow; /* 116 x74 */ +- __le32 EPSecMACHi; /* 120 x78 */ +- __le32 EPSecMACLow; /* 124 x7c */ +- __le32 HostPrimIPHi; /* 128 x80 */ +- __le32 HostPrimIPMidHi; /* 132 x84 */ +- __le32 HostPrimIPMidLow; /* 136 x88 */ +- __le32 HostPrimIPLow; /* 140 x8c */ +- __le32 HostSecIPHi; /* 144 x90 */ +- __le32 HostSecIPMidHi; /* 148 x94 */ +- __le32 HostSecIPMidLow; /* 152 x98 */ +- __le32 HostSecIPLow; /* 156 x9c */ +- __le32 EPPrimIPHi; /* 160 xa0 */ +- __le32 EPPrimIPMidHi; /* 164 xa4 */ +- __le32 EPPrimIPMidLow; /* 168 xa8 */ +- __le32 EPPrimIPLow; /* 172 xac */ +- __le32 EPSecIPHi; /* 176 xb0 */ +- __le32 EPSecIPMidHi; /* 180 xb4 */ +- __le32 EPSecIPMidLow; /* 184 xb8 */ +- __le32 EPSecIPLow; /* 188 xbc */ +- __le32 IPReassemblyTimeout; /* 192 xc0 */ +- __le32 EthMaxFramePayload; /* 196 xc4 */ +- __le32 TCPMaxWindowSize; /* 200 xc8 */ +- __le32 TCPCurrentTimestampHi; /* 204 xcc */ +- __le32 TCPCurrentTimestampLow; /* 208 xd0 */ +- __le32 LocalRAMAddress; /* 212 xd4 */ +- __le32 LocalRAMData; /* 216 xd8 */ +- __le32 PCSReserved1; /* 220 xdc */ +- __le32 gp_out; /* 224 xe0 */ +- __le32 gp_in; /* 228 xe4 */ +- __le32 ProbeMuxAddr; /* 232 xe8 */ +- __le32 ProbeMuxData; /* 236 xec */ +- __le32 ERMQueueBaseAddr0; /* 240 xf0 */ +- __le32 ERMQueueBaseAddr1; /* 244 xf4 */ +- __le32 MACConfiguration; /* 248 xf8 */ +- __le32 port_err_status; /* 252 xfc COR */ ++ __le32 ext_hw_conf; /* 0x50 R/W */ ++ __le32 rsrvd0; /* 0x54 */ ++ __le32 port_ctrl; /* 0x58 */ ++ __le32 port_status; /* 0x5c */ ++ __le32 rsrvd1[32]; /* 0x60-0xdf */ ++ __le32 gp_out; /* 0xe0 */ ++ __le32 gp_in; /* 0xe4 */ ++ __le32 rsrvd2[5]; /* 0xe8-0xfb */ ++ __le32 port_err_status; /* 0xfc */ + }; + + struct host_mem_cfg_regs { +- __le32 NetRequestQueueOut; /* 80 x50 */ +- __le32 NetRequestQueueOutAddrHi; /* 84 x54 */ +- __le32 NetRequestQueueOutAddrLow; /* 88 x58 */ +- __le32 NetRequestQueueBaseAddrHi; /* 92 x5c */ +- __le32 NetRequestQueueBaseAddrLow; /* 96 x60 */ +- __le32 NetRequestQueueLength; /* 100 x64 */ +- __le32 NetResponseQueueIn; /* 104 x68 */ +- __le32 NetResponseQueueInAddrHi; /* 108 x6c */ +- __le32 NetResponseQueueInAddrLow; /* 112 x70 */ +- __le32 NetResponseQueueBaseAddrHi; /* 116 x74 */ +- __le32 NetResponseQueueBaseAddrLow; /* 120 x78 */ +- __le32 NetResponseQueueLength; /* 124 x7c */ +- __le32 req_q_out; /* 128 x80 */ +- __le32 RequestQueueOutAddrHi; /* 132 x84 */ +- __le32 RequestQueueOutAddrLow; /* 136 x88 */ +- __le32 RequestQueueBaseAddrHi; /* 140 x8c */ +- __le32 RequestQueueBaseAddrLow; /* 144 x90 */ +- __le32 RequestQueueLength; /* 148 x94 */ +- __le32 ResponseQueueIn; /* 152 x98 */ +- __le32 ResponseQueueInAddrHi; /* 156 x9c */ +- __le32 ResponseQueueInAddrLow; /* 160 xa0 */ +- __le32 ResponseQueueBaseAddrHi; /* 164 xa4 */ +- __le32 ResponseQueueBaseAddrLow; /* 168 xa8 */ +- __le32 ResponseQueueLength; /* 172 xac */ +- __le32 NetRxLargeBufferQueueOut; /* 176 xb0 */ +- __le32 NetRxLargeBufferQueueBaseAddrHi; /* 180 xb4 */ +- __le32 NetRxLargeBufferQueueBaseAddrLow; /* 184 xb8 */ +- __le32 NetRxLargeBufferQueueLength; /* 188 xbc */ +- __le32 NetRxLargeBufferLength; /* 192 xc0 */ +- __le32 NetRxSmallBufferQueueOut; /* 196 xc4 */ +- __le32 NetRxSmallBufferQueueBaseAddrHi; /* 200 xc8 */ +- __le32 NetRxSmallBufferQueueBaseAddrLow; /* 204 xcc */ +- __le32 NetRxSmallBufferQueueLength; /* 208 xd0 */ +- __le32 NetRxSmallBufferLength; /* 212 xd4 */ +- __le32 HMCReserved0[10]; /* 216 xd8 */ +-}; +- +-struct local_ram_cfg_regs { +- __le32 BufletSize; /* 80 x50 */ +- __le32 BufletMaxCount; /* 84 x54 */ +- __le32 BufletCurrCount; /* 88 x58 */ +- __le32 BufletPauseThresholdCount; /* 92 x5c */ +- __le32 BufletTCPWinThresholdHi; /* 96 x60 */ +- __le32 BufletTCPWinThresholdLow; /* 100 x64 */ +- __le32 IPHashTableBaseAddr; /* 104 x68 */ +- __le32 IPHashTableSize; /* 108 x6c */ +- __le32 TCPHashTableBaseAddr; /* 112 x70 */ +- __le32 TCPHashTableSize; /* 116 x74 */ +- __le32 NCBAreaBaseAddr; /* 120 x78 */ +- __le32 NCBMaxCount; /* 124 x7c */ +- __le32 NCBCurrCount; /* 128 x80 */ +- __le32 DRBAreaBaseAddr; /* 132 x84 */ +- __le32 DRBMaxCount; /* 136 x88 */ +- __le32 DRBCurrCount; /* 140 x8c */ +- __le32 LRCReserved[28]; /* 144 x90 */ +-}; +- +-struct prot_stat_regs { +- __le32 MACTxFrameCount; /* 80 x50 R */ +- __le32 MACTxByteCount; /* 84 x54 R */ +- __le32 MACRxFrameCount; /* 88 x58 R */ +- __le32 MACRxByteCount; /* 92 x5c R */ +- __le32 MACCRCErrCount; /* 96 x60 R */ +- __le32 MACEncErrCount; /* 100 x64 R */ +- __le32 MACRxLengthErrCount; /* 104 x68 R */ +- __le32 IPTxPacketCount; /* 108 x6c R */ +- __le32 IPTxByteCount; /* 112 x70 R */ +- __le32 IPTxFragmentCount; /* 116 x74 R */ +- __le32 IPRxPacketCount; /* 120 x78 R */ +- __le32 IPRxByteCount; /* 124 x7c R */ +- __le32 IPRxFragmentCount; /* 128 x80 R */ +- __le32 IPDatagramReassemblyCount; /* 132 x84 R */ +- __le32 IPV6RxPacketCount; /* 136 x88 R */ +- __le32 IPErrPacketCount; /* 140 x8c R */ +- __le32 IPReassemblyErrCount; /* 144 x90 R */ +- __le32 TCPTxSegmentCount; /* 148 x94 R */ +- __le32 TCPTxByteCount; /* 152 x98 R */ +- __le32 TCPRxSegmentCount; /* 156 x9c R */ +- __le32 TCPRxByteCount; /* 160 xa0 R */ +- __le32 TCPTimerExpCount; /* 164 xa4 R */ +- __le32 TCPRxAckCount; /* 168 xa8 R */ +- __le32 TCPTxAckCount; /* 172 xac R */ +- __le32 TCPRxErrOOOCount; /* 176 xb0 R */ +- __le32 PSReserved0; /* 180 xb4 */ +- __le32 TCPRxWindowProbeUpdateCount; /* 184 xb8 R */ +- __le32 ECCErrCorrectionCount; /* 188 xbc R */ +- __le32 PSReserved1[16]; /* 192 xc0 */ ++ __le32 rsrvd0[12]; /* 0x50-0x79 */ ++ __le32 req_q_out; /* 0x80 */ ++ __le32 rsrvd1[31]; /* 0x84-0xFF */ + }; + +- + /* remote register set (access via PCI memory read/write) */ + struct isp_reg { + #define MBOX_REG_COUNT 8 +@@ -207,11 +87,7 @@ + union { + struct port_ctrl_stat_regs p0; + struct host_mem_cfg_regs p1; +- struct local_ram_cfg_regs p2; +- struct prot_stat_regs p3; +- __le32 r_union[44]; + }; +- + } __attribute__ ((packed)) isp4022; + } u2; + }; /* 256 x100 */ +@@ -296,6 +172,7 @@ + /* ISP Semaphore definitions */ + + /* ISP General Purpose Output definitions */ ++#define GPOR_TOPCAT_RESET 0x00000004 + + /* shadow registers (DMA'd from HA to system memory. read only) */ + struct shadow_regs { +@@ -337,6 +214,7 @@ + + /* Mailbox command definitions */ + #define MBOX_CMD_ABOUT_FW 0x0009 ++#define MBOX_CMD_PING 0x000B + #define MBOX_CMD_LUN_RESET 0x0016 + #define MBOX_CMD_GET_MANAGEMENT_DATA 0x001E + #define MBOX_CMD_GET_FW_STATUS 0x001F +@@ -364,6 +242,17 @@ + #define MBOX_CMD_GET_FW_STATE 0x0069 + #define MBOX_CMD_GET_INIT_FW_CTRL_BLOCK_DEFAULTS 0x006A + #define MBOX_CMD_RESTORE_FACTORY_DEFAULTS 0x0087 ++#define MBOX_CMD_SET_ACB 0x0088 ++#define MBOX_CMD_GET_ACB 0x0089 ++#define MBOX_CMD_DISABLE_ACB 0x008A ++#define MBOX_CMD_GET_IPV6_NEIGHBOR_CACHE 0x008B ++#define MBOX_CMD_GET_IPV6_DEST_CACHE 0x008C ++#define MBOX_CMD_GET_IPV6_DEF_ROUTER_LIST 0x008D ++#define MBOX_CMD_GET_IPV6_LCL_PREFIX_LIST 0x008E ++#define MBOX_CMD_SET_IPV6_NEIGHBOR_CACHE 0x0090 ++#define MBOX_CMD_GET_IP_ADDR_STATE 0x0091 ++#define MBOX_CMD_SEND_IPV6_ROUTER_SOL 0x0092 ++#define MBOX_CMD_GET_DB_ENTRY_CURRENT_IP_ADDR 0x0093 + + /* Mailbox 1 */ + #define FW_STATE_READY 0x0000 +@@ -409,6 +298,16 @@ + #define MBOX_ASTS_DHCP_LEASE_EXPIRED 0x801D + #define MBOX_ASTS_DHCP_LEASE_ACQUIRED 0x801F + #define MBOX_ASTS_ISNS_UNSOLICITED_PDU_RECEIVED 0x8021 ++#define MBOX_ASTS_DUPLICATE_IP 0x8025 ++#define MBOX_ASTS_ARP_COMPLETE 0x8026 ++#define MBOX_ASTS_SUBNET_STATE_CHANGE 0x8027 ++#define MBOX_ASTS_RESPONSE_QUEUE_FULL 0x8028 ++#define MBOX_ASTS_IP_ADDR_STATE_CHANGED 0x8029 ++#define MBOX_ASTS_IPV6_PREFIX_EXPIRED 0x802B ++#define MBOX_ASTS_IPV6_ND_PREFIX_IGNORED 0x802C ++#define MBOX_ASTS_IPV6_LCL_PREFIX_IGNORED 0x802D ++#define MBOX_ASTS_ICMPV6_ERROR_MSG_RCVD 0x802E ++ + #define ISNS_EVENT_DATA_RECEIVED 0x0000 + #define ISNS_EVENT_CONNECTION_OPENED 0x0001 + #define ISNS_EVENT_CONNECTION_FAILED 0x0002 +@@ -418,137 +317,166 @@ + /*************************************************************************/ + + /* Host Adapter Initialization Control Block (from host) */ +-struct init_fw_ctrl_blk { +- uint8_t Version; /* 00 */ +- uint8_t Control; /* 01 */ ++struct addr_ctrl_blk { ++ uint8_t version; /* 00 */ ++ uint8_t control; /* 01 */ + +- uint16_t FwOptions; /* 02-03 */ ++ uint16_t fw_options; /* 02-03 */ + #define FWOPT_HEARTBEAT_ENABLE 0x1000 + #define FWOPT_SESSION_MODE 0x0040 + #define FWOPT_INITIATOR_MODE 0x0020 + #define FWOPT_TARGET_MODE 0x0010 + +- uint16_t ExecThrottle; /* 04-05 */ +- uint8_t RetryCount; /* 06 */ +- uint8_t RetryDelay; /* 07 */ +- uint16_t MaxEthFrPayloadSize; /* 08-09 */ +- uint16_t AddFwOptions; /* 0A-0B */ +- +- uint8_t HeartbeatInterval; /* 0C */ +- uint8_t InstanceNumber; /* 0D */ +- uint16_t RES2; /* 0E-0F */ +- uint16_t ReqQConsumerIndex; /* 10-11 */ +- uint16_t ComplQProducerIndex; /* 12-13 */ +- uint16_t ReqQLen; /* 14-15 */ +- uint16_t ComplQLen; /* 16-17 */ +- uint32_t ReqQAddrLo; /* 18-1B */ +- uint32_t ReqQAddrHi; /* 1C-1F */ +- uint32_t ComplQAddrLo; /* 20-23 */ +- uint32_t ComplQAddrHi; /* 24-27 */ +- uint32_t ShadowRegBufAddrLo; /* 28-2B */ +- uint32_t ShadowRegBufAddrHi; /* 2C-2F */ +- +- uint16_t iSCSIOptions; /* 30-31 */ +- +- uint16_t TCPOptions; /* 32-33 */ +- +- uint16_t IPOptions; /* 34-35 */ +- +- uint16_t MaxPDUSize; /* 36-37 */ +- uint16_t RcvMarkerInt; /* 38-39 */ +- uint16_t SndMarkerInt; /* 3A-3B */ +- uint16_t InitMarkerlessInt; /* 3C-3D */ +- uint16_t FirstBurstSize; /* 3E-3F */ +- uint16_t DefaultTime2Wait; /* 40-41 */ +- uint16_t DefaultTime2Retain; /* 42-43 */ +- uint16_t MaxOutStndngR2T; /* 44-45 */ +- uint16_t KeepAliveTimeout; /* 46-47 */ +- uint16_t PortNumber; /* 48-49 */ +- uint16_t MaxBurstSize; /* 4A-4B */ +- uint32_t RES4; /* 4C-4F */ +- uint8_t IPAddr[4]; /* 50-53 */ +- uint8_t RES5[12]; /* 54-5F */ +- uint8_t SubnetMask[4]; /* 60-63 */ +- uint8_t RES6[12]; /* 64-6F */ +- uint8_t GatewayIPAddr[4]; /* 70-73 */ +- uint8_t RES7[12]; /* 74-7F */ +- uint8_t PriDNSIPAddr[4]; /* 80-83 */ +- uint8_t SecDNSIPAddr[4]; /* 84-87 */ +- uint8_t RES8[8]; /* 88-8F */ +- uint8_t Alias[32]; /* 90-AF */ +- uint8_t TargAddr[8]; /* B0-B7 *//* /FIXME: Remove?? */ +- uint8_t CHAPNameSecretsTable[8]; /* B8-BF */ +- uint8_t EthernetMACAddr[6]; /* C0-C5 */ +- uint16_t TargetPortalGroup; /* C6-C7 */ +- uint8_t SendScale; /* C8 */ +- uint8_t RecvScale; /* C9 */ +- uint8_t TypeOfService; /* CA */ +- uint8_t Time2Live; /* CB */ +- uint16_t VLANPriority; /* CC-CD */ +- uint16_t Reserved8; /* CE-CF */ +- uint8_t SecIPAddr[4]; /* D0-D3 */ +- uint8_t Reserved9[12]; /* D4-DF */ +- uint8_t iSNSIPAddr[4]; /* E0-E3 */ +- uint16_t iSNSServerPortNumber; /* E4-E5 */ +- uint8_t Reserved10[10]; /* E6-EF */ +- uint8_t SLPDAIPAddr[4]; /* F0-F3 */ +- uint8_t Reserved11[12]; /* F4-FF */ +- uint8_t iSCSINameString[256]; /* 100-1FF */ ++ uint16_t exec_throttle; /* 04-05 */ ++ uint8_t zio_count; /* 06 */ ++ uint8_t res0; /* 07 */ ++ uint16_t eth_mtu_size; /* 08-09 */ ++ uint16_t add_fw_options; /* 0A-0B */ ++ ++ uint8_t hb_interval; /* 0C */ ++ uint8_t inst_num; /* 0D */ ++ uint16_t res1; /* 0E-0F */ ++ uint16_t rqq_consumer_idx; /* 10-11 */ ++ uint16_t compq_producer_idx; /* 12-13 */ ++ uint16_t rqq_len; /* 14-15 */ ++ uint16_t compq_len; /* 16-17 */ ++ uint32_t rqq_addr_lo; /* 18-1B */ ++ uint32_t rqq_addr_hi; /* 1C-1F */ ++ uint32_t compq_addr_lo; /* 20-23 */ ++ uint32_t compq_addr_hi; /* 24-27 */ ++ uint32_t shdwreg_addr_lo; /* 28-2B */ ++ uint32_t shdwreg_addr_hi; /* 2C-2F */ ++ ++ uint16_t iscsi_opts; /* 30-31 */ ++ uint16_t ipv4_tcp_opts; /* 32-33 */ ++ uint16_t ipv4_ip_opts; /* 34-35 */ ++ ++ uint16_t iscsi_max_pdu_size; /* 36-37 */ ++ uint8_t ipv4_tos; /* 38 */ ++ uint8_t ipv4_ttl; /* 39 */ ++ uint8_t acb_version; /* 3A */ ++ uint8_t res2; /* 3B */ ++ uint16_t def_timeout; /* 3C-3D */ ++ uint16_t iscsi_fburst_len; /* 3E-3F */ ++ uint16_t iscsi_def_time2wait; /* 40-41 */ ++ uint16_t iscsi_def_time2retain; /* 42-43 */ ++ uint16_t iscsi_max_outstnd_r2t; /* 44-45 */ ++ uint16_t conn_ka_timeout; /* 46-47 */ ++ uint16_t ipv4_port; /* 48-49 */ ++ uint16_t iscsi_max_burst_len; /* 4A-4B */ ++ uint32_t res5; /* 4C-4F */ ++ uint8_t ipv4_addr[4]; /* 50-53 */ ++ uint16_t ipv4_vlan_tag; /* 54-55 */ ++ uint8_t ipv4_addr_state; /* 56 */ ++ uint8_t ipv4_cacheid; /* 57 */ ++ uint8_t res6[8]; /* 58-5F */ ++ uint8_t ipv4_subnet[4]; /* 60-63 */ ++ uint8_t res7[12]; /* 64-6F */ ++ uint8_t ipv4_gw_addr[4]; /* 70-73 */ ++ uint8_t res8[0xc]; /* 74-7F */ ++ uint8_t pri_dns_srvr_ip[4];/* 80-83 */ ++ uint8_t sec_dns_srvr_ip[4];/* 84-87 */ ++ uint16_t min_eph_port; /* 88-89 */ ++ uint16_t max_eph_port; /* 8A-8B */ ++ uint8_t res9[4]; /* 8C-8F */ ++ uint8_t iscsi_alias[32];/* 90-AF */ ++ uint8_t res9_1[0x16]; /* B0-C5 */ ++ uint16_t tgt_portal_grp;/* C6-C7 */ ++ uint8_t abort_timer; /* C8 */ ++ uint8_t ipv4_tcp_wsf; /* C9 */ ++ uint8_t res10[6]; /* CA-CF */ ++ uint8_t ipv4_sec_ip_addr[4]; /* D0-D3 */ ++ uint8_t ipv4_dhcp_vid_len; /* D4 */ ++ uint8_t ipv4_dhcp_vid[11]; /* D5-DF */ ++ uint8_t res11[20]; /* E0-F3 */ ++ uint8_t ipv4_dhcp_alt_cid_len; /* F4 */ ++ uint8_t ipv4_dhcp_alt_cid[11]; /* F5-FF */ ++ uint8_t iscsi_name[224]; /* 100-1DF */ ++ uint8_t res12[32]; /* 1E0-1FF */ ++ uint32_t cookie; /* 200-203 */ ++ uint16_t ipv6_port; /* 204-205 */ ++ uint16_t ipv6_opts; /* 206-207 */ ++ uint16_t ipv6_addtl_opts; /* 208-209 */ ++ uint16_t ipv6_tcp_opts; /* 20A-20B */ ++ uint8_t ipv6_tcp_wsf; /* 20C */ ++ uint16_t ipv6_flow_lbl; /* 20D-20F */ ++ uint8_t ipv6_gw_addr[16]; /* 210-21F */ ++ uint16_t ipv6_vlan_tag; /* 220-221 */ ++ uint8_t ipv6_lnk_lcl_addr_state;/* 222 */ ++ uint8_t ipv6_addr0_state; /* 223 */ ++ uint8_t ipv6_addr1_state; /* 224 */ ++ uint8_t ipv6_gw_state; /* 225 */ ++ uint8_t ipv6_traffic_class; /* 226 */ ++ uint8_t ipv6_hop_limit; /* 227 */ ++ uint8_t ipv6_if_id[8]; /* 228-22F */ ++ uint8_t ipv6_addr0[16]; /* 230-23F */ ++ uint8_t ipv6_addr1[16]; /* 240-24F */ ++ uint32_t ipv6_nd_reach_time; /* 250-253 */ ++ uint32_t ipv6_nd_rexmit_timer; /* 254-257 */ ++ uint32_t ipv6_nd_stale_timeout; /* 258-25B */ ++ uint8_t ipv6_dup_addr_detect_count; /* 25C */ ++ uint8_t ipv6_cache_id; /* 25D */ ++ uint8_t res13[18]; /* 25E-26F */ ++ uint32_t ipv6_gw_advrt_mtu; /* 270-273 */ ++ uint8_t res14[140]; /* 274-2FF */ ++}; ++ ++struct init_fw_ctrl_blk { ++ struct addr_ctrl_blk pri; ++ struct addr_ctrl_blk sec; + }; + + /*************************************************************************/ + + struct dev_db_entry { +- uint8_t options; /* 00 */ ++ uint16_t options; /* 00-01 */ + #define DDB_OPT_DISC_SESSION 0x10 + #define DDB_OPT_TARGET 0x02 /* device is a target */ + +- uint8_t control; /* 01 */ +- +- uint16_t exeThrottle; /* 02-03 */ +- uint16_t exeCount; /* 04-05 */ +- uint8_t retryCount; /* 06 */ +- uint8_t retryDelay; /* 07 */ +- uint16_t iSCSIOptions; /* 08-09 */ +- +- uint16_t TCPOptions; /* 0A-0B */ +- +- uint16_t IPOptions; /* 0C-0D */ +- +- uint16_t maxPDUSize; /* 0E-0F */ +- uint16_t rcvMarkerInt; /* 10-11 */ +- uint16_t sndMarkerInt; /* 12-13 */ +- uint16_t iSCSIMaxSndDataSegLen; /* 14-15 */ +- uint16_t firstBurstSize; /* 16-17 */ +- uint16_t minTime2Wait; /* 18-19 : RA :default_time2wait */ +- uint16_t maxTime2Retain; /* 1A-1B */ +- uint16_t maxOutstndngR2T; /* 1C-1D */ +- uint16_t keepAliveTimeout; /* 1E-1F */ +- uint8_t ISID[6]; /* 20-25 big-endian, must be converted ++ uint16_t exec_throttle; /* 02-03 */ ++ uint16_t exec_count; /* 04-05 */ ++ uint16_t res0; /* 06-07 */ ++ uint16_t iscsi_options; /* 08-09 */ ++ uint16_t tcp_options; /* 0A-0B */ ++ uint16_t ip_options; /* 0C-0D */ ++ uint16_t iscsi_max_rcv_data_seg_len; /* 0E-0F */ ++ uint32_t res1; /* 10-13 */ ++ uint16_t iscsi_max_snd_data_seg_len; /* 14-15 */ ++ uint16_t iscsi_first_burst_len; /* 16-17 */ ++ uint16_t iscsi_def_time2wait; /* 18-19 */ ++ uint16_t iscsi_def_time2retain; /* 1A-1B */ ++ uint16_t iscsi_max_outsnd_r2t; /* 1C-1D */ ++ uint16_t ka_timeout; /* 1E-1F */ ++ uint8_t isid[6]; /* 20-25 big-endian, must be converted + * to little-endian */ +- uint16_t TSID; /* 26-27 */ +- uint16_t portNumber; /* 28-29 */ +- uint16_t maxBurstSize; /* 2A-2B */ +- uint16_t taskMngmntTimeout; /* 2C-2D */ +- uint16_t reserved1; /* 2E-2F */ +- uint8_t ipAddr[0x10]; /* 30-3F */ +- uint8_t iSCSIAlias[0x20]; /* 40-5F */ +- uint8_t targetAddr[0x20]; /* 60-7F */ +- uint8_t userID[0x20]; /* 80-9F */ +- uint8_t password[0x20]; /* A0-BF */ +- uint8_t iscsiName[0x100]; /* C0-1BF : xxzzy Make this a ++ uint16_t tsid; /* 26-27 */ ++ uint16_t port; /* 28-29 */ ++ uint16_t iscsi_max_burst_len; /* 2A-2B */ ++ uint16_t def_timeout; /* 2C-2D */ ++ uint16_t res2; /* 2E-2F */ ++ uint8_t ip_addr[0x10]; /* 30-3F */ ++ uint8_t iscsi_alias[0x20]; /* 40-5F */ ++ uint8_t tgt_addr[0x20]; /* 60-7F */ ++ uint16_t mss; /* 80-81 */ ++ uint16_t res3; /* 82-83 */ ++ uint16_t lcl_port; /* 84-85 */ ++ uint8_t ipv4_tos; /* 86 */ ++ uint16_t ipv6_flow_lbl; /* 87-89 */ ++ uint8_t res4[0x36]; /* 8A-BF */ ++ uint8_t iscsi_name[0xE0]; /* C0-19F : xxzzy Make this a + * pointer to a string so we + * don't have to reserve soooo + * much RAM */ +- uint16_t ddbLink; /* 1C0-1C1 */ +- uint16_t CHAPTableIndex; /* 1C2-1C3 */ +- uint16_t TargetPortalGroup; /* 1C4-1C5 */ +- uint16_t reserved2[2]; /* 1C6-1C7 */ +- uint32_t statSN; /* 1C8-1CB */ +- uint32_t expStatSN; /* 1CC-1CF */ +- uint16_t reserved3[0x2C]; /* 1D0-1FB */ +- uint16_t ddbValidCookie; /* 1FC-1FD */ +- uint16_t ddbValidSize; /* 1FE-1FF */ ++ uint8_t ipv6_addr[0x10];/* 1A0-1AF */ ++ uint8_t res5[0x10]; /* 1B0-1BF */ ++ uint16_t ddb_link; /* 1C0-1C1 */ ++ uint16_t chap_tbl_idx; /* 1C2-1C3 */ ++ uint16_t tgt_portal_grp; /* 1C4-1C5 */ ++ uint8_t tcp_xmt_wsf; /* 1C6 */ ++ uint8_t tcp_rcv_wsf; /* 1C7 */ ++ uint32_t stat_sn; /* 1C8-1CB */ ++ uint32_t exp_stat_sn; /* 1CC-1CF */ ++ uint8_t res6[0x30]; /* 1D0-1FF */ + }; + + /*************************************************************************/ +diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_glbl.h linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_glbl.h +--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_glbl.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_glbl.h 2007-12-21 15:36:12.000000000 -0500 +@@ -8,6 +8,9 @@ + #ifndef __QLA4x_GBL_H + #define __QLA4x_GBL_H + ++struct iscsi_cls_conn; ++ ++void qla4xxx_hw_reset(struct scsi_qla_host *ha); + int ql4xxx_lock_drvr_wait(struct scsi_qla_host *a); + int qla4xxx_send_tgts(struct scsi_qla_host *ha, char *ip, uint16_t port); + int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb); +@@ -58,11 +61,13 @@ + void qla4xxx_interrupt_service_routine(struct scsi_qla_host * ha, + uint32_t intr_status); + int qla4xxx_init_rings(struct scsi_qla_host * ha); +-struct srb * qla4xxx_del_from_active_array(struct scsi_qla_host *ha, uint32_t index); ++struct srb * qla4xxx_del_from_active_array(struct scsi_qla_host *ha, ++ uint32_t index); + void qla4xxx_srb_compl(struct scsi_qla_host *ha, struct srb *srb); + int qla4xxx_reinitialize_ddb_list(struct scsi_qla_host * ha); + int qla4xxx_process_ddb_changed(struct scsi_qla_host * ha, + uint32_t fw_ddb_index, uint32_t state); ++void qla4xxx_dump_buffer(void *b, uint32_t size); + + extern int ql4xextended_error_logging; + extern int ql4xdiscoverywait; +diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_init.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_init.c +--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_init.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_init.c 2007-12-21 15:36:12.000000000 -0500 +@@ -6,6 +6,9 @@ + */ + + #include "ql4_def.h" ++#include "ql4_glbl.h" ++#include "ql4_dbg.h" ++#include "ql4_inline.h" + + static struct ddb_entry * qla4xxx_alloc_ddb(struct scsi_qla_host *ha, + uint32_t fw_ddb_index); +@@ -300,12 +303,12 @@ + if (!qla4xxx_fw_ready(ha)) + return status; + +- set_bit(AF_ONLINE, &ha->flags); + return qla4xxx_get_firmware_status(ha); + } + + static struct ddb_entry* qla4xxx_get_ddb_entry(struct scsi_qla_host *ha, +- uint32_t fw_ddb_index) ++ uint32_t fw_ddb_index, ++ uint32_t *new_tgt) + { + struct dev_db_entry *fw_ddb_entry = NULL; + dma_addr_t fw_ddb_entry_dma; +@@ -313,6 +316,7 @@ + int found = 0; + uint32_t device_state; + ++ *new_tgt = 0; + /* Make sure the dma buffer is valid */ + fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev, + sizeof(*fw_ddb_entry), +@@ -337,7 +341,7 @@ + DEBUG2(printk("scsi%ld: %s: Looking for ddb[%d]\n", ha->host_no, + __func__, fw_ddb_index)); + list_for_each_entry(ddb_entry, &ha->ddb_list, list) { +- if (memcmp(ddb_entry->iscsi_name, fw_ddb_entry->iscsiName, ++ if (memcmp(ddb_entry->iscsi_name, fw_ddb_entry->iscsi_name, + ISCSI_NAME_SIZE) == 0) { + found++; + break; +@@ -348,6 +352,7 @@ + DEBUG2(printk("scsi%ld: %s: ddb[%d] not found - allocating " + "new ddb\n", ha->host_no, __func__, + fw_ddb_index)); ++ *new_tgt = 1; + ddb_entry = qla4xxx_alloc_ddb(ha, fw_ddb_index); + } + +@@ -409,26 +414,26 @@ + } + + status = QLA_SUCCESS; +- ddb_entry->target_session_id = le16_to_cpu(fw_ddb_entry->TSID); ++ ddb_entry->target_session_id = le16_to_cpu(fw_ddb_entry->tsid); + ddb_entry->task_mgmt_timeout = +- le16_to_cpu(fw_ddb_entry->taskMngmntTimeout); ++ le16_to_cpu(fw_ddb_entry->def_timeout); + ddb_entry->CmdSn = 0; +- ddb_entry->exe_throttle = le16_to_cpu(fw_ddb_entry->exeThrottle); ++ ddb_entry->exe_throttle = le16_to_cpu(fw_ddb_entry->exec_throttle); + ddb_entry->default_relogin_timeout = +- le16_to_cpu(fw_ddb_entry->taskMngmntTimeout); +- ddb_entry->default_time2wait = le16_to_cpu(fw_ddb_entry->minTime2Wait); ++ le16_to_cpu(fw_ddb_entry->def_timeout); ++ ddb_entry->default_time2wait = le16_to_cpu(fw_ddb_entry->iscsi_def_time2wait); + + /* Update index in case it changed */ + ddb_entry->fw_ddb_index = fw_ddb_index; + ha->fw_ddb_index_map[fw_ddb_index] = ddb_entry; + +- ddb_entry->port = le16_to_cpu(fw_ddb_entry->portNumber); +- ddb_entry->tpgt = le32_to_cpu(fw_ddb_entry->TargetPortalGroup); +- memcpy(&ddb_entry->iscsi_name[0], &fw_ddb_entry->iscsiName[0], ++ ddb_entry->port = le16_to_cpu(fw_ddb_entry->port); ++ ddb_entry->tpgt = le32_to_cpu(fw_ddb_entry->tgt_portal_grp); ++ memcpy(&ddb_entry->iscsi_name[0], &fw_ddb_entry->iscsi_name[0], + min(sizeof(ddb_entry->iscsi_name), +- sizeof(fw_ddb_entry->iscsiName))); +- memcpy(&ddb_entry->ip_addr[0], &fw_ddb_entry->ipAddr[0], +- min(sizeof(ddb_entry->ip_addr), sizeof(fw_ddb_entry->ipAddr))); ++ sizeof(fw_ddb_entry->iscsi_name))); ++ memcpy(&ddb_entry->ip_addr[0], &fw_ddb_entry->ip_addr[0], ++ min(sizeof(ddb_entry->ip_addr), sizeof(fw_ddb_entry->ip_addr))); + + DEBUG2(printk("scsi%ld: %s: ddb[%d] - State= %x status= %d.\n", + ha->host_no, __func__, fw_ddb_index, +@@ -495,6 +500,7 @@ + uint32_t ddb_state; + uint32_t conn_err, err_code; + struct ddb_entry *ddb_entry; ++ uint32_t new_tgt; + + dev_info(&ha->pdev->dev, "Initializing DDBs ...\n"); + for (fw_ddb_index = 0; fw_ddb_index < MAX_DDB_ENTRIES; +@@ -526,8 +532,19 @@ + "completed " + "or access denied failure\n", + ha->host_no, __func__)); +- } else ++ } else { + qla4xxx_set_ddb_entry(ha, fw_ddb_index, 0); ++ if (qla4xxx_get_fwddb_entry(ha, fw_ddb_index, ++ NULL, 0, NULL, &next_fw_ddb_index, ++ &ddb_state, &conn_err, NULL, NULL) ++ == QLA_ERROR) { ++ DEBUG2(printk("scsi%ld: %s:" ++ "get_ddb_entry %d failed\n", ++ ha->host_no, ++ __func__, fw_ddb_index)); ++ return QLA_ERROR; ++ } ++ } + } + + if (ddb_state != DDB_DS_SESSION_ACTIVE) +@@ -540,7 +557,7 @@ + ha->host_no, __func__, fw_ddb_index)); + + /* Add DDB to internal our ddb list. */ +- ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index); ++ ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index, &new_tgt); + if (ddb_entry == NULL) { + DEBUG2(printk("scsi%ld: %s: Unable to allocate memory " + "for device at fw_ddb_index %d\n", +@@ -865,21 +882,20 @@ + + static void qla4x00_pci_config(struct scsi_qla_host *ha) + { +- uint16_t w, mwi; ++ uint16_t w; ++ int status; + + dev_info(&ha->pdev->dev, "Configuring PCI space...\n"); + + pci_set_master(ha->pdev); +- mwi = 0; +- if (pci_set_mwi(ha->pdev)) +- mwi = PCI_COMMAND_INVALIDATE; ++ status = pci_set_mwi(ha->pdev); + /* + * We want to respect framework's setting of PCI configuration space + * command register and also want to make sure that all bits of + * interest to us are properly set in command register. + */ + pci_read_config_word(ha->pdev, PCI_COMMAND, &w); +- w |= mwi | (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); ++ w |= PCI_COMMAND_PARITY | PCI_COMMAND_SERR; + w &= ~PCI_COMMAND_INTX_DISABLE; + pci_write_config_word(ha->pdev, PCI_COMMAND, w); + } +@@ -911,6 +927,9 @@ + writel(set_rmask(NVR_WRITE_ENABLE), + &ha->reg->u1.isp4022.nvram); + ++ writel(2, &ha->reg->mailbox[6]); ++ readl(&ha->reg->mailbox[6]); ++ + writel(set_rmask(CSR_BOOT_ENABLE), &ha->reg->ctrl_status); + readl(&ha->reg->ctrl_status); + spin_unlock_irqrestore(&ha->hardware_lock, flags); +@@ -958,25 +977,25 @@ + return status; + } + +-int ql4xxx_lock_drvr_wait(struct scsi_qla_host *ha) ++int ql4xxx_lock_drvr_wait(struct scsi_qla_host *a) + { +-#define QL4_LOCK_DRVR_WAIT 30 ++#define QL4_LOCK_DRVR_WAIT 60 + #define QL4_LOCK_DRVR_SLEEP 1 + + int drvr_wait = QL4_LOCK_DRVR_WAIT; + while (drvr_wait) { +- if (ql4xxx_lock_drvr(ha) == 0) { ++ if (ql4xxx_lock_drvr(a) == 0) { + ssleep(QL4_LOCK_DRVR_SLEEP); + if (drvr_wait) { + DEBUG2(printk("scsi%ld: %s: Waiting for " +- "Global Init Semaphore(%d)...n", +- ha->host_no, ++ "Global Init Semaphore(%d)...\n", ++ a->host_no, + __func__, drvr_wait)); + } + drvr_wait -= QL4_LOCK_DRVR_SLEEP; + } else { + DEBUG2(printk("scsi%ld: %s: Global Init Semaphore " +- "acquired.n", ha->host_no, __func__)); ++ "acquired\n", a->host_no, __func__)); + return QLA_SUCCESS; + } + } +@@ -1125,17 +1144,17 @@ + + /* Initialize the Host adapter request/response queues and firmware */ + if (qla4xxx_start_firmware(ha) == QLA_ERROR) +- return status; ++ goto exit_init_hba; + + if (qla4xxx_validate_mac_address(ha) == QLA_ERROR) +- return status; ++ goto exit_init_hba; + + if (qla4xxx_init_local_data(ha) == QLA_ERROR) +- return status; ++ goto exit_init_hba; + + status = qla4xxx_init_firmware(ha); + if (status == QLA_ERROR) +- return status; ++ goto exit_init_hba; + + /* + * FW is waiting to get an IP address from DHCP server: Skip building +@@ -1143,12 +1162,12 @@ + * followed by 0x8014 aen" to trigger the tgt discovery process. + */ + if (ha->firmware_state & FW_STATE_DHCP_IN_PROGRESS) +- return status; ++ goto exit_init_online; + + /* Skip device discovery if ip and subnet is zero */ + if (memcmp(ha->ip_address, ip_address, IP_ADDR_LEN) == 0 || + memcmp(ha->subnet_mask, ip_address, IP_ADDR_LEN) == 0) +- return status; ++ goto exit_init_online; + + if (renew_ddb_list == PRESERVE_DDB_LIST) { + /* +@@ -1177,9 +1196,10 @@ + ha->host_no)); + } + +- exit_init_hba: ++exit_init_online: ++ set_bit(AF_ONLINE, &ha->flags); ++exit_init_hba: + return status; +- + } + + /** +@@ -1193,9 +1213,10 @@ + uint32_t fw_ddb_index) + { + struct ddb_entry * ddb_entry; ++ uint32_t new_tgt; + + /* First allocate a device structure */ +- ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index); ++ ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index, &new_tgt); + if (ddb_entry == NULL) { + DEBUG2(printk(KERN_WARNING + "scsi%ld: Unable to allocate memory to add " +@@ -1203,6 +1224,18 @@ + return; + } + ++ if (!new_tgt && (ddb_entry->fw_ddb_index != fw_ddb_index)) { ++ /* Target has been bound to a new fw_ddb_index */ ++ qla4xxx_free_ddb(ha, ddb_entry); ++ ddb_entry = qla4xxx_alloc_ddb(ha, fw_ddb_index); ++ if (ddb_entry == NULL) { ++ DEBUG2(printk(KERN_WARNING ++ "scsi%ld: Unable to allocate memory" ++ " to add fw_ddb_index %d\n", ++ ha->host_no, fw_ddb_index)); ++ return; ++ } ++ } + if (qla4xxx_update_ddb_entry(ha, ddb_entry, fw_ddb_index) == + QLA_ERROR) { + ha->fw_ddb_index_map[fw_ddb_index] = +diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_iocb.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_iocb.c +--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_iocb.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_iocb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -6,6 +6,10 @@ + */ + + #include "ql4_def.h" ++#include "ql4_glbl.h" ++#include "ql4_dbg.h" ++#include "ql4_inline.h" ++ + + #include + +@@ -141,11 +145,13 @@ + uint16_t avail_dsds; + struct data_seg_a64 *cur_dsd; + struct scsi_cmnd *cmd; ++ struct scatterlist *sg; ++ int i; + + cmd = srb->cmd; + ha = srb->ha; + +- if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) { ++ if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) { + /* No data being transferred */ + cmd_entry->ttlByteCnt = __constant_cpu_to_le32(0); + return; +@@ -154,14 +160,7 @@ + avail_dsds = COMMAND_SEG; + cur_dsd = (struct data_seg_a64 *) & (cmd_entry->dataseg[0]); + +- /* Load data segments */ +- if (cmd->use_sg) { +- struct scatterlist *cur_seg; +- struct scatterlist *end_seg; +- +- cur_seg = (struct scatterlist *)cmd->request_buffer; +- end_seg = cur_seg + tot_dsds; +- while (cur_seg < end_seg) { ++ scsi_for_each_sg(cmd, sg, tot_dsds, i) { + dma_addr_t sle_dma; + + /* Allocate additional continuation packets? */ +@@ -175,19 +174,13 @@ + avail_dsds = CONTINUE_SEG; + } + +- sle_dma = sg_dma_address(cur_seg); ++ sle_dma = sg_dma_address(sg); + cur_dsd->base.addrLow = cpu_to_le32(LSDW(sle_dma)); + cur_dsd->base.addrHigh = cpu_to_le32(MSDW(sle_dma)); +- cur_dsd->count = cpu_to_le32(sg_dma_len(cur_seg)); ++ cur_dsd->count = cpu_to_le32(sg_dma_len(sg)); + avail_dsds--; + + cur_dsd++; +- cur_seg++; +- } +- } else { +- cur_dsd->base.addrLow = cpu_to_le32(LSDW(srb->dma_handle)); +- cur_dsd->base.addrHigh = cpu_to_le32(MSDW(srb->dma_handle)); +- cur_dsd->count = cpu_to_le32(cmd->request_bufflen); + } + } + +@@ -204,8 +197,8 @@ + struct scsi_cmnd *cmd = srb->cmd; + struct ddb_entry *ddb_entry; + struct command_t3_entry *cmd_entry; +- struct scatterlist *sg = NULL; + ++ int nseg; + uint16_t tot_dsds; + uint16_t req_cnt; + +@@ -233,24 +226,11 @@ + index = (uint32_t)cmd->request->tag; + + /* Calculate the number of request entries needed. */ +- if (cmd->use_sg) { +- sg = (struct scatterlist *)cmd->request_buffer; +- tot_dsds = pci_map_sg(ha->pdev, sg, cmd->use_sg, +- cmd->sc_data_direction); +- if (tot_dsds == 0) ++ nseg = scsi_dma_map(cmd); ++ if (nseg < 0) + goto queuing_error; +- } else if (cmd->request_bufflen) { +- dma_addr_t req_dma; ++ tot_dsds = nseg; + +- req_dma = pci_map_single(ha->pdev, cmd->request_buffer, +- cmd->request_bufflen, +- cmd->sc_data_direction); +- if (dma_mapping_error(req_dma)) +- goto queuing_error; +- +- srb->dma_handle = req_dma; +- tot_dsds = 1; +- } + req_cnt = qla4xxx_calc_request_entries(tot_dsds); + + if (ha->req_q_count < (req_cnt + 2)) { +@@ -279,7 +259,7 @@ + + int_to_scsilun(cmd->device->lun, &cmd_entry->lun); + cmd_entry->cmdSeqNum = cpu_to_le32(ddb_entry->CmdSn); +- cmd_entry->ttlByteCnt = cpu_to_le32(cmd->request_bufflen); ++ cmd_entry->ttlByteCnt = cpu_to_le32(scsi_bufflen(cmd)); + memcpy(cmd_entry->cdb, cmd->cmnd, cmd->cmd_len); + cmd_entry->dataSegCnt = cpu_to_le16(tot_dsds); + cmd_entry->hdr.entryCount = req_cnt; +@@ -289,13 +269,13 @@ + * transferred, as the data direction bit is sometimed filled + * in when there is no data to be transferred */ + cmd_entry->control_flags = CF_NO_DATA; +- if (cmd->request_bufflen) { ++ if (scsi_bufflen(cmd)) { + if (cmd->sc_data_direction == DMA_TO_DEVICE) + cmd_entry->control_flags = CF_WRITE; + else if (cmd->sc_data_direction == DMA_FROM_DEVICE) + cmd_entry->control_flags = CF_READ; + +- ha->bytes_xfered += cmd->request_bufflen; ++ ha->bytes_xfered += scsi_bufflen(cmd); + if (ha->bytes_xfered & ~0xFFFFF){ + ha->total_mbytes_xferred += ha->bytes_xfered >> 20; + ha->bytes_xfered &= 0xFFFFF; +@@ -359,14 +339,9 @@ + return QLA_SUCCESS; + + queuing_error: ++ if (tot_dsds) ++ scsi_dma_unmap(cmd); + +- if (cmd->use_sg && tot_dsds) { +- sg = (struct scatterlist *) cmd->request_buffer; +- pci_unmap_sg(ha->pdev, sg, cmd->use_sg, +- cmd->sc_data_direction); +- } else if (tot_dsds) +- pci_unmap_single(ha->pdev, srb->dma_handle, +- cmd->request_bufflen, cmd->sc_data_direction); + spin_unlock_irqrestore(&ha->hardware_lock, flags); + + return QLA_ERROR; +diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_isr.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_isr.c +--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_isr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_isr.c 2007-12-21 15:36:12.000000000 -0500 +@@ -6,6 +6,9 @@ + */ + + #include "ql4_def.h" ++#include "ql4_glbl.h" ++#include "ql4_dbg.h" ++#include "ql4_inline.h" + + /** + * qla2x00_process_completed_request() - Process a Fast Post response. +@@ -92,7 +95,7 @@ + + if (sts_entry->iscsiFlags & + (ISCSI_FLAG_RESIDUAL_OVER|ISCSI_FLAG_RESIDUAL_UNDER)) +- cmd->resid = residual; ++ scsi_set_resid(cmd, residual); + + cmd->result = DID_OK << 16 | scsi_status; + +@@ -176,14 +179,14 @@ + * Firmware detected a SCSI transport underrun + * condition + */ +- cmd->resid = residual; ++ scsi_set_resid(cmd, residual); + DEBUG2(printk("scsi%ld:%d:%d:%d: %s: UNDERRUN status " + "detected, xferlen = 0x%x, residual = " + "0x%x\n", + ha->host_no, cmd->device->channel, + cmd->device->id, + cmd->device->lun, __func__, +- cmd->request_bufflen, ++ scsi_bufflen(cmd), + residual)); + } + +@@ -227,7 +230,7 @@ + if ((sts_entry->iscsiFlags & + ISCSI_FLAG_RESIDUAL_UNDER) == 0) { + cmd->result = DID_BUS_BUSY << 16; +- } else if ((cmd->request_bufflen - residual) < ++ } else if ((scsi_bufflen(cmd) - residual) < + cmd->underflow) { + /* + * Handle mid-layer underflow??? +@@ -248,7 +251,7 @@ + cmd->device->channel, + cmd->device->id, + cmd->device->lun, __func__, +- cmd->request_bufflen, residual)); ++ scsi_bufflen(cmd), residual)); + + cmd->result = DID_ERROR << 16; + } else { +@@ -417,6 +420,7 @@ + uint32_t mbox_status) + { + int i; ++ uint32_t mbox_stat2, mbox_stat3; + + if ((mbox_status == MBOX_STS_BUSY) || + (mbox_status == MBOX_STS_INTERMEDIATE_COMPLETION) || +@@ -437,6 +441,12 @@ + } else if (mbox_status >> 12 == MBOX_ASYNC_EVENT_STATUS) { + /* Immediately process the AENs that don't require much work. + * Only queue the database_changed AENs */ ++ if (ha->aen_log.count < MAX_AEN_ENTRIES) { ++ for (i = 0; i < MBOX_AEN_REG_COUNT; i++) ++ ha->aen_log.entry[ha->aen_log.count].mbox_sts[i] = ++ readl(&ha->reg->mailbox[i]); ++ ha->aen_log.count++; ++ } + switch (mbox_status) { + case MBOX_ASTS_SYSTEM_ERROR: + /* Log Mailbox registers */ +@@ -493,6 +503,16 @@ + mbox_status)); + break; + ++ case MBOX_ASTS_IP_ADDR_STATE_CHANGED: ++ mbox_stat2 = readl(&ha->reg->mailbox[2]); ++ mbox_stat3 = readl(&ha->reg->mailbox[3]); ++ ++ if ((mbox_stat3 == 5) && (mbox_stat2 == 3)) ++ set_bit(DPC_GET_DHCP_IP_ADDR, &ha->dpc_flags); ++ else if ((mbox_stat3 == 2) && (mbox_stat2 == 5)) ++ set_bit(DPC_RESET_HA, &ha->dpc_flags); ++ break; ++ + case MBOX_ASTS_MAC_ADDRESS_CHANGED: + case MBOX_ASTS_DNS: + /* No action */ +@@ -518,11 +538,6 @@ + /* Queue AEN information and process it in the DPC + * routine */ + if (ha->aen_q_count > 0) { +- /* advance pointer */ +- if (ha->aen_in == (MAX_AEN_ENTRIES - 1)) +- ha->aen_in = 0; +- else +- ha->aen_in++; + + /* decrement available counter */ + ha->aen_q_count--; +@@ -542,6 +557,10 @@ + ha->aen_q[ha->aen_in].mbox_sts[2], + ha->aen_q[ha->aen_in].mbox_sts[3], + ha->aen_q[ha->aen_in]. mbox_sts[4])); ++ /* advance pointer */ ++ ha->aen_in++; ++ if (ha->aen_in == MAX_AEN_ENTRIES) ++ ha->aen_in = 0; + + /* The DPC routine will process the aen */ + set_bit(DPC_AEN, &ha->dpc_flags); +@@ -724,25 +743,24 @@ + + spin_lock_irqsave(&ha->hardware_lock, flags); + while (ha->aen_out != ha->aen_in) { +- /* Advance pointers for next entry */ +- if (ha->aen_out == (MAX_AEN_ENTRIES - 1)) +- ha->aen_out = 0; +- else +- ha->aen_out++; +- +- ha->aen_q_count++; + aen = &ha->aen_q[ha->aen_out]; +- + /* copy aen information to local structure */ + for (i = 0; i < MBOX_AEN_REG_COUNT; i++) + mbox_sts[i] = aen->mbox_sts[i]; + ++ ha->aen_q_count++; ++ ha->aen_out++; ++ ++ if (ha->aen_out == MAX_AEN_ENTRIES) ++ ha->aen_out = 0; ++ + spin_unlock_irqrestore(&ha->hardware_lock, flags); + +- DEBUG(printk("scsi%ld: AEN[%d] %04x, index [%d] state=%04x " +- "mod=%x conerr=%08x \n", ha->host_no, ha->aen_out, +- mbox_sts[0], mbox_sts[2], mbox_sts[3], +- mbox_sts[1], mbox_sts[4])); ++ DEBUG2(printk("qla4xxx(%ld): AEN[%d]=0x%08x, mbx1=0x%08x mbx2=0x%08x" ++ " mbx3=0x%08x mbx4=0x%08x\n", ha->host_no, ++ (ha->aen_out ? (ha->aen_out-1): (MAX_AEN_ENTRIES-1)), ++ mbox_sts[0], mbox_sts[1], mbox_sts[2], ++ mbox_sts[3], mbox_sts[4])); + + switch (mbox_sts[0]) { + case MBOX_ASTS_DATABASE_CHANGED: +@@ -792,6 +810,5 @@ + spin_lock_irqsave(&ha->hardware_lock, flags); + } + spin_unlock_irqrestore(&ha->hardware_lock, flags); +- + } + +diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_mbx.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_mbx.c +--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_mbx.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_mbx.c 2007-12-21 15:36:12.000000000 -0500 +@@ -6,6 +6,9 @@ + */ + + #include "ql4_def.h" ++#include "ql4_glbl.h" ++#include "ql4_dbg.h" ++#include "ql4_inline.h" + + + /** +@@ -169,84 +172,6 @@ + return status; + } + +- +-#if 0 +- +-/** +- * qla4xxx_issue_iocb - issue mailbox iocb command +- * @ha: adapter state pointer. +- * @buffer: buffer pointer. +- * @phys_addr: physical address of buffer. +- * @size: size of buffer. +- * +- * Issues iocbs via mailbox commands. +- * TARGET_QUEUE_LOCK must be released. +- * ADAPTER_STATE_LOCK must be released. +- **/ +-int +-qla4xxx_issue_iocb(struct scsi_qla_host * ha, void *buffer, +- dma_addr_t phys_addr, size_t size) +-{ +- uint32_t mbox_cmd[MBOX_REG_COUNT]; +- uint32_t mbox_sts[MBOX_REG_COUNT]; +- int status; +- +- memset(&mbox_cmd, 0, sizeof(mbox_cmd)); +- memset(&mbox_sts, 0, sizeof(mbox_sts)); +- mbox_cmd[0] = MBOX_CMD_EXECUTE_IOCB_A64; +- mbox_cmd[1] = 0; +- mbox_cmd[2] = LSDW(phys_addr); +- mbox_cmd[3] = MSDW(phys_addr); +- status = qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]); +- return status; +-} +- +-int qla4xxx_conn_close_sess_logout(struct scsi_qla_host * ha, +- uint16_t fw_ddb_index, +- uint16_t connection_id, +- uint16_t option) +-{ +- uint32_t mbox_cmd[MBOX_REG_COUNT]; +- uint32_t mbox_sts[MBOX_REG_COUNT]; +- +- memset(&mbox_cmd, 0, sizeof(mbox_cmd)); +- memset(&mbox_sts, 0, sizeof(mbox_sts)); +- mbox_cmd[0] = MBOX_CMD_CONN_CLOSE_SESS_LOGOUT; +- mbox_cmd[1] = fw_ddb_index; +- mbox_cmd[2] = connection_id; +- mbox_cmd[3] = LOGOUT_OPTION_RELOGIN; +- if (qla4xxx_mailbox_command(ha, 4, 2, &mbox_cmd[0], &mbox_sts[0]) != +- QLA_SUCCESS) { +- DEBUG2(printk("scsi%ld: %s: MBOX_CMD_CONN_CLOSE_SESS_LOGOUT " +- "option %04x failed sts %04X %04X", +- ha->host_no, __func__, +- option, mbox_sts[0], mbox_sts[1])); +- if (mbox_sts[0] == 0x4005) +- DEBUG2(printk("%s reason %04X\n", __func__, +- mbox_sts[1])); +- } +- return QLA_SUCCESS; +-} +- +-int qla4xxx_clear_database_entry(struct scsi_qla_host * ha, +- uint16_t fw_ddb_index) +-{ +- uint32_t mbox_cmd[MBOX_REG_COUNT]; +- uint32_t mbox_sts[MBOX_REG_COUNT]; +- +- memset(&mbox_cmd, 0, sizeof(mbox_cmd)); +- memset(&mbox_sts, 0, sizeof(mbox_sts)); +- mbox_cmd[0] = MBOX_CMD_CLEAR_DATABASE_ENTRY; +- mbox_cmd[1] = fw_ddb_index; +- if (qla4xxx_mailbox_command(ha, 2, 5, &mbox_cmd[0], &mbox_sts[0]) != +- QLA_SUCCESS) +- return QLA_ERROR; +- +- return QLA_SUCCESS; +-} +- +-#endif /* 0 */ +- + /** + * qla4xxx_initialize_fw_cb - initializes firmware control block. + * @ha: Pointer to host adapter structure. +@@ -272,10 +197,13 @@ + /* Get Initialize Firmware Control Block. */ + memset(&mbox_cmd, 0, sizeof(mbox_cmd)); + memset(&mbox_sts, 0, sizeof(mbox_sts)); ++ + mbox_cmd[0] = MBOX_CMD_GET_INIT_FW_CTRL_BLOCK; + mbox_cmd[2] = LSDW(init_fw_cb_dma); + mbox_cmd[3] = MSDW(init_fw_cb_dma); +- if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) != ++ mbox_cmd[4] = sizeof(struct init_fw_ctrl_blk); ++ ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) != + QLA_SUCCESS) { + dma_free_coherent(&ha->pdev->dev, + sizeof(struct init_fw_ctrl_blk), +@@ -287,51 +215,56 @@ + qla4xxx_init_rings(ha); + + /* Fill in the request and response queue information. */ +- init_fw_cb->ReqQConsumerIndex = cpu_to_le16(ha->request_out); +- init_fw_cb->ComplQProducerIndex = cpu_to_le16(ha->response_in); +- init_fw_cb->ReqQLen = __constant_cpu_to_le16(REQUEST_QUEUE_DEPTH); +- init_fw_cb->ComplQLen = __constant_cpu_to_le16(RESPONSE_QUEUE_DEPTH); +- init_fw_cb->ReqQAddrLo = cpu_to_le32(LSDW(ha->request_dma)); +- init_fw_cb->ReqQAddrHi = cpu_to_le32(MSDW(ha->request_dma)); +- init_fw_cb->ComplQAddrLo = cpu_to_le32(LSDW(ha->response_dma)); +- init_fw_cb->ComplQAddrHi = cpu_to_le32(MSDW(ha->response_dma)); +- init_fw_cb->ShadowRegBufAddrLo = ++ init_fw_cb->pri.rqq_consumer_idx = cpu_to_le16(ha->request_out); ++ init_fw_cb->pri.compq_producer_idx = cpu_to_le16(ha->response_in); ++ init_fw_cb->pri.rqq_len = __constant_cpu_to_le16(REQUEST_QUEUE_DEPTH); ++ init_fw_cb->pri.compq_len = __constant_cpu_to_le16(RESPONSE_QUEUE_DEPTH); ++ init_fw_cb->pri.rqq_addr_lo = cpu_to_le32(LSDW(ha->request_dma)); ++ init_fw_cb->pri.rqq_addr_hi = cpu_to_le32(MSDW(ha->request_dma)); ++ init_fw_cb->pri.compq_addr_lo = cpu_to_le32(LSDW(ha->response_dma)); ++ init_fw_cb->pri.compq_addr_hi = cpu_to_le32(MSDW(ha->response_dma)); ++ init_fw_cb->pri.shdwreg_addr_lo = + cpu_to_le32(LSDW(ha->shadow_regs_dma)); +- init_fw_cb->ShadowRegBufAddrHi = ++ init_fw_cb->pri.shdwreg_addr_hi = + cpu_to_le32(MSDW(ha->shadow_regs_dma)); + + /* Set up required options. */ +- init_fw_cb->FwOptions |= ++ init_fw_cb->pri.fw_options |= + __constant_cpu_to_le16(FWOPT_SESSION_MODE | + FWOPT_INITIATOR_MODE); +- init_fw_cb->FwOptions &= __constant_cpu_to_le16(~FWOPT_TARGET_MODE); ++ init_fw_cb->pri.fw_options &= __constant_cpu_to_le16(~FWOPT_TARGET_MODE); + + /* Save some info in adapter structure. */ +- ha->firmware_options = le16_to_cpu(init_fw_cb->FwOptions); +- ha->tcp_options = le16_to_cpu(init_fw_cb->TCPOptions); +- ha->heartbeat_interval = init_fw_cb->HeartbeatInterval; +- memcpy(ha->ip_address, init_fw_cb->IPAddr, +- min(sizeof(ha->ip_address), sizeof(init_fw_cb->IPAddr))); +- memcpy(ha->subnet_mask, init_fw_cb->SubnetMask, +- min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->SubnetMask))); +- memcpy(ha->gateway, init_fw_cb->GatewayIPAddr, +- min(sizeof(ha->gateway), sizeof(init_fw_cb->GatewayIPAddr))); +- memcpy(ha->name_string, init_fw_cb->iSCSINameString, ++ ha->firmware_options = le16_to_cpu(init_fw_cb->pri.fw_options); ++ ha->tcp_options = le16_to_cpu(init_fw_cb->pri.ipv4_tcp_opts); ++ ha->heartbeat_interval = init_fw_cb->pri.hb_interval; ++ memcpy(ha->ip_address, init_fw_cb->pri.ipv4_addr, ++ min(sizeof(ha->ip_address), sizeof(init_fw_cb->pri.ipv4_addr))); ++ memcpy(ha->subnet_mask, init_fw_cb->pri.ipv4_subnet, ++ min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->pri.ipv4_subnet))); ++ memcpy(ha->gateway, init_fw_cb->pri.ipv4_gw_addr, ++ min(sizeof(ha->gateway), sizeof(init_fw_cb->pri.ipv4_gw_addr))); ++ memcpy(ha->name_string, init_fw_cb->pri.iscsi_name, + min(sizeof(ha->name_string), +- sizeof(init_fw_cb->iSCSINameString))); +- memcpy(ha->alias, init_fw_cb->Alias, +- min(sizeof(ha->alias), sizeof(init_fw_cb->Alias))); ++ sizeof(init_fw_cb->pri.iscsi_name))); ++ /*memcpy(ha->alias, init_fw_cb->Alias, ++ min(sizeof(ha->alias), sizeof(init_fw_cb->Alias)));*/ + + /* Save Command Line Paramater info */ +- ha->port_down_retry_count = le16_to_cpu(init_fw_cb->KeepAliveTimeout); ++ ha->port_down_retry_count = le16_to_cpu(init_fw_cb->pri.conn_ka_timeout); + ha->discovery_wait = ql4xdiscoverywait; + + /* Send Initialize Firmware Control Block. */ ++ memset(&mbox_cmd, 0, sizeof(mbox_cmd)); ++ memset(&mbox_sts, 0, sizeof(mbox_sts)); ++ + mbox_cmd[0] = MBOX_CMD_INITIALIZE_FIRMWARE; + mbox_cmd[1] = 0; + mbox_cmd[2] = LSDW(init_fw_cb_dma); + mbox_cmd[3] = MSDW(init_fw_cb_dma); +- if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) == ++ mbox_cmd[4] = sizeof(struct init_fw_ctrl_blk); ++ ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) == + QLA_SUCCESS) + status = QLA_SUCCESS; + else { +@@ -368,12 +301,14 @@ + /* Get Initialize Firmware Control Block. */ + memset(&mbox_cmd, 0, sizeof(mbox_cmd)); + memset(&mbox_sts, 0, sizeof(mbox_sts)); ++ + memset(init_fw_cb, 0, sizeof(struct init_fw_ctrl_blk)); + mbox_cmd[0] = MBOX_CMD_GET_INIT_FW_CTRL_BLOCK; + mbox_cmd[2] = LSDW(init_fw_cb_dma); + mbox_cmd[3] = MSDW(init_fw_cb_dma); ++ mbox_cmd[4] = sizeof(struct init_fw_ctrl_blk); + +- if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) != ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) != + QLA_SUCCESS) { + DEBUG2(printk("scsi%ld: %s: Failed to get init_fw_ctrl_blk\n", + ha->host_no, __func__)); +@@ -384,12 +319,12 @@ + } + + /* Save IP Address. */ +- memcpy(ha->ip_address, init_fw_cb->IPAddr, +- min(sizeof(ha->ip_address), sizeof(init_fw_cb->IPAddr))); +- memcpy(ha->subnet_mask, init_fw_cb->SubnetMask, +- min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->SubnetMask))); +- memcpy(ha->gateway, init_fw_cb->GatewayIPAddr, +- min(sizeof(ha->gateway), sizeof(init_fw_cb->GatewayIPAddr))); ++ memcpy(ha->ip_address, init_fw_cb->pri.ipv4_addr, ++ min(sizeof(ha->ip_address), sizeof(init_fw_cb->pri.ipv4_addr))); ++ memcpy(ha->subnet_mask, init_fw_cb->pri.ipv4_subnet, ++ min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->pri.ipv4_subnet))); ++ memcpy(ha->gateway, init_fw_cb->pri.ipv4_gw_addr, ++ min(sizeof(ha->gateway), sizeof(init_fw_cb->pri.ipv4_gw_addr))); + + dma_free_coherent(&ha->pdev->dev, sizeof(struct init_fw_ctrl_blk), + init_fw_cb, init_fw_cb_dma); +@@ -409,8 +344,10 @@ + /* Get firmware version */ + memset(&mbox_cmd, 0, sizeof(mbox_cmd)); + memset(&mbox_sts, 0, sizeof(mbox_sts)); ++ + mbox_cmd[0] = MBOX_CMD_GET_FW_STATE; +- if (qla4xxx_mailbox_command(ha, 1, 4, &mbox_cmd[0], &mbox_sts[0]) != ++ ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 4, &mbox_cmd[0], &mbox_sts[0]) != + QLA_SUCCESS) { + DEBUG2(printk("scsi%ld: %s: MBOX_CMD_GET_FW_STATE failed w/ " + "status %04X\n", ha->host_no, __func__, +@@ -438,8 +375,10 @@ + /* Get firmware version */ + memset(&mbox_cmd, 0, sizeof(mbox_cmd)); + memset(&mbox_sts, 0, sizeof(mbox_sts)); ++ + mbox_cmd[0] = MBOX_CMD_GET_FW_STATUS; +- if (qla4xxx_mailbox_command(ha, 1, 3, &mbox_cmd[0], &mbox_sts[0]) != ++ ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 3, &mbox_cmd[0], &mbox_sts[0]) != + QLA_SUCCESS) { + DEBUG2(printk("scsi%ld: %s: MBOX_CMD_GET_FW_STATUS failed w/ " + "status %04X\n", ha->host_no, __func__, +@@ -491,11 +430,14 @@ + } + memset(&mbox_cmd, 0, sizeof(mbox_cmd)); + memset(&mbox_sts, 0, sizeof(mbox_sts)); ++ + mbox_cmd[0] = MBOX_CMD_GET_DATABASE_ENTRY; + mbox_cmd[1] = (uint32_t) fw_ddb_index; + mbox_cmd[2] = LSDW(fw_ddb_entry_dma); + mbox_cmd[3] = MSDW(fw_ddb_entry_dma); +- if (qla4xxx_mailbox_command(ha, 4, 7, &mbox_cmd[0], &mbox_sts[0]) == ++ mbox_cmd[4] = sizeof(struct dev_db_entry); ++ ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 7, &mbox_cmd[0], &mbox_sts[0]) == + QLA_ERROR) { + DEBUG2(printk("scsi%ld: %s: MBOX_CMD_GET_DATABASE_ENTRY failed" + " with status 0x%04X\n", ha->host_no, __func__, +@@ -512,11 +454,11 @@ + dev_info(&ha->pdev->dev, "DDB[%d] MB0 %04x Tot %d Next %d " + "State %04x ConnErr %08x %d.%d.%d.%d:%04d \"%s\"\n", + fw_ddb_index, mbox_sts[0], mbox_sts[2], mbox_sts[3], +- mbox_sts[4], mbox_sts[5], fw_ddb_entry->ipAddr[0], +- fw_ddb_entry->ipAddr[1], fw_ddb_entry->ipAddr[2], +- fw_ddb_entry->ipAddr[3], +- le16_to_cpu(fw_ddb_entry->portNumber), +- fw_ddb_entry->iscsiName); ++ mbox_sts[4], mbox_sts[5], fw_ddb_entry->ip_addr[0], ++ fw_ddb_entry->ip_addr[1], fw_ddb_entry->ip_addr[2], ++ fw_ddb_entry->ip_addr[3], ++ le16_to_cpu(fw_ddb_entry->port), ++ fw_ddb_entry->iscsi_name); + } + if (num_valid_ddb_entries) + *num_valid_ddb_entries = mbox_sts[2]; +@@ -571,35 +513,10 @@ + mbox_cmd[1] = (uint32_t) fw_ddb_index; + mbox_cmd[2] = LSDW(fw_ddb_entry_dma); + mbox_cmd[3] = MSDW(fw_ddb_entry_dma); +- return qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]); +-} +- +-#if 0 +-int qla4xxx_conn_open_session_login(struct scsi_qla_host * ha, +- uint16_t fw_ddb_index) +-{ +- int status = QLA_ERROR; +- uint32_t mbox_cmd[MBOX_REG_COUNT]; +- uint32_t mbox_sts[MBOX_REG_COUNT]; +- +- /* Do not wait for completion. The firmware will send us an +- * ASTS_DATABASE_CHANGED (0x8014) to notify us of the login status. +- */ +- memset(&mbox_cmd, 0, sizeof(mbox_cmd)); +- memset(&mbox_sts, 0, sizeof(mbox_sts)); +- mbox_cmd[0] = MBOX_CMD_CONN_OPEN_SESS_LOGIN; +- mbox_cmd[1] = (uint32_t) fw_ddb_index; +- mbox_cmd[2] = 0; +- mbox_cmd[3] = 0; +- mbox_cmd[4] = 0; +- status = qla4xxx_mailbox_command(ha, 4, 0, &mbox_cmd[0], &mbox_sts[0]); +- DEBUG2(printk("%s fw_ddb_index=%d status=%d mbx0_1=0x%x :0x%x\n", +- __func__, fw_ddb_index, status, mbox_sts[0], +- mbox_sts[1]);) ++ mbox_cmd[4] = sizeof(struct dev_db_entry); + +- return status; ++ return qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]); + } +-#endif /* 0 */ + + /** + * qla4xxx_get_crash_record - retrieves crash record. +@@ -614,12 +531,14 @@ + struct crash_record *crash_record = NULL; + dma_addr_t crash_record_dma = 0; + uint32_t crash_record_size = 0; ++ + memset(&mbox_cmd, 0, sizeof(mbox_cmd)); + memset(&mbox_sts, 0, sizeof(mbox_cmd)); + + /* Get size of crash record. */ + mbox_cmd[0] = MBOX_CMD_GET_CRASH_RECORD; +- if (qla4xxx_mailbox_command(ha, 5, 5, &mbox_cmd[0], &mbox_sts[0]) != ++ ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) != + QLA_SUCCESS) { + DEBUG2(printk("scsi%ld: %s: ERROR: Unable to retrieve size!\n", + ha->host_no, __func__)); +@@ -639,11 +558,15 @@ + goto exit_get_crash_record; + + /* Get Crash Record. */ ++ memset(&mbox_cmd, 0, sizeof(mbox_cmd)); ++ memset(&mbox_sts, 0, sizeof(mbox_cmd)); ++ + mbox_cmd[0] = MBOX_CMD_GET_CRASH_RECORD; + mbox_cmd[2] = LSDW(crash_record_dma); + mbox_cmd[3] = MSDW(crash_record_dma); + mbox_cmd[4] = crash_record_size; +- if (qla4xxx_mailbox_command(ha, 5, 5, &mbox_cmd[0], &mbox_sts[0]) != ++ ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) != + QLA_SUCCESS) + goto exit_get_crash_record; + +@@ -655,7 +578,6 @@ + crash_record, crash_record_dma); + } + +-#if 0 + /** + * qla4xxx_get_conn_event_log - retrieves connection event log + * @ha: Pointer to host adapter structure. +@@ -678,7 +600,8 @@ + + /* Get size of crash record. */ + mbox_cmd[0] = MBOX_CMD_GET_CONN_EVENT_LOG; +- if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) != ++ ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) != + QLA_SUCCESS) + goto exit_get_event_log; + +@@ -693,10 +616,14 @@ + goto exit_get_event_log; + + /* Get Crash Record. */ ++ memset(&mbox_cmd, 0, sizeof(mbox_cmd)); ++ memset(&mbox_sts, 0, sizeof(mbox_cmd)); ++ + mbox_cmd[0] = MBOX_CMD_GET_CONN_EVENT_LOG; + mbox_cmd[2] = LSDW(event_log_dma); + mbox_cmd[3] = MSDW(event_log_dma); +- if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) != ++ ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) != + QLA_SUCCESS) { + DEBUG2(printk("scsi%ld: %s: ERROR: Unable to retrieve event " + "log!\n", ha->host_no, __func__)); +@@ -745,7 +672,6 @@ + dma_free_coherent(&ha->pdev->dev, event_log_size, event_log, + event_log_dma); + } +-#endif /* 0 */ + + /** + * qla4xxx_reset_lun - issues LUN Reset +@@ -773,11 +699,13 @@ + */ + memset(&mbox_cmd, 0, sizeof(mbox_cmd)); + memset(&mbox_sts, 0, sizeof(mbox_sts)); ++ + mbox_cmd[0] = MBOX_CMD_LUN_RESET; + mbox_cmd[1] = ddb_entry->fw_ddb_index; + mbox_cmd[2] = lun << 8; + mbox_cmd[5] = 0x01; /* Immediate Command Enable */ +- qla4xxx_mailbox_command(ha, 6, 1, &mbox_cmd[0], &mbox_sts[0]); ++ ++ qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]); + if (mbox_sts[0] != MBOX_STS_COMMAND_COMPLETE && + mbox_sts[0] != MBOX_STS_COMMAND_ERROR) + status = QLA_ERROR; +@@ -794,12 +722,14 @@ + + memset(&mbox_cmd, 0, sizeof(mbox_cmd)); + memset(&mbox_sts, 0, sizeof(mbox_sts)); ++ + mbox_cmd[0] = MBOX_CMD_READ_FLASH; + mbox_cmd[1] = LSDW(dma_addr); + mbox_cmd[2] = MSDW(dma_addr); + mbox_cmd[3] = offset; + mbox_cmd[4] = len; +- if (qla4xxx_mailbox_command(ha, 5, 2, &mbox_cmd[0], &mbox_sts[0]) != ++ ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 2, &mbox_cmd[0], &mbox_sts[0]) != + QLA_SUCCESS) { + DEBUG2(printk("scsi%ld: %s: MBOX_CMD_READ_FLASH, failed w/ " + "status %04X %04X, offset %08x, len %08x\n", ha->host_no, +@@ -825,8 +755,10 @@ + /* Get firmware version. */ + memset(&mbox_cmd, 0, sizeof(mbox_cmd)); + memset(&mbox_sts, 0, sizeof(mbox_sts)); ++ + mbox_cmd[0] = MBOX_CMD_ABOUT_FW; +- if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) != ++ ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) != + QLA_SUCCESS) { + DEBUG2(printk("scsi%ld: %s: MBOX_CMD_ABOUT_FW failed w/ " + "status %04X\n", ha->host_no, __func__, mbox_sts[0])); +@@ -855,7 +787,7 @@ + mbox_cmd[2] = LSDW(dma_addr); + mbox_cmd[3] = MSDW(dma_addr); + +- if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) != ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) != + QLA_SUCCESS) { + DEBUG2(printk("scsi%ld: %s: failed status %04X\n", + ha->host_no, __func__, mbox_sts[0])); +@@ -875,7 +807,7 @@ + mbox_cmd[0] = MBOX_CMD_REQUEST_DATABASE_ENTRY; + mbox_cmd[1] = MAX_PRST_DEV_DB_ENTRIES; + +- if (qla4xxx_mailbox_command(ha, 2, 3, &mbox_cmd[0], &mbox_sts[0]) != ++ if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 3, &mbox_cmd[0], &mbox_sts[0]) != + QLA_SUCCESS) { + if (mbox_sts[0] == MBOX_STS_COMMAND_ERROR) { + *ddb_index = mbox_sts[2]; +@@ -918,23 +850,23 @@ + if (ret_val != QLA_SUCCESS) + goto qla4xxx_send_tgts_exit; + +- memset((void *)fw_ddb_entry->iSCSIAlias, 0, +- sizeof(fw_ddb_entry->iSCSIAlias)); ++ memset(fw_ddb_entry->iscsi_alias, 0, ++ sizeof(fw_ddb_entry->iscsi_alias)); + +- memset((void *)fw_ddb_entry->iscsiName, 0, +- sizeof(fw_ddb_entry->iscsiName)); ++ memset(fw_ddb_entry->iscsi_name, 0, ++ sizeof(fw_ddb_entry->iscsi_name)); + +- memset((void *)fw_ddb_entry->ipAddr, 0, sizeof(fw_ddb_entry->ipAddr)); +- memset((void *)fw_ddb_entry->targetAddr, 0, +- sizeof(fw_ddb_entry->targetAddr)); ++ memset(fw_ddb_entry->ip_addr, 0, sizeof(fw_ddb_entry->ip_addr)); ++ memset(fw_ddb_entry->tgt_addr, 0, ++ sizeof(fw_ddb_entry->tgt_addr)); + + fw_ddb_entry->options = (DDB_OPT_DISC_SESSION | DDB_OPT_TARGET); +- fw_ddb_entry->portNumber = cpu_to_le16(ntohs(port)); ++ fw_ddb_entry->port = cpu_to_le16(ntohs(port)); + +- fw_ddb_entry->ipAddr[0] = *ip; +- fw_ddb_entry->ipAddr[1] = *(ip + 1); +- fw_ddb_entry->ipAddr[2] = *(ip + 2); +- fw_ddb_entry->ipAddr[3] = *(ip + 3); ++ fw_ddb_entry->ip_addr[0] = *ip; ++ fw_ddb_entry->ip_addr[1] = *(ip + 1); ++ fw_ddb_entry->ip_addr[2] = *(ip + 2); ++ fw_ddb_entry->ip_addr[3] = *(ip + 3); + + ret_val = qla4xxx_set_ddb_entry(ha, ddb_index, fw_ddb_entry_dma); + +diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_nvram.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_nvram.c +--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_nvram.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_nvram.c 2007-12-21 15:36:12.000000000 -0500 +@@ -6,6 +6,9 @@ + */ + + #include "ql4_def.h" ++#include "ql4_glbl.h" ++#include "ql4_dbg.h" ++#include "ql4_inline.h" + + static inline void eeprom_cmd(uint32_t cmd, struct scsi_qla_host *ha) + { +diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_os.c linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_os.c +--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_os.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_os.c 2007-12-21 15:36:12.000000000 -0500 +@@ -10,6 +10,10 @@ + #include + + #include "ql4_def.h" ++#include "ql4_version.h" ++#include "ql4_glbl.h" ++#include "ql4_dbg.h" ++#include "ql4_inline.h" + + /* + * Driver version +@@ -50,12 +54,15 @@ + /* + * iSCSI template entry points + */ +-static int qla4xxx_tgt_dscvr(enum iscsi_tgt_dscvr type, uint32_t host_no, +- uint32_t enable, struct sockaddr *dst_addr); ++static int qla4xxx_tgt_dscvr(struct Scsi_Host *shost, ++ enum iscsi_tgt_dscvr type, uint32_t enable, ++ struct sockaddr *dst_addr); + static int qla4xxx_conn_get_param(struct iscsi_cls_conn *conn, + enum iscsi_param param, char *buf); + static int qla4xxx_sess_get_param(struct iscsi_cls_session *sess, + enum iscsi_param param, char *buf); ++static int qla4xxx_host_get_param(struct Scsi_Host *shost, ++ enum iscsi_host_param param, char *buf); + static void qla4xxx_conn_stop(struct iscsi_cls_conn *conn, int flag); + static int qla4xxx_conn_start(struct iscsi_cls_conn *conn); + static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session); +@@ -95,16 +102,20 @@ + static struct iscsi_transport qla4xxx_iscsi_transport = { + .owner = THIS_MODULE, + .name = DRIVER_NAME, +- .param_mask = ISCSI_CONN_PORT | +- ISCSI_CONN_ADDRESS | +- ISCSI_TARGET_NAME | +- ISCSI_TPGT, ++ .caps = CAP_FW_DB | CAP_SENDTARGETS_OFFLOAD | ++ CAP_DATA_PATH_OFFLOAD, ++ .param_mask = ISCSI_CONN_PORT | ISCSI_CONN_ADDRESS | ++ ISCSI_TARGET_NAME | ISCSI_TPGT, ++ .host_param_mask = ISCSI_HOST_HWADDRESS | ++ ISCSI_HOST_IPADDRESS | ++ ISCSI_HOST_INITIATOR_NAME, + .sessiondata_size = sizeof(struct ddb_entry), + .host_template = &qla4xxx_driver_template, + + .tgt_dscvr = qla4xxx_tgt_dscvr, + .get_conn_param = qla4xxx_conn_get_param, + .get_session_param = qla4xxx_sess_get_param, ++ .get_host_param = qla4xxx_host_get_param, + .start_conn = qla4xxx_conn_start, + .stop_conn = qla4xxx_conn_stop, + .session_recovery_timedout = qla4xxx_recovery_timedout, +@@ -161,6 +172,43 @@ + printk(KERN_ERR "iscsi: invalid stop flag %d\n", flag); + } + ++static ssize_t format_addr(char *buf, const unsigned char *addr, int len) ++{ ++ int i; ++ char *cp = buf; ++ ++ for (i = 0; i < len; i++) ++ cp += sprintf(cp, "%02x%c", addr[i], ++ i == (len - 1) ? '\n' : ':'); ++ return cp - buf; ++} ++ ++ ++static int qla4xxx_host_get_param(struct Scsi_Host *shost, ++ enum iscsi_host_param param, char *buf) ++{ ++ struct scsi_qla_host *ha = to_qla_host(shost); ++ int len; ++ ++ switch (param) { ++ case ISCSI_HOST_PARAM_HWADDRESS: ++ len = format_addr(buf, ha->my_mac, MAC_ADDR_LEN); ++ break; ++ case ISCSI_HOST_PARAM_IPADDRESS: ++ len = sprintf(buf, "%d.%d.%d.%d\n", ha->ip_address[0], ++ ha->ip_address[1], ha->ip_address[2], ++ ha->ip_address[3]); ++ break; ++ case ISCSI_HOST_PARAM_INITIATOR_NAME: ++ len = sprintf(buf, "%s\n", ha->name_string); ++ break; ++ default: ++ return -ENOSYS; ++ } ++ ++ return len; ++} ++ + static int qla4xxx_sess_get_param(struct iscsi_cls_session *sess, + enum iscsi_param param, char *buf) + { +@@ -208,21 +256,15 @@ + return len; + } + +-static int qla4xxx_tgt_dscvr(enum iscsi_tgt_dscvr type, uint32_t host_no, +- uint32_t enable, struct sockaddr *dst_addr) ++static int qla4xxx_tgt_dscvr(struct Scsi_Host *shost, ++ enum iscsi_tgt_dscvr type, uint32_t enable, ++ struct sockaddr *dst_addr) + { + struct scsi_qla_host *ha; +- struct Scsi_Host *shost; + struct sockaddr_in *addr; + struct sockaddr_in6 *addr6; + int ret = 0; + +- shost = scsi_host_lookup(host_no); +- if (IS_ERR(shost)) { +- printk(KERN_ERR "Could not find host no %u\n", host_no); +- return -ENODEV; +- } +- + ha = (struct scsi_qla_host *) shost->hostdata; + + switch (type) { +@@ -246,8 +288,6 @@ + default: + ret = -ENOSYS; + } +- +- scsi_host_put(shost); + return ret; + } + +@@ -369,14 +409,7 @@ + struct scsi_cmnd *cmd = srb->cmd; + + if (srb->flags & SRB_DMA_VALID) { +- if (cmd->use_sg) { +- pci_unmap_sg(ha->pdev, cmd->request_buffer, +- cmd->use_sg, cmd->sc_data_direction); +- } else if (cmd->request_bufflen) { +- pci_unmap_single(ha->pdev, srb->dma_handle, +- cmd->request_bufflen, +- cmd->sc_data_direction); +- } ++ scsi_dma_unmap(cmd); + srb->flags &= ~SRB_DMA_VALID; + } + cmd->SCp.ptr = NULL; +@@ -711,7 +744,7 @@ + return stat; + } + +-static void qla4xxx_hw_reset(struct scsi_qla_host *ha) ++void qla4xxx_hw_reset(struct scsi_qla_host *ha) + { + uint32_t ctrl_status; + unsigned long flags = 0; +@@ -1081,13 +1114,13 @@ + if (ha->timer_active) + qla4xxx_stop_timer(ha); + +- /* free extra memory */ +- qla4xxx_mem_free(ha); +- + /* Detach interrupts */ + if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) + free_irq(ha->pdev->irq, ha); + ++ /* free extra memory */ ++ qla4xxx_mem_free(ha); ++ + pci_disable_device(ha->pdev); + + } +@@ -1332,6 +1365,11 @@ + + ha = pci_get_drvdata(pdev); + ++ qla4xxx_disable_intrs(ha); ++ ++ while (test_bit(DPC_RESET_HA_INTR, &ha->dpc_flags)) ++ ssleep(1); ++ + /* remove devs from iscsi_sessions to scsi_devices */ + qla4xxx_free_ddb_list(ha); + +diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_version.h linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_version.h +--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_version.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qla4xxx/ql4_version.h 2007-12-21 15:36:12.000000000 -0500 +@@ -5,4 +5,5 @@ + * See LICENSE.qla4xxx for copyright and licensing details. + */ + +-#define QLA4XXX_DRIVER_VERSION "5.00.07-k1" ++#define QLA4XXX_DRIVER_VERSION "5.01.00-k7" ++ +diff -Nurb linux-2.6.22-570/drivers/scsi/qlogicfas408.c linux-2.6.22-591/drivers/scsi/qlogicfas408.c +--- linux-2.6.22-570/drivers/scsi/qlogicfas408.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/qlogicfas408.c 2007-12-21 15:36:12.000000000 -0500 +@@ -265,8 +265,6 @@ + unsigned int message; /* scsi returned message */ + unsigned int phase; /* recorded scsi phase */ + unsigned int reqlen; /* total length of transfer */ +- struct scatterlist *sglist; /* scatter-gather list pointer */ +- unsigned int sgcount; /* sg counter */ + char *buf; + struct qlogicfas408_priv *priv = get_priv_by_cmd(cmd); + int qbase = priv->qbase; +@@ -301,9 +299,10 @@ + if (inb(qbase + 7) & 0x1f) /* if some bytes in fifo */ + outb(1, qbase + 3); /* clear fifo */ + /* note that request_bufflen is the total xfer size when sg is used */ +- reqlen = cmd->request_bufflen; ++ reqlen = scsi_bufflen(cmd); + /* note that it won't work if transfers > 16M are requested */ + if (reqlen && !((phase = inb(qbase + 4)) & 6)) { /* data phase */ ++ struct scatterlist *sg; + rtrc(2) + outb(reqlen, qbase); /* low-mid xfer cnt */ + outb(reqlen >> 8, qbase + 1); /* low-mid xfer cnt */ +@@ -311,23 +310,16 @@ + outb(0x90, qbase + 3); /* command do xfer */ + /* PIO pseudo DMA to buffer or sglist */ + REG1; +- if (!cmd->use_sg) +- ql_pdma(priv, phase, cmd->request_buffer, +- cmd->request_bufflen); +- else { +- sgcount = cmd->use_sg; +- sglist = cmd->request_buffer; +- while (sgcount--) { ++ ++ scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) { + if (priv->qabort) { + REG0; + return ((priv->qabort == 1 ? + DID_ABORT : DID_RESET) << 16); + } +- buf = page_address(sglist->page) + sglist->offset; +- if (ql_pdma(priv, phase, buf, sglist->length)) ++ buf = page_address(sg->page) + sg->offset; ++ if (ql_pdma(priv, phase, buf, sg->length)) + break; +- sglist++; +- } + } + REG0; + rtrc(2) +diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_debug.c linux-2.6.22-591/drivers/scsi/scsi_debug.c +--- linux-2.6.22-570/drivers/scsi/scsi_debug.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/scsi_debug.c 2007-12-21 15:36:12.000000000 -0500 +@@ -2405,7 +2405,7 @@ + MODULE_PARM_DESC(delay, "# of jiffies to delay response(def=1)"); + MODULE_PARM_DESC(dev_size_mb, "size in MB of ram shared by devs(def=8)"); + MODULE_PARM_DESC(dsense, "use descriptor sense format(def=0 -> fixed)"); +-MODULE_PARM_DESC(every_nth, "timeout every nth command(def=100)"); ++MODULE_PARM_DESC(every_nth, "timeout every nth command(def=0)"); + MODULE_PARM_DESC(fake_rw, "fake reads/writes instead of copying (def=0)"); + MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)"); + MODULE_PARM_DESC(no_lun_0, "no LU number 0 (def=0 -> have lun 0)"); +diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_error.c linux-2.6.22-591/drivers/scsi/scsi_error.c +--- linux-2.6.22-570/drivers/scsi/scsi_error.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/scsi_error.c 2007-12-21 15:36:12.000000000 -0500 +@@ -18,12 +18,13 @@ + #include + #include + #include +-#include + #include ++#include + #include + #include + #include + #include ++#include + + #include + #include +@@ -640,16 +641,8 @@ + memcpy(scmd->cmnd, cmnd, cmnd_size); + + if (copy_sense) { +- gfp_t gfp_mask = GFP_ATOMIC; +- +- if (shost->hostt->unchecked_isa_dma) +- gfp_mask |= __GFP_DMA; +- +- sgl.page = alloc_page(gfp_mask); +- if (!sgl.page) +- return FAILED; +- sgl.offset = 0; +- sgl.length = 252; ++ sg_init_one(&sgl, scmd->sense_buffer, ++ sizeof(scmd->sense_buffer)); + + scmd->sc_data_direction = DMA_FROM_DEVICE; + scmd->request_bufflen = sgl.length; +@@ -720,18 +713,6 @@ + + + /* +- * Last chance to have valid sense data. +- */ +- if (copy_sense) { +- if (!SCSI_SENSE_VALID(scmd)) { +- memcpy(scmd->sense_buffer, page_address(sgl.page), +- sizeof(scmd->sense_buffer)); +- } +- __free_page(sgl.page); +- } +- +- +- /* + * Restore original data + */ + scmd->request_buffer = old_buffer; +@@ -1536,8 +1517,6 @@ + { + struct Scsi_Host *shost = data; + +- current->flags |= PF_NOFREEZE; +- + /* + * We use TASK_INTERRUPTIBLE so that the thread is not + * counted against the load average as a running process. +diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_lib.c linux-2.6.22-591/drivers/scsi/scsi_lib.c +--- linux-2.6.22-570/drivers/scsi/scsi_lib.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/scsi_lib.c 2007-12-21 15:36:12.000000000 -0500 +@@ -2290,3 +2290,41 @@ + kunmap_atomic(virt, KM_BIO_SRC_IRQ); + } + EXPORT_SYMBOL(scsi_kunmap_atomic_sg); ++ ++/** ++ * scsi_dma_map - perform DMA mapping against command's sg lists ++ * @cmd: scsi command ++ * ++ * Returns the number of sg lists actually used, zero if the sg lists ++ * is NULL, or -ENOMEM if the mapping failed. ++ */ ++int scsi_dma_map(struct scsi_cmnd *cmd) ++{ ++ int nseg = 0; ++ ++ if (scsi_sg_count(cmd)) { ++ struct device *dev = cmd->device->host->shost_gendev.parent; ++ ++ nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd), ++ cmd->sc_data_direction); ++ if (unlikely(!nseg)) ++ return -ENOMEM; ++ } ++ return nseg; ++} ++EXPORT_SYMBOL(scsi_dma_map); ++ ++/** ++ * scsi_dma_unmap - unmap command's sg lists mapped by scsi_dma_map ++ * @cmd: scsi command ++ */ ++void scsi_dma_unmap(struct scsi_cmnd *cmd) ++{ ++ if (scsi_sg_count(cmd)) { ++ struct device *dev = cmd->device->host->shost_gendev.parent; ++ ++ dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd), ++ cmd->sc_data_direction); ++ } ++} ++EXPORT_SYMBOL(scsi_dma_unmap); +diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_netlink.c linux-2.6.22-591/drivers/scsi/scsi_netlink.c +--- linux-2.6.22-570/drivers/scsi/scsi_netlink.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/scsi_netlink.c 2007-12-21 15:36:14.000000000 -0500 +@@ -167,7 +167,7 @@ + return; + } + +- scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT, ++ scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT, + SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL, + THIS_MODULE); + if (!scsi_nl_sock) { +diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_scan.c linux-2.6.22-591/drivers/scsi/scsi_scan.c +--- linux-2.6.22-570/drivers/scsi/scsi_scan.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/scsi_scan.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1213,7 +1213,7 @@ + * Given a struct scsi_lun of: 0a 04 0b 03 00 00 00 00, this function returns + * the integer: 0x0b030a04 + **/ +-static int scsilun_to_int(struct scsi_lun *scsilun) ++int scsilun_to_int(struct scsi_lun *scsilun) + { + int i; + unsigned int lun; +@@ -1224,6 +1224,7 @@ + scsilun->scsi_lun[i + 1]) << (i * 8)); + return lun; + } ++EXPORT_SYMBOL(scsilun_to_int); + + /** + * int_to_scsilun: reverts an int into a scsi_lun +diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_sysfs.c linux-2.6.22-591/drivers/scsi/scsi_sysfs.c +--- linux-2.6.22-570/drivers/scsi/scsi_sysfs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/scsi_sysfs.c 2007-12-21 15:36:12.000000000 -0500 +@@ -293,30 +293,18 @@ + { + struct device_driver *drv = dev->driver; + struct scsi_device *sdev = to_scsi_device(dev); +- struct scsi_host_template *sht = sdev->host->hostt; + int err; + + err = scsi_device_quiesce(sdev); + if (err) + return err; + +- /* call HLD suspend first */ + if (drv && drv->suspend) { + err = drv->suspend(dev, state); + if (err) + return err; + } + +- /* then, call host suspend */ +- if (sht->suspend) { +- err = sht->suspend(sdev, state); +- if (err) { +- if (drv && drv->resume) +- drv->resume(dev); +- return err; +- } +- } +- + return 0; + } + +@@ -324,21 +312,14 @@ + { + struct device_driver *drv = dev->driver; + struct scsi_device *sdev = to_scsi_device(dev); +- struct scsi_host_template *sht = sdev->host->hostt; +- int err = 0, err2 = 0; ++ int err = 0; + +- /* call host resume first */ +- if (sht->resume) +- err = sht->resume(sdev); +- +- /* then, call HLD resume */ + if (drv && drv->resume) +- err2 = drv->resume(dev); ++ err = drv->resume(dev); + + scsi_device_resume(sdev); + +- /* favor LLD failure */ +- return err ? err : err2;; ++ return err; + } + + struct bus_type scsi_bus_type = { +diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_transport_fc.c linux-2.6.22-591/drivers/scsi/scsi_transport_fc.c +--- linux-2.6.22-570/drivers/scsi/scsi_transport_fc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/scsi_transport_fc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -19,9 +19,10 @@ + * + * ======== + * +- * Copyright (C) 2004-2005 James Smart, Emulex Corporation ++ * Copyright (C) 2004-2007 James Smart, Emulex Corporation + * Rewrite for host, target, device, and remote port attributes, + * statistics, and service functions... ++ * Add vports, etc + * + */ + #include +@@ -37,6 +38,34 @@ + #include "scsi_priv.h" + + static int fc_queue_work(struct Scsi_Host *, struct work_struct *); ++static void fc_vport_sched_delete(struct work_struct *work); ++ ++/* ++ * This is a temporary carrier for creating a vport. It will eventually ++ * be replaced by a real message definition for sgio or netlink. ++ * ++ * fc_vport_identifiers: This set of data contains all elements ++ * to uniquely identify and instantiate a FC virtual port. ++ * ++ * Notes: ++ * symbolic_name: The driver is to append the symbolic_name string data ++ * to the symbolic_node_name data that it generates by default. ++ * the resulting combination should then be registered with the switch. ++ * It is expected that things like Xen may stuff a VM title into ++ * this field. ++ */ ++struct fc_vport_identifiers { ++ u64 node_name; ++ u64 port_name; ++ u32 roles; ++ bool disable; ++ enum fc_port_type vport_type; /* only FC_PORTTYPE_NPIV allowed */ ++ char symbolic_name[FC_VPORT_SYMBOLIC_NAMELEN]; ++}; ++ ++static int fc_vport_create(struct Scsi_Host *shost, int channel, ++ struct device *pdev, struct fc_vport_identifiers *ids, ++ struct fc_vport **vport); + + /* + * Redefine so that we can have same named attributes in the +@@ -90,10 +119,14 @@ + { FC_PORTTYPE_NLPORT, "NLPort (fabric via loop)" }, + { FC_PORTTYPE_LPORT, "LPort (private loop)" }, + { FC_PORTTYPE_PTP, "Point-To-Point (direct nport connection" }, ++ { FC_PORTTYPE_NPIV, "NPIV VPORT" }, + }; + fc_enum_name_search(port_type, fc_port_type, fc_port_type_names) + #define FC_PORTTYPE_MAX_NAMELEN 50 + ++/* Reuse fc_port_type enum function for vport_type */ ++#define get_fc_vport_type_name get_fc_port_type_name ++ + + /* Convert fc_host_event_code values to ascii string name */ + static const struct { +@@ -139,6 +172,29 @@ + #define FC_PORTSTATE_MAX_NAMELEN 20 + + ++/* Convert fc_vport_state values to ascii string name */ ++static struct { ++ enum fc_vport_state value; ++ char *name; ++} fc_vport_state_names[] = { ++ { FC_VPORT_UNKNOWN, "Unknown" }, ++ { FC_VPORT_ACTIVE, "Active" }, ++ { FC_VPORT_DISABLED, "Disabled" }, ++ { FC_VPORT_LINKDOWN, "Linkdown" }, ++ { FC_VPORT_INITIALIZING, "Initializing" }, ++ { FC_VPORT_NO_FABRIC_SUPP, "No Fabric Support" }, ++ { FC_VPORT_NO_FABRIC_RSCS, "No Fabric Resources" }, ++ { FC_VPORT_FABRIC_LOGOUT, "Fabric Logout" }, ++ { FC_VPORT_FABRIC_REJ_WWN, "Fabric Rejected WWN" }, ++ { FC_VPORT_FAILED, "VPort Failed" }, ++}; ++fc_enum_name_search(vport_state, fc_vport_state, fc_vport_state_names) ++#define FC_VPORTSTATE_MAX_NAMELEN 24 ++ ++/* Reuse fc_vport_state enum function for vport_last_state */ ++#define get_fc_vport_last_state_name get_fc_vport_state_name ++ ++ + /* Convert fc_tgtid_binding_type values to ascii string name */ + static const struct { + enum fc_tgtid_binding_type value; +@@ -219,16 +275,16 @@ + } + + +-/* Convert FC_RPORT_ROLE bit values to ascii string name */ ++/* Convert FC_PORT_ROLE bit values to ascii string name */ + static const struct { + u32 value; + char *name; +-} fc_remote_port_role_names[] = { +- { FC_RPORT_ROLE_FCP_TARGET, "FCP Target" }, +- { FC_RPORT_ROLE_FCP_INITIATOR, "FCP Initiator" }, +- { FC_RPORT_ROLE_IP_PORT, "IP Port" }, ++} fc_port_role_names[] = { ++ { FC_PORT_ROLE_FCP_TARGET, "FCP Target" }, ++ { FC_PORT_ROLE_FCP_INITIATOR, "FCP Initiator" }, ++ { FC_PORT_ROLE_IP_PORT, "IP Port" }, + }; +-fc_bitfield_name_search(remote_port_roles, fc_remote_port_role_names) ++fc_bitfield_name_search(port_roles, fc_port_role_names) + + /* + * Define roles that are specific to port_id. Values are relative to ROLE_MASK. +@@ -252,7 +308,8 @@ + */ + #define FC_STARGET_NUM_ATTRS 3 + #define FC_RPORT_NUM_ATTRS 10 +-#define FC_HOST_NUM_ATTRS 17 ++#define FC_VPORT_NUM_ATTRS 9 ++#define FC_HOST_NUM_ATTRS 21 + + struct fc_internal { + struct scsi_transport_template t; +@@ -278,6 +335,10 @@ + struct transport_container rport_attr_cont; + struct class_device_attribute private_rport_attrs[FC_RPORT_NUM_ATTRS]; + struct class_device_attribute *rport_attrs[FC_RPORT_NUM_ATTRS + 1]; ++ ++ struct transport_container vport_attr_cont; ++ struct class_device_attribute private_vport_attrs[FC_VPORT_NUM_ATTRS]; ++ struct class_device_attribute *vport_attrs[FC_VPORT_NUM_ATTRS + 1]; + }; + + #define to_fc_internal(tmpl) container_of(tmpl, struct fc_internal, t) +@@ -331,6 +392,7 @@ + sizeof(fc_host->supported_fc4s)); + fc_host->supported_speeds = FC_PORTSPEED_UNKNOWN; + fc_host->maxframe_size = -1; ++ fc_host->max_npiv_vports = 0; + memset(fc_host->serial_number, 0, + sizeof(fc_host->serial_number)); + +@@ -348,8 +410,11 @@ + + INIT_LIST_HEAD(&fc_host->rports); + INIT_LIST_HEAD(&fc_host->rport_bindings); ++ INIT_LIST_HEAD(&fc_host->vports); + fc_host->next_rport_number = 0; + fc_host->next_target_id = 0; ++ fc_host->next_vport_number = 0; ++ fc_host->npiv_vports_inuse = 0; + + snprintf(fc_host->work_q_name, KOBJ_NAME_LEN, "fc_wq_%d", + shost->host_no); +@@ -388,6 +453,16 @@ + NULL); + + /* ++ * Setup and Remove actions for virtual ports are handled ++ * in the service functions below. ++ */ ++static DECLARE_TRANSPORT_CLASS(fc_vport_class, ++ "fc_vports", ++ NULL, ++ NULL, ++ NULL); ++ ++/* + * Module Parameters + */ + +@@ -585,6 +660,9 @@ + error = transport_class_register(&fc_host_class); + if (error) + return error; ++ error = transport_class_register(&fc_vport_class); ++ if (error) ++ return error; + error = transport_class_register(&fc_rport_class); + if (error) + return error; +@@ -596,6 +674,7 @@ + transport_class_unregister(&fc_transport_class); + transport_class_unregister(&fc_rport_class); + transport_class_unregister(&fc_host_class); ++ transport_class_unregister(&fc_vport_class); + } + + /* +@@ -800,9 +879,9 @@ + return snprintf(buf, 30, "Unknown Fabric Entity\n"); + } + } else { +- if (rport->roles == FC_RPORT_ROLE_UNKNOWN) ++ if (rport->roles == FC_PORT_ROLE_UNKNOWN) + return snprintf(buf, 20, "unknown\n"); +- return get_fc_remote_port_roles_names(rport->roles, buf); ++ return get_fc_port_roles_names(rport->roles, buf); + } + } + static FC_CLASS_DEVICE_ATTR(rport, roles, S_IRUGO, +@@ -857,7 +936,7 @@ + + /* + * Note: in the target show function we recognize when the remote +- * port is in the hierarchy and do not allow the driver to get ++ * port is in the heirarchy and do not allow the driver to get + * involved in sysfs functions. The driver only gets involved if + * it's the "old" style that doesn't use rports. + */ +@@ -912,6 +991,257 @@ + + + /* ++ * FC Virtual Port Attribute Management ++ */ ++ ++#define fc_vport_show_function(field, format_string, sz, cast) \ ++static ssize_t \ ++show_fc_vport_##field (struct class_device *cdev, char *buf) \ ++{ \ ++ struct fc_vport *vport = transport_class_to_vport(cdev); \ ++ struct Scsi_Host *shost = vport_to_shost(vport); \ ++ struct fc_internal *i = to_fc_internal(shost->transportt); \ ++ if ((i->f->get_vport_##field) && \ ++ !(vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING))) \ ++ i->f->get_vport_##field(vport); \ ++ return snprintf(buf, sz, format_string, cast vport->field); \ ++} ++ ++#define fc_vport_store_function(field) \ ++static ssize_t \ ++store_fc_vport_##field(struct class_device *cdev, const char *buf, \ ++ size_t count) \ ++{ \ ++ int val; \ ++ struct fc_vport *vport = transport_class_to_vport(cdev); \ ++ struct Scsi_Host *shost = vport_to_shost(vport); \ ++ struct fc_internal *i = to_fc_internal(shost->transportt); \ ++ char *cp; \ ++ if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING)) \ ++ return -EBUSY; \ ++ val = simple_strtoul(buf, &cp, 0); \ ++ if (*cp && (*cp != '\n')) \ ++ return -EINVAL; \ ++ i->f->set_vport_##field(vport, val); \ ++ return count; \ ++} ++ ++#define fc_vport_store_str_function(field, slen) \ ++static ssize_t \ ++store_fc_vport_##field(struct class_device *cdev, const char *buf, \ ++ size_t count) \ ++{ \ ++ struct fc_vport *vport = transport_class_to_vport(cdev); \ ++ struct Scsi_Host *shost = vport_to_shost(vport); \ ++ struct fc_internal *i = to_fc_internal(shost->transportt); \ ++ unsigned int cnt=count; \ ++ \ ++ /* count may include a LF at end of string */ \ ++ if (buf[cnt-1] == '\n') \ ++ cnt--; \ ++ if (cnt > ((slen) - 1)) \ ++ return -EINVAL; \ ++ memcpy(vport->field, buf, cnt); \ ++ i->f->set_vport_##field(vport); \ ++ return count; \ ++} ++ ++#define fc_vport_rd_attr(field, format_string, sz) \ ++ fc_vport_show_function(field, format_string, sz, ) \ ++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO, \ ++ show_fc_vport_##field, NULL) ++ ++#define fc_vport_rd_attr_cast(field, format_string, sz, cast) \ ++ fc_vport_show_function(field, format_string, sz, (cast)) \ ++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO, \ ++ show_fc_vport_##field, NULL) ++ ++#define fc_vport_rw_attr(field, format_string, sz) \ ++ fc_vport_show_function(field, format_string, sz, ) \ ++ fc_vport_store_function(field) \ ++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO | S_IWUSR, \ ++ show_fc_vport_##field, \ ++ store_fc_vport_##field) ++ ++#define fc_private_vport_show_function(field, format_string, sz, cast) \ ++static ssize_t \ ++show_fc_vport_##field (struct class_device *cdev, char *buf) \ ++{ \ ++ struct fc_vport *vport = transport_class_to_vport(cdev); \ ++ return snprintf(buf, sz, format_string, cast vport->field); \ ++} ++ ++#define fc_private_vport_store_u32_function(field) \ ++static ssize_t \ ++store_fc_vport_##field(struct class_device *cdev, const char *buf, \ ++ size_t count) \ ++{ \ ++ u32 val; \ ++ struct fc_vport *vport = transport_class_to_vport(cdev); \ ++ char *cp; \ ++ if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING)) \ ++ return -EBUSY; \ ++ val = simple_strtoul(buf, &cp, 0); \ ++ if (*cp && (*cp != '\n')) \ ++ return -EINVAL; \ ++ vport->field = val; \ ++ return count; \ ++} ++ ++ ++#define fc_private_vport_rd_attr(field, format_string, sz) \ ++ fc_private_vport_show_function(field, format_string, sz, ) \ ++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO, \ ++ show_fc_vport_##field, NULL) ++ ++#define fc_private_vport_rd_attr_cast(field, format_string, sz, cast) \ ++ fc_private_vport_show_function(field, format_string, sz, (cast)) \ ++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO, \ ++ show_fc_vport_##field, NULL) ++ ++#define fc_private_vport_rw_u32_attr(field, format_string, sz) \ ++ fc_private_vport_show_function(field, format_string, sz, ) \ ++ fc_private_vport_store_u32_function(field) \ ++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO | S_IWUSR, \ ++ show_fc_vport_##field, \ ++ store_fc_vport_##field) ++ ++ ++#define fc_private_vport_rd_enum_attr(title, maxlen) \ ++static ssize_t \ ++show_fc_vport_##title (struct class_device *cdev, char *buf) \ ++{ \ ++ struct fc_vport *vport = transport_class_to_vport(cdev); \ ++ const char *name; \ ++ name = get_fc_##title##_name(vport->title); \ ++ if (!name) \ ++ return -EINVAL; \ ++ return snprintf(buf, maxlen, "%s\n", name); \ ++} \ ++static FC_CLASS_DEVICE_ATTR(vport, title, S_IRUGO, \ ++ show_fc_vport_##title, NULL) ++ ++ ++#define SETUP_VPORT_ATTRIBUTE_RD(field) \ ++ i->private_vport_attrs[count] = class_device_attr_vport_##field; \ ++ i->private_vport_attrs[count].attr.mode = S_IRUGO; \ ++ i->private_vport_attrs[count].store = NULL; \ ++ i->vport_attrs[count] = &i->private_vport_attrs[count]; \ ++ if (i->f->get_##field) \ ++ count++ ++ /* NOTE: Above MACRO differs: checks function not show bit */ ++ ++#define SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(field) \ ++ i->private_vport_attrs[count] = class_device_attr_vport_##field; \ ++ i->private_vport_attrs[count].attr.mode = S_IRUGO; \ ++ i->private_vport_attrs[count].store = NULL; \ ++ i->vport_attrs[count] = &i->private_vport_attrs[count]; \ ++ count++ ++ ++#define SETUP_VPORT_ATTRIBUTE_WR(field) \ ++ i->private_vport_attrs[count] = class_device_attr_vport_##field; \ ++ i->vport_attrs[count] = &i->private_vport_attrs[count]; \ ++ if (i->f->field) \ ++ count++ ++ /* NOTE: Above MACRO differs: checks function */ ++ ++#define SETUP_VPORT_ATTRIBUTE_RW(field) \ ++ i->private_vport_attrs[count] = class_device_attr_vport_##field; \ ++ if (!i->f->set_vport_##field) { \ ++ i->private_vport_attrs[count].attr.mode = S_IRUGO; \ ++ i->private_vport_attrs[count].store = NULL; \ ++ } \ ++ i->vport_attrs[count] = &i->private_vport_attrs[count]; \ ++ count++ ++ /* NOTE: Above MACRO differs: does not check show bit */ ++ ++#define SETUP_PRIVATE_VPORT_ATTRIBUTE_RW(field) \ ++{ \ ++ i->private_vport_attrs[count] = class_device_attr_vport_##field; \ ++ i->vport_attrs[count] = &i->private_vport_attrs[count]; \ ++ count++; \ ++} ++ ++ ++/* The FC Transport Virtual Port Attributes: */ ++ ++/* Fixed Virtual Port Attributes */ ++ ++/* Dynamic Virtual Port Attributes */ ++ ++/* Private Virtual Port Attributes */ ++ ++fc_private_vport_rd_enum_attr(vport_state, FC_VPORTSTATE_MAX_NAMELEN); ++fc_private_vport_rd_enum_attr(vport_last_state, FC_VPORTSTATE_MAX_NAMELEN); ++fc_private_vport_rd_attr_cast(node_name, "0x%llx\n", 20, unsigned long long); ++fc_private_vport_rd_attr_cast(port_name, "0x%llx\n", 20, unsigned long long); ++ ++static ssize_t ++show_fc_vport_roles (struct class_device *cdev, char *buf) ++{ ++ struct fc_vport *vport = transport_class_to_vport(cdev); ++ ++ if (vport->roles == FC_PORT_ROLE_UNKNOWN) ++ return snprintf(buf, 20, "unknown\n"); ++ return get_fc_port_roles_names(vport->roles, buf); ++} ++static FC_CLASS_DEVICE_ATTR(vport, roles, S_IRUGO, show_fc_vport_roles, NULL); ++ ++fc_private_vport_rd_enum_attr(vport_type, FC_PORTTYPE_MAX_NAMELEN); ++ ++fc_private_vport_show_function(symbolic_name, "%s\n", ++ FC_VPORT_SYMBOLIC_NAMELEN + 1, ) ++fc_vport_store_str_function(symbolic_name, FC_VPORT_SYMBOLIC_NAMELEN) ++static FC_CLASS_DEVICE_ATTR(vport, symbolic_name, S_IRUGO | S_IWUSR, ++ show_fc_vport_symbolic_name, store_fc_vport_symbolic_name); ++ ++static ssize_t ++store_fc_vport_delete(struct class_device *cdev, const char *buf, ++ size_t count) ++{ ++ struct fc_vport *vport = transport_class_to_vport(cdev); ++ struct Scsi_Host *shost = vport_to_shost(vport); ++ ++ fc_queue_work(shost, &vport->vport_delete_work); ++ return count; ++} ++static FC_CLASS_DEVICE_ATTR(vport, vport_delete, S_IWUSR, ++ NULL, store_fc_vport_delete); ++ ++ ++/* ++ * Enable/Disable vport ++ * Write "1" to disable, write "0" to enable ++ */ ++static ssize_t ++store_fc_vport_disable(struct class_device *cdev, const char *buf, ++ size_t count) ++{ ++ struct fc_vport *vport = transport_class_to_vport(cdev); ++ struct Scsi_Host *shost = vport_to_shost(vport); ++ struct fc_internal *i = to_fc_internal(shost->transportt); ++ int stat; ++ ++ if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING)) ++ return -EBUSY; ++ ++ if (*buf == '0') { ++ if (vport->vport_state != FC_VPORT_DISABLED) ++ return -EALREADY; ++ } else if (*buf == '1') { ++ if (vport->vport_state == FC_VPORT_DISABLED) ++ return -EALREADY; ++ } else ++ return -EINVAL; ++ ++ stat = i->f->vport_disable(vport, ((*buf == '0') ? false : true)); ++ return stat ? stat : count; ++} ++static FC_CLASS_DEVICE_ATTR(vport, vport_disable, S_IWUSR, ++ NULL, store_fc_vport_disable); ++ ++ ++/* + * Host Attribute Management + */ + +@@ -1003,6 +1333,13 @@ + if (i->f->show_host_##field) \ + count++ + ++#define SETUP_HOST_ATTRIBUTE_RD_NS(field) \ ++ i->private_host_attrs[count] = class_device_attr_host_##field; \ ++ i->private_host_attrs[count].attr.mode = S_IRUGO; \ ++ i->private_host_attrs[count].store = NULL; \ ++ i->host_attrs[count] = &i->private_host_attrs[count]; \ ++ count++ ++ + #define SETUP_HOST_ATTRIBUTE_RW(field) \ + i->private_host_attrs[count] = class_device_attr_host_##field; \ + if (!i->f->set_host_##field) { \ +@@ -1090,6 +1427,7 @@ + fc_private_host_rd_attr_cast(permanent_port_name, "0x%llx\n", 20, + unsigned long long); + fc_private_host_rd_attr(maxframe_size, "%u bytes\n", 20); ++fc_private_host_rd_attr(max_npiv_vports, "%u\n", 20); + fc_private_host_rd_attr(serial_number, "%s\n", (FC_SERIAL_NUMBER_SIZE +1)); + + +@@ -1210,6 +1548,9 @@ + static FC_CLASS_DEVICE_ATTR(host, issue_lip, S_IWUSR, NULL, + store_fc_private_host_issue_lip); + ++fc_private_host_rd_attr(npiv_vports_inuse, "%u\n", 20); ++ ++ + /* + * Host Statistics Management + */ +@@ -1285,7 +1626,6 @@ + static FC_CLASS_DEVICE_ATTR(host, reset_statistics, S_IWUSR, NULL, + fc_reset_statistics); + +- + static struct attribute *fc_statistics_attrs[] = { + &class_device_attr_host_seconds_since_last_reset.attr, + &class_device_attr_host_tx_frames.attr, +@@ -1316,6 +1656,142 @@ + .attrs = fc_statistics_attrs, + }; + ++ ++/* Host Vport Attributes */ ++ ++static int ++fc_parse_wwn(const char *ns, u64 *nm) ++{ ++ unsigned int i, j; ++ u8 wwn[8]; ++ ++ memset(wwn, 0, sizeof(wwn)); ++ ++ /* Validate and store the new name */ ++ for (i=0, j=0; i < 16; i++) { ++ if ((*ns >= 'a') && (*ns <= 'f')) ++ j = ((j << 4) | ((*ns++ -'a') + 10)); ++ else if ((*ns >= 'A') && (*ns <= 'F')) ++ j = ((j << 4) | ((*ns++ -'A') + 10)); ++ else if ((*ns >= '0') && (*ns <= '9')) ++ j = ((j << 4) | (*ns++ -'0')); ++ else ++ return -EINVAL; ++ if (i % 2) { ++ wwn[i/2] = j & 0xff; ++ j = 0; ++ } ++ } ++ ++ *nm = wwn_to_u64(wwn); ++ ++ return 0; ++} ++ ++ ++/* ++ * "Short-cut" sysfs variable to create a new vport on a FC Host. ++ * Input is a string of the form ":". Other attributes ++ * will default to a NPIV-based FCP_Initiator; The WWNs are specified ++ * as hex characters, and may *not* contain any prefixes (e.g. 0x, x, etc) ++ */ ++static ssize_t ++store_fc_host_vport_create(struct class_device *cdev, const char *buf, ++ size_t count) ++{ ++ struct Scsi_Host *shost = transport_class_to_shost(cdev); ++ struct fc_vport_identifiers vid; ++ struct fc_vport *vport; ++ unsigned int cnt=count; ++ int stat; ++ ++ memset(&vid, 0, sizeof(vid)); ++ ++ /* count may include a LF at end of string */ ++ if (buf[cnt-1] == '\n') ++ cnt--; ++ ++ /* validate we have enough characters for WWPN */ ++ if ((cnt != (16+1+16)) || (buf[16] != ':')) ++ return -EINVAL; ++ ++ stat = fc_parse_wwn(&buf[0], &vid.port_name); ++ if (stat) ++ return stat; ++ ++ stat = fc_parse_wwn(&buf[17], &vid.node_name); ++ if (stat) ++ return stat; ++ ++ vid.roles = FC_PORT_ROLE_FCP_INITIATOR; ++ vid.vport_type = FC_PORTTYPE_NPIV; ++ /* vid.symbolic_name is already zero/NULL's */ ++ vid.disable = false; /* always enabled */ ++ ++ /* we only allow support on Channel 0 !!! */ ++ stat = fc_vport_create(shost, 0, &shost->shost_gendev, &vid, &vport); ++ return stat ? stat : count; ++} ++static FC_CLASS_DEVICE_ATTR(host, vport_create, S_IWUSR, NULL, ++ store_fc_host_vport_create); ++ ++ ++/* ++ * "Short-cut" sysfs variable to delete a vport on a FC Host. ++ * Vport is identified by a string containing ":". ++ * The WWNs are specified as hex characters, and may *not* contain ++ * any prefixes (e.g. 0x, x, etc) ++ */ ++static ssize_t ++store_fc_host_vport_delete(struct class_device *cdev, const char *buf, ++ size_t count) ++{ ++ struct Scsi_Host *shost = transport_class_to_shost(cdev); ++ struct fc_host_attrs *fc_host = shost_to_fc_host(shost); ++ struct fc_vport *vport; ++ u64 wwpn, wwnn; ++ unsigned long flags; ++ unsigned int cnt=count; ++ int stat, match; ++ ++ /* count may include a LF at end of string */ ++ if (buf[cnt-1] == '\n') ++ cnt--; ++ ++ /* validate we have enough characters for WWPN */ ++ if ((cnt != (16+1+16)) || (buf[16] != ':')) ++ return -EINVAL; ++ ++ stat = fc_parse_wwn(&buf[0], &wwpn); ++ if (stat) ++ return stat; ++ ++ stat = fc_parse_wwn(&buf[17], &wwnn); ++ if (stat) ++ return stat; ++ ++ spin_lock_irqsave(shost->host_lock, flags); ++ match = 0; ++ /* we only allow support on Channel 0 !!! */ ++ list_for_each_entry(vport, &fc_host->vports, peers) { ++ if ((vport->channel == 0) && ++ (vport->port_name == wwpn) && (vport->node_name == wwnn)) { ++ match = 1; ++ break; ++ } ++ } ++ spin_unlock_irqrestore(shost->host_lock, flags); ++ ++ if (!match) ++ return -ENODEV; ++ ++ stat = fc_vport_terminate(vport); ++ return stat ? stat : count; ++} ++static FC_CLASS_DEVICE_ATTR(host, vport_delete, S_IWUSR, NULL, ++ store_fc_host_vport_delete); ++ ++ + static int fc_host_match(struct attribute_container *cont, + struct device *dev) + { +@@ -1387,6 +1863,40 @@ + } + + ++static void fc_vport_dev_release(struct device *dev) ++{ ++ struct fc_vport *vport = dev_to_vport(dev); ++ put_device(dev->parent); /* release kobj parent */ ++ kfree(vport); ++} ++ ++int scsi_is_fc_vport(const struct device *dev) ++{ ++ return dev->release == fc_vport_dev_release; ++} ++EXPORT_SYMBOL(scsi_is_fc_vport); ++ ++static int fc_vport_match(struct attribute_container *cont, ++ struct device *dev) ++{ ++ struct fc_vport *vport; ++ struct Scsi_Host *shost; ++ struct fc_internal *i; ++ ++ if (!scsi_is_fc_vport(dev)) ++ return 0; ++ vport = dev_to_vport(dev); ++ ++ shost = vport_to_shost(vport); ++ if (!shost->transportt || shost->transportt->host_attrs.ac.class ++ != &fc_host_class.class) ++ return 0; ++ ++ i = to_fc_internal(shost->transportt); ++ return &i->vport_attr_cont.ac == cont; ++} ++ ++ + /** + * fc_timed_out - FC Transport I/O timeout intercept handler + * +@@ -1433,6 +1943,9 @@ + if (rport->scsi_target_id == -1) + continue; + ++ if (rport->port_state != FC_PORTSTATE_ONLINE) ++ continue; ++ + if ((channel == SCAN_WILD_CARD || channel == rport->channel) && + (id == SCAN_WILD_CARD || id == rport->scsi_target_id)) { + scsi_scan_target(&rport->dev, rport->channel, +@@ -1472,6 +1985,11 @@ + i->rport_attr_cont.ac.match = fc_rport_match; + transport_container_register(&i->rport_attr_cont); + ++ i->vport_attr_cont.ac.attrs = &i->vport_attrs[0]; ++ i->vport_attr_cont.ac.class = &fc_vport_class.class; ++ i->vport_attr_cont.ac.match = fc_vport_match; ++ transport_container_register(&i->vport_attr_cont); ++ + i->f = ft; + + /* Transport uses the shost workq for scsi scanning */ +@@ -1505,6 +2023,10 @@ + SETUP_HOST_ATTRIBUTE_RD(supported_fc4s); + SETUP_HOST_ATTRIBUTE_RD(supported_speeds); + SETUP_HOST_ATTRIBUTE_RD(maxframe_size); ++ if (ft->vport_create) { ++ SETUP_HOST_ATTRIBUTE_RD_NS(max_npiv_vports); ++ SETUP_HOST_ATTRIBUTE_RD_NS(npiv_vports_inuse); ++ } + SETUP_HOST_ATTRIBUTE_RD(serial_number); + + SETUP_HOST_ATTRIBUTE_RD(port_id); +@@ -1520,6 +2042,10 @@ + SETUP_PRIVATE_HOST_ATTRIBUTE_RW(tgtid_bind_type); + if (ft->issue_fc_host_lip) + SETUP_PRIVATE_HOST_ATTRIBUTE_RW(issue_lip); ++ if (ft->vport_create) ++ SETUP_PRIVATE_HOST_ATTRIBUTE_RW(vport_create); ++ if (ft->vport_delete) ++ SETUP_PRIVATE_HOST_ATTRIBUTE_RW(vport_delete); + + BUG_ON(count > FC_HOST_NUM_ATTRS); + +@@ -1545,6 +2071,24 @@ + + i->rport_attrs[count] = NULL; + ++ /* ++ * Setup Virtual Port Attributes. ++ */ ++ count=0; ++ SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(vport_state); ++ SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(vport_last_state); ++ SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(node_name); ++ SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(port_name); ++ SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(roles); ++ SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(vport_type); ++ SETUP_VPORT_ATTRIBUTE_RW(symbolic_name); ++ SETUP_VPORT_ATTRIBUTE_WR(vport_delete); ++ SETUP_VPORT_ATTRIBUTE_WR(vport_disable); ++ ++ BUG_ON(count > FC_VPORT_NUM_ATTRS); ++ ++ i->vport_attrs[count] = NULL; ++ + return &i->t; + } + EXPORT_SYMBOL(fc_attach_transport); +@@ -1556,6 +2100,7 @@ + transport_container_unregister(&i->t.target_attrs); + transport_container_unregister(&i->t.host_attrs); + transport_container_unregister(&i->rport_attr_cont); ++ transport_container_unregister(&i->vport_attr_cont); + + kfree(i); + } +@@ -1667,9 +2212,17 @@ + void + fc_remove_host(struct Scsi_Host *shost) + { +- struct fc_rport *rport, *next_rport; ++ struct fc_vport *vport = NULL, *next_vport = NULL; ++ struct fc_rport *rport = NULL, *next_rport = NULL; + struct workqueue_struct *work_q; + struct fc_host_attrs *fc_host = shost_to_fc_host(shost); ++ unsigned long flags; ++ ++ spin_lock_irqsave(shost->host_lock, flags); ++ ++ /* Remove any vports */ ++ list_for_each_entry_safe(vport, next_vport, &fc_host->vports, peers) ++ fc_queue_work(shost, &vport->vport_delete_work); + + /* Remove any remote ports */ + list_for_each_entry_safe(rport, next_rport, +@@ -1686,6 +2239,8 @@ + fc_queue_work(shost, &rport->rport_delete_work); + } + ++ spin_unlock_irqrestore(shost->host_lock, flags); ++ + /* flush all scan work items */ + scsi_flush_work(shost); + +@@ -1844,7 +2399,7 @@ + spin_lock_irqsave(shost->host_lock, flags); + + rport->number = fc_host->next_rport_number++; +- if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) ++ if (rport->roles & FC_PORT_ROLE_FCP_TARGET) + rport->scsi_target_id = fc_host->next_target_id++; + else + rport->scsi_target_id = -1; +@@ -1869,7 +2424,7 @@ + transport_add_device(dev); + transport_configure_device(dev); + +- if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) { ++ if (rport->roles & FC_PORT_ROLE_FCP_TARGET) { + /* initiate a scan of the target */ + rport->flags |= FC_RPORT_SCAN_PENDING; + scsi_queue_work(shost, &rport->scan_work); +@@ -2003,7 +2558,7 @@ + + /* was a target, not in roles */ + if ((rport->scsi_target_id != -1) && +- (!(ids->roles & FC_RPORT_ROLE_FCP_TARGET))) ++ (!(ids->roles & FC_PORT_ROLE_FCP_TARGET))) + return rport; + + /* +@@ -2086,7 +2641,7 @@ + memset(rport->dd_data, 0, + fci->f->dd_fcrport_size); + +- if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) { ++ if (rport->roles & FC_PORT_ROLE_FCP_TARGET) { + /* initiate a scan of the target */ + rport->flags |= FC_RPORT_SCAN_PENDING; + scsi_queue_work(shost, &rport->scan_work); +@@ -2243,11 +2798,11 @@ + int create = 0; + + spin_lock_irqsave(shost->host_lock, flags); +- if (roles & FC_RPORT_ROLE_FCP_TARGET) { ++ if (roles & FC_PORT_ROLE_FCP_TARGET) { + if (rport->scsi_target_id == -1) { + rport->scsi_target_id = fc_host->next_target_id++; + create = 1; +- } else if (!(rport->roles & FC_RPORT_ROLE_FCP_TARGET)) ++ } else if (!(rport->roles & FC_PORT_ROLE_FCP_TARGET)) + create = 1; + } + +@@ -2317,7 +2872,7 @@ + */ + if ((rport->port_state == FC_PORTSTATE_ONLINE) && + (rport->scsi_target_id != -1) && +- !(rport->roles & FC_RPORT_ROLE_FCP_TARGET)) { ++ !(rport->roles & FC_PORT_ROLE_FCP_TARGET)) { + dev_printk(KERN_ERR, &rport->dev, + "blocked FC remote port time out: no longer" + " a FCP target, removing starget\n"); +@@ -2367,7 +2922,7 @@ + */ + rport->maxframe_size = -1; + rport->supported_classes = FC_COS_UNSPECIFIED; +- rport->roles = FC_RPORT_ROLE_UNKNOWN; ++ rport->roles = FC_PORT_ROLE_UNKNOWN; + rport->port_state = FC_PORTSTATE_NOTPRESENT; + + /* remove the identifiers that aren't used in the consisting binding */ +@@ -2436,7 +2991,7 @@ + unsigned long flags; + + if ((rport->port_state == FC_PORTSTATE_ONLINE) && +- (rport->roles & FC_RPORT_ROLE_FCP_TARGET)) { ++ (rport->roles & FC_PORT_ROLE_FCP_TARGET)) { + scsi_scan_target(&rport->dev, rport->channel, + rport->scsi_target_id, SCAN_WILD_CARD, 1); + } +@@ -2447,7 +3002,227 @@ + } + + +-MODULE_AUTHOR("Martin Hicks"); ++/** ++ * fc_vport_create - allocates and creates a FC virtual port. ++ * @shost: scsi host the virtual port is connected to. ++ * @channel: Channel on shost port connected to. ++ * @pdev: parent device for vport ++ * @ids: The world wide names, FC4 port roles, etc for ++ * the virtual port. ++ * @ret_vport: The pointer to the created vport. ++ * ++ * Allocates and creates the vport structure, calls the parent host ++ * to instantiate the vport, the completes w/ class and sysfs creation. ++ * ++ * Notes: ++ * This routine assumes no locks are held on entry. ++ **/ ++static int ++fc_vport_create(struct Scsi_Host *shost, int channel, struct device *pdev, ++ struct fc_vport_identifiers *ids, struct fc_vport **ret_vport) ++{ ++ struct fc_host_attrs *fc_host = shost_to_fc_host(shost); ++ struct fc_internal *fci = to_fc_internal(shost->transportt); ++ struct fc_vport *vport; ++ struct device *dev; ++ unsigned long flags; ++ size_t size; ++ int error; ++ ++ *ret_vport = NULL; ++ ++ if ( ! fci->f->vport_create) ++ return -ENOENT; ++ ++ size = (sizeof(struct fc_vport) + fci->f->dd_fcvport_size); ++ vport = kzalloc(size, GFP_KERNEL); ++ if (unlikely(!vport)) { ++ printk(KERN_ERR "%s: allocation failure\n", __FUNCTION__); ++ return -ENOMEM; ++ } ++ ++ vport->vport_state = FC_VPORT_UNKNOWN; ++ vport->vport_last_state = FC_VPORT_UNKNOWN; ++ vport->node_name = ids->node_name; ++ vport->port_name = ids->port_name; ++ vport->roles = ids->roles; ++ vport->vport_type = ids->vport_type; ++ if (fci->f->dd_fcvport_size) ++ vport->dd_data = &vport[1]; ++ vport->shost = shost; ++ vport->channel = channel; ++ vport->flags = FC_VPORT_CREATING; ++ INIT_WORK(&vport->vport_delete_work, fc_vport_sched_delete); ++ ++ spin_lock_irqsave(shost->host_lock, flags); ++ ++ if (fc_host->npiv_vports_inuse >= fc_host->max_npiv_vports) { ++ spin_unlock_irqrestore(shost->host_lock, flags); ++ kfree(vport); ++ return -ENOSPC; ++ } ++ fc_host->npiv_vports_inuse++; ++ vport->number = fc_host->next_vport_number++; ++ list_add_tail(&vport->peers, &fc_host->vports); ++ get_device(&shost->shost_gendev); /* for fc_host->vport list */ ++ ++ spin_unlock_irqrestore(shost->host_lock, flags); ++ ++ dev = &vport->dev; ++ device_initialize(dev); /* takes self reference */ ++ dev->parent = get_device(pdev); /* takes parent reference */ ++ dev->release = fc_vport_dev_release; ++ sprintf(dev->bus_id, "vport-%d:%d-%d", ++ shost->host_no, channel, vport->number); ++ transport_setup_device(dev); ++ ++ error = device_add(dev); ++ if (error) { ++ printk(KERN_ERR "FC Virtual Port device_add failed\n"); ++ goto delete_vport; ++ } ++ transport_add_device(dev); ++ transport_configure_device(dev); ++ ++ error = fci->f->vport_create(vport, ids->disable); ++ if (error) { ++ printk(KERN_ERR "FC Virtual Port LLDD Create failed\n"); ++ goto delete_vport_all; ++ } ++ ++ /* ++ * if the parent isn't the physical adapter's Scsi_Host, ensure ++ * the Scsi_Host at least contains ia symlink to the vport. ++ */ ++ if (pdev != &shost->shost_gendev) { ++ error = sysfs_create_link(&shost->shost_gendev.kobj, ++ &dev->kobj, dev->bus_id); ++ if (error) ++ printk(KERN_ERR ++ "%s: Cannot create vport symlinks for " ++ "%s, err=%d\n", ++ __FUNCTION__, dev->bus_id, error); ++ } ++ spin_lock_irqsave(shost->host_lock, flags); ++ vport->flags &= ~FC_VPORT_CREATING; ++ spin_unlock_irqrestore(shost->host_lock, flags); ++ ++ dev_printk(KERN_NOTICE, pdev, ++ "%s created via shost%d channel %d\n", dev->bus_id, ++ shost->host_no, channel); ++ ++ *ret_vport = vport; ++ ++ return 0; ++ ++delete_vport_all: ++ transport_remove_device(dev); ++ device_del(dev); ++delete_vport: ++ transport_destroy_device(dev); ++ spin_lock_irqsave(shost->host_lock, flags); ++ list_del(&vport->peers); ++ put_device(&shost->shost_gendev); /* for fc_host->vport list */ ++ fc_host->npiv_vports_inuse--; ++ spin_unlock_irqrestore(shost->host_lock, flags); ++ put_device(dev->parent); ++ kfree(vport); ++ ++ return error; ++} ++ ++ ++/** ++ * fc_vport_terminate - Admin App or LLDD requests termination of a vport ++ * @vport: fc_vport to be terminated ++ * ++ * Calls the LLDD vport_delete() function, then deallocates and removes ++ * the vport from the shost and object tree. ++ * ++ * Notes: ++ * This routine assumes no locks are held on entry. ++ **/ ++int ++fc_vport_terminate(struct fc_vport *vport) ++{ ++ struct Scsi_Host *shost = vport_to_shost(vport); ++ struct fc_host_attrs *fc_host = shost_to_fc_host(shost); ++ struct fc_internal *i = to_fc_internal(shost->transportt); ++ struct device *dev = &vport->dev; ++ unsigned long flags; ++ int stat; ++ ++ spin_lock_irqsave(shost->host_lock, flags); ++ if (vport->flags & FC_VPORT_CREATING) { ++ spin_unlock_irqrestore(shost->host_lock, flags); ++ return -EBUSY; ++ } ++ if (vport->flags & (FC_VPORT_DEL)) { ++ spin_unlock_irqrestore(shost->host_lock, flags); ++ return -EALREADY; ++ } ++ vport->flags |= FC_VPORT_DELETING; ++ spin_unlock_irqrestore(shost->host_lock, flags); ++ ++ if (i->f->vport_delete) ++ stat = i->f->vport_delete(vport); ++ else ++ stat = -ENOENT; ++ ++ spin_lock_irqsave(shost->host_lock, flags); ++ vport->flags &= ~FC_VPORT_DELETING; ++ if (!stat) { ++ vport->flags |= FC_VPORT_DELETED; ++ list_del(&vport->peers); ++ fc_host->npiv_vports_inuse--; ++ put_device(&shost->shost_gendev); /* for fc_host->vport list */ ++ } ++ spin_unlock_irqrestore(shost->host_lock, flags); ++ ++ if (stat) ++ return stat; ++ ++ if (dev->parent != &shost->shost_gendev) ++ sysfs_remove_link(&shost->shost_gendev.kobj, dev->bus_id); ++ transport_remove_device(dev); ++ device_del(dev); ++ transport_destroy_device(dev); ++ ++ /* ++ * Removing our self-reference should mean our ++ * release function gets called, which will drop the remaining ++ * parent reference and free the data structure. ++ */ ++ put_device(dev); /* for self-reference */ ++ ++ return 0; /* SUCCESS */ ++} ++EXPORT_SYMBOL(fc_vport_terminate); ++ ++/** ++ * fc_vport_sched_delete - workq-based delete request for a vport ++ * ++ * @work: vport to be deleted. ++ **/ ++static void ++fc_vport_sched_delete(struct work_struct *work) ++{ ++ struct fc_vport *vport = ++ container_of(work, struct fc_vport, vport_delete_work); ++ int stat; ++ ++ stat = fc_vport_terminate(vport); ++ if (stat) ++ dev_printk(KERN_ERR, vport->dev.parent, ++ "%s: %s could not be deleted created via " ++ "shost%d channel %d - error %d\n", __FUNCTION__, ++ vport->dev.bus_id, vport->shost->host_no, ++ vport->channel, stat); ++} ++ ++ ++/* Original Author: Martin Hicks */ ++MODULE_AUTHOR("James Smart"); + MODULE_DESCRIPTION("FC Transport Attributes"); + MODULE_LICENSE("GPL"); + +diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_transport_iscsi.c linux-2.6.22-591/drivers/scsi/scsi_transport_iscsi.c +--- linux-2.6.22-570/drivers/scsi/scsi_transport_iscsi.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/scsi_transport_iscsi.c 2007-12-21 15:36:14.000000000 -0500 +@@ -30,9 +30,9 @@ + #include + #include + +-#define ISCSI_SESSION_ATTRS 11 ++#define ISCSI_SESSION_ATTRS 15 + #define ISCSI_CONN_ATTRS 11 +-#define ISCSI_HOST_ATTRS 0 ++#define ISCSI_HOST_ATTRS 4 + #define ISCSI_TRANSPORT_VERSION "2.0-724" + + struct iscsi_internal { +@@ -609,12 +609,10 @@ + int t = done ? NLMSG_DONE : type; + + skb = alloc_skb(len, GFP_ATOMIC); +- /* +- * FIXME: +- * user is supposed to react on iferror == -ENOMEM; +- * see iscsi_if_rx(). +- */ +- BUG_ON(!skb); ++ if (!skb) { ++ printk(KERN_ERR "Could not allocate skb to send reply.\n"); ++ return -ENOMEM; ++ } + + nlh = __nlmsg_put(skb, pid, seq, t, (len - sizeof(*nlh)), 0); + nlh->nlmsg_flags = flags; +@@ -816,6 +814,8 @@ + uint32_t hostno; + + session = transport->create_session(transport, &priv->t, ++ ev->u.c_session.cmds_max, ++ ev->u.c_session.queue_depth, + ev->u.c_session.initial_cmdsn, + &hostno); + if (!session) +@@ -947,15 +947,50 @@ + iscsi_tgt_dscvr(struct iscsi_transport *transport, + struct iscsi_uevent *ev) + { ++ struct Scsi_Host *shost; + struct sockaddr *dst_addr; ++ int err; + + if (!transport->tgt_dscvr) + return -EINVAL; + ++ shost = scsi_host_lookup(ev->u.tgt_dscvr.host_no); ++ if (IS_ERR(shost)) { ++ printk(KERN_ERR "target discovery could not find host no %u\n", ++ ev->u.tgt_dscvr.host_no); ++ return -ENODEV; ++ } ++ ++ + dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev)); +- return transport->tgt_dscvr(ev->u.tgt_dscvr.type, +- ev->u.tgt_dscvr.host_no, ++ err = transport->tgt_dscvr(shost, ev->u.tgt_dscvr.type, + ev->u.tgt_dscvr.enable, dst_addr); ++ scsi_host_put(shost); ++ return err; ++} ++ ++static int ++iscsi_set_host_param(struct iscsi_transport *transport, ++ struct iscsi_uevent *ev) ++{ ++ char *data = (char*)ev + sizeof(*ev); ++ struct Scsi_Host *shost; ++ int err; ++ ++ if (!transport->set_host_param) ++ return -ENOSYS; ++ ++ shost = scsi_host_lookup(ev->u.set_host_param.host_no); ++ if (IS_ERR(shost)) { ++ printk(KERN_ERR "set_host_param could not find host no %u\n", ++ ev->u.set_host_param.host_no); ++ return -ENODEV; ++ } ++ ++ err = transport->set_host_param(shost, ev->u.set_host_param.param, ++ data, ev->u.set_host_param.len); ++ scsi_host_put(shost); ++ return err; + } + + static int +@@ -1049,8 +1084,11 @@ + case ISCSI_UEVENT_TGT_DSCVR: + err = iscsi_tgt_dscvr(transport, ev); + break; ++ case ISCSI_UEVENT_SET_HOST_PARAM: ++ err = iscsi_set_host_param(transport, ev); ++ break; + default: +- err = -EINVAL; ++ err = -ENOSYS; + break; + } + +@@ -1160,30 +1198,37 @@ + /* + * iSCSI session attrs + */ +-#define iscsi_session_attr_show(param) \ ++#define iscsi_session_attr_show(param, perm) \ + static ssize_t \ + show_session_param_##param(struct class_device *cdev, char *buf) \ + { \ + struct iscsi_cls_session *session = iscsi_cdev_to_session(cdev); \ + struct iscsi_transport *t = session->transport; \ ++ \ ++ if (perm && !capable(CAP_SYS_ADMIN)) \ ++ return -EACCES; \ + return t->get_session_param(session, param, buf); \ + } + +-#define iscsi_session_attr(field, param) \ +- iscsi_session_attr_show(param) \ ++#define iscsi_session_attr(field, param, perm) \ ++ iscsi_session_attr_show(param, perm) \ + static ISCSI_CLASS_ATTR(sess, field, S_IRUGO, show_session_param_##param, \ + NULL); + +-iscsi_session_attr(targetname, ISCSI_PARAM_TARGET_NAME); +-iscsi_session_attr(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN); +-iscsi_session_attr(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T); +-iscsi_session_attr(immediate_data, ISCSI_PARAM_IMM_DATA_EN); +-iscsi_session_attr(first_burst_len, ISCSI_PARAM_FIRST_BURST); +-iscsi_session_attr(max_burst_len, ISCSI_PARAM_MAX_BURST); +-iscsi_session_attr(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN); +-iscsi_session_attr(data_seq_in_order, ISCSI_PARAM_DATASEQ_INORDER_EN); +-iscsi_session_attr(erl, ISCSI_PARAM_ERL); +-iscsi_session_attr(tpgt, ISCSI_PARAM_TPGT); ++iscsi_session_attr(targetname, ISCSI_PARAM_TARGET_NAME, 0); ++iscsi_session_attr(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN, 0); ++iscsi_session_attr(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T, 0); ++iscsi_session_attr(immediate_data, ISCSI_PARAM_IMM_DATA_EN, 0); ++iscsi_session_attr(first_burst_len, ISCSI_PARAM_FIRST_BURST, 0); ++iscsi_session_attr(max_burst_len, ISCSI_PARAM_MAX_BURST, 0); ++iscsi_session_attr(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN, 0); ++iscsi_session_attr(data_seq_in_order, ISCSI_PARAM_DATASEQ_INORDER_EN, 0); ++iscsi_session_attr(erl, ISCSI_PARAM_ERL, 0); ++iscsi_session_attr(tpgt, ISCSI_PARAM_TPGT, 0); ++iscsi_session_attr(username, ISCSI_PARAM_USERNAME, 1); ++iscsi_session_attr(username_in, ISCSI_PARAM_USERNAME_IN, 1); ++iscsi_session_attr(password, ISCSI_PARAM_PASSWORD, 1); ++iscsi_session_attr(password_in, ISCSI_PARAM_PASSWORD_IN, 1); + + #define iscsi_priv_session_attr_show(field, format) \ + static ssize_t \ +@@ -1199,6 +1244,28 @@ + NULL) + iscsi_priv_session_attr(recovery_tmo, "%d"); + ++/* ++ * iSCSI host attrs ++ */ ++#define iscsi_host_attr_show(param) \ ++static ssize_t \ ++show_host_param_##param(struct class_device *cdev, char *buf) \ ++{ \ ++ struct Scsi_Host *shost = transport_class_to_shost(cdev); \ ++ struct iscsi_internal *priv = to_iscsi_internal(shost->transportt); \ ++ return priv->iscsi_transport->get_host_param(shost, param, buf); \ ++} ++ ++#define iscsi_host_attr(field, param) \ ++ iscsi_host_attr_show(param) \ ++static ISCSI_CLASS_ATTR(host, field, S_IRUGO, show_host_param_##param, \ ++ NULL); ++ ++iscsi_host_attr(netdev, ISCSI_HOST_PARAM_NETDEV_NAME); ++iscsi_host_attr(hwaddress, ISCSI_HOST_PARAM_HWADDRESS); ++iscsi_host_attr(ipaddress, ISCSI_HOST_PARAM_IPADDRESS); ++iscsi_host_attr(initiatorname, ISCSI_HOST_PARAM_INITIATOR_NAME); ++ + #define SETUP_PRIV_SESSION_RD_ATTR(field) \ + do { \ + priv->session_attrs[count] = &class_device_attr_priv_sess_##field; \ +@@ -1222,6 +1289,14 @@ + } \ + } while (0) + ++#define SETUP_HOST_RD_ATTR(field, param_flag) \ ++do { \ ++ if (tt->host_param_mask & param_flag) { \ ++ priv->host_attrs[count] = &class_device_attr_host_##field; \ ++ count++; \ ++ } \ ++} while (0) ++ + static int iscsi_session_match(struct attribute_container *cont, + struct device *dev) + { +@@ -1323,9 +1398,16 @@ + priv->t.host_attrs.ac.class = &iscsi_host_class.class; + priv->t.host_attrs.ac.match = iscsi_host_match; + priv->t.host_size = sizeof(struct iscsi_host); +- priv->host_attrs[0] = NULL; + transport_container_register(&priv->t.host_attrs); + ++ SETUP_HOST_RD_ATTR(netdev, ISCSI_HOST_NETDEV_NAME); ++ SETUP_HOST_RD_ATTR(ipaddress, ISCSI_HOST_IPADDRESS); ++ SETUP_HOST_RD_ATTR(hwaddress, ISCSI_HOST_HWADDRESS); ++ SETUP_HOST_RD_ATTR(initiatorname, ISCSI_HOST_INITIATOR_NAME); ++ BUG_ON(count > ISCSI_HOST_ATTRS); ++ priv->host_attrs[count] = NULL; ++ count = 0; ++ + /* connection parameters */ + priv->conn_cont.ac.attrs = &priv->conn_attrs[0]; + priv->conn_cont.ac.class = &iscsi_connection_class.class; +@@ -1364,6 +1446,10 @@ + SETUP_SESSION_RD_ATTR(erl, ISCSI_ERL); + SETUP_SESSION_RD_ATTR(targetname, ISCSI_TARGET_NAME); + SETUP_SESSION_RD_ATTR(tpgt, ISCSI_TPGT); ++ SETUP_SESSION_RD_ATTR(password, ISCSI_USERNAME); ++ SETUP_SESSION_RD_ATTR(password_in, ISCSI_USERNAME_IN); ++ SETUP_SESSION_RD_ATTR(username, ISCSI_PASSWORD); ++ SETUP_SESSION_RD_ATTR(username_in, ISCSI_PASSWORD_IN); + SETUP_PRIV_SESSION_RD_ATTR(recovery_tmo); + + BUG_ON(count > ISCSI_SESSION_ATTRS); +@@ -1437,7 +1523,7 @@ + if (err) + goto unregister_conn_class; + +- nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, NULL, ++ nls = netlink_kernel_create(&init_net, NETLINK_ISCSI, 1, iscsi_if_rx, NULL, + THIS_MODULE); + if (!nls) { + err = -ENOBUFS; +diff -Nurb linux-2.6.22-570/drivers/scsi/sd.c linux-2.6.22-591/drivers/scsi/sd.c +--- linux-2.6.22-570/drivers/scsi/sd.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/sd.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1515,7 +1515,7 @@ + if (!scsi_device_online(sdp)) + goto out; + +- buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL | __GFP_DMA); ++ buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL); + if (!buffer) { + sd_printk(KERN_WARNING, sdkp, "sd_revalidate_disk: Memory " + "allocation failure.\n"); +diff -Nurb linux-2.6.22-570/drivers/scsi/sg.c linux-2.6.22-591/drivers/scsi/sg.c +--- linux-2.6.22-570/drivers/scsi/sg.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/sg.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1842,7 +1842,7 @@ + int blk_size = buff_size; + struct page *p = NULL; + +- if ((blk_size < 0) || (!sfp)) ++ if (blk_size < 0) + return -EFAULT; + if (0 == blk_size) + ++blk_size; /* don't know why */ +diff -Nurb linux-2.6.22-570/drivers/scsi/stex.c linux-2.6.22-591/drivers/scsi/stex.c +--- linux-2.6.22-570/drivers/scsi/stex.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/stex.c 2007-12-21 15:36:12.000000000 -0500 +@@ -395,53 +395,34 @@ + static int stex_map_sg(struct st_hba *hba, + struct req_msg *req, struct st_ccb *ccb) + { +- struct pci_dev *pdev = hba->pdev; + struct scsi_cmnd *cmd; +- dma_addr_t dma_handle; +- struct scatterlist *src; ++ struct scatterlist *sg; + struct st_sgtable *dst; +- int i; ++ int i, nseg; + + cmd = ccb->cmd; + dst = (struct st_sgtable *)req->variable; + dst->max_sg_count = cpu_to_le16(ST_MAX_SG); +- dst->sz_in_byte = cpu_to_le32(cmd->request_bufflen); +- +- if (cmd->use_sg) { +- int n_elem; ++ dst->sz_in_byte = cpu_to_le32(scsi_bufflen(cmd)); + +- src = (struct scatterlist *) cmd->request_buffer; +- n_elem = pci_map_sg(pdev, src, +- cmd->use_sg, cmd->sc_data_direction); +- if (n_elem <= 0) ++ nseg = scsi_dma_map(cmd); ++ if (nseg < 0) + return -EIO; ++ if (nseg) { ++ ccb->sg_count = nseg; ++ dst->sg_count = cpu_to_le16((u16)nseg); + +- ccb->sg_count = n_elem; +- dst->sg_count = cpu_to_le16((u16)n_elem); +- +- for (i = 0; i < n_elem; i++, src++) { +- dst->table[i].count = cpu_to_le32((u32)sg_dma_len(src)); ++ scsi_for_each_sg(cmd, sg, nseg, i) { ++ dst->table[i].count = cpu_to_le32((u32)sg_dma_len(sg)); + dst->table[i].addr = +- cpu_to_le32(sg_dma_address(src) & 0xffffffff); ++ cpu_to_le32(sg_dma_address(sg) & 0xffffffff); + dst->table[i].addr_hi = +- cpu_to_le32((sg_dma_address(src) >> 16) >> 16); ++ cpu_to_le32((sg_dma_address(sg) >> 16) >> 16); + dst->table[i].ctrl = SG_CF_64B | SG_CF_HOST; + } + dst->table[--i].ctrl |= SG_CF_EOT; +- return 0; + } + +- dma_handle = pci_map_single(pdev, cmd->request_buffer, +- cmd->request_bufflen, cmd->sc_data_direction); +- cmd->SCp.dma_handle = dma_handle; +- +- ccb->sg_count = 1; +- dst->sg_count = cpu_to_le16(1); +- dst->table[0].addr = cpu_to_le32(dma_handle & 0xffffffff); +- dst->table[0].addr_hi = cpu_to_le32((dma_handle >> 16) >> 16); +- dst->table[0].count = cpu_to_le32((u32)cmd->request_bufflen); +- dst->table[0].ctrl = SG_CF_EOT | SG_CF_64B | SG_CF_HOST; +- + return 0; + } + +@@ -451,24 +432,24 @@ + size_t lcount; + size_t len; + void *s, *d, *base = NULL; +- if (*count > cmd->request_bufflen) +- *count = cmd->request_bufflen; ++ size_t offset; ++ ++ if (*count > scsi_bufflen(cmd)) ++ *count = scsi_bufflen(cmd); + lcount = *count; + while (lcount) { + len = lcount; + s = (void *)src; +- if (cmd->use_sg) { +- size_t offset = *count - lcount; ++ ++ offset = *count - lcount; + s += offset; +- base = scsi_kmap_atomic_sg(cmd->request_buffer, ++ base = scsi_kmap_atomic_sg(scsi_sglist(cmd), + sg_count, &offset, &len); +- if (base == NULL) { ++ if (!base) { + *count -= lcount; + return; + } + d = base + offset; +- } else +- d = cmd->request_buffer; + + if (direction == ST_TO_CMD) + memcpy(d, s, len); +@@ -476,7 +457,6 @@ + memcpy(s, d, len); + + lcount -= len; +- if (cmd->use_sg) + scsi_kunmap_atomic_sg(base); + } + } +@@ -484,22 +464,17 @@ + static int stex_direct_copy(struct scsi_cmnd *cmd, + const void *src, size_t count) + { +- struct st_hba *hba = (struct st_hba *) &cmd->device->host->hostdata[0]; + size_t cp_len = count; + int n_elem = 0; + +- if (cmd->use_sg) { +- n_elem = pci_map_sg(hba->pdev, cmd->request_buffer, +- cmd->use_sg, cmd->sc_data_direction); +- if (n_elem <= 0) ++ n_elem = scsi_dma_map(cmd); ++ if (n_elem < 0) + return 0; +- } + + stex_internal_copy(cmd, src, &cp_len, n_elem, ST_TO_CMD); + +- if (cmd->use_sg) +- pci_unmap_sg(hba->pdev, cmd->request_buffer, +- cmd->use_sg, cmd->sc_data_direction); ++ scsi_dma_unmap(cmd); ++ + return cp_len == count; + } + +@@ -678,18 +653,6 @@ + return 0; + } + +-static void stex_unmap_sg(struct st_hba *hba, struct scsi_cmnd *cmd) +-{ +- if (cmd->sc_data_direction != DMA_NONE) { +- if (cmd->use_sg) +- pci_unmap_sg(hba->pdev, cmd->request_buffer, +- cmd->use_sg, cmd->sc_data_direction); +- else +- pci_unmap_single(hba->pdev, cmd->SCp.dma_handle, +- cmd->request_bufflen, cmd->sc_data_direction); +- } +-} +- + static void stex_scsi_done(struct st_ccb *ccb) + { + struct scsi_cmnd *cmd = ccb->cmd; +@@ -756,7 +719,7 @@ + + if (ccb->cmd->cmnd[0] == MGT_CMD && + resp->scsi_status != SAM_STAT_CHECK_CONDITION) { +- ccb->cmd->request_bufflen = ++ scsi_bufflen(ccb->cmd) = + le32_to_cpu(*(__le32 *)&resp->variable[0]); + return; + } +@@ -855,7 +818,7 @@ + ccb->cmd->cmnd[1] == PASSTHRU_GET_ADAPTER)) + stex_controller_info(hba, ccb); + +- stex_unmap_sg(hba, ccb->cmd); ++ scsi_dma_unmap(ccb->cmd); + stex_scsi_done(ccb); + hba->out_req_cnt--; + } else if (ccb->req_type & PASSTHRU_REQ_TYPE) { +@@ -1028,7 +991,7 @@ + } + + fail_out: +- stex_unmap_sg(hba, cmd); ++ scsi_dma_unmap(cmd); + hba->wait_ccb->req = NULL; /* nullify the req's future return */ + hba->wait_ccb = NULL; + result = FAILED; +diff -Nurb linux-2.6.22-570/drivers/scsi/sun_esp.c linux-2.6.22-591/drivers/scsi/sun_esp.c +--- linux-2.6.22-570/drivers/scsi/sun_esp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/sun_esp.c 2007-12-21 15:36:12.000000000 -0500 +@@ -493,7 +493,7 @@ + goto fail; + + host->max_id = (hme ? 16 : 8); +- esp = host_to_esp(host); ++ esp = shost_priv(host); + + esp->host = host; + esp->dev = esp_dev; +diff -Nurb linux-2.6.22-570/drivers/scsi/sym53c416.c linux-2.6.22-591/drivers/scsi/sym53c416.c +--- linux-2.6.22-570/drivers/scsi/sym53c416.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/sym53c416.c 2007-12-21 15:36:12.000000000 -0500 +@@ -332,8 +332,7 @@ + int i; + unsigned long flags = 0; + unsigned char status_reg, pio_int_reg, int_reg; +- struct scatterlist *sglist; +- unsigned int sgcount; ++ struct scatterlist *sg; + unsigned int tot_trans = 0; + + /* We search the base address of the host adapter which caused the interrupt */ +@@ -429,19 +428,15 @@ + { + current_command->SCp.phase = data_out; + outb(FLUSH_FIFO, base + COMMAND_REG); +- sym53c416_set_transfer_counter(base, current_command->request_bufflen); ++ sym53c416_set_transfer_counter(base, ++ scsi_bufflen(current_command)); + outb(TRANSFER_INFORMATION | PIO_MODE, base + COMMAND_REG); +- if(!current_command->use_sg) +- tot_trans = sym53c416_write(base, current_command->request_buffer, current_command->request_bufflen); +- else +- { +- sgcount = current_command->use_sg; +- sglist = current_command->request_buffer; +- while(sgcount--) +- { +- tot_trans += sym53c416_write(base, SG_ADDRESS(sglist), sglist->length); +- sglist++; +- } ++ ++ scsi_for_each_sg(current_command, ++ sg, scsi_sg_count(current_command), i) { ++ tot_trans += sym53c416_write(base, ++ SG_ADDRESS(sg), ++ sg->length); + } + if(tot_trans < current_command->underflow) + printk(KERN_WARNING "sym53c416: Underflow, wrote %d bytes, request for %d bytes.\n", tot_trans, current_command->underflow); +@@ -455,19 +450,16 @@ + { + current_command->SCp.phase = data_in; + outb(FLUSH_FIFO, base + COMMAND_REG); +- sym53c416_set_transfer_counter(base, current_command->request_bufflen); ++ sym53c416_set_transfer_counter(base, ++ scsi_bufflen(current_command)); ++ + outb(TRANSFER_INFORMATION | PIO_MODE, base + COMMAND_REG); +- if(!current_command->use_sg) +- tot_trans = sym53c416_read(base, current_command->request_buffer, current_command->request_bufflen); +- else +- { +- sgcount = current_command->use_sg; +- sglist = current_command->request_buffer; +- while(sgcount--) +- { +- tot_trans += sym53c416_read(base, SG_ADDRESS(sglist), sglist->length); +- sglist++; +- } ++ ++ scsi_for_each_sg(current_command, ++ sg, scsi_sg_count(current_command), i) { ++ tot_trans += sym53c416_read(base, ++ SG_ADDRESS(sg), ++ sg->length); + } + if(tot_trans < current_command->underflow) + printk(KERN_WARNING "sym53c416: Underflow, read %d bytes, request for %d bytes.\n", tot_trans, current_command->underflow); +diff -Nurb linux-2.6.22-570/drivers/scsi/tmscsim.c linux-2.6.22-591/drivers/scsi/tmscsim.c +--- linux-2.6.22-570/drivers/scsi/tmscsim.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/tmscsim.c 2007-12-21 15:36:12.000000000 -0500 +@@ -457,27 +457,20 @@ + error = 1; + DEBUG1(printk("%s(): Mapped sense buffer %p at %x\n", __FUNCTION__, pcmd->sense_buffer, cmdp->saved_dma_handle)); + /* Map SG list */ +- } else if (pcmd->use_sg) { +- pSRB->pSegmentList = (struct scatterlist *) pcmd->request_buffer; +- pSRB->SGcount = pci_map_sg(pdev, pSRB->pSegmentList, pcmd->use_sg, +- pcmd->sc_data_direction); ++ } else if (scsi_sg_count(pcmd)) { ++ int nseg; ++ ++ nseg = scsi_dma_map(pcmd); ++ ++ pSRB->pSegmentList = scsi_sglist(pcmd); ++ pSRB->SGcount = nseg; ++ + /* TODO: error handling */ +- if (!pSRB->SGcount) ++ if (nseg < 0) + error = 1; + DEBUG1(printk("%s(): Mapped SG %p with %d (%d) elements\n",\ +- __FUNCTION__, pcmd->request_buffer, pSRB->SGcount, pcmd->use_sg)); ++ __FUNCTION__, scsi_sglist(pcmd), nseg, scsi_sg_count(pcmd))); + /* Map single segment */ +- } else if (pcmd->request_buffer && pcmd->request_bufflen) { +- pSRB->pSegmentList = dc390_sg_build_single(&pSRB->Segmentx, pcmd->request_buffer, pcmd->request_bufflen); +- pSRB->SGcount = pci_map_sg(pdev, pSRB->pSegmentList, 1, +- pcmd->sc_data_direction); +- cmdp->saved_dma_handle = sg_dma_address(pSRB->pSegmentList); +- +- /* TODO: error handling */ +- if (pSRB->SGcount != 1) +- error = 1; +- DEBUG1(printk("%s(): Mapped request buffer %p at %x\n", __FUNCTION__, pcmd->request_buffer, cmdp->saved_dma_handle)); +- /* No mapping !? */ + } else + pSRB->SGcount = 0; + +@@ -494,12 +487,10 @@ + if (pSRB->SRBFlag) { + pci_unmap_sg(pdev, &pSRB->Segmentx, 1, DMA_FROM_DEVICE); + DEBUG1(printk("%s(): Unmapped sense buffer at %x\n", __FUNCTION__, cmdp->saved_dma_handle)); +- } else if (pcmd->use_sg) { +- pci_unmap_sg(pdev, pcmd->request_buffer, pcmd->use_sg, pcmd->sc_data_direction); +- DEBUG1(printk("%s(): Unmapped SG at %p with %d elements\n", __FUNCTION__, pcmd->request_buffer, pcmd->use_sg)); +- } else if (pcmd->request_buffer && pcmd->request_bufflen) { +- pci_unmap_sg(pdev, &pSRB->Segmentx, 1, pcmd->sc_data_direction); +- DEBUG1(printk("%s(): Unmapped request buffer at %x\n", __FUNCTION__, cmdp->saved_dma_handle)); ++ } else { ++ scsi_dma_unmap(pcmd); ++ DEBUG1(printk("%s(): Unmapped SG at %p with %d elements\n", ++ __FUNCTION__, scsi_sglist(pcmd), scsi_sg_count(pcmd))); + } + } + +@@ -1153,9 +1144,9 @@ + struct scatterlist *psgl; + pSRB->TotalXferredLen = 0; + pSRB->SGIndex = 0; +- if (pcmd->use_sg) { ++ if (scsi_sg_count(pcmd)) { + size_t saved; +- pSRB->pSegmentList = (struct scatterlist *)pcmd->request_buffer; ++ pSRB->pSegmentList = scsi_sglist(pcmd); + psgl = pSRB->pSegmentList; + //dc390_pci_sync(pSRB); + +@@ -1179,12 +1170,6 @@ + printk (KERN_INFO "DC390: Pointer restored. Segment %i, Total %li, Bus %08lx\n", + pSRB->SGIndex, pSRB->Saved_Ptr, pSRB->SGBusAddr); + +- } else if(pcmd->request_buffer) { +- //dc390_pci_sync(pSRB); +- +- sg_dma_len(&pSRB->Segmentx) = pcmd->request_bufflen - pSRB->Saved_Ptr; +- pSRB->SGcount = 1; +- pSRB->pSegmentList = (struct scatterlist *) &pSRB->Segmentx; + } else { + pSRB->SGcount = 0; + printk (KERN_INFO "DC390: RESTORE_PTR message for Transfer without Scatter-Gather ??\n"); +@@ -1579,7 +1564,8 @@ + if( (pSRB->SRBState & (SRB_START_+SRB_MSGOUT)) || + !(pSRB->SRBState & (SRB_DISCONNECT+SRB_COMPLETED)) ) + { /* Selection time out */ +- pSRB->TargetStatus = SCSI_STAT_SEL_TIMEOUT; ++ pSRB->AdaptStatus = H_SEL_TIMEOUT; ++ pSRB->TargetStatus = 0; + goto disc1; + } + else if (!(pSRB->SRBState & SRB_DISCONNECT) && (pSRB->SRBState & SRB_COMPLETED)) +@@ -1612,7 +1598,7 @@ + if( !( pACB->scan_devices ) ) + { + struct scsi_cmnd *pcmd = pSRB->pcmd; +- pcmd->resid = pcmd->request_bufflen; ++ scsi_set_resid(pcmd, scsi_bufflen(pcmd)); + SET_RES_DID(pcmd->result, DID_SOFT_ERROR); + dc390_Going_remove(pDCB, pSRB); + dc390_Free_insert(pACB, pSRB); +@@ -1695,7 +1681,7 @@ + pcmd->cmnd[0], pDCB->TargetID, pDCB->TargetLUN)); + + pSRB->SRBFlag |= AUTO_REQSENSE; +- pSRB->SavedSGCount = pcmd->use_sg; ++ pSRB->SavedSGCount = scsi_sg_count(pcmd); + pSRB->SavedTotXLen = pSRB->TotalXferredLen; + pSRB->AdaptStatus = 0; + pSRB->TargetStatus = 0; /* CHECK_CONDITION<<1; */ +@@ -1728,22 +1714,22 @@ + { /* Last command was a Request Sense */ + pSRB->SRBFlag &= ~AUTO_REQSENSE; + pSRB->AdaptStatus = 0; +- pSRB->TargetStatus = CHECK_CONDITION << 1; ++ pSRB->TargetStatus = SAM_STAT_CHECK_CONDITION; + + //pcmd->result = MK_RES(DRIVER_SENSE,DID_OK,0,status); +- if (status == (CHECK_CONDITION << 1)) ++ if (status == SAM_STAT_CHECK_CONDITION) + pcmd->result = MK_RES_LNX(0, DID_BAD_TARGET, 0, /*CHECK_CONDITION*/0); + else /* Retry */ + { + if( pSRB->pcmd->cmnd[0] == TEST_UNIT_READY /* || pSRB->pcmd->cmnd[0] == START_STOP */) + { + /* Don't retry on TEST_UNIT_READY */ +- pcmd->result = MK_RES_LNX(DRIVER_SENSE,DID_OK,0,CHECK_CONDITION); ++ pcmd->result = MK_RES_LNX(DRIVER_SENSE, DID_OK, 0, SAM_STAT_CHECK_CONDITION); + REMOVABLEDEBUG(printk(KERN_INFO "Cmd=%02x, Result=%08x, XferL=%08x\n",pSRB->pcmd->cmnd[0],\ + (u32) pcmd->result, (u32) pSRB->TotalXferredLen)); + } else { + SET_RES_DRV(pcmd->result, DRIVER_SENSE); +- pcmd->use_sg = pSRB->SavedSGCount; ++ scsi_sg_count(pcmd) = pSRB->SavedSGCount; + //pSRB->ScsiCmdLen = (u8) (pSRB->Segment1[0] >> 8); + DEBUG0 (printk ("DC390: RETRY pid %li (%02x), target %02i-%02i\n", pcmd->pid, pcmd->cmnd[0], pcmd->device->id, pcmd->device->lun)); + pSRB->TotalXferredLen = 0; +@@ -1754,7 +1740,7 @@ + } + if( status ) + { +- if( status_byte(status) == CHECK_CONDITION ) ++ if (status == SAM_STAT_CHECK_CONDITION) + { + if (dc390_RequestSense(pACB, pDCB, pSRB)) { + SET_RES_DID(pcmd->result, DID_ERROR); +@@ -1762,22 +1748,15 @@ + } + return; + } +- else if( status_byte(status) == QUEUE_FULL ) ++ else if (status == SAM_STAT_TASK_SET_FULL) + { + scsi_track_queue_full(pcmd->device, pDCB->GoingSRBCnt - 1); +- pcmd->use_sg = pSRB->SavedSGCount; ++ scsi_sg_count(pcmd) = pSRB->SavedSGCount; + DEBUG0 (printk ("DC390: RETRY pid %li (%02x), target %02i-%02i\n", pcmd->pid, pcmd->cmnd[0], pcmd->device->id, pcmd->device->lun)); + pSRB->TotalXferredLen = 0; + SET_RES_DID(pcmd->result, DID_SOFT_ERROR); + } +- else if(status == SCSI_STAT_SEL_TIMEOUT) +- { +- pSRB->AdaptStatus = H_SEL_TIMEOUT; +- pSRB->TargetStatus = 0; +- pcmd->result = MK_RES(0,DID_NO_CONNECT,0,0); +- /* Devices are removed below ... */ +- } +- else if (status_byte(status) == BUSY && ++ else if (status == SAM_STAT_BUSY && + (pcmd->cmnd[0] == TEST_UNIT_READY || pcmd->cmnd[0] == INQUIRY) && + pACB->scan_devices) + { +@@ -1795,12 +1774,17 @@ + else + { /* Target status == 0 */ + status = pSRB->AdaptStatus; +- if(status & H_OVER_UNDER_RUN) ++ if (status == H_OVER_UNDER_RUN) + { + pSRB->TargetStatus = 0; + SET_RES_DID(pcmd->result,DID_OK); + SET_RES_MSG(pcmd->result,pSRB->EndMessage); + } ++ else if (status == H_SEL_TIMEOUT) ++ { ++ pcmd->result = MK_RES(0, DID_NO_CONNECT, 0, 0); ++ /* Devices are removed below ... */ ++ } + else if( pSRB->SRBStatus & PARITY_ERROR) + { + //pcmd->result = MK_RES(0,DID_PARITY,pSRB->EndMessage,0); +@@ -1816,7 +1800,7 @@ + } + + cmd_done: +- pcmd->resid = pcmd->request_bufflen - pSRB->TotalXferredLen; ++ scsi_set_resid(pcmd, scsi_bufflen(pcmd) - pSRB->TotalXferredLen); + + dc390_Going_remove (pDCB, pSRB); + /* Add to free list */ +diff -Nurb linux-2.6.22-570/drivers/scsi/tmscsim.h linux-2.6.22-591/drivers/scsi/tmscsim.h +--- linux-2.6.22-570/drivers/scsi/tmscsim.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/tmscsim.h 2007-12-21 15:36:12.000000000 -0500 +@@ -258,13 +258,6 @@ + #define H_BAD_CCB_OR_SG 0x1A + #define H_ABORT 0x0FF + +-/*; SCSI Status byte codes*/ +-/* The values defined in include/scsi/scsi.h, to be shifted << 1 */ +- +-#define SCSI_STAT_UNEXP_BUS_F 0xFD /*; Unexpect Bus Free */ +-#define SCSI_STAT_BUS_RST_DETECT 0xFE /*; Scsi Bus Reset detected */ +-#define SCSI_STAT_SEL_TIMEOUT 0xFF /*; Selection Time out */ +- + /* cmd->result */ + #define RES_TARGET 0x000000FF /* Target State */ + #define RES_TARGET_LNX STATUS_MASK /* Only official ... */ +@@ -273,7 +266,7 @@ + #define RES_DRV 0xFF000000 /* DRIVER_ codes */ + + #define MK_RES(drv,did,msg,tgt) ((int)(drv)<<24 | (int)(did)<<16 | (int)(msg)<<8 | (int)(tgt)) +-#define MK_RES_LNX(drv,did,msg,tgt) ((int)(drv)<<24 | (int)(did)<<16 | (int)(msg)<<8 | (int)(tgt)<<1) ++#define MK_RES_LNX(drv,did,msg,tgt) ((int)(drv)<<24 | (int)(did)<<16 | (int)(msg)<<8 | (int)(tgt)) + + #define SET_RES_TARGET(who, tgt) do { who &= ~RES_TARGET; who |= (int)(tgt); } while (0) + #define SET_RES_TARGET_LNX(who, tgt) do { who &= ~RES_TARGET_LNX; who |= (int)(tgt) << 1; } while (0) +diff -Nurb linux-2.6.22-570/drivers/scsi/u14-34f.c linux-2.6.22-591/drivers/scsi/u14-34f.c +--- linux-2.6.22-570/drivers/scsi/u14-34f.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/u14-34f.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1111,7 +1111,7 @@ + static void map_dma(unsigned int i, unsigned int j) { + unsigned int data_len = 0; + unsigned int k, count, pci_dir; +- struct scatterlist *sgpnt; ++ struct scatterlist *sg; + struct mscp *cpp; + struct scsi_cmnd *SCpnt; + +@@ -1124,33 +1124,28 @@ + + cpp->sense_len = sizeof SCpnt->sense_buffer; + +- if (!SCpnt->use_sg) { +- +- /* If we get here with PCI_DMA_NONE, pci_map_single triggers a BUG() */ +- if (!SCpnt->request_bufflen) pci_dir = PCI_DMA_BIDIRECTIONAL; +- +- if (SCpnt->request_buffer) +- cpp->data_address = H2DEV(pci_map_single(HD(j)->pdev, +- SCpnt->request_buffer, SCpnt->request_bufflen, pci_dir)); +- +- cpp->data_len = H2DEV(SCpnt->request_bufflen); +- return; +- } +- +- sgpnt = (struct scatterlist *) SCpnt->request_buffer; +- count = pci_map_sg(HD(j)->pdev, sgpnt, SCpnt->use_sg, pci_dir); +- +- for (k = 0; k < count; k++) { +- cpp->sglist[k].address = H2DEV(sg_dma_address(&sgpnt[k])); +- cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(&sgpnt[k])); +- data_len += sgpnt[k].length; ++ if (scsi_bufflen(SCpnt)) { ++ count = scsi_dma_map(SCpnt); ++ BUG_ON(count < 0); ++ ++ scsi_for_each_sg(SCpnt, sg, count, k) { ++ cpp->sglist[k].address = H2DEV(sg_dma_address(sg)); ++ cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(sg)); ++ data_len += sg->length; + } + + cpp->sg = TRUE; +- cpp->use_sg = SCpnt->use_sg; +- cpp->data_address = H2DEV(pci_map_single(HD(j)->pdev, cpp->sglist, +- SCpnt->use_sg * sizeof(struct sg_list), pci_dir)); ++ cpp->use_sg = scsi_sg_count(SCpnt); ++ cpp->data_address = ++ H2DEV(pci_map_single(HD(j)->pdev, cpp->sglist, ++ cpp->use_sg * sizeof(struct sg_list), ++ pci_dir)); + cpp->data_len = H2DEV(data_len); ++ ++ } else { ++ pci_dir = PCI_DMA_BIDIRECTIONAL; ++ cpp->data_len = H2DEV(scsi_bufflen(SCpnt)); ++ } + } + + static void unmap_dma(unsigned int i, unsigned int j) { +@@ -1165,8 +1160,7 @@ + pci_unmap_single(HD(j)->pdev, DEV2H(cpp->sense_addr), + DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE); + +- if (SCpnt->use_sg) +- pci_unmap_sg(HD(j)->pdev, SCpnt->request_buffer, SCpnt->use_sg, pci_dir); ++ scsi_dma_unmap(SCpnt); + + if (!DEV2H(cpp->data_len)) pci_dir = PCI_DMA_BIDIRECTIONAL; + +@@ -1187,9 +1181,9 @@ + pci_dma_sync_single_for_cpu(HD(j)->pdev, DEV2H(cpp->sense_addr), + DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE); + +- if (SCpnt->use_sg) +- pci_dma_sync_sg_for_cpu(HD(j)->pdev, SCpnt->request_buffer, +- SCpnt->use_sg, pci_dir); ++ if (scsi_sg_count(SCpnt)) ++ pci_dma_sync_sg_for_cpu(HD(j)->pdev, scsi_sglist(SCpnt), ++ scsi_sg_count(SCpnt), pci_dir); + + if (!DEV2H(cpp->data_len)) pci_dir = PCI_DMA_BIDIRECTIONAL; + +diff -Nurb linux-2.6.22-570/drivers/scsi/ultrastor.c linux-2.6.22-591/drivers/scsi/ultrastor.c +--- linux-2.6.22-570/drivers/scsi/ultrastor.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/ultrastor.c 2007-12-21 15:36:12.000000000 -0500 +@@ -675,16 +675,15 @@ + + static inline void build_sg_list(struct mscp *mscp, struct scsi_cmnd *SCpnt) + { +- struct scatterlist *sl; ++ struct scatterlist *sg; + long transfer_length = 0; + int i, max; + +- sl = (struct scatterlist *) SCpnt->request_buffer; +- max = SCpnt->use_sg; +- for (i = 0; i < max; i++) { +- mscp->sglist[i].address = isa_page_to_bus(sl[i].page) + sl[i].offset; +- mscp->sglist[i].num_bytes = sl[i].length; +- transfer_length += sl[i].length; ++ max = scsi_sg_count(SCpnt); ++ scsi_for_each_sg(SCpnt, sg, max, i) { ++ mscp->sglist[i].address = isa_page_to_bus(sg->page) + sg->offset; ++ mscp->sglist[i].num_bytes = sg->length; ++ transfer_length += sg->length; + } + mscp->number_of_sg_list = max; + mscp->transfer_data = isa_virt_to_bus(mscp->sglist); +@@ -730,15 +729,15 @@ + my_mscp->target_id = SCpnt->device->id; + my_mscp->ch_no = 0; + my_mscp->lun = SCpnt->device->lun; +- if (SCpnt->use_sg) { ++ if (scsi_sg_count(SCpnt)) { + /* Set scatter/gather flag in SCSI command packet */ + my_mscp->sg = TRUE; + build_sg_list(my_mscp, SCpnt); + } else { + /* Unset scatter/gather flag in SCSI command packet */ + my_mscp->sg = FALSE; +- my_mscp->transfer_data = isa_virt_to_bus(SCpnt->request_buffer); +- my_mscp->transfer_data_length = SCpnt->request_bufflen; ++ my_mscp->transfer_data = isa_virt_to_bus(scsi_sglist(SCpnt)); ++ my_mscp->transfer_data_length = scsi_bufflen(SCpnt); + } + my_mscp->command_link = 0; /*???*/ + my_mscp->scsi_command_link_id = 0; /*???*/ +diff -Nurb linux-2.6.22-570/drivers/scsi/wd7000.c linux-2.6.22-591/drivers/scsi/wd7000.c +--- linux-2.6.22-570/drivers/scsi/wd7000.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/scsi/wd7000.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1091,6 +1091,7 @@ + unchar *cdb = (unchar *) SCpnt->cmnd; + unchar idlun; + short cdblen; ++ int nseg; + Adapter *host = (Adapter *) SCpnt->device->host->hostdata; + + cdblen = SCpnt->cmd_len; +@@ -1106,28 +1107,29 @@ + SCpnt->host_scribble = (unchar *) scb; + scb->host = host; + +- if (SCpnt->use_sg) { +- struct scatterlist *sg = (struct scatterlist *) SCpnt->request_buffer; ++ nseg = scsi_sg_count(SCpnt); ++ if (nseg) { ++ struct scatterlist *sg; + unsigned i; + + if (SCpnt->device->host->sg_tablesize == SG_NONE) { + panic("wd7000_queuecommand: scatter/gather not supported.\n"); + } +- dprintk("Using scatter/gather with %d elements.\n", SCpnt->use_sg); ++ dprintk("Using scatter/gather with %d elements.\n", nseg); + + sgb = scb->sgb; + scb->op = 1; + any2scsi(scb->dataptr, (int) sgb); +- any2scsi(scb->maxlen, SCpnt->use_sg * sizeof(Sgb)); ++ any2scsi(scb->maxlen, nseg * sizeof(Sgb)); + +- for (i = 0; i < SCpnt->use_sg; i++) { +- any2scsi(sgb[i].ptr, isa_page_to_bus(sg[i].page) + sg[i].offset); +- any2scsi(sgb[i].len, sg[i].length); ++ scsi_for_each_sg(SCpnt, sg, nseg, i) { ++ any2scsi(sgb[i].ptr, isa_page_to_bus(sg->page) + sg->offset); ++ any2scsi(sgb[i].len, sg->length); + } + } else { + scb->op = 0; +- any2scsi(scb->dataptr, isa_virt_to_bus(SCpnt->request_buffer)); +- any2scsi(scb->maxlen, SCpnt->request_bufflen); ++ any2scsi(scb->dataptr, isa_virt_to_bus(scsi_sglist(SCpnt))); ++ any2scsi(scb->maxlen, scsi_bufflen(SCpnt)); + } + + /* FIXME: drop lock and yield here ? */ +diff -Nurb linux-2.6.22-570/drivers/scsi/zorro7xx.c linux-2.6.22-591/drivers/scsi/zorro7xx.c +--- linux-2.6.22-570/drivers/scsi/zorro7xx.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/scsi/zorro7xx.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,180 @@ ++/* ++ * Detection routine for the NCR53c710 based Amiga SCSI Controllers for Linux. ++ * Amiga MacroSystemUS WarpEngine SCSI controller. ++ * Amiga Technologies/DKB A4091 SCSI controller. ++ * ++ * Written 1997 by Alan Hourihane ++ * plus modifications of the 53c7xx.c driver to support the Amiga. ++ * ++ * Rewritten to use 53c700.c by Kars de Jong ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "53c700.h" ++ ++MODULE_AUTHOR("Alan Hourihane / Kars de Jong "); ++MODULE_DESCRIPTION("Amiga Zorro NCR53C710 driver"); ++MODULE_LICENSE("GPL"); ++ ++ ++static struct scsi_host_template zorro7xx_scsi_driver_template = { ++ .proc_name = "zorro7xx", ++ .this_id = 7, ++ .module = THIS_MODULE, ++}; ++ ++static struct zorro_driver_data { ++ const char *name; ++ unsigned long offset; ++ int absolute; /* offset is absolute address */ ++} zorro7xx_driver_data[] __devinitdata = { ++ { .name = "PowerUP 603e+", .offset = 0xf40000, .absolute = 1 }, ++ { .name = "WarpEngine 40xx", .offset = 0x40000 }, ++ { .name = "A4091", .offset = 0x800000 }, ++ { .name = "GForce 040/060", .offset = 0x40000 }, ++ { 0 } ++}; ++ ++static struct zorro_device_id zorro7xx_zorro_tbl[] __devinitdata = { ++ { ++ .id = ZORRO_PROD_PHASE5_BLIZZARD_603E_PLUS, ++ .driver_data = (unsigned long)&zorro7xx_driver_data[0], ++ }, ++ { ++ .id = ZORRO_PROD_MACROSYSTEMS_WARP_ENGINE_40xx, ++ .driver_data = (unsigned long)&zorro7xx_driver_data[1], ++ }, ++ { ++ .id = ZORRO_PROD_CBM_A4091_1, ++ .driver_data = (unsigned long)&zorro7xx_driver_data[2], ++ }, ++ { ++ .id = ZORRO_PROD_CBM_A4091_2, ++ .driver_data = (unsigned long)&zorro7xx_driver_data[2], ++ }, ++ { ++ .id = ZORRO_PROD_GVP_GFORCE_040_060, ++ .driver_data = (unsigned long)&zorro7xx_driver_data[3], ++ }, ++ { 0 } ++}; ++ ++static int __devinit zorro7xx_init_one(struct zorro_dev *z, ++ const struct zorro_device_id *ent) ++{ ++ struct Scsi_Host * host = NULL; ++ struct NCR_700_Host_Parameters *hostdata; ++ struct zorro_driver_data *zdd; ++ unsigned long board, ioaddr; ++ ++ board = zorro_resource_start(z); ++ zdd = (struct zorro_driver_data *)ent->driver_data; ++ ++ if (zdd->absolute) { ++ ioaddr = zdd->offset; ++ } else { ++ ioaddr = board + zdd->offset; ++ } ++ ++ if (!zorro_request_device(z, zdd->name)) { ++ printk(KERN_ERR "zorro7xx: cannot reserve region 0x%lx, abort\n", ++ board); ++ return -EBUSY; ++ } ++ ++ hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL); ++ if (hostdata == NULL) { ++ printk(KERN_ERR "zorro7xx: Failed to allocate host data\n"); ++ goto out_release; ++ } ++ ++ memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters)); ++ ++ /* Fill in the required pieces of hostdata */ ++ if (ioaddr > 0x01000000) ++ hostdata->base = ioremap(ioaddr, zorro_resource_len(z)); ++ else ++ hostdata->base = (void __iomem *)ZTWO_VADDR(ioaddr); ++ ++ hostdata->clock = 50; ++ hostdata->chip710 = 1; ++ ++ /* Settings for at least WarpEngine 40xx */ ++ hostdata->ctest7_extra = CTEST7_TT1; ++ ++ zorro7xx_scsi_driver_template.name = zdd->name; ++ ++ /* and register the chip */ ++ host = NCR_700_detect(&zorro7xx_scsi_driver_template, hostdata, ++ &z->dev); ++ if (!host) { ++ printk(KERN_ERR "zorro7xx: No host detected; " ++ "board configuration problem?\n"); ++ goto out_free; ++ } ++ ++ host->this_id = 7; ++ host->base = ioaddr; ++ host->irq = IRQ_AMIGA_PORTS; ++ ++ if (request_irq(host->irq, NCR_700_intr, IRQF_SHARED, "zorro7xx-scsi", ++ host)) { ++ printk(KERN_ERR "zorro7xx: request_irq failed\n"); ++ goto out_put_host; ++ } ++ ++ scsi_scan_host(host); ++ ++ return 0; ++ ++ out_put_host: ++ scsi_host_put(host); ++ out_free: ++ if (ioaddr > 0x01000000) ++ iounmap(hostdata->base); ++ kfree(hostdata); ++ out_release: ++ zorro_release_device(z); ++ ++ return -ENODEV; ++} ++ ++static __devexit void zorro7xx_remove_one(struct zorro_dev *z) ++{ ++ struct Scsi_Host *host = dev_to_shost(&z->dev); ++ struct NCR_700_Host_Parameters *hostdata = shost_priv(host); ++ ++ scsi_remove_host(host); ++ ++ NCR_700_release(host); ++ kfree(hostdata); ++ free_irq(host->irq, host); ++ zorro_release_device(z); ++} ++ ++static struct zorro_driver zorro7xx_driver = { ++ .name = "zorro7xx-scsi", ++ .id_table = zorro7xx_zorro_tbl, ++ .probe = zorro7xx_init_one, ++ .remove = __devexit_p(zorro7xx_remove_one), ++}; ++ ++static int __init zorro7xx_scsi_init(void) ++{ ++ return zorro_register_driver(&zorro7xx_driver); ++} ++ ++static void __exit zorro7xx_scsi_exit(void) ++{ ++ zorro_unregister_driver(&zorro7xx_driver); ++} ++ ++module_init(zorro7xx_scsi_init); ++module_exit(zorro7xx_scsi_exit); +diff -Nurb linux-2.6.22-570/drivers/serial/8250.c linux-2.6.22-591/drivers/serial/8250.c +--- linux-2.6.22-570/drivers/serial/8250.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/serial/8250.c 2007-12-21 15:36:12.000000000 -0500 +@@ -2845,6 +2845,25 @@ + } + EXPORT_SYMBOL(serial8250_unregister_port); + ++/** ++ * serial8250_unregister_by_port - remove a 16x50 serial port ++ * at runtime. ++ * @port: A &struct uart_port that describes the port to remove. ++ * ++ * Remove one serial port. This may not be called from interrupt ++ * context. We hand the port back to the our control. ++ */ ++void serial8250_unregister_by_port(struct uart_port *port) ++{ ++ struct uart_8250_port *uart; ++ ++ uart = serial8250_find_match_or_unused(port); ++ ++ if (uart) ++ serial8250_unregister_port(uart->port.line); ++} ++EXPORT_SYMBOL(serial8250_unregister_by_port); ++ + static int __init serial8250_init(void) + { + int ret, i; +diff -Nurb linux-2.6.22-570/drivers/serial/8250_kgdb.c linux-2.6.22-591/drivers/serial/8250_kgdb.c +--- linux-2.6.22-570/drivers/serial/8250_kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/serial/8250_kgdb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,515 @@ ++/* ++ * 8250 interface for kgdb. ++ * ++ * This is a merging of many different drivers, and all of the people have ++ * had an impact in some form or another: ++ * ++ * 2004-2005 (c) MontaVista Software, Inc. ++ * 2005-2006 (c) Wind River Systems, Inc. ++ * ++ * Amit Kale , David Grothe , ++ * Scott Foehner , George Anzinger , ++ * Robert Walsh , wangdi , ++ * San Mehat, Tom Rini , ++ * Jason Wessel ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include /* For BASE_BAUD and SERIAL_PORT_DFNS */ ++ ++#include "8250.h" ++ ++#define GDB_BUF_SIZE 512 /* power of 2, please */ ++ ++MODULE_DESCRIPTION("KGDB driver for the 8250"); ++MODULE_LICENSE("GPL"); ++/* These will conflict with early_param otherwise. */ ++#ifdef CONFIG_KGDB_8250_MODULE ++static char config[256]; ++module_param_string(kgdb8250, config, 256, 0); ++MODULE_PARM_DESC(kgdb8250, ++ " kgdb8250=,
,,\n"); ++static struct kgdb_io local_kgdb_io_ops; ++#endif /* CONFIG_KGDB_8250_MODULE */ ++ ++/* Speed of the UART. */ ++static int kgdb8250_baud; ++ ++/* Flag for if we need to call request_mem_region */ ++static int kgdb8250_needs_request_mem_region; ++ ++static char kgdb8250_buf[GDB_BUF_SIZE]; ++static atomic_t kgdb8250_buf_in_cnt; ++static int kgdb8250_buf_out_inx; ++ ++/* Old-style serial definitions, if existant, and a counter. */ ++#ifdef CONFIG_KGDB_SIMPLE_SERIAL ++static int __initdata should_copy_rs_table = 1; ++static struct serial_state old_rs_table[] __initdata = { ++#ifdef SERIAL_PORT_DFNS ++ SERIAL_PORT_DFNS ++#endif ++}; ++#endif ++ ++/* Our internal table of UARTS. */ ++#define UART_NR CONFIG_SERIAL_8250_NR_UARTS ++static struct uart_port kgdb8250_ports[UART_NR]; ++ ++static struct uart_port *current_port; ++ ++/* Base of the UART. */ ++static void *kgdb8250_addr; ++ ++/* Forward declarations. */ ++static int kgdb8250_uart_init(void); ++static int __init kgdb_init_io(void); ++static int __init kgdb8250_opt(char *str); ++ ++/* These are much shorter calls to ioread8/iowrite8 that take into ++ * account our shifts, etc. */ ++static inline unsigned int kgdb_ioread(u8 mask) ++{ ++ return ioread8(kgdb8250_addr + (mask << current_port->regshift)); ++} ++ ++static inline void kgdb_iowrite(u8 val, u8 mask) ++{ ++ iowrite8(val, kgdb8250_addr + (mask << current_port->regshift)); ++} ++ ++/* ++ * Wait until the interface can accept a char, then write it. ++ */ ++static void kgdb_put_debug_char(u8 chr) ++{ ++ while (!(kgdb_ioread(UART_LSR) & UART_LSR_THRE)) ; ++ ++ kgdb_iowrite(chr, UART_TX); ++} ++ ++/* ++ * Get a byte from the hardware data buffer and return it ++ */ ++static int read_data_bfr(void) ++{ ++ char it = kgdb_ioread(UART_LSR); ++ ++ if (it & UART_LSR_DR) ++ return kgdb_ioread(UART_RX); ++ ++ /* ++ * If we have a framing error assume somebody messed with ++ * our uart. Reprogram it and send '-' both ways... ++ */ ++ if (it & 0xc) { ++ kgdb8250_uart_init(); ++ kgdb_put_debug_char('-'); ++ return '-'; ++ } ++ ++ return -1; ++} ++ ++/* ++ * Get a char if available, return -1 if nothing available. ++ * Empty the receive buffer first, then look at the interface hardware. ++ */ ++static int kgdb_get_debug_char(void) ++{ ++ int retchr; ++ ++ /* intr routine has q'd chars */ ++ if (atomic_read(&kgdb8250_buf_in_cnt) != 0) { ++ retchr = kgdb8250_buf[kgdb8250_buf_out_inx++]; ++ kgdb8250_buf_out_inx &= (GDB_BUF_SIZE - 1); ++ atomic_dec(&kgdb8250_buf_in_cnt); ++ return retchr; ++ } ++ ++ do { ++ retchr = read_data_bfr(); ++ } while (retchr < 0); ++ ++ return retchr; ++} ++ ++/* ++ * This is the receiver interrupt routine for the GDB stub. ++ * All that we need to do is verify that the interrupt happened on the ++ * line we're in charge of. If this is true, schedule a breakpoint and ++ * return. ++ */ ++static irqreturn_t ++kgdb8250_interrupt(int irq, void *dev_id) ++{ ++ if (kgdb_ioread(UART_IIR) & UART_IIR_RDI) { ++ /* Throw away the data if another I/O routine is active. */ ++ if (kgdb_io_ops.read_char != kgdb_get_debug_char && ++ (kgdb_ioread(UART_LSR) & UART_LSR_DR)) ++ kgdb_ioread(UART_RX); ++ else ++ breakpoint(); ++ } ++ ++ return IRQ_HANDLED; ++} ++ ++/* ++ * Initializes the UART. ++ * Returns: ++ * 0 on success, 1 on failure. ++ */ ++static int ++kgdb8250_uart_init (void) ++{ ++ unsigned int ier, base_baud = current_port->uartclk ? ++ current_port->uartclk / 16 : BASE_BAUD; ++ ++ /* test uart existance */ ++ if(kgdb_ioread(UART_LSR) == 0xff) ++ return -1; ++ ++ /* disable interrupts */ ++ kgdb_iowrite(0, UART_IER); ++ ++#if defined(CONFIG_ARCH_OMAP1510) ++ /* Workaround to enable 115200 baud on OMAP1510 internal ports */ ++ if (cpu_is_omap1510() && is_omap_port((void *)kgdb8250_addr)) { ++ if (kgdb8250_baud == 115200) { ++ base_baud = 1; ++ kgdb8250_baud = 1; ++ kgdb_iowrite(1, UART_OMAP_OSC_12M_SEL); ++ } else ++ kgdb_iowrite(0, UART_OMAP_OSC_12M_SEL); ++ } ++#endif ++ /* set DLAB */ ++ kgdb_iowrite(UART_LCR_DLAB, UART_LCR); ++ ++ /* set baud */ ++ kgdb_iowrite((base_baud / kgdb8250_baud) & 0xff, UART_DLL); ++ kgdb_iowrite((base_baud / kgdb8250_baud) >> 8, UART_DLM); ++ ++ /* reset DLAB, set LCR */ ++ kgdb_iowrite(UART_LCR_WLEN8, UART_LCR); ++ ++ /* set DTR and RTS */ ++ kgdb_iowrite(UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS, UART_MCR); ++ ++ /* setup fifo */ ++ kgdb_iowrite(UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR ++ | UART_FCR_CLEAR_XMIT | UART_FCR_TRIGGER_8, ++ UART_FCR); ++ ++ /* clear pending interrupts */ ++ kgdb_ioread(UART_IIR); ++ kgdb_ioread(UART_RX); ++ kgdb_ioread(UART_LSR); ++ kgdb_ioread(UART_MSR); ++ ++ /* turn on RX interrupt only */ ++ kgdb_iowrite(UART_IER_RDI, UART_IER); ++ ++ /* ++ * Borrowed from the main 8250 driver. ++ * Try writing and reading the UART_IER_UUE bit (b6). ++ * If it works, this is probably one of the Xscale platform's ++ * internal UARTs. ++ * We're going to explicitly set the UUE bit to 0 before ++ * trying to write and read a 1 just to make sure it's not ++ * already a 1 and maybe locked there before we even start start. ++ */ ++ ier = kgdb_ioread(UART_IER); ++ kgdb_iowrite(ier & ~UART_IER_UUE, UART_IER); ++ if (!(kgdb_ioread(UART_IER) & UART_IER_UUE)) { ++ /* ++ * OK it's in a known zero state, try writing and reading ++ * without disturbing the current state of the other bits. ++ */ ++ kgdb_iowrite(ier | UART_IER_UUE, UART_IER); ++ if (kgdb_ioread(UART_IER) & UART_IER_UUE) ++ /* ++ * It's an Xscale. ++ */ ++ ier |= UART_IER_UUE | UART_IER_RTOIE; ++ } ++ kgdb_iowrite(ier, UART_IER); ++ return 0; ++} ++ ++/* ++ * Copy the old serial_state table to our uart_port table if we haven't ++ * had values specifically configured in. We need to make sure this only ++ * happens once. ++ */ ++static void __init kgdb8250_copy_rs_table(void) ++{ ++#ifdef CONFIG_KGDB_SIMPLE_SERIAL ++ int i; ++ ++ if (!should_copy_rs_table) ++ return; ++ ++ for (i = 0; i < ARRAY_SIZE(old_rs_table); i++) { ++ kgdb8250_ports[i].iobase = old_rs_table[i].port; ++ kgdb8250_ports[i].irq = irq_canonicalize(old_rs_table[i].irq); ++ kgdb8250_ports[i].uartclk = old_rs_table[i].baud_base * 16; ++ kgdb8250_ports[i].membase = old_rs_table[i].iomem_base; ++ kgdb8250_ports[i].iotype = old_rs_table[i].io_type; ++ kgdb8250_ports[i].regshift = old_rs_table[i].iomem_reg_shift; ++ kgdb8250_ports[i].line = i; ++ } ++ ++ should_copy_rs_table = 0; ++#endif ++} ++ ++/* ++ * Hookup our IRQ line now that it is safe to do so, after we grab any ++ * memory regions we might need to. If we haven't been initialized yet, ++ * go ahead and copy the old_rs_table in. ++ */ ++static void __init kgdb8250_late_init(void) ++{ ++ /* Try and copy the old_rs_table. */ ++ kgdb8250_copy_rs_table(); ++ ++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_SERIAL_8250_MODULE) ++ /* Take the port away from the main driver. */ ++ serial8250_unregister_by_port(current_port); ++ ++ /* Now reinit the port as the above has disabled things. */ ++ kgdb8250_uart_init(); ++#endif ++ /* We may need to call request_mem_region() first. */ ++ if (kgdb8250_needs_request_mem_region) ++ request_mem_region(current_port->mapbase, ++ 8 << current_port->regshift, "kgdb"); ++ if (request_irq(current_port->irq, kgdb8250_interrupt, SA_SHIRQ, ++ "GDB-stub", current_port) < 0) ++ printk(KERN_ERR "KGDB failed to request the serial IRQ (%d)\n", ++ current_port->irq); ++} ++ ++static __init int kgdb_init_io(void) ++{ ++ /* Give us the basic table of uarts. */ ++ kgdb8250_copy_rs_table(); ++ ++ /* We're either a module and parse a config string, or we have a ++ * semi-static config. */ ++#ifdef CONFIG_KGDB_8250_MODULE ++ if (strlen(config)) { ++ if (kgdb8250_opt(config)) ++ return -EINVAL; ++ } else { ++ printk(KERN_ERR "kgdb8250: argument error, usage: " ++ "kgdb8250=,
,,\n"); ++ return -EINVAL; ++ } ++#elif defined(CONFIG_KGDB_SIMPLE_SERIAL) ++ kgdb8250_baud = CONFIG_KGDB_BAUDRATE; ++ ++ /* Setup our pointer to the serial port now. */ ++ current_port = &kgdb8250_ports[CONFIG_KGDB_PORT_NUM]; ++#else ++ if (kgdb8250_opt(CONFIG_KGDB_8250_CONF_STRING)) ++ return -EINVAL; ++#endif ++ ++ ++ /* Internal driver setup. */ ++ switch (current_port->iotype) { ++ case UPIO_MEM: ++ if (current_port->mapbase) ++ kgdb8250_needs_request_mem_region = 1; ++ if (current_port->flags & UPF_IOREMAP) { ++ current_port->membase = ioremap(current_port->mapbase, ++ 8 << current_port->regshift); ++ if (!current_port->membase) ++ return -EIO; /* Failed. */ ++ } ++ kgdb8250_addr = current_port->membase; ++ break; ++ case UPIO_PORT: ++ default: ++ kgdb8250_addr = ioport_map(current_port->iobase, ++ 8 << current_port->regshift); ++ if (!kgdb8250_addr) ++ return -EIO; /* Failed. */ ++ } ++ ++ if (kgdb8250_uart_init() == -1) { ++ printk(KERN_ERR "kgdb8250: init failed\n"); ++ return -EIO; ++ } ++#ifdef CONFIG_KGDB_8250_MODULE ++ /* Attach the kgdb irq. When this is built into the kernel, it ++ * is called as a part of late_init sequence. ++ */ ++ kgdb8250_late_init(); ++ if (kgdb_register_io_module(&local_kgdb_io_ops)) ++ return -EINVAL; ++ ++ printk(KERN_INFO "kgdb8250: debugging enabled\n"); ++#endif /* CONFIG_KGD_8250_MODULE */ ++ ++ return 0; ++} ++ ++#ifdef CONFIG_KGDB_8250_MODULE ++/* If it is a module the kgdb_io_ops should be a static which ++ * is passed to the KGDB I/O initialization ++ */ ++static struct kgdb_io local_kgdb_io_ops = { ++#else /* ! CONFIG_KGDB_8250_MODULE */ ++struct kgdb_io kgdb_io_ops = { ++#endif /* ! CONFIG_KGD_8250_MODULE */ ++ .read_char = kgdb_get_debug_char, ++ .write_char = kgdb_put_debug_char, ++ .init = kgdb_init_io, ++ .late_init = kgdb8250_late_init, ++}; ++ ++/** ++ * kgdb8250_add_port - Define a serial port for use with KGDB ++ * @i: The index of the port being added ++ * @serial_req: The &struct uart_port describing the port ++ * ++ * On platforms where we must register the serial device ++ * dynamically, this is the best option if a platform also normally ++ * calls early_serial_setup(). ++ */ ++void __init kgdb8250_add_port(int i, struct uart_port *serial_req) ++{ ++ /* Make sure we've got the built-in data before we override. */ ++ kgdb8250_copy_rs_table(); ++ ++ /* Copy the whole thing over. */ ++ if (current_port != &kgdb8250_ports[i]) ++ memcpy(&kgdb8250_ports[i], serial_req, sizeof(struct uart_port)); ++} ++ ++/** ++ * kgdb8250_add_platform_port - Define a serial port for use with KGDB ++ * @i: The index of the port being added ++ * @p: The &struct plat_serial8250_port describing the port ++ * ++ * On platforms where we must register the serial device ++ * dynamically, this is the best option if a platform normally ++ * handles uart setup with an array of &struct plat_serial8250_port. ++ */ ++void __init kgdb8250_add_platform_port(int i, struct plat_serial8250_port *p) ++{ ++ /* Make sure we've got the built-in data before we override. */ ++ kgdb8250_copy_rs_table(); ++ ++ kgdb8250_ports[i].iobase = p->iobase; ++ kgdb8250_ports[i].membase = p->membase; ++ kgdb8250_ports[i].irq = p->irq; ++ kgdb8250_ports[i].uartclk = p->uartclk; ++ kgdb8250_ports[i].regshift = p->regshift; ++ kgdb8250_ports[i].iotype = p->iotype; ++ kgdb8250_ports[i].flags = p->flags; ++ kgdb8250_ports[i].mapbase = p->mapbase; ++} ++ ++/* ++ * Syntax for this cmdline option is: ++ * kgdb8250=,
,," ++ */ ++static int __init kgdb8250_opt(char *str) ++{ ++ /* We'll fill out and use the first slot. */ ++ current_port = &kgdb8250_ports[0]; ++ ++ if (!strncmp(str, "io", 2)) { ++ current_port->iotype = UPIO_PORT; ++ str += 2; ++ } else if (!strncmp(str, "mmap", 4)) { ++ current_port->iotype = UPIO_MEM; ++ current_port->flags |= UPF_IOREMAP; ++ str += 4; ++ } else if (!strncmp(str, "mmio", 4)) { ++ current_port->iotype = UPIO_MEM; ++ current_port->flags &= ~UPF_IOREMAP; ++ str += 4; ++ } else ++ goto errout; ++ ++ if (*str != ',') ++ goto errout; ++ str++; ++ ++ if (current_port->iotype == UPIO_PORT) ++ current_port->iobase = simple_strtoul(str, &str, 16); ++ else { ++ if (current_port->flags & UPF_IOREMAP) ++ current_port->mapbase = ++ (unsigned long) simple_strtoul(str, &str, 16); ++ else ++ current_port->membase = ++ (void *) simple_strtoul(str, &str, 16); ++ } ++ ++ if (*str != ',') ++ goto errout; ++ str++; ++ ++ kgdb8250_baud = simple_strtoul(str, &str, 10); ++ if (!kgdb8250_baud) ++ goto errout; ++ ++ if (*str != ',') ++ goto errout; ++ str++; ++ ++ current_port->irq = simple_strtoul(str, &str, 10); ++ ++#ifdef CONFIG_KGDB_SIMPLE_SERIAL ++ should_copy_rs_table = 0; ++#endif ++ ++ return 0; ++ ++ errout: ++ printk(KERN_ERR "Invalid syntax for option kgdb8250=\n"); ++ return 1; ++} ++ ++#ifdef CONFIG_KGDB_8250_MODULE ++static void cleanup_kgdb8250(void) ++{ ++ kgdb_unregister_io_module(&local_kgdb_io_ops); ++ ++ /* Clean up the irq and memory */ ++ free_irq(current_port->irq, current_port); ++ ++ if (kgdb8250_needs_request_mem_region) ++ release_mem_region(current_port->mapbase, ++ 8 << current_port->regshift); ++ /* Hook up the serial port back to what it was previously ++ * hooked up to. ++ */ ++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_SERIAL_8250_MODULE) ++ /* Give the port back to the 8250 driver. */ ++ serial8250_register_port(current_port); ++#endif ++} ++ ++module_init(kgdb_init_io); ++module_exit(cleanup_kgdb8250); ++#else /* ! CONFIG_KGDB_8250_MODULE */ ++early_param("kgdb8250", kgdb8250_opt); ++#endif /* ! CONFIG_KGDB_8250_MODULE */ +diff -Nurb linux-2.6.22-570/drivers/serial/Kconfig linux-2.6.22-591/drivers/serial/Kconfig +--- linux-2.6.22-570/drivers/serial/Kconfig 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/serial/Kconfig 2007-12-21 15:36:12.000000000 -0500 +@@ -107,7 +107,7 @@ + + config SERIAL_8250_NR_UARTS + int "Maximum number of 8250/16550 serial ports" +- depends on SERIAL_8250 ++ depends on SERIAL_8250 || KGDB_8250 + default "4" + help + Set this to the number of serial ports you want the driver +diff -Nurb linux-2.6.22-570/drivers/serial/Makefile linux-2.6.22-591/drivers/serial/Makefile +--- linux-2.6.22-570/drivers/serial/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/serial/Makefile 2007-12-21 15:36:12.000000000 -0500 +@@ -23,6 +23,7 @@ + obj-$(CONFIG_SERIAL_8250_AU1X00) += 8250_au1x00.o + obj-$(CONFIG_SERIAL_AMBA_PL010) += amba-pl010.o + obj-$(CONFIG_SERIAL_AMBA_PL011) += amba-pl011.o ++obj-$(CONFIG_KGDB_AMBA_PL011) += pl011_kgdb.o + obj-$(CONFIG_SERIAL_CLPS711X) += clps711x.o + obj-$(CONFIG_SERIAL_PXA) += pxa.o + obj-$(CONFIG_SERIAL_PNX8XXX) += pnx8xxx_uart.o +@@ -50,10 +51,12 @@ + obj-$(CONFIG_SERIAL_MPC52xx) += mpc52xx_uart.o + obj-$(CONFIG_SERIAL_ICOM) += icom.o + obj-$(CONFIG_SERIAL_M32R_SIO) += m32r_sio.o ++obj-$(CONFIG_KGDB_MPSC) += mpsc_kgdb.o + obj-$(CONFIG_SERIAL_MPSC) += mpsc.o + obj-$(CONFIG_ETRAX_SERIAL) += crisv10.o + obj-$(CONFIG_SERIAL_JSM) += jsm/ + obj-$(CONFIG_SERIAL_TXX9) += serial_txx9.o ++obj-$(CONFIG_KGDB_TXX9) += serial_txx9_kgdb.o + obj-$(CONFIG_SERIAL_VR41XX) += vr41xx_siu.o + obj-$(CONFIG_SERIAL_SGI_IOC4) += ioc4_serial.o + obj-$(CONFIG_SERIAL_SGI_IOC3) += ioc3_serial.o +@@ -62,3 +65,4 @@ + obj-$(CONFIG_SERIAL_NETX) += netx-serial.o + obj-$(CONFIG_SERIAL_OF_PLATFORM) += of_serial.o + obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o ++obj-$(CONFIG_KGDB_8250) += 8250_kgdb.o +diff -Nurb linux-2.6.22-570/drivers/serial/amba-pl011.c linux-2.6.22-591/drivers/serial/amba-pl011.c +--- linux-2.6.22-570/drivers/serial/amba-pl011.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/serial/amba-pl011.c 2007-12-21 15:36:12.000000000 -0500 +@@ -332,7 +332,7 @@ + /* + * Allocate the IRQ + */ +- retval = request_irq(uap->port.irq, pl011_int, 0, "uart-pl011", uap); ++ retval = request_irq(uap->port.irq, pl011_int, SA_SHIRQ, "uart-pl011", uap); + if (retval) + goto clk_dis; + +diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/Makefile linux-2.6.22-591/drivers/serial/cpm_uart/Makefile +--- linux-2.6.22-570/drivers/serial/cpm_uart/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/serial/cpm_uart/Makefile 2007-12-21 15:36:12.000000000 -0500 +@@ -7,5 +7,6 @@ + # Select the correct platform objects. + cpm_uart-objs-$(CONFIG_CPM2) += cpm_uart_cpm2.o + cpm_uart-objs-$(CONFIG_8xx) += cpm_uart_cpm1.o ++cpm_uart-objs-$(CONFIG_KGDB_CPM_UART) += cpm_uart_kgdb.o + + cpm_uart-objs := cpm_uart_core.o $(cpm_uart-objs-y) +diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart.h linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart.h +--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart.h 2007-12-21 15:36:12.000000000 -0500 +@@ -50,6 +50,41 @@ + + #define SCC_WAIT_CLOSING 100 + ++#ifdef CONFIG_KGDB_CPM_UART ++ ++/* Speed of the debug UART. */ ++#if CONFIG_KGDB_BAUDRATE == 9600 ++#define KGDB_BAUD B9600 ++#elif CONFIG_KGDB_BAUDRATE == 19200 ++#define KGDB_BAUD B19200 ++#elif CONFIG_KGDB_BAUDRATE == 38400 ++#define KGDB_BAUD B38400 ++#elif CONFIG_KGDB_BAUDRATE == 57600 ++#define KGDB_BAUD B57600 ++#elif CONFIG_KGDB_BAUDRATE == 115200 ++#define KGDB_BAUD B115200 /* Start with this if not given */ ++#else ++#error Unsupported baud rate! ++#endif ++ ++#if defined(CONFIG_KGDB_CPM_UART_SCC1) ++#define KGDB_PINFO_INDEX UART_SCC1 ++#elif defined(CONFIG_KGDB_CPM_UART_SCC2) ++#define KGDB_PINFO_INDEX UART_SCC2 ++#elif defined(CONFIG_KGDB_CPM_UART_SCC3) ++#define KGDB_PINFO_INDEX UART_SCC3 ++#elif defined(CONFIG_KGDB_CPM_UART_SCC4) ++#define KGDB_PINFO_INDEX UART_SCC4 ++#elif defined(CONFIG_KGDB_CPM_UART_SMC1) ++#define KGDB_PINFO_INDEX UART_SMC1 ++#elif defined(CONFIG_KGDB_CPM_UART_SMC2) ++#define KGDB_PINFO_INDEX UART_SMC2 ++#else ++#error The port for KGDB is undefined! ++#endif ++ ++#endif /* CONFIG_KGDB_CPM_UART */ ++ + struct uart_cpm_port { + struct uart_port port; + u16 rx_nrfifos; +@@ -86,6 +121,9 @@ + extern int cpm_uart_nr; + extern struct uart_cpm_port cpm_uart_ports[UART_NR]; + ++void cpm_uart_early_write(int index, const char *s, u_int count); ++int cpm_uart_early_setup(int index,int early); ++ + /* these are located in their respective files */ + void cpm_line_cr_cmd(int line, int cmd); + int cpm_uart_init_portdesc(void); +@@ -132,5 +170,4 @@ + return 0; + } + +- + #endif /* CPM_UART_H */ +diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_core.c linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_core.c +--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_core.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1073,22 +1073,17 @@ + return 0; + } + +-#ifdef CONFIG_SERIAL_CPM_CONSOLE +-/* +- * Print a string to the serial port trying not to disturb +- * any possible real use of the port... +- * +- * Note that this is called with interrupts already disabled +- */ +-static void cpm_uart_console_write(struct console *co, const char *s, ++void cpm_uart_early_write(int index, const char *s, + u_int count) + { +- struct uart_cpm_port *pinfo = +- &cpm_uart_ports[cpm_uart_port_map[co->index]]; ++ struct uart_cpm_port *pinfo; + unsigned int i; + volatile cbd_t *bdp, *bdbase; + volatile unsigned char *cp; + ++ BUG_ON(index>UART_NR); ++ pinfo = &cpm_uart_ports[index]; ++ + /* Get the address of the host memory buffer. + */ + bdp = pinfo->tx_cur; +@@ -1152,19 +1147,14 @@ + pinfo->tx_cur = (volatile cbd_t *) bdp; + } + +- +-static int __init cpm_uart_console_setup(struct console *co, char *options) ++int cpm_uart_early_setup(int index, int early) + { ++ int ret; + struct uart_port *port; + struct uart_cpm_port *pinfo; +- int baud = 38400; +- int bits = 8; +- int parity = 'n'; +- int flow = 'n'; +- int ret; + + struct fs_uart_platform_info *pdata; +- struct platform_device* pdev = early_uart_get_pdev(co->index); ++ struct platform_device* pdev = early_uart_get_pdev(index); + + if (!pdev) { + pr_info("cpm_uart: console: compat mode\n"); +@@ -1172,8 +1162,9 @@ + cpm_uart_init_portdesc(); + } + ++ BUG_ON(index>UART_NR); + port = +- (struct uart_port *)&cpm_uart_ports[cpm_uart_port_map[co->index]]; ++ (struct uart_port *)&cpm_uart_ports[index]; + pinfo = (struct uart_cpm_port *)port; + if (!pdev) { + if (pinfo->set_lineif) +@@ -1187,15 +1178,6 @@ + cpm_uart_drv_get_platform_data(pdev, 1); + } + +- pinfo->flags |= FLAG_CONSOLE; +- +- if (options) { +- uart_parse_options(options, &baud, &parity, &bits, &flow); +- } else { +- if ((baud = uart_baudrate()) == -1) +- baud = 9600; +- } +- + if (IS_SMC(pinfo)) { + pinfo->smcp->smc_smcm &= ~(SMCM_RX | SMCM_TX); + pinfo->smcp->smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN); +@@ -1203,8 +1185,7 @@ + pinfo->sccp->scc_sccm &= ~(UART_SCCM_TX | UART_SCCM_RX); + pinfo->sccp->scc_gsmrl &= ~(SCC_GSMRL_ENR | SCC_GSMRL_ENT); + } +- +- ret = cpm_uart_allocbuf(pinfo, 1); ++ ret = cpm_uart_allocbuf(pinfo, early); + + if (ret) + return ret; +@@ -1216,6 +1197,62 @@ + else + cpm_uart_init_scc(pinfo); + ++ return 0; ++} ++ ++#ifdef CONFIG_SERIAL_CPM_CONSOLE ++/* ++ * Print a string to the serial port trying not to disturb ++ * any possible real use of the port... ++ * ++ * Note that this is called with interrupts already disabled ++ */ ++ ++static void cpm_uart_console_write(struct console *co, const char *s, ++ u_int count) ++{ ++ cpm_uart_early_write(cpm_uart_port_map[co->index],s,count); ++} ++ ++/* ++ * Setup console. Be careful is called early ! ++ */ ++static int __init cpm_uart_console_setup(struct console *co, char *options) ++{ ++ struct uart_port *port; ++ struct uart_cpm_port *pinfo; ++ int baud = 115200; ++ int bits = 8; ++ int parity = 'n'; ++ int flow = 'n'; ++ int ret; ++ ++#ifdef CONFIG_KGDB_CPM_UART ++ /* We are not interested in ports yet utilized by kgdb */ ++ if (co->index == KGDB_PINFO_INDEX) ++ return 0; ++#endif ++ ++ port = ++ (struct uart_port *)&cpm_uart_ports[cpm_uart_port_map[co->index]]; ++ pinfo = (struct uart_cpm_port *)port; ++ ++ pinfo->flags |= FLAG_CONSOLE; ++ ++ if (options) { ++ uart_parse_options(options, &baud, &parity, &bits, &flow); ++ } else { ++ bd_t *bd = (bd_t *) __res; ++ ++ if (bd->bi_baudrate) ++ baud = bd->bi_baudrate; ++ else ++ baud = 9600; ++ } ++ ++ ret = cpm_uart_early_setup(cpm_uart_port_map[co->index], 1); ++ if(ret) ++ return ret; + uart_set_options(port, co, baud, parity, bits, flow); + + return 0; +@@ -1266,6 +1303,12 @@ + + pdata = pdev->dev.platform_data; + ++#ifdef CONFIG_KGDB_CPM_UART ++ /* We are not interested in ports yet utilized by kgdb */ ++ if (cpm_uart_id2nr(fs_uart_get_id(pdata)) == KGDB_PINFO_INDEX) ++ return ret; ++#endif ++ + if ((ret = cpm_uart_drv_get_platform_data(pdev, 0))) + return ret; + +@@ -1363,6 +1406,12 @@ + + for (i = 0; i < cpm_uart_nr; i++) { + int con = cpm_uart_port_map[i]; ++ ++#ifdef CONFIG_KGDB_CPM_UART ++ /* We are not interested in ports yet utilized by kgdb */ ++ if (con == KGDB_PINFO_INDEX) ++ continue; ++#endif + cpm_uart_ports[con].port.line = i; + cpm_uart_ports[con].port.flags = UPF_BOOT_AUTOCONF; + if (cpm_uart_ports[con].set_lineif) +diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm1.c linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_cpm1.c +--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm1.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_cpm1.c 2007-12-21 15:36:12.000000000 -0500 +@@ -53,6 +53,7 @@ + { + ushort val; + volatile cpm8xx_t *cp = cpmp; ++ unsigned *bcsr_io; + + switch (line) { + case UART_SMC1: +@@ -95,12 +96,35 @@ + { + /* XXX SCC1: insert port configuration here */ + pinfo->brg = 1; ++ ++#if defined (CONFIG_MPC885ADS) || defined (CONFIG_MPC86XADS) ++ bcsr_io = ioremap(BCSR1, sizeof(unsigned long)); ++ ++ if (bcsr_io == NULL) { ++ printk(KERN_CRIT "Could not remap BCSR\n"); ++ return; ++ } ++ out_be32(bcsr_io, in_be32(bcsr_io) & ~BCSR1_RS232EN_1); ++ iounmap(bcsr_io); ++#endif + } + + void scc2_lineif(struct uart_cpm_port *pinfo) + { + /* XXX SCC2: insert port configuration here */ + pinfo->brg = 2; ++ unsigned *bcsr_io; ++ ++#if defined (CONFIG_MPC885ADS) || defined (CONFIG_MPC86XADS) ++ bcsr_io = ioremap(BCSR1, sizeof(unsigned long)); ++ ++ if (bcsr_io == NULL) { ++ printk(KERN_CRIT "Could not remap BCSR\n"); ++ return; ++ } ++ out_be32(bcsr_io, in_be32(bcsr_io) & ~BCSR1_RS232EN_2); ++ iounmap(bcsr_io); ++#endif + } + + void scc3_lineif(struct uart_cpm_port *pinfo) +@@ -189,6 +213,10 @@ + { + pr_debug("CPM uart[-]:init portdesc\n"); + ++ /* Check if we have called this yet. This may happen if early kgdb ++ breakpoint is on */ ++ if(cpm_uart_nr) ++ return 0; + cpm_uart_nr = 0; + #ifdef CONFIG_SERIAL_CPM_SMC1 + cpm_uart_ports[UART_SMC1].smcp = &cpmp->cp_smc[0]; +diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm2.c linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_cpm2.c +--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm2.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_cpm2.c 2007-12-21 15:36:12.000000000 -0500 +@@ -289,6 +289,10 @@ + #endif + pr_debug("CPM uart[-]:init portdesc\n"); + ++ /* Check if we have called this yet. This may happen if early kgdb ++ breakpoint is on */ ++ if(cpm_uart_nr) ++ return 0; + cpm_uart_nr = 0; + #ifdef CONFIG_SERIAL_CPM_SMC1 + cpm_uart_ports[UART_SMC1].smcp = (smc_t *) cpm2_map(im_smc[0]); +diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_kgdb.c linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_kgdb.c +--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/serial/cpm_uart/cpm_uart_kgdb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,185 @@ ++/* ++ * drivers/serial/cpm_uart/cpm_uart_kgdb.c ++ * ++ * CPM UART interface for kgdb. ++ * ++ * Author: Vitaly Bordug ++ * ++ * Used some bits from drivers/serial/kgdb_8250.c as a template ++ * ++ * 2005-2007 (c) MontaVista Software, Inc. This file is licensed under ++ * the terms of the GNU General Public License version 2. This program ++ * is licensed "as is" without any warranty of any kind, whether express ++ * or implied. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include /* For BASE_BAUD and SERIAL_PORT_DFNS */ ++ ++#include "cpm_uart.h" ++ ++#define GDB_BUF_SIZE 512 /* power of 2, please */ ++ ++ ++static char kgdb_buf[GDB_BUF_SIZE], *kgdbp; ++static int kgdb_chars; ++ ++/* Forward declarations. */ ++ ++/* ++ * Receive character from the serial port. This only works well ++ * before the port is initialize for real use. ++ */ ++static int kgdb_wait_key(char *obuf) ++{ ++ struct uart_cpm_port *pinfo; ++ u_char c, *cp; ++ volatile cbd_t *bdp; ++ int i; ++ ++ pinfo = &cpm_uart_ports[KGDB_PINFO_INDEX]; ++ ++ /* Get the address of the host memory buffer. ++ */ ++ bdp = pinfo->rx_cur; ++ while (bdp->cbd_sc & BD_SC_EMPTY); ++ ++ /* If the buffer address is in the CPM DPRAM, don't ++ * convert it. ++ */ ++ cp = cpm2cpu_addr(bdp->cbd_bufaddr, pinfo); ++ ++ if (obuf) { ++ i = c = bdp->cbd_datlen; ++ while (i-- > 0) ++ *obuf++ = *cp++; ++ } else ++ c = *cp; ++ bdp->cbd_sc |= BD_SC_EMPTY; ++ ++ if (bdp->cbd_sc & BD_SC_WRAP) ++ bdp = pinfo->rx_bd_base; ++ else ++ bdp++; ++ pinfo->rx_cur = (cbd_t *)bdp; ++ ++ return (int)c; ++} ++ ++ ++/* ++ * Wait until the interface can accept a char, then write it. ++ */ ++static void kgdb_put_debug_char(u8 chr) ++{ ++ static char ch[2]; ++ ++ ch[0] = (char)chr; ++ cpm_uart_early_write(KGDB_PINFO_INDEX, ch, 1); ++} ++ ++ ++/* ++ * Get a char if available, return -1 if nothing available. ++ * Empty the receive buffer first, then look at the interface hardware. ++ */ ++static int kgdb_get_debug_char(void) ++{ ++ if (kgdb_chars <= 0) { ++ kgdb_chars = kgdb_wait_key(kgdb_buf); ++ kgdbp = kgdb_buf; ++ } ++ kgdb_chars--; ++ ++ return (*kgdbp++); ++} ++ ++static void termios_set_options(int index, ++ int baud, int parity, int bits, int flow) ++{ ++ struct ktermios termios; ++ struct uart_port *port; ++ struct uart_cpm_port *pinfo; ++ ++ BUG_ON(index>UART_NR); ++ ++ port = (struct uart_port *)&cpm_uart_ports[index]; ++ pinfo = (struct uart_cpm_port *)port; ++ ++ /* ++ * Ensure that the serial console lock is initialised ++ * early. ++ */ ++ spin_lock_init(&port->lock); ++ ++ memset(&termios, 0, sizeof(struct termios)); ++ ++ termios.c_cflag = CREAD | HUPCL | CLOCAL; ++ ++ termios.c_cflag |= baud; ++ ++ if (bits == 7) ++ termios.c_cflag |= CS7; ++ else ++ termios.c_cflag |= CS8; ++ ++ switch (parity) { ++ case 'o': case 'O': ++ termios.c_cflag |= PARODD; ++ /*fall through*/ ++ case 'e': case 'E': ++ termios.c_cflag |= PARENB; ++ break; ++ } ++ ++ if (flow == 'r') ++ termios.c_cflag |= CRTSCTS; ++ ++ port->ops->set_termios(port, &termios, NULL); ++} ++ ++/* ++ * Returns: ++ * 0 on success, 1 on failure. ++ */ ++static int kgdb_init(void) ++{ ++ struct uart_port *port; ++ struct uart_cpm_port *pinfo; ++ int use_bootmem = 0; /* use dma by default */ ++ ++ if (!cpm_uart_nr) { ++ use_bootmem = 1; ++ cpm_uart_init_portdesc(); ++ } ++ port = (struct uart_port *)&cpm_uart_ports[KGDB_PINFO_INDEX]; ++ pinfo = (struct uart_cpm_port *)port; ++ ++ if (cpm_uart_early_setup(KGDB_PINFO_INDEX, use_bootmem)) ++ return 1; ++ ++ termios_set_options(KGDB_PINFO_INDEX, KGDB_BAUD,'n',8,'n'); ++ if (IS_SMC(pinfo)) ++ pinfo->smcp->smc_smcm |= SMCM_TX; ++ else ++ pinfo->sccp->scc_sccm |= UART_SCCM_TX; ++ ++ return 0; ++} ++ ++ ++struct kgdb_io kgdb_io_ops = { ++ .read_char = kgdb_get_debug_char, ++ .write_char = kgdb_put_debug_char, ++ .init = kgdb_init, ++}; ++ +diff -Nurb linux-2.6.22-570/drivers/serial/mpsc_kgdb.c linux-2.6.22-591/drivers/serial/mpsc_kgdb.c +--- linux-2.6.22-570/drivers/serial/mpsc_kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/serial/mpsc_kgdb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,345 @@ ++/* ++ * drivers/serial/mpsc_kgdb.c ++ * ++ * KGDB driver for the Marvell MultiProtocol Serial Controller (MPCS) ++ * ++ * Based on the polled boot loader driver by Ajit Prem (ajit.prem@motorola.com) ++ * ++ * Author: Randy Vinson ++ * ++ * Copyright (C) 2005-2006 MontaVista Software, Inc. ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* Main MPSC Configuration Register Offsets */ ++#define MPSC_MMCRL 0x0000 ++#define MPSC_MMCRH 0x0004 ++#define MPSC_MPCR 0x0008 ++#define MPSC_CHR_1 0x000c ++#define MPSC_CHR_2 0x0010 ++#define MPSC_CHR_3 0x0014 ++#define MPSC_CHR_4 0x0018 ++#define MPSC_CHR_5 0x001c ++#define MPSC_CHR_6 0x0020 ++#define MPSC_CHR_7 0x0024 ++#define MPSC_CHR_8 0x0028 ++#define MPSC_CHR_9 0x002c ++#define MPSC_CHR_10 0x0030 ++#define MPSC_CHR_11 0x0034 ++ ++#define MPSC_MPCR_FRZ (1 << 9) ++#define MPSC_MPCR_CL_5 0 ++#define MPSC_MPCR_CL_6 1 ++#define MPSC_MPCR_CL_7 2 ++#define MPSC_MPCR_CL_8 3 ++#define MPSC_MPCR_SBL_1 0 ++#define MPSC_MPCR_SBL_2 1 ++ ++#define MPSC_CHR_2_TEV (1<<1) ++#define MPSC_CHR_2_TA (1<<7) ++#define MPSC_CHR_2_TTCS (1<<9) ++#define MPSC_CHR_2_REV (1<<17) ++#define MPSC_CHR_2_RA (1<<23) ++#define MPSC_CHR_2_CRD (1<<25) ++#define MPSC_CHR_2_EH (1<<31) ++#define MPSC_CHR_2_PAR_ODD 0 ++#define MPSC_CHR_2_PAR_SPACE 1 ++#define MPSC_CHR_2_PAR_EVEN 2 ++#define MPSC_CHR_2_PAR_MARK 3 ++ ++/* MPSC Signal Routing */ ++#define MPSC_MRR 0x0000 ++#define MPSC_RCRR 0x0004 ++#define MPSC_TCRR 0x0008 ++ ++/* MPSC Interrupt registers (offset from MV64x60_SDMA_INTR_OFFSET) */ ++#define MPSC_INTR_CAUSE 0x0004 ++#define MPSC_INTR_MASK 0x0084 ++#define MPSC_INTR_CAUSE_RCC (1<<6) ++ ++/* Baud Rate Generator Interface Registers */ ++#define BRG_BCR 0x0000 ++#define BRG_BTR 0x0004 ++ ++/* Speed of the UART. */ ++static int kgdbmpsc_baud = CONFIG_KGDB_BAUDRATE; ++ ++/* Index of the UART, matches ttyMX naming. */ ++static int kgdbmpsc_ttyMM = CONFIG_KGDB_PORT_NUM; ++ ++#define MPSC_INTR_REG_SELECT(x) ((x) + (8 * kgdbmpsc_ttyMM)) ++ ++static int kgdbmpsc_init(void); ++ ++static struct platform_device mpsc_dev, shared_dev; ++ ++static void __iomem *mpsc_base; ++static void __iomem *brg_base; ++static void __iomem *routing_base; ++static void __iomem *sdma_base; ++ ++static unsigned int mpsc_irq; ++ ++static void kgdb_write_debug_char(u8 c) ++{ ++ u32 data; ++ ++ data = readl(mpsc_base + MPSC_MPCR); ++ writeb(c, mpsc_base + MPSC_CHR_1); ++ mb(); ++ data = readl(mpsc_base + MPSC_CHR_2); ++ data |= MPSC_CHR_2_TTCS; ++ writel(data, mpsc_base + MPSC_CHR_2); ++ mb(); ++ ++ while (readl(mpsc_base + MPSC_CHR_2) & MPSC_CHR_2_TTCS) ; ++} ++ ++static int kgdb_get_debug_char(void) ++{ ++ unsigned char c; ++ ++ while (!(readl(sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE)) & ++ MPSC_INTR_CAUSE_RCC)) ; ++ ++ c = readb(mpsc_base + MPSC_CHR_10 + (1 << 1)); ++ mb(); ++ writeb(c, mpsc_base + MPSC_CHR_10 + (1 << 1)); ++ mb(); ++ writel(~MPSC_INTR_CAUSE_RCC, sdma_base + ++ MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE)); ++ return (c); ++} ++ ++/* ++ * This is the receiver interrupt routine for the GDB stub. ++ * All that we need to do is verify that the interrupt happened on the ++ * line we're in charge of. If this is true, schedule a breakpoint and ++ * return. ++ */ ++static irqreturn_t kgdbmpsc_interrupt(int irq, void *dev_id) ++{ ++ if (irq != mpsc_irq) ++ return IRQ_NONE; ++ /* ++ * If there is some other CPU in KGDB then this is a ++ * spurious interrupt. so return without even checking a byte ++ */ ++ if (atomic_read(&debugger_active)) ++ return IRQ_NONE; ++ ++ if (readl(sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE)) & ++ MPSC_INTR_CAUSE_RCC) ++ breakpoint(); ++ ++ return IRQ_HANDLED; ++} ++ ++static int __init kgdbmpsc_init(void) ++{ ++ struct mpsc_pdata *pdata; ++ u32 cdv; ++ ++ if (!brg_base || !mpsc_base || !routing_base || !sdma_base) ++ return -1; ++ ++ /* Set MPSC Routing to enable both ports */ ++ writel(0x0, routing_base + MPSC_MRR); ++ ++ /* MPSC 0/1 Rx & Tx get clocks BRG0/1 */ ++ writel(0x00000100, routing_base + MPSC_RCRR); ++ writel(0x00000100, routing_base + MPSC_TCRR); ++ ++ /* Disable all MPSC interrupts and clear any pending interrupts */ ++ writel(0, sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_MASK)); ++ writel(0, sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE)); ++ ++ pdata = (struct mpsc_pdata *)mpsc_dev.dev.platform_data; ++ ++ /* cdv = (clock/(2*16*baud rate)) for 16X mode. */ ++ cdv = ((pdata->brg_clk_freq / (32 * kgdbmpsc_baud)) - 1); ++ writel((pdata->brg_clk_src << 18) | (1 << 16) | cdv, ++ brg_base + BRG_BCR); ++ ++ /* Put MPSC into UART mode, no null modem, 16x clock mode */ ++ writel(0x000004c4, mpsc_base + MPSC_MMCRL); ++ writel(0x04400400, mpsc_base + MPSC_MMCRH); ++ ++ writel(0, mpsc_base + MPSC_CHR_1); ++ writel(0, mpsc_base + MPSC_CHR_9); ++ writel(0, mpsc_base + MPSC_CHR_10); ++ writel(4, mpsc_base + MPSC_CHR_3); ++ writel(0x20000000, mpsc_base + MPSC_CHR_4); ++ writel(0x9000, mpsc_base + MPSC_CHR_5); ++ writel(0, mpsc_base + MPSC_CHR_6); ++ writel(0, mpsc_base + MPSC_CHR_7); ++ writel(0, mpsc_base + MPSC_CHR_8); ++ ++ /* 8 data bits, 1 stop bit */ ++ writel((3 << 12), mpsc_base + MPSC_MPCR); ++ ++ /* Enter "hunt" mode */ ++ writel((1 << 31), mpsc_base + MPSC_CHR_2); ++ ++ udelay(100); ++ return 0; ++} ++ ++static void __iomem *__init ++kgdbmpsc_map_resource(struct platform_device *pd, int type, int num) ++{ ++ void __iomem *base = NULL; ++ struct resource *r; ++ ++ if ((r = platform_get_resource(pd, IORESOURCE_MEM, num))) ++ base = ioremap(r->start, r->end - r->start + 1); ++ return base; ++} ++ ++static void __iomem *__init ++kgdbmpsc_unmap_resource(struct platform_device *pd, int type, int num, ++ void __iomem * base) ++{ ++ if (base) ++ iounmap(base); ++ return NULL; ++} ++ ++static void __init ++kgdbmpsc_reserve_resource(struct platform_device *pd, int type, int num) ++{ ++ struct resource *r; ++ ++ if ((r = platform_get_resource(pd, IORESOURCE_MEM, num))) ++ request_mem_region(r->start, r->end - r->start + 1, "kgdb"); ++} ++ ++static int __init kgdbmpsc_local_init(void) ++{ ++ if (!mpsc_dev.num_resources || !shared_dev.num_resources) ++ return 1; /* failure */ ++ ++ mpsc_base = kgdbmpsc_map_resource(&mpsc_dev, IORESOURCE_MEM, ++ MPSC_BASE_ORDER); ++ brg_base = kgdbmpsc_map_resource(&mpsc_dev, IORESOURCE_MEM, ++ MPSC_BRG_BASE_ORDER); ++ ++ /* get the platform data for the shared registers and get them mapped */ ++ routing_base = kgdbmpsc_map_resource(&shared_dev, ++ IORESOURCE_MEM, ++ MPSC_ROUTING_BASE_ORDER); ++ sdma_base = ++ kgdbmpsc_map_resource(&shared_dev, IORESOURCE_MEM, ++ MPSC_SDMA_INTR_BASE_ORDER); ++ ++ mpsc_irq = platform_get_irq(&mpsc_dev, 1); ++ ++ if (mpsc_base && brg_base && routing_base && sdma_base) ++ return 0; /* success */ ++ ++ return 1; /* failure */ ++} ++ ++static void __init kgdbmpsc_local_exit(void) ++{ ++ if (sdma_base) ++ sdma_base = kgdbmpsc_unmap_resource(&shared_dev, IORESOURCE_MEM, ++ MPSC_SDMA_INTR_BASE_ORDER, ++ sdma_base); ++ if (routing_base) ++ routing_base = kgdbmpsc_unmap_resource(&shared_dev, ++ IORESOURCE_MEM, ++ MPSC_ROUTING_BASE_ORDER, ++ routing_base); ++ if (brg_base) ++ brg_base = kgdbmpsc_unmap_resource(&mpsc_dev, IORESOURCE_MEM, ++ MPSC_BRG_BASE_ORDER, ++ brg_base); ++ if (mpsc_base) ++ mpsc_base = kgdbmpsc_unmap_resource(&mpsc_dev, IORESOURCE_MEM, ++ MPSC_BASE_ORDER, mpsc_base); ++} ++ ++static void __init kgdbmpsc_update_pdata(struct platform_device *pdev) ++{ ++ ++ snprintf(pdev->dev.bus_id, BUS_ID_SIZE, "%s.%u", pdev->name, pdev->id); ++} ++ ++static int __init kgdbmpsc_pdev_init(void) ++{ ++ struct platform_device *pdev; ++ ++ /* get the platform data for the specified port. */ ++ pdev = mv64x60_early_get_pdev_data(MPSC_CTLR_NAME, kgdbmpsc_ttyMM, 1); ++ if (pdev) { ++ memcpy(&mpsc_dev, pdev, sizeof(struct platform_device)); ++ if (platform_notify) { ++ kgdbmpsc_update_pdata(&mpsc_dev); ++ platform_notify(&mpsc_dev.dev); ++ } ++ ++ /* get the platform data for the shared registers. */ ++ pdev = mv64x60_early_get_pdev_data(MPSC_SHARED_NAME, 0, 0); ++ if (pdev) { ++ memcpy(&shared_dev, pdev, ++ sizeof(struct platform_device)); ++ if (platform_notify) { ++ kgdbmpsc_update_pdata(&shared_dev); ++ platform_notify(&shared_dev.dev); ++ } ++ } ++ } ++ return 0; ++} ++ ++postcore_initcall(kgdbmpsc_pdev_init); ++ ++static int __init kgdbmpsc_init_io(void) ++{ ++ ++ kgdbmpsc_pdev_init(); ++ ++ if (kgdbmpsc_local_init()) { ++ kgdbmpsc_local_exit(); ++ return -1; ++ } ++ ++ if (kgdbmpsc_init() == -1) ++ return -1; ++ return 0; ++} ++ ++static void __init kgdbmpsc_hookup_irq(void) ++{ ++ unsigned int msk; ++ if (!request_irq(mpsc_irq, kgdbmpsc_interrupt, 0, "kgdb mpsc", NULL)) { ++ /* Enable interrupt */ ++ msk = readl(sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_MASK)); ++ msk |= MPSC_INTR_CAUSE_RCC; ++ writel(msk, sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_MASK)); ++ ++ kgdbmpsc_reserve_resource(&mpsc_dev, IORESOURCE_MEM, ++ MPSC_BASE_ORDER); ++ kgdbmpsc_reserve_resource(&mpsc_dev, IORESOURCE_MEM, ++ MPSC_BRG_BASE_ORDER); ++ } ++} ++ ++struct kgdb_io kgdb_io_ops = { ++ .read_char = kgdb_get_debug_char, ++ .write_char = kgdb_write_debug_char, ++ .init = kgdbmpsc_init_io, ++ .late_init = kgdbmpsc_hookup_irq, ++}; +diff -Nurb linux-2.6.22-570/drivers/serial/pl011_kgdb.c linux-2.6.22-591/drivers/serial/pl011_kgdb.c +--- linux-2.6.22-570/drivers/serial/pl011_kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/serial/pl011_kgdb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,111 @@ ++/* ++ * driver/serial/pl011_kgdb.c ++ * ++ * Support for KGDB on ARM AMBA PL011 UARTs ++ * ++ * Authors: Manish Lachwani ++ * Deepak Saxena ++ * ++ * Copyright (c) 2005-2007 MontaVista Software, Inc. ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program is licensed "as is" without any warranty of any ++ * kind, whether expressor implied. ++ * ++ */ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++static int kgdb_irq = CONFIG_KGDB_AMBA_IRQ; ++ ++#define UART_DIVISOR (CONFIG_KGDB_AMBA_UARTCLK * 4 / CONFIG_KGDB_BAUDRATE) ++/* ++ * Todo: IO_ADDRESS is not very generic across ARM... ++ */ ++static volatile unsigned char *kgdb_port = ++ (unsigned char*)IO_ADDRESS(CONFIG_KGDB_AMBA_BASE); ++ ++/* ++ * Init code taken from amba-pl011.c. ++ */ ++static int kgdb_serial_init(void) ++{ ++ writew(0, kgdb_port + UART010_CR); ++ ++ /* Set baud rate */ ++ writew(UART_DIVISOR & 0x3f, kgdb_port + UART011_FBRD); ++ writew(UART_DIVISOR >> 6, kgdb_port + UART011_IBRD); ++ ++ writew(UART01x_LCRH_WLEN_8 | UART01x_LCRH_FEN, kgdb_port + UART010_LCRH); ++ writew(UART01x_CR_UARTEN | UART011_CR_TXE | UART011_CR_RXE, ++ kgdb_port + UART010_CR); ++ ++ writew(UART011_RXIM, kgdb_port + UART011_IMSC); ++ ++ return 0; ++} ++ ++static void kgdb_serial_putchar(u8 ch) ++{ ++ unsigned int status; ++ ++ do { ++ status = readw(kgdb_port + UART01x_FR); ++ } while (status & UART01x_FR_TXFF); ++ ++ writew(ch, kgdb_port + UART01x_DR); ++} ++ ++static int kgdb_serial_getchar(void) ++{ ++ unsigned int status; ++ int ch; ++ ++#ifdef CONFIG_DEBUG_LL ++ printascii("Entering serial_getchar loop"); ++#endif ++ do { ++ status = readw(kgdb_port + UART01x_FR); ++ } while (status & UART01x_FR_RXFE); ++ ch = readw(kgdb_port + UART01x_DR); ++#ifdef CONFIG_DEBUG_LL ++ printascii("Exited serial_getchar loop"); ++ printascii("Read char: "); ++ printch(ch); ++ printascii("\n"); ++#endif ++ return ch; ++} ++ ++static irqreturn_t kgdb_interrupt(int irq, void *dev_id, struct pt_regs *regs) ++{ ++ int status = readw(kgdb_port + UART011_MIS); ++ ++#ifdef CONFIG_DEBUG_LL ++ printascii("KGDB irq\n"); ++#endif ++ if (irq != kgdb_irq) ++ return IRQ_NONE; ++ ++ if (status & 0x40) ++ breakpoint(); ++ ++ return IRQ_HANDLED; ++} ++ ++static void __init kgdb_hookup_irq(void) ++{ ++ request_irq(kgdb_irq, kgdb_interrupt, SA_SHIRQ, "KGDB-serial", kgdb_port); ++} ++ ++struct kgdb_io kgdb_io_ops = { ++ .init = kgdb_serial_init, ++ .write_char = kgdb_serial_putchar, ++ .read_char = kgdb_serial_getchar, ++ .late_init = kgdb_hookup_irq, ++}; +diff -Nurb linux-2.6.22-570/drivers/serial/pxa.c linux-2.6.22-591/drivers/serial/pxa.c +--- linux-2.6.22-570/drivers/serial/pxa.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/serial/pxa.c 2007-12-21 15:36:12.000000000 -0500 +@@ -42,6 +42,9 @@ + #include + #include + #include ++#ifdef CONFIG_KGDB_CONSOLE ++#include ++#endif + + #include + #include +@@ -690,6 +693,8 @@ + console_initcall(serial_pxa_console_init); + + #define PXA_CONSOLE &serial_pxa_console ++#elif defined(CONFIG_KGDB_CONSOLE) ++#define PXA_CONSOLE &kgdbcons + #else + #define PXA_CONSOLE NULL + #endif +diff -Nurb linux-2.6.22-570/drivers/serial/serial_core.c linux-2.6.22-591/drivers/serial/serial_core.c +--- linux-2.6.22-570/drivers/serial/serial_core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/serial/serial_core.c 2007-12-21 15:36:12.000000000 -0500 +@@ -33,6 +33,7 @@ + #include /* for serial_state and serial_icounter_struct */ + #include + #include ++#include + + #include + #include +@@ -58,6 +59,12 @@ + #define uart_console(port) (0) + #endif + ++#ifdef CONFIG_KGDB_CONSOLE ++#define uart_kgdb(port) (port->cons && !strcmp(port->cons->name, "kgdb")) ++#else ++#define uart_kgdb(port) (0) ++#endif ++ + static void uart_change_speed(struct uart_state *state, struct ktermios *old_termios); + static void uart_wait_until_sent(struct tty_struct *tty, int timeout); + static void uart_change_pm(struct uart_state *state, int pm_state); +@@ -1671,6 +1678,9 @@ + mmio ? "mmio:0x" : "port:", + mmio ? port->mapbase : (unsigned long) port->iobase, + port->irq); ++ if (port->iotype == UPIO_MEM) ++ ret += sprintf(buf+ret, " membase 0x%08lX", ++ (unsigned long) port->membase); + + if (port->type == PORT_UNKNOWN) { + strcat(buf, "\n"); +@@ -2063,7 +2073,8 @@ + case UPIO_TSI: + case UPIO_DWAPB: + snprintf(address, sizeof(address), +- "MMIO 0x%lx", port->mapbase); ++ "MMIO map 0x%lx mem 0x%lx", port->mapbase, ++ (unsigned long) port->membase); + break; + default: + strlcpy(address, "*unknown*", sizeof(address)); +@@ -2118,9 +2129,9 @@ + + /* + * Power down all ports by default, except the +- * console if we have one. ++ * console (real or kgdb) if we have one. + */ +- if (!uart_console(port)) ++ if (!uart_console(port) && !uart_kgdb(port)) + uart_change_pm(state, 3); + } + } +@@ -2311,6 +2322,12 @@ + */ + port->flags &= ~UPF_DEAD; + ++#if defined(CONFIG_KGDB_8250) ++ /* Add any 8250-like ports we find later. */ ++ if (port->type <= PORT_MAX_8250) ++ kgdb8250_add_port(port->line, port); ++#endif ++ + out: + mutex_unlock(&state->mutex); + mutex_unlock(&port_mutex); +diff -Nurb linux-2.6.22-570/drivers/serial/serial_txx9.c linux-2.6.22-591/drivers/serial/serial_txx9.c +--- linux-2.6.22-570/drivers/serial/serial_txx9.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/serial/serial_txx9.c 2007-12-21 15:36:12.000000000 -0500 +@@ -40,6 +40,10 @@ + static char *serial_version = "1.09"; + static char *serial_name = "TX39/49 Serial driver"; + ++#ifndef CONFIG_KGDB_TXX9 ++#define CONFIG_KGDB_PORT_NUM -1 ++#endif ++ + #define PASS_LIMIT 256 + + #if !defined(CONFIG_SERIAL_TXX9_STDSERIAL) +@@ -471,6 +475,9 @@ + unsigned long flags; + int retval; + ++ if (up->port.line == CONFIG_KGDB_PORT_NUM) ++ return -EBUSY; ++ + /* + * Clear the FIFO buffers and disable them. + * (they will be reenabled in set_termios()) +@@ -799,6 +806,9 @@ + for (i = 0; i < UART_NR; i++) { + struct uart_txx9_port *up = &serial_txx9_ports[i]; + ++ if (up->port.line == CONFIG_KGDB_PORT_NUM) ++ continue; ++ + up->port.line = i; + up->port.ops = &serial_txx9_pops; + up->port.dev = dev; +@@ -967,6 +977,9 @@ + + mutex_lock(&serial_txx9_mutex); + for (i = 0; i < UART_NR; i++) { ++ if (i == CONFIG_KGDB_PORT_NUM) ++ continue; ++ + uart = &serial_txx9_ports[i]; + if (uart_match_port(&uart->port, port)) { + uart_remove_one_port(&serial_txx9_reg, &uart->port); +diff -Nurb linux-2.6.22-570/drivers/serial/serial_txx9_kgdb.c linux-2.6.22-591/drivers/serial/serial_txx9_kgdb.c +--- linux-2.6.22-570/drivers/serial/serial_txx9_kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/drivers/serial/serial_txx9_kgdb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,150 @@ ++/* ++ * drivers/serial/serial_txx9_kgdb.c ++ * ++ * kgdb interface for gdb ++ * ++ * Author: MontaVista Software, Inc. ++ * source@mvista.com ++ * ++ * Copyright (C) 2005-2006 MontaVista Software Inc. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++/* Speed of the UART. */ ++static unsigned int kgdb_txx9_baud = CONFIG_KGDB_BAUDRATE; ++ ++#define TXX9_NPORT 4 /* TX4939 has 4 UARTs, others only have 2 */ ++ ++static struct uart_port kgdb_txx9_ports[TXX9_NPORT]; ++static struct uart_port *kgdb_port; ++ ++/* TXX9 Serial Registers */ ++#define TXX9_SILCR 0x00 ++#define TXX9_SIDISR 0x08 ++#define TXX9_SISCISR 0x0c ++#define TXX9_SIFCR 0x10 ++#define TXX9_SIFLCR 0x14 ++#define TXX9_SIBGR 0x18 ++#define TXX9_SITFIFO 0x1c ++#define TXX9_SIRFIFO 0x20 ++ ++/* SILCR : Line Control */ ++#define TXX9_SILCR_SCS_IMCLK_BG 0x00000020 ++#define TXX9_SILCR_SCS_SCLK_BG 0x00000060 ++#define TXX9_SILCR_USBL_1BIT 0x00000000 ++#define TXX9_SILCR_UMODE_8BIT 0x00000000 ++ ++/* SIDISR : DMA/Int. Status */ ++#define TXX9_SIDISR_RFDN_MASK 0x0000001f ++ ++/* SISCISR : Status Change Int. Status */ ++#define TXX9_SISCISR_TRDY 0x00000004 ++ ++/* SIFCR : FIFO Control */ ++#define TXX9_SIFCR_SWRST 0x00008000 ++ ++/* SIBGR : Baud Rate Control */ ++#define TXX9_SIBGR_BCLK_T0 0x00000000 ++#define TXX9_SIBGR_BCLK_T2 0x00000100 ++#define TXX9_SIBGR_BCLK_T4 0x00000200 ++#define TXX9_SIBGR_BCLK_T6 0x00000300 ++ ++static inline unsigned int sio_in(struct uart_port *port, int offset) ++{ ++ return *(volatile u32 *)(port->membase + offset); ++} ++ ++static inline void sio_out(struct uart_port *port, int offset, unsigned int value) ++{ ++ *(volatile u32 *)(port->membase + offset) = value; ++} ++ ++void __init txx9_kgdb_add_port(int n, struct uart_port *port) ++{ ++ memcpy(&kgdb_txx9_ports[n], port, sizeof(struct uart_port)); ++} ++ ++static int txx9_kgdb_init(void) ++{ ++ unsigned int quot, sibgr; ++ ++ kgdb_port = &kgdb_txx9_ports[CONFIG_KGDB_PORT_NUM]; ++ ++ if (kgdb_port->iotype != UPIO_MEM && ++ kgdb_port->iotype != UPIO_MEM32) ++ return -1; ++ ++ /* Reset the UART. */ ++ sio_out(kgdb_port, TXX9_SIFCR, TXX9_SIFCR_SWRST); ++#ifdef CONFIG_CPU_TX49XX ++ /* ++ * TX4925 BUG WORKAROUND. Accessing SIOC register ++ * immediately after soft reset causes bus error. ++ */ ++ iob(); ++ udelay(1); ++#endif ++ /* Wait until reset is complete. */ ++ while (sio_in(kgdb_port, TXX9_SIFCR) & TXX9_SIFCR_SWRST); ++ ++ /* Select the frame format and input clock. */ ++ sio_out(kgdb_port, TXX9_SILCR, ++ TXX9_SILCR_UMODE_8BIT | TXX9_SILCR_USBL_1BIT | ++ ((kgdb_port->flags & UPF_MAGIC_MULTIPLIER) ? ++ TXX9_SILCR_SCS_SCLK_BG : TXX9_SILCR_SCS_IMCLK_BG)); ++ ++ /* Select the input clock prescaler that fits the baud rate. */ ++ quot = (kgdb_port->uartclk + 8 * kgdb_txx9_baud) / (16 * kgdb_txx9_baud); ++ if (quot < (256 << 1)) ++ sibgr = (quot >> 1) | TXX9_SIBGR_BCLK_T0; ++ else if (quot < ( 256 << 3)) ++ sibgr = (quot >> 3) | TXX9_SIBGR_BCLK_T2; ++ else if (quot < ( 256 << 5)) ++ sibgr = (quot >> 5) | TXX9_SIBGR_BCLK_T4; ++ else if (quot < ( 256 << 7)) ++ sibgr = (quot >> 7) | TXX9_SIBGR_BCLK_T6; ++ else ++ sibgr = 0xff | TXX9_SIBGR_BCLK_T6; ++ ++ sio_out(kgdb_port, TXX9_SIBGR, sibgr); ++ ++ /* Enable receiver and transmitter. */ ++ sio_out(kgdb_port, TXX9_SIFLCR, 0); ++ ++ return 0; ++} ++ ++static void txx9_kgdb_late_init(void) ++{ ++ request_mem_region(kgdb_port->mapbase, 0x40, "serial_txx9(debug)"); ++} ++ ++static int txx9_kgdb_read(void) ++{ ++ while (!(sio_in(kgdb_port, TXX9_SIDISR) & TXX9_SIDISR_RFDN_MASK)); ++ ++ return sio_in(kgdb_port, TXX9_SIRFIFO); ++} ++ ++static void txx9_kgdb_write(u8 ch) ++{ ++ while (!(sio_in(kgdb_port, TXX9_SISCISR) & TXX9_SISCISR_TRDY)); ++ ++ sio_out(kgdb_port, TXX9_SITFIFO, ch); ++} ++ ++struct kgdb_io kgdb_io_ops = { ++ .read_char = txx9_kgdb_read, ++ .write_char = txx9_kgdb_write, ++ .init = txx9_kgdb_init, ++ .late_init = txx9_kgdb_late_init ++}; +diff -Nurb linux-2.6.22-570/drivers/serial/sh-sci.c linux-2.6.22-591/drivers/serial/sh-sci.c +--- linux-2.6.22-570/drivers/serial/sh-sci.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/serial/sh-sci.c 2007-12-21 15:36:12.000000000 -0500 +@@ -118,7 +118,8 @@ + do { + status = sci_in(port, SCxSR); + if (status & SCxSR_ERRORS(port)) { +- handle_error(port); ++ /* Clear error flags. */ ++ sci_out(port, SCxSR, SCxSR_ERROR_CLEAR(port)); + continue; + } + } while (!(status & SCxSR_RDxF(port))); +@@ -184,18 +185,18 @@ + int h, l; + + c = *p++; +- h = highhex(c); +- l = lowhex(c); ++ h = hexchars[c >> 4]; ++ l = hexchars[c % 16]; + put_char(port, h); + put_char(port, l); + checksum += h + l; + } + put_char(port, '#'); +- put_char(port, highhex(checksum)); +- put_char(port, lowhex(checksum)); ++ put_char(port, hexchars[checksum >> 4]); ++ put_char(port, hexchars[checksum & 16]); + } while (get_char(port) != '+'); + } else +-#endif /* CONFIG_SH_STANDARD_BIOS || CONFIG_SH_KGDB */ ++#endif /* CONFIG_SH_STANDARD_BIOS */ + for (i=0; iline == KGDBPORT.port.line && ++ c == 3) ++ breakpoint(); ++#endif ++ + /* Store data and status */ + if (status&SCxSR_FER(port)) { + flag = TTY_FRAME; +@@ -1279,6 +1290,7 @@ + console_initcall(sci_console_init); + #endif /* CONFIG_SERIAL_SH_SCI_CONSOLE */ + ++#if 0 + #ifdef CONFIG_SH_KGDB + /* + * FIXME: Most of this can go away.. at the moment, we rely on +diff -Nurb linux-2.6.22-570/drivers/spi/at25.c linux-2.6.22-591/drivers/spi/at25.c +--- linux-2.6.22-570/drivers/spi/at25.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/spi/at25.c 2007-12-21 15:36:12.000000000 -0500 +@@ -111,7 +111,8 @@ + } + + static ssize_t +-at25_bin_read(struct kobject *kobj, char *buf, loff_t off, size_t count) ++at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct device *dev; + struct at25_data *at25; +@@ -236,7 +237,8 @@ + } + + static ssize_t +-at25_bin_write(struct kobject *kobj, char *buf, loff_t off, size_t count) ++at25_bin_write(struct kobject *kobj, struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct device *dev; + struct at25_data *at25; +@@ -314,7 +316,6 @@ + */ + at25->bin.attr.name = "eeprom"; + at25->bin.attr.mode = S_IRUSR; +- at25->bin.attr.owner = THIS_MODULE; + at25->bin.read = at25_bin_read; + + at25->bin.size = at25->chip.byte_len; +diff -Nurb linux-2.6.22-570/drivers/usb/atm/cxacru.c linux-2.6.22-591/drivers/usb/atm/cxacru.c +--- linux-2.6.22-570/drivers/usb/atm/cxacru.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/usb/atm/cxacru.c 2007-12-21 15:36:14.000000000 -0500 +@@ -171,7 +171,7 @@ + struct delayed_work poll_work; + u32 card_info[CXINF_MAX]; + struct mutex poll_state_serialize; +- int poll_state; ++ enum cxacru_poll_state poll_state; + + /* contol handles */ + struct mutex cm_serialize; +@@ -226,58 +226,48 @@ + + static ssize_t cxacru_sysfs_showattr_dB(s16 value, char *buf) + { +- if (unlikely(value < 0)) { + return snprintf(buf, PAGE_SIZE, "%d.%02u\n", +- value / 100, -value % 100); +- } else { +- return snprintf(buf, PAGE_SIZE, "%d.%02u\n", +- value / 100, value % 100); +- } ++ value / 100, abs(value) % 100); + } + + static ssize_t cxacru_sysfs_showattr_bool(u32 value, char *buf) + { +- switch (value) { +- case 0: return snprintf(buf, PAGE_SIZE, "no\n"); +- case 1: return snprintf(buf, PAGE_SIZE, "yes\n"); +- default: return 0; +- } ++ static char *str[] = { "no", "yes" }; ++ if (unlikely(value >= ARRAY_SIZE(str))) ++ return snprintf(buf, PAGE_SIZE, "%u\n", value); ++ return snprintf(buf, PAGE_SIZE, "%s\n", str[value]); + } + + static ssize_t cxacru_sysfs_showattr_LINK(u32 value, char *buf) + { +- switch (value) { +- case 1: return snprintf(buf, PAGE_SIZE, "not connected\n"); +- case 2: return snprintf(buf, PAGE_SIZE, "connected\n"); +- case 3: return snprintf(buf, PAGE_SIZE, "lost\n"); +- default: return snprintf(buf, PAGE_SIZE, "unknown (%u)\n", value); +- } ++ static char *str[] = { NULL, "not connected", "connected", "lost" }; ++ if (unlikely(value >= ARRAY_SIZE(str) || str[value] == NULL)) ++ return snprintf(buf, PAGE_SIZE, "%u\n", value); ++ return snprintf(buf, PAGE_SIZE, "%s\n", str[value]); + } + + static ssize_t cxacru_sysfs_showattr_LINE(u32 value, char *buf) + { +- switch (value) { +- case 0: return snprintf(buf, PAGE_SIZE, "down\n"); +- case 1: return snprintf(buf, PAGE_SIZE, "attempting to activate\n"); +- case 2: return snprintf(buf, PAGE_SIZE, "training\n"); +- case 3: return snprintf(buf, PAGE_SIZE, "channel analysis\n"); +- case 4: return snprintf(buf, PAGE_SIZE, "exchange\n"); +- case 5: return snprintf(buf, PAGE_SIZE, "up\n"); +- case 6: return snprintf(buf, PAGE_SIZE, "waiting\n"); +- case 7: return snprintf(buf, PAGE_SIZE, "initialising\n"); +- default: return snprintf(buf, PAGE_SIZE, "unknown (%u)\n", value); +- } ++ static char *str[] = { "down", "attempting to activate", ++ "training", "channel analysis", "exchange", "up", ++ "waiting", "initialising" ++ }; ++ if (unlikely(value >= ARRAY_SIZE(str))) ++ return snprintf(buf, PAGE_SIZE, "%u\n", value); ++ return snprintf(buf, PAGE_SIZE, "%s\n", str[value]); + } + + static ssize_t cxacru_sysfs_showattr_MODU(u32 value, char *buf) + { +- switch (value) { +- case 0: return 0; +- case 1: return snprintf(buf, PAGE_SIZE, "ANSI T1.413\n"); +- case 2: return snprintf(buf, PAGE_SIZE, "ITU-T G.992.1 (G.DMT)\n"); +- case 3: return snprintf(buf, PAGE_SIZE, "ITU-T G.992.2 (G.LITE)\n"); +- default: return snprintf(buf, PAGE_SIZE, "unknown (%u)\n", value); +- } ++ static char *str[] = { ++ NULL, ++ "ANSI T1.413", ++ "ITU-T G.992.1 (G.DMT)", ++ "ITU-T G.992.2 (G.LITE)" ++ }; ++ if (unlikely(value >= ARRAY_SIZE(str) || str[value] == NULL)) ++ return snprintf(buf, PAGE_SIZE, "%u\n", value); ++ return snprintf(buf, PAGE_SIZE, "%s\n", str[value]); + } + + /* +@@ -308,11 +298,10 @@ + struct cxacru_data *instance = usbatm_instance->driver_data; + u32 value = instance->card_info[CXINF_LINE_STARTABLE]; + +- switch (value) { +- case 0: return snprintf(buf, PAGE_SIZE, "running\n"); +- case 1: return snprintf(buf, PAGE_SIZE, "stopped\n"); +- default: return snprintf(buf, PAGE_SIZE, "unknown (%u)\n", value); +- } ++ static char *str[] = { "running", "stopped" }; ++ if (unlikely(value >= ARRAY_SIZE(str))) ++ return snprintf(buf, PAGE_SIZE, "%u\n", value); ++ return snprintf(buf, PAGE_SIZE, "%s\n", str[value]); + } + + static ssize_t cxacru_sysfs_store_adsl_state(struct device *dev, +diff -Nurb linux-2.6.22-570/drivers/usb/atm/ueagle-atm.c linux-2.6.22-591/drivers/usb/atm/ueagle-atm.c +--- linux-2.6.22-570/drivers/usb/atm/ueagle-atm.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/usb/atm/ueagle-atm.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1168,6 +1168,7 @@ + struct uea_softc *sc = data; + int ret = -EAGAIN; + ++ set_freezable(); + uea_enters(INS_TO_USBDEV(sc)); + while (!kthread_should_stop()) { + if (ret < 0 || sc->reset) +diff -Nurb linux-2.6.22-570/drivers/usb/core/config.c linux-2.6.22-591/drivers/usb/core/config.c +--- linux-2.6.22-570/drivers/usb/core/config.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/usb/core/config.c 2007-12-21 15:36:14.000000000 -0500 +@@ -274,6 +274,7 @@ + struct usb_descriptor_header *header; + int len, retval; + u8 inums[USB_MAXINTERFACES], nalts[USB_MAXINTERFACES]; ++ unsigned iad_num = 0; + + memcpy(&config->desc, buffer, USB_DT_CONFIG_SIZE); + if (config->desc.bDescriptorType != USB_DT_CONFIG || +@@ -351,6 +352,20 @@ + ++n; + } + ++ } else if (header->bDescriptorType == ++ USB_DT_INTERFACE_ASSOCIATION) { ++ if (iad_num == USB_MAXIADS) { ++ dev_warn(ddev, "found more Interface " ++ "Association Descriptors " ++ "than allocated for in " ++ "configuration %d\n", cfgno); ++ } else { ++ config->intf_assoc[iad_num] = ++ (struct usb_interface_assoc_descriptor ++ *)header; ++ iad_num++; ++ } ++ + } else if (header->bDescriptorType == USB_DT_DEVICE || + header->bDescriptorType == USB_DT_CONFIG) + dev_warn(ddev, "config %d contains an unexpected " +diff -Nurb linux-2.6.22-570/drivers/usb/core/devices.c linux-2.6.22-591/drivers/usb/core/devices.c +--- linux-2.6.22-570/drivers/usb/core/devices.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/usb/core/devices.c 2007-12-21 15:36:14.000000000 -0500 +@@ -102,6 +102,10 @@ + /* C: #Ifs=dd Cfg#=dd Atr=xx MPwr=dddmA */ + "C:%c #Ifs=%2d Cfg#=%2d Atr=%02x MxPwr=%3dmA\n"; + ++static const char *format_iad = ++/* A: FirstIf#=dd IfCount=dd Cls=xx(sssss) Sub=xx Prot=xx */ ++ "A: FirstIf#=%2d IfCount=%2d Cls=%02x(%-5s) Sub=%02x Prot=%02x\n"; ++ + static const char *format_iface = + /* I: If#=dd Alt=dd #EPs=dd Cls=xx(sssss) Sub=xx Prot=xx Driver=xxxx*/ + "I:%c If#=%2d Alt=%2d #EPs=%2d Cls=%02x(%-5s) Sub=%02x Prot=%02x Driver=%s\n"; +@@ -146,6 +150,7 @@ + {USB_CLASS_STILL_IMAGE, "still"}, + {USB_CLASS_CSCID, "scard"}, + {USB_CLASS_CONTENT_SEC, "c-sec"}, ++ {USB_CLASS_VIDEO, "video"}, + {-1, "unk."} /* leave as last */ + }; + +@@ -286,6 +291,21 @@ + return start; + } + ++static char *usb_dump_iad_descriptor(char *start, char *end, ++ const struct usb_interface_assoc_descriptor *iad) ++{ ++ if (start > end) ++ return start; ++ start += sprintf(start, format_iad, ++ iad->bFirstInterface, ++ iad->bInterfaceCount, ++ iad->bFunctionClass, ++ class_decode(iad->bFunctionClass), ++ iad->bFunctionSubClass, ++ iad->bFunctionProtocol); ++ return start; ++} ++ + /* TBD: + * 0. TBDs + * 1. marking active interface altsettings (code lists all, but should mark +@@ -322,6 +342,12 @@ + if (!config) /* getting these some in 2.3.7; none in 2.3.6 */ + return start + sprintf(start, "(null Cfg. desc.)\n"); + start = usb_dump_config_descriptor(start, end, &config->desc, active); ++ for (i = 0; i < USB_MAXIADS; i++) { ++ if (config->intf_assoc[i] == NULL) ++ break; ++ start = usb_dump_iad_descriptor(start, end, ++ config->intf_assoc[i]); ++ } + for (i = 0; i < config->desc.bNumInterfaces; i++) { + intfc = config->intf_cache[i]; + interface = config->interface[i]; +diff -Nurb linux-2.6.22-570/drivers/usb/core/hub.c linux-2.6.22-591/drivers/usb/core/hub.c +--- linux-2.6.22-570/drivers/usb/core/hub.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/usb/core/hub.c 2007-12-21 15:36:12.000000000 -0500 +@@ -2831,6 +2831,7 @@ + + static int hub_thread(void *__unused) + { ++ set_freezable(); + do { + hub_events(); + wait_event_interruptible(khubd_wait, +diff -Nurb linux-2.6.22-570/drivers/usb/core/message.c linux-2.6.22-591/drivers/usb/core/message.c +--- linux-2.6.22-570/drivers/usb/core/message.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/drivers/usb/core/message.c 2007-12-21 15:36:14.000000000 -0500 +@@ -1409,6 +1409,36 @@ + .uevent = usb_if_uevent, + }; + ++static struct usb_interface_assoc_descriptor *find_iad(struct usb_device *dev, ++ struct usb_host_config *config, ++ u8 inum) ++{ ++ struct usb_interface_assoc_descriptor *retval = NULL; ++ struct usb_interface_assoc_descriptor *intf_assoc; ++ int first_intf; ++ int last_intf; ++ int i; ++ ++ for (i = 0; (i < USB_MAXIADS && config->intf_assoc[i]); i++) { ++ intf_assoc = config->intf_assoc[i]; ++ if (intf_assoc->bInterfaceCount == 0) ++ continue; ++ ++ first_intf = intf_assoc->bFirstInterface; ++ last_intf = first_intf + (intf_assoc->bInterfaceCount - 1); ++ if (inum >= first_intf && inum <= last_intf) { ++ if (!retval) ++ retval = intf_assoc; ++ else ++ dev_err(&dev->dev, "Interface #%d referenced" ++ " by multiple IADs\n", inum); ++ } ++ } ++ ++ return retval; ++} ++ ++ + /* + * usb_set_configuration - Makes a particular device setting be current + * @dev: the device whose configuration is being updated +@@ -1555,6 +1585,7 @@ + intfc = cp->intf_cache[i]; + intf->altsetting = intfc->altsetting; + intf->num_altsetting = intfc->num_altsetting; ++ intf->intf_assoc = find_iad(dev, cp, i); + kref_get(&intfc->ref); + + alt = usb_altnum_to_altsetting(intf, 0); +diff -Nurb linux-2.6.22-570/drivers/usb/core/sysfs.c linux-2.6.22-591/drivers/usb/core/sysfs.c +--- linux-2.6.22-570/drivers/usb/core/sysfs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/usb/core/sysfs.c 2007-12-21 15:36:14.000000000 -0500 +@@ -424,6 +424,25 @@ + sysfs_remove_group(&dev->kobj, &dev_attr_grp); + } + ++/* Interface Accociation Descriptor fields */ ++#define usb_intf_assoc_attr(field, format_string) \ ++static ssize_t \ ++show_iad_##field (struct device *dev, struct device_attribute *attr, \ ++ char *buf) \ ++{ \ ++ struct usb_interface *intf = to_usb_interface (dev); \ ++ \ ++ return sprintf (buf, format_string, \ ++ intf->intf_assoc->field); \ ++} \ ++static DEVICE_ATTR(iad_##field, S_IRUGO, show_iad_##field, NULL); ++ ++usb_intf_assoc_attr (bFirstInterface, "%02x\n") ++usb_intf_assoc_attr (bInterfaceCount, "%02d\n") ++usb_intf_assoc_attr (bFunctionClass, "%02x\n") ++usb_intf_assoc_attr (bFunctionSubClass, "%02x\n") ++usb_intf_assoc_attr (bFunctionProtocol, "%02x\n") ++ + /* Interface fields */ + #define usb_intf_attr(field, format_string) \ + static ssize_t \ +@@ -487,6 +506,18 @@ + } + static DEVICE_ATTR(modalias, S_IRUGO, show_modalias, NULL); + ++static struct attribute *intf_assoc_attrs[] = { ++ &dev_attr_iad_bFirstInterface.attr, ++ &dev_attr_iad_bInterfaceCount.attr, ++ &dev_attr_iad_bFunctionClass.attr, ++ &dev_attr_iad_bFunctionSubClass.attr, ++ &dev_attr_iad_bFunctionProtocol.attr, ++ NULL, ++}; ++static struct attribute_group intf_assoc_attr_grp = { ++ .attrs = intf_assoc_attrs, ++}; ++ + static struct attribute *intf_attrs[] = { + &dev_attr_bInterfaceNumber.attr, + &dev_attr_bAlternateSetting.attr, +@@ -538,6 +569,8 @@ + alt->string = usb_cache_string(udev, alt->desc.iInterface); + if (alt->string) + retval = device_create_file(dev, &dev_attr_interface); ++ if (intf->intf_assoc) ++ retval = sysfs_create_group(&dev->kobj, &intf_assoc_attr_grp); + usb_create_intf_ep_files(intf, udev); + return 0; + } +@@ -549,4 +582,5 @@ + usb_remove_intf_ep_files(intf); + device_remove_file(dev, &dev_attr_interface); + sysfs_remove_group(&dev->kobj, &intf_attr_grp); ++ sysfs_remove_group(&intf->dev.kobj, &intf_assoc_attr_grp); + } +diff -Nurb linux-2.6.22-570/drivers/usb/gadget/file_storage.c linux-2.6.22-591/drivers/usb/gadget/file_storage.c +--- linux-2.6.22-570/drivers/usb/gadget/file_storage.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/usb/gadget/file_storage.c 2007-12-21 15:36:12.000000000 -0500 +@@ -3434,6 +3434,9 @@ + allow_signal(SIGKILL); + allow_signal(SIGUSR1); + ++ /* Allow the thread to be frozen */ ++ set_freezable(); ++ + /* Arrange for userspace references to be interpreted as kernel + * pointers. That way we can pass a kernel pointer to a routine + * that expects a __user pointer and it will work okay. */ +diff -Nurb linux-2.6.22-570/drivers/usb/storage/usb.c linux-2.6.22-591/drivers/usb/storage/usb.c +--- linux-2.6.22-570/drivers/usb/storage/usb.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/usb/storage/usb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -301,8 +301,6 @@ + struct us_data *us = (struct us_data *)__us; + struct Scsi_Host *host = us_to_host(us); + +- current->flags |= PF_NOFREEZE; +- + for(;;) { + US_DEBUGP("*** thread sleeping.\n"); + if(down_interruptible(&us->sema)) +@@ -909,6 +907,7 @@ + printk(KERN_DEBUG + "usb-storage: device found at %d\n", us->pusb_dev->devnum); + ++ set_freezable(); + /* Wait for the timeout to expire or for a disconnect */ + if (delay_use > 0) { + printk(KERN_DEBUG "usb-storage: waiting for device " +diff -Nurb linux-2.6.22-570/drivers/video/Kconfig linux-2.6.22-591/drivers/video/Kconfig +--- linux-2.6.22-570/drivers/video/Kconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/video/Kconfig 2007-12-21 15:36:12.000000000 -0500 +@@ -12,6 +12,13 @@ + tristate + default n + ++config VIDEO_OUTPUT_CONTROL ++ tristate "Lowlevel video output switch controls" ++ default m ++ help ++ This framework adds support for low-level control of the video ++ output switch. ++ + config FB + tristate "Support for frame buffer devices" + ---help--- +diff -Nurb linux-2.6.22-570/drivers/video/Makefile linux-2.6.22-591/drivers/video/Makefile +--- linux-2.6.22-570/drivers/video/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/video/Makefile 2007-12-21 15:36:12.000000000 -0500 +@@ -122,3 +122,6 @@ + + # the test framebuffer is last + obj-$(CONFIG_FB_VIRTUAL) += vfb.o ++ ++#video output switch sysfs driver ++obj-$(CONFIG_VIDEO_OUTPUT_CONTROL) += output.o +diff -Nurb linux-2.6.22-570/drivers/video/aty/radeon_base.c linux-2.6.22-591/drivers/video/aty/radeon_base.c +--- linux-2.6.22-570/drivers/video/aty/radeon_base.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/video/aty/radeon_base.c 2007-12-21 15:36:12.000000000 -0500 +@@ -2102,7 +2102,9 @@ + } + + +-static ssize_t radeon_show_edid1(struct kobject *kobj, char *buf, loff_t off, size_t count) ++static ssize_t radeon_show_edid1(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct device *dev = container_of(kobj, struct device, kobj); + struct pci_dev *pdev = to_pci_dev(dev); +@@ -2113,7 +2115,9 @@ + } + + +-static ssize_t radeon_show_edid2(struct kobject *kobj, char *buf, loff_t off, size_t count) ++static ssize_t radeon_show_edid2(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct device *dev = container_of(kobj, struct device, kobj); + struct pci_dev *pdev = to_pci_dev(dev); +@@ -2126,7 +2130,6 @@ + static struct bin_attribute edid1_attr = { + .attr = { + .name = "edid1", +- .owner = THIS_MODULE, + .mode = 0444, + }, + .size = EDID_LENGTH, +@@ -2136,7 +2139,6 @@ + static struct bin_attribute edid2_attr = { + .attr = { + .name = "edid2", +- .owner = THIS_MODULE, + .mode = 0444, + }, + .size = EDID_LENGTH, +diff -Nurb linux-2.6.22-570/drivers/video/backlight/backlight.c linux-2.6.22-591/drivers/video/backlight/backlight.c +--- linux-2.6.22-570/drivers/video/backlight/backlight.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/video/backlight/backlight.c 2007-12-21 15:36:12.000000000 -0500 +@@ -172,7 +172,7 @@ + + #define DECLARE_ATTR(_name,_mode,_show,_store) \ + { \ +- .attr = { .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ ++ .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + } +diff -Nurb linux-2.6.22-570/drivers/video/backlight/lcd.c linux-2.6.22-591/drivers/video/backlight/lcd.c +--- linux-2.6.22-570/drivers/video/backlight/lcd.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/video/backlight/lcd.c 2007-12-21 15:36:12.000000000 -0500 +@@ -157,7 +157,7 @@ + + #define DECLARE_ATTR(_name,_mode,_show,_store) \ + { \ +- .attr = { .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ ++ .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + } +diff -Nurb linux-2.6.22-570/drivers/video/ps3fb.c linux-2.6.22-591/drivers/video/ps3fb.c +--- linux-2.6.22-570/drivers/video/ps3fb.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/video/ps3fb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -812,6 +812,7 @@ + + static int ps3fbd(void *arg) + { ++ set_freezable(); + while (!kthread_should_stop()) { + try_to_freeze(); + set_current_state(TASK_INTERRUPTIBLE); +diff -Nurb linux-2.6.22-570/drivers/w1/slaves/w1_ds2433.c linux-2.6.22-591/drivers/w1/slaves/w1_ds2433.c +--- linux-2.6.22-570/drivers/w1/slaves/w1_ds2433.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/w1/slaves/w1_ds2433.c 2007-12-21 15:36:12.000000000 -0500 +@@ -91,8 +91,9 @@ + } + #endif /* CONFIG_W1_SLAVE_DS2433_CRC */ + +-static ssize_t w1_f23_read_bin(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++static ssize_t w1_f23_read_bin(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct w1_slave *sl = kobj_to_w1_slave(kobj); + #ifdef CONFIG_W1_SLAVE_DS2433_CRC +@@ -199,8 +200,9 @@ + return 0; + } + +-static ssize_t w1_f23_write_bin(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++static ssize_t w1_f23_write_bin(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct w1_slave *sl = kobj_to_w1_slave(kobj); + int addr, len, idx; +@@ -252,7 +254,6 @@ + .attr = { + .name = "eeprom", + .mode = S_IRUGO | S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = W1_EEPROM_SIZE, + .read = w1_f23_read_bin, +diff -Nurb linux-2.6.22-570/drivers/w1/slaves/w1_therm.c linux-2.6.22-591/drivers/w1/slaves/w1_therm.c +--- linux-2.6.22-570/drivers/w1/slaves/w1_therm.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/w1/slaves/w1_therm.c 2007-12-21 15:36:12.000000000 -0500 +@@ -42,13 +42,13 @@ + {} + }; + +-static ssize_t w1_therm_read_bin(struct kobject *, char *, loff_t, size_t); ++static ssize_t w1_therm_read_bin(struct kobject *, struct bin_attribute *, ++ char *, loff_t, size_t); + + static struct bin_attribute w1_therm_bin_attr = { + .attr = { + .name = "w1_slave", + .mode = S_IRUGO, +- .owner = THIS_MODULE, + }, + .size = W1_SLAVE_DATA_SIZE, + .read = w1_therm_read_bin, +@@ -159,7 +159,9 @@ + return 0; + } + +-static ssize_t w1_therm_read_bin(struct kobject *kobj, char *buf, loff_t off, size_t count) ++static ssize_t w1_therm_read_bin(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct w1_slave *sl = kobj_to_w1_slave(kobj); + struct w1_master *dev = sl->master; +diff -Nurb linux-2.6.22-570/drivers/w1/w1.c linux-2.6.22-591/drivers/w1/w1.c +--- linux-2.6.22-570/drivers/w1/w1.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/w1/w1.c 2007-12-21 15:36:12.000000000 -0500 +@@ -105,7 +105,9 @@ + return sprintf(buf, "%s\n", sl->name); + } + +-static ssize_t w1_slave_read_id(struct kobject *kobj, char *buf, loff_t off, size_t count) ++static ssize_t w1_slave_read_id(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct w1_slave *sl = kobj_to_w1_slave(kobj); + +@@ -128,7 +130,6 @@ + .attr = { + .name = "id", + .mode = S_IRUGO, +- .owner = THIS_MODULE, + }, + .size = 8, + .read = w1_slave_read_id, +@@ -136,7 +137,9 @@ + + /* Default family */ + +-static ssize_t w1_default_write(struct kobject *kobj, char *buf, loff_t off, size_t count) ++static ssize_t w1_default_write(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct w1_slave *sl = kobj_to_w1_slave(kobj); + +@@ -153,7 +156,9 @@ + return count; + } + +-static ssize_t w1_default_read(struct kobject *kobj, char *buf, loff_t off, size_t count) ++static ssize_t w1_default_read(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct w1_slave *sl = kobj_to_w1_slave(kobj); + +@@ -167,7 +172,6 @@ + .attr = { + .name = "rw", + .mode = S_IRUGO | S_IWUSR, +- .owner = THIS_MODULE, + }, + .size = PAGE_SIZE, + .read = w1_default_read, +@@ -801,6 +805,7 @@ + struct w1_master *dev, *n; + int have_to_wait = 0; + ++ set_freezable(); + while (!kthread_should_stop() || have_to_wait) { + have_to_wait = 0; + +diff -Nurb linux-2.6.22-570/drivers/zorro/zorro-sysfs.c linux-2.6.22-591/drivers/zorro/zorro-sysfs.c +--- linux-2.6.22-570/drivers/zorro/zorro-sysfs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/drivers/zorro/zorro-sysfs.c 2007-12-21 15:36:12.000000000 -0500 +@@ -49,8 +49,9 @@ + + static DEVICE_ATTR(resource, S_IRUGO, zorro_show_resource, NULL); + +-static ssize_t zorro_read_config(struct kobject *kobj, char *buf, loff_t off, +- size_t count) ++static ssize_t zorro_read_config(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct zorro_dev *z = to_zorro_dev(container_of(kobj, struct device, + kobj)); +@@ -78,7 +79,6 @@ + .attr = { + .name = "config", + .mode = S_IRUGO | S_IWUSR, +- .owner = THIS_MODULE + }, + .size = sizeof(struct ConfigDev), + .read = zorro_read_config, +diff -Nurb linux-2.6.22-570/ed linux-2.6.22-591/ed +--- linux-2.6.22-570/ed 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/ed 2007-12-21 15:36:14.000000000 -0500 +@@ -0,0 +1,6 @@ ++vi -o ./fs/proc/proc_misc.c ./fs/proc/proc_misc.c.rej ++vi -o ./fs/proc/array.c ./fs/proc/array.c.rej ++vi -o ./include/linux/sched.h ./include/linux/sched.h.rej ++vi -o ./kernel/time/timekeeping.c ./kernel/time/timekeeping.c.rej ++vi -o ./kernel/timer.c ./kernel/timer.c.rej ++vi -o ./kernel/fork.c ./kernel/fork.c.rej +diff -Nurb linux-2.6.22-570/edit linux-2.6.22-591/edit +--- linux-2.6.22-570/edit 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/edit 2007-12-21 15:36:14.000000000 -0500 +@@ -0,0 +1,19 @@ ++vi -o ./fs/proc/root.c ./fs/proc/root.c.rej ++vi -o ./include/linux/nsproxy.h ./include/linux/nsproxy.h.rej ++vi -o ./include/linux/sched.h ./include/linux/sched.h.rej ++vi -o ./include/net/inet_timewait_sock.h ./include/net/inet_timewait_sock.h.rej ++vi -o ./include/net/route.h ./include/net/route.h.rej ++vi -o ./include/net/sock.h ./include/net/sock.h.rej ++vi -o ./kernel/nsproxy.c ./kernel/nsproxy.c.rej ++vi -o ./lib/Makefile ./lib/Makefile.rej ++vi -o ./net/core/dev.c ./net/core/dev.c.rej ++vi -o ./net/core/rtnetlink.c ./net/core/rtnetlink.c.rej ++vi -o ./net/core/sock.c ./net/core/sock.c.rej ++vi -o ./net/ipv4/af_inet.c ./net/ipv4/af_inet.c.rej ++vi -o ./net/ipv4/inet_connection_sock.c ./net/ipv4/inet_connection_sock.c.rej ++vi -o ./net/ipv4/inet_hashtables.c ./net/ipv4/inet_hashtables.c.rej ++vi -o ./net/ipv4/raw.c ./net/ipv4/raw.c.rej ++vi -o ./net/ipv4/tcp_ipv4.c ./net/ipv4/tcp_ipv4.c.rej ++vi -o ./net/ipv4/udp.c ./net/ipv4/udp.c.rej ++vi -o ./net/ipv6/addrconf.c ./net/ipv6/addrconf.c.rej ++vi -o ./net/unix/af_unix.c ./net/unix/af_unix.c.rej +diff -Nurb linux-2.6.22-570/fs/Kconfig linux-2.6.22-591/fs/Kconfig +--- linux-2.6.22-570/fs/Kconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/Kconfig 2007-12-21 15:36:12.000000000 -0500 +@@ -1030,6 +1030,41 @@ + + endmenu + ++menu "Layered filesystems" ++ ++config ECRYPT_FS ++ tristate "eCrypt filesystem layer support (EXPERIMENTAL)" ++ depends on EXPERIMENTAL && KEYS && CRYPTO && NET ++ help ++ Encrypted filesystem that operates on the VFS layer. See ++ to learn more about ++ eCryptfs. Userspace components are required and can be ++ obtained from . ++ ++ To compile this file system support as a module, choose M here: the ++ module will be called ecryptfs. ++ ++config UNION_FS ++ tristate "Union file system (EXPERIMENTAL)" ++ depends on EXPERIMENTAL ++ help ++ Unionfs is a stackable unification file system, which appears to ++ merge the contents of several directories (branches), while keeping ++ their physical content separate. ++ ++ See for details ++ ++config UNION_FS_XATTR ++ bool "Unionfs extended attributes" ++ depends on UNION_FS ++ help ++ Extended attributes are name:value pairs associated with inodes by ++ the kernel or by users (see the attr(5) manual page). ++ ++ If unsure, say N. ++ ++endmenu ++ + menu "Miscellaneous filesystems" + + config ADFS_FS +@@ -1082,18 +1117,6 @@ + To compile this file system support as a module, choose M here: the + module will be called affs. If unsure, say N. + +-config ECRYPT_FS +- tristate "eCrypt filesystem layer support (EXPERIMENTAL)" +- depends on EXPERIMENTAL && KEYS && CRYPTO && NET +- help +- Encrypted filesystem that operates on the VFS layer. See +- to learn more about +- eCryptfs. Userspace components are required and can be +- obtained from . +- +- To compile this file system support as a module, choose M here: the +- module will be called ecryptfs. +- + config HFS_FS + tristate "Apple Macintosh file system support (EXPERIMENTAL)" + depends on BLOCK && EXPERIMENTAL +diff -Nurb linux-2.6.22-570/fs/Makefile linux-2.6.22-591/fs/Makefile +--- linux-2.6.22-570/fs/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/Makefile 2007-12-21 15:36:12.000000000 -0500 +@@ -19,6 +19,7 @@ + obj-y += no-block.o + endif + ++obj-$(CONFIG_MMU) += revoke.o revoked_inode.o + obj-$(CONFIG_INOTIFY) += inotify.o + obj-$(CONFIG_INOTIFY_USER) += inotify_user.o + obj-$(CONFIG_EPOLL) += eventpoll.o +@@ -118,3 +119,4 @@ + obj-$(CONFIG_DEBUG_FS) += debugfs/ + obj-$(CONFIG_OCFS2_FS) += ocfs2/ + obj-$(CONFIG_GFS2_FS) += gfs2/ ++obj-$(CONFIG_UNION_FS) += unionfs/ +diff -Nurb linux-2.6.22-570/fs/afs/netdevices.c linux-2.6.22-591/fs/afs/netdevices.c +--- linux-2.6.22-570/fs/afs/netdevices.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/afs/netdevices.c 2007-12-21 15:36:14.000000000 -0500 +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + #include "internal.h" + + /* +@@ -23,7 +24,7 @@ + BUG(); + + rtnl_lock(); +- dev = __dev_getfirstbyhwtype(ARPHRD_ETHER); ++ dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER); + if (dev) { + memcpy(mac, dev->dev_addr, maclen); + ret = 0; +@@ -47,7 +48,7 @@ + ASSERT(maxbufs > 0); + + rtnl_lock(); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if (dev->type == ARPHRD_LOOPBACK && !wantloopback) + continue; + idev = __in_dev_get_rtnl(dev); +diff -Nurb linux-2.6.22-570/fs/buffer.c linux-2.6.22-591/fs/buffer.c +--- linux-2.6.22-570/fs/buffer.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/buffer.c 2007-12-21 15:36:12.000000000 -0500 +@@ -982,7 +982,7 @@ + struct buffer_head *bh; + + page = find_or_create_page(inode->i_mapping, index, +- mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); ++ (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); + if (!page) + return NULL; + +@@ -2899,7 +2899,8 @@ + + struct buffer_head *alloc_buffer_head(gfp_t gfp_flags) + { +- struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags); ++ struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, ++ set_migrateflags(gfp_flags, __GFP_RECLAIMABLE)); + if (ret) { + INIT_LIST_HEAD(&ret->b_assoc_buffers); + get_cpu_var(bh_accounting).nr++; +diff -Nurb linux-2.6.22-570/fs/cifs/cifsfs.c linux-2.6.22-591/fs/cifs/cifsfs.c +--- linux-2.6.22-570/fs/cifs/cifsfs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/cifs/cifsfs.c 2007-12-21 15:36:12.000000000 -0500 +@@ -849,6 +849,7 @@ + __u16 netfid; + int rc; + ++ set_freezable(); + do { + if (try_to_freeze()) + continue; +diff -Nurb linux-2.6.22-570/fs/cifs/connect.c linux-2.6.22-591/fs/cifs/connect.c +--- linux-2.6.22-570/fs/cifs/connect.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/cifs/connect.c 2007-12-21 15:36:12.000000000 -0500 +@@ -363,6 +363,7 @@ + GFP_KERNEL); + } + ++ set_freezable(); + while (!kthread_should_stop()) { + if (try_to_freeze()) + continue; +diff -Nurb linux-2.6.22-570/fs/compat_ioctl.c linux-2.6.22-591/fs/compat_ioctl.c +--- linux-2.6.22-570/fs/compat_ioctl.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/compat_ioctl.c 2007-12-21 15:36:14.000000000 -0500 +@@ -319,22 +319,21 @@ + + static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg) + { +- struct net_device *dev; +- struct ifreq32 ifr32; ++ struct ifreq __user *uifr; + int err; + +- if (copy_from_user(&ifr32, compat_ptr(arg), sizeof(ifr32))) ++ uifr = compat_alloc_user_space(sizeof(struct ifreq)); ++ if (copy_in_user(uifr, compat_ptr(arg), sizeof(struct ifreq32))); + return -EFAULT; + +- dev = dev_get_by_index(ifr32.ifr_ifindex); +- if (!dev) +- return -ENODEV; ++ err = sys_ioctl(fd, SIOCGIFNAME, (unsigned long)uifr); ++ if (err) ++ return err; + +- strlcpy(ifr32.ifr_name, dev->name, sizeof(ifr32.ifr_name)); +- dev_put(dev); ++ if (copy_in_user(compat_ptr(arg), uifr, sizeof(struct ifreq32))) ++ return -EFAULT; + +- err = copy_to_user(compat_ptr(arg), &ifr32, sizeof(ifr32)); +- return (err ? -EFAULT : 0); ++ return 0; + } + + static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg) +diff -Nurb linux-2.6.22-570/fs/configfs/configfs_internal.h linux-2.6.22-591/fs/configfs/configfs_internal.h +--- linux-2.6.22-570/fs/configfs/configfs_internal.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/configfs/configfs_internal.h 2007-12-21 15:36:12.000000000 -0500 +@@ -29,6 +29,7 @@ + + struct configfs_dirent { + atomic_t s_count; ++ int s_dependent_count; + struct list_head s_sibling; + struct list_head s_children; + struct list_head s_links; +diff -Nurb linux-2.6.22-570/fs/configfs/dir.c linux-2.6.22-591/fs/configfs/dir.c +--- linux-2.6.22-570/fs/configfs/dir.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/configfs/dir.c 2007-12-21 15:36:12.000000000 -0500 +@@ -355,6 +355,10 @@ + /* Mark that we've taken i_mutex */ + sd->s_type |= CONFIGFS_USET_DROPPING; + ++ /* ++ * Yup, recursive. If there's a problem, blame ++ * deep nesting of default_groups ++ */ + ret = configfs_detach_prep(sd->s_dentry); + if (!ret) + continue; +@@ -714,6 +718,28 @@ + } + + /* ++ * After the item has been detached from the filesystem view, we are ++ * ready to tear it out of the hierarchy. Notify the client before ++ * we do that so they can perform any cleanup that requires ++ * navigating the hierarchy. A client does not need to provide this ++ * callback. The subsystem semaphore MUST be held by the caller, and ++ * references must be valid for both items. It also assumes the ++ * caller has validated ci_type. ++ */ ++static void client_disconnect_notify(struct config_item *parent_item, ++ struct config_item *item) ++{ ++ struct config_item_type *type; ++ ++ type = parent_item->ci_type; ++ BUG_ON(!type); ++ ++ if (type->ct_group_ops && type->ct_group_ops->disconnect_notify) ++ type->ct_group_ops->disconnect_notify(to_config_group(parent_item), ++ item); ++} ++ ++/* + * Drop the initial reference from make_item()/make_group() + * This function assumes that reference is held on item + * and that item holds a valid reference to the parent. Also, it +@@ -738,6 +764,239 @@ + config_item_put(item); + } + ++#ifdef DEBUG ++static void configfs_dump_one(struct configfs_dirent *sd, int level) ++{ ++ printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd)); ++ ++#define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type); ++ type_print(CONFIGFS_ROOT); ++ type_print(CONFIGFS_DIR); ++ type_print(CONFIGFS_ITEM_ATTR); ++ type_print(CONFIGFS_ITEM_LINK); ++ type_print(CONFIGFS_USET_DIR); ++ type_print(CONFIGFS_USET_DEFAULT); ++ type_print(CONFIGFS_USET_DROPPING); ++#undef type_print ++} ++ ++static int configfs_dump(struct configfs_dirent *sd, int level) ++{ ++ struct configfs_dirent *child_sd; ++ int ret = 0; ++ ++ configfs_dump_one(sd, level); ++ ++ if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT))) ++ return 0; ++ ++ list_for_each_entry(child_sd, &sd->s_children, s_sibling) { ++ ret = configfs_dump(child_sd, level + 2); ++ if (ret) ++ break; ++ } ++ ++ return ret; ++} ++#endif ++ ++ ++/* ++ * configfs_depend_item() and configfs_undepend_item() ++ * ++ * WARNING: Do not call these from a configfs callback! ++ * ++ * This describes these functions and their helpers. ++ * ++ * Allow another kernel system to depend on a config_item. If this ++ * happens, the item cannot go away until the dependant can live without ++ * it. The idea is to give client modules as simple an interface as ++ * possible. When a system asks them to depend on an item, they just ++ * call configfs_depend_item(). If the item is live and the client ++ * driver is in good shape, we'll happily do the work for them. ++ * ++ * Why is the locking complex? Because configfs uses the VFS to handle ++ * all locking, but this function is called outside the normal ++ * VFS->configfs path. So it must take VFS locks to prevent the ++ * VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc). This is ++ * why you can't call these functions underneath configfs callbacks. ++ * ++ * Note, btw, that this can be called at *any* time, even when a configfs ++ * subsystem isn't registered, or when configfs is loading or unloading. ++ * Just like configfs_register_subsystem(). So we take the same ++ * precautions. We pin the filesystem. We lock each i_mutex _in_order_ ++ * on our way down the tree. If we can find the target item in the ++ * configfs tree, it must be part of the subsystem tree as well, so we ++ * do not need the subsystem semaphore. Holding the i_mutex chain locks ++ * out mkdir() and rmdir(), who might be racing us. ++ */ ++ ++/* ++ * configfs_depend_prep() ++ * ++ * Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are ++ * attributes. This is similar but not the same to configfs_detach_prep(). ++ * Note that configfs_detach_prep() expects the parent to be locked when it ++ * is called, but we lock the parent *inside* configfs_depend_prep(). We ++ * do that so we can unlock it if we find nothing. ++ * ++ * Here we do a depth-first search of the dentry hierarchy looking for ++ * our object. We take i_mutex on each step of the way down. IT IS ++ * ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch, ++ * we'll drop the i_mutex. ++ * ++ * If the target is not found, -ENOENT is bubbled up and we have released ++ * all locks. If the target was found, the locks will be cleared by ++ * configfs_depend_rollback(). ++ * ++ * This adds a requirement that all config_items be unique! ++ * ++ * This is recursive because the locking traversal is tricky. There isn't ++ * much on the stack, though, so folks that need this function - be careful ++ * about your stack! Patches will be accepted to make it iterative. ++ */ ++static int configfs_depend_prep(struct dentry *origin, ++ struct config_item *target) ++{ ++ struct configfs_dirent *child_sd, *sd = origin->d_fsdata; ++ int ret = 0; ++ ++ BUG_ON(!origin || !sd); ++ ++ /* Lock this guy on the way down */ ++ mutex_lock(&sd->s_dentry->d_inode->i_mutex); ++ if (sd->s_element == target) /* Boo-yah */ ++ goto out; ++ ++ list_for_each_entry(child_sd, &sd->s_children, s_sibling) { ++ if (child_sd->s_type & CONFIGFS_DIR) { ++ ret = configfs_depend_prep(child_sd->s_dentry, ++ target); ++ if (!ret) ++ goto out; /* Child path boo-yah */ ++ } ++ } ++ ++ /* We looped all our children and didn't find target */ ++ mutex_unlock(&sd->s_dentry->d_inode->i_mutex); ++ ret = -ENOENT; ++ ++out: ++ return ret; ++} ++ ++/* ++ * This is ONLY called if configfs_depend_prep() did its job. So we can ++ * trust the entire path from item back up to origin. ++ * ++ * We walk backwards from item, unlocking each i_mutex. We finish by ++ * unlocking origin. ++ */ ++static void configfs_depend_rollback(struct dentry *origin, ++ struct config_item *item) ++{ ++ struct dentry *dentry = item->ci_dentry; ++ ++ while (dentry != origin) { ++ mutex_unlock(&dentry->d_inode->i_mutex); ++ dentry = dentry->d_parent; ++ } ++ ++ mutex_unlock(&origin->d_inode->i_mutex); ++} ++ ++int configfs_depend_item(struct configfs_subsystem *subsys, ++ struct config_item *target) ++{ ++ int ret; ++ struct configfs_dirent *p, *root_sd, *subsys_sd = NULL; ++ struct config_item *s_item = &subsys->su_group.cg_item; ++ ++ /* ++ * Pin the configfs filesystem. This means we can safely access ++ * the root of the configfs filesystem. ++ */ ++ ret = configfs_pin_fs(); ++ if (ret) ++ return ret; ++ ++ /* ++ * Next, lock the root directory. We're going to check that the ++ * subsystem is really registered, and so we need to lock out ++ * configfs_[un]register_subsystem(). ++ */ ++ mutex_lock(&configfs_sb->s_root->d_inode->i_mutex); ++ ++ root_sd = configfs_sb->s_root->d_fsdata; ++ ++ list_for_each_entry(p, &root_sd->s_children, s_sibling) { ++ if (p->s_type & CONFIGFS_DIR) { ++ if (p->s_element == s_item) { ++ subsys_sd = p; ++ break; ++ } ++ } ++ } ++ ++ if (!subsys_sd) { ++ ret = -ENOENT; ++ goto out_unlock_fs; ++ } ++ ++ /* Ok, now we can trust subsys/s_item */ ++ ++ /* Scan the tree, locking i_mutex recursively, return 0 if found */ ++ ret = configfs_depend_prep(subsys_sd->s_dentry, target); ++ if (ret) ++ goto out_unlock_fs; ++ ++ /* We hold all i_mutexes from the subsystem down to the target */ ++ p = target->ci_dentry->d_fsdata; ++ p->s_dependent_count += 1; ++ ++ configfs_depend_rollback(subsys_sd->s_dentry, target); ++ ++out_unlock_fs: ++ mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex); ++ ++ /* ++ * If we succeeded, the fs is pinned via other methods. If not, ++ * we're done with it anyway. So release_fs() is always right. ++ */ ++ configfs_release_fs(); ++ ++ return ret; ++} ++EXPORT_SYMBOL(configfs_depend_item); ++ ++/* ++ * Release the dependent linkage. This is much simpler than ++ * configfs_depend_item() because we know that that the client driver is ++ * pinned, thus the subsystem is pinned, and therefore configfs is pinned. ++ */ ++void configfs_undepend_item(struct configfs_subsystem *subsys, ++ struct config_item *target) ++{ ++ struct configfs_dirent *sd; ++ ++ /* ++ * Since we can trust everything is pinned, we just need i_mutex ++ * on the item. ++ */ ++ mutex_lock(&target->ci_dentry->d_inode->i_mutex); ++ ++ sd = target->ci_dentry->d_fsdata; ++ BUG_ON(sd->s_dependent_count < 1); ++ ++ sd->s_dependent_count -= 1; ++ ++ /* ++ * After this unlock, we cannot trust the item to stay alive! ++ * DO NOT REFERENCE item after this unlock. ++ */ ++ mutex_unlock(&target->ci_dentry->d_inode->i_mutex); ++} ++EXPORT_SYMBOL(configfs_undepend_item); + + static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) + { +@@ -842,11 +1101,14 @@ + if (ret) { + /* Tear down everything we built up */ + down(&subsys->su_sem); ++ ++ client_disconnect_notify(parent_item, item); + if (group) + unlink_group(group); + else + unlink_obj(item); + client_drop_item(parent_item, item); ++ + up(&subsys->su_sem); + + if (module_got) +@@ -881,6 +1143,13 @@ + if (sd->s_type & CONFIGFS_USET_DEFAULT) + return -EPERM; + ++ /* ++ * Here's where we check for dependents. We're protected by ++ * i_mutex. ++ */ ++ if (sd->s_dependent_count) ++ return -EBUSY; ++ + /* Get a working ref until we have the child */ + parent_item = configfs_get_config_item(dentry->d_parent); + subsys = to_config_group(parent_item)->cg_subsys; +@@ -911,11 +1180,13 @@ + configfs_detach_group(item); + + down(&subsys->su_sem); ++ client_disconnect_notify(parent_item, item); + unlink_group(to_config_group(item)); + } else { + configfs_detach_item(item); + + down(&subsys->su_sem); ++ client_disconnect_notify(parent_item, item); + unlink_obj(item); + } + +diff -Nurb linux-2.6.22-570/fs/configfs/file.c linux-2.6.22-591/fs/configfs/file.c +--- linux-2.6.22-570/fs/configfs/file.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/configfs/file.c 2007-12-21 15:36:12.000000000 -0500 +@@ -27,19 +27,26 @@ + #include + #include + #include ++#include + #include +-#include + + #include + #include "configfs_internal.h" + ++/* ++ * A simple attribute can only be 4096 characters. Why 4k? Because the ++ * original code limited it to PAGE_SIZE. That's a bad idea, though, ++ * because an attribute of 16k on ia64 won't work on x86. So we limit to ++ * 4k, our minimum common page size. ++ */ ++#define SIMPLE_ATTR_SIZE 4096 + + struct configfs_buffer { + size_t count; + loff_t pos; + char * page; + struct configfs_item_operations * ops; +- struct semaphore sem; ++ struct mutex mutex; + int needs_read_fill; + }; + +@@ -69,7 +76,7 @@ + + count = ops->show_attribute(item,attr,buffer->page); + buffer->needs_read_fill = 0; +- BUG_ON(count > (ssize_t)PAGE_SIZE); ++ BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE); + if (count >= 0) + buffer->count = count; + else +@@ -102,7 +109,7 @@ + struct configfs_buffer * buffer = file->private_data; + ssize_t retval = 0; + +- down(&buffer->sem); ++ mutex_lock(&buffer->mutex); + if (buffer->needs_read_fill) { + if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) + goto out; +@@ -112,7 +119,7 @@ + retval = simple_read_from_buffer(buf, count, ppos, buffer->page, + buffer->count); + out: +- up(&buffer->sem); ++ mutex_unlock(&buffer->mutex); + return retval; + } + +@@ -137,8 +144,8 @@ + if (!buffer->page) + return -ENOMEM; + +- if (count >= PAGE_SIZE) +- count = PAGE_SIZE - 1; ++ if (count >= SIMPLE_ATTR_SIZE) ++ count = SIMPLE_ATTR_SIZE - 1; + error = copy_from_user(buffer->page,buf,count); + buffer->needs_read_fill = 1; + /* if buf is assumed to contain a string, terminate it by \0, +@@ -193,13 +200,13 @@ + struct configfs_buffer * buffer = file->private_data; + ssize_t len; + +- down(&buffer->sem); ++ mutex_lock(&buffer->mutex); + len = fill_write_buffer(buffer, buf, count); + if (len > 0) + len = flush_write_buffer(file->f_path.dentry, buffer, count); + if (len > 0) + *ppos += len; +- up(&buffer->sem); ++ mutex_unlock(&buffer->mutex); + return len; + } + +@@ -253,7 +260,7 @@ + error = -ENOMEM; + goto Enomem; + } +- init_MUTEX(&buffer->sem); ++ mutex_init(&buffer->mutex); + buffer->needs_read_fill = 1; + buffer->ops = ops; + file->private_data = buffer; +@@ -292,6 +299,7 @@ + if (buffer) { + if (buffer->page) + free_page((unsigned long)buffer->page); ++ mutex_destroy(&buffer->mutex); + kfree(buffer); + } + return 0; +diff -Nurb linux-2.6.22-570/fs/configfs/item.c linux-2.6.22-591/fs/configfs/item.c +--- linux-2.6.22-570/fs/configfs/item.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/configfs/item.c 2007-12-21 15:36:12.000000000 -0500 +@@ -62,7 +62,6 @@ + * dynamically allocated string that @item->ci_name points to. + * Otherwise, use the static @item->ci_namebuf array. + */ +- + int config_item_set_name(struct config_item * item, const char * fmt, ...) + { + int error = 0; +@@ -139,12 +138,7 @@ + return item; + } + +-/** +- * config_item_cleanup - free config_item resources. +- * @item: item. +- */ +- +-void config_item_cleanup(struct config_item * item) ++static void config_item_cleanup(struct config_item * item) + { + struct config_item_type * t = item->ci_type; + struct config_group * s = item->ci_group; +@@ -179,12 +173,10 @@ + kref_put(&item->ci_kref, config_item_release); + } + +- + /** + * config_group_init - initialize a group for use + * @k: group + */ +- + void config_group_init(struct config_group *group) + { + config_item_init(&group->cg_item); +@@ -201,8 +193,8 @@ + * looking for a matching config_item. If matching item is found + * take a reference and return the item. + */ +- +-struct config_item * config_group_find_obj(struct config_group * group, const char * name) ++struct config_item *config_group_find_obj(struct config_group *group, ++ const char * name) + { + struct list_head * entry; + struct config_item * ret = NULL; +@@ -219,7 +211,6 @@ + return ret; + } + +- + EXPORT_SYMBOL(config_item_init); + EXPORT_SYMBOL(config_group_init); + EXPORT_SYMBOL(config_item_get); +diff -Nurb linux-2.6.22-570/fs/drop_caches.c linux-2.6.22-591/fs/drop_caches.c +--- linux-2.6.22-570/fs/drop_caches.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/drop_caches.c 2007-12-21 15:36:12.000000000 -0500 +@@ -3,6 +3,7 @@ + */ + + #include ++#include + #include + #include + #include +@@ -12,7 +13,7 @@ + /* A global variable is a bit ugly, but it keeps the code simple */ + int sysctl_drop_caches; + +-static void drop_pagecache_sb(struct super_block *sb) ++void drop_pagecache_sb(struct super_block *sb) + { + struct inode *inode; + +@@ -24,6 +25,7 @@ + } + spin_unlock(&inode_lock); + } ++EXPORT_SYMBOL(drop_pagecache_sb); + + void drop_pagecache(void) + { +diff -Nurb linux-2.6.22-570/fs/ecryptfs/inode.c linux-2.6.22-591/fs/ecryptfs/inode.c +--- linux-2.6.22-570/fs/ecryptfs/inode.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/ecryptfs/inode.c 2007-12-21 15:36:12.000000000 -0500 +@@ -280,7 +280,9 @@ + int rc = 0; + struct dentry *lower_dir_dentry; + struct dentry *lower_dentry; ++ struct dentry *dentry_save; + struct vfsmount *lower_mnt; ++ struct vfsmount *mnt_save; + char *encoded_name; + unsigned int encoded_namelen; + struct ecryptfs_crypt_stat *crypt_stat = NULL; +@@ -308,9 +310,13 @@ + } + ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen " + "= [%d]\n", encoded_name, encoded_namelen); +- lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry, +- encoded_namelen - 1); ++ dentry_save = nd->dentry; ++ mnt_save = nd->mnt; ++ lower_dentry = lookup_one_len_nd(encoded_name, lower_dir_dentry, ++ (encoded_namelen - 1), nd); + kfree(encoded_name); ++ nd->mnt = mnt_save; ++ nd->dentry = dentry_save; + if (IS_ERR(lower_dentry)) { + ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n"); + rc = PTR_ERR(lower_dentry); +diff -Nurb linux-2.6.22-570/fs/ecryptfs/main.c linux-2.6.22-591/fs/ecryptfs/main.c +--- linux-2.6.22-570/fs/ecryptfs/main.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ecryptfs/main.c 2007-12-21 15:36:12.000000000 -0500 +@@ -840,8 +840,6 @@ + goto out; + } + kobj_set_kset_s(&ecryptfs_subsys, fs_subsys); +- sysfs_attr_version.attr.owner = THIS_MODULE; +- sysfs_attr_version_str.attr.owner = THIS_MODULE; + rc = do_sysfs_registration(); + if (rc) { + printk(KERN_ERR "sysfs registration failed\n"); +diff -Nurb linux-2.6.22-570/fs/exec.c linux-2.6.22-591/fs/exec.c +--- linux-2.6.22-570/fs/exec.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/exec.c 2007-12-21 15:36:14.000000000 -0500 +@@ -861,9 +861,9 @@ + current->sas_ss_sp = current->sas_ss_size = 0; + + if (current->euid == current->uid && current->egid == current->gid) +- current->mm->dumpable = 1; ++ set_dumpable(current->mm, 1); + else +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + + name = bprm->filename; + +@@ -889,12 +889,12 @@ + + if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) { + suid_keys(current); +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + current->pdeath_signal = 0; + } else if (file_permission(bprm->file, MAY_READ) || + (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { + suid_keys(current); +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + } + + /* An exec changes our domain. We are no longer part of the thread +@@ -1486,6 +1486,55 @@ + return core_waiters; + } + ++/* ++ * set_dumpable converts traditional three-value dumpable to two flags and ++ * stores them into mm->flags. It modifies lower two bits of mm->flags, but ++ * these bits are not changed atomically. So get_dumpable can observe the ++ * intermediate state. To avoid doing unexpected behavior, get get_dumpable ++ * return either old dumpable or new one by paying attention to the order of ++ * modifying the bits. ++ * ++ * dumpable | mm->flags (binary) ++ * old new | initial interim final ++ * ---------+----------------------- ++ * 0 1 | 00 01 01 ++ * 0 2 | 00 10(*) 11 ++ * 1 0 | 01 00 00 ++ * 1 2 | 01 11 11 ++ * 2 0 | 11 10(*) 00 ++ * 2 1 | 11 11 01 ++ * ++ * (*) get_dumpable regards interim value of 10 as 11. ++ */ ++void set_dumpable(struct mm_struct *mm, int value) ++{ ++ switch (value) { ++ case 0: ++ clear_bit(MMF_DUMPABLE, &mm->flags); ++ smp_wmb(); ++ clear_bit(MMF_DUMP_SECURELY, &mm->flags); ++ break; ++ case 1: ++ set_bit(MMF_DUMPABLE, &mm->flags); ++ smp_wmb(); ++ clear_bit(MMF_DUMP_SECURELY, &mm->flags); ++ break; ++ case 2: ++ set_bit(MMF_DUMP_SECURELY, &mm->flags); ++ smp_wmb(); ++ set_bit(MMF_DUMPABLE, &mm->flags); ++ break; ++ } ++} ++ ++int get_dumpable(struct mm_struct *mm) ++{ ++ int ret; ++ ++ ret = mm->flags & 0x3; ++ return (ret >= 2) ? 2 : ret; ++} ++ + int do_coredump(long signr, int exit_code, struct pt_regs * regs) + { + char corename[CORENAME_MAX_SIZE + 1]; +@@ -1504,7 +1553,7 @@ + if (!binfmt || !binfmt->core_dump) + goto fail; + down_write(&mm->mmap_sem); +- if (!mm->dumpable) { ++ if (!get_dumpable(mm)) { + up_write(&mm->mmap_sem); + goto fail; + } +@@ -1514,11 +1563,11 @@ + * process nor do we know its entire history. We only know it + * was tainted so we dump it as root in mode 2. + */ +- if (mm->dumpable == 2) { /* Setuid core dump mode */ ++ if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ + flag = O_EXCL; /* Stop rewrite attacks */ + current->fsuid = 0; /* Dump root private */ + } +- mm->dumpable = 0; ++ set_dumpable(mm, 0); + + retval = coredump_wait(exit_code); + if (retval < 0) +diff -Nurb linux-2.6.22-570/fs/gfs2/ops_address.c linux-2.6.22-591/fs/gfs2/ops_address.c +--- linux-2.6.22-570/fs/gfs2/ops_address.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/gfs2/ops_address.c 2007-12-21 15:36:12.000000000 -0500 +@@ -250,7 +250,7 @@ + if (file) { + gf = file->private_data; + if (test_bit(GFF_EXLOCK, &gf->f_flags)) +- /* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */ ++ /* gfs2_sharewrite_fault has grabbed the ip->i_gl already */ + goto skip_lock; + } + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh); +diff -Nurb linux-2.6.22-570/fs/gfs2/ops_file.c linux-2.6.22-591/fs/gfs2/ops_file.c +--- linux-2.6.22-570/fs/gfs2/ops_file.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/gfs2/ops_file.c 2007-12-21 15:36:12.000000000 -0500 +@@ -364,6 +364,8 @@ + else + vma->vm_ops = &gfs2_vm_ops_private; + ++ vma->vm_flags |= VM_CAN_INVALIDATE|VM_CAN_NONLINEAR; ++ + gfs2_glock_dq_uninit(&i_gh); + + return error; +diff -Nurb linux-2.6.22-570/fs/gfs2/ops_vm.c linux-2.6.22-591/fs/gfs2/ops_vm.c +--- linux-2.6.22-570/fs/gfs2/ops_vm.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/gfs2/ops_vm.c 2007-12-21 15:36:12.000000000 -0500 +@@ -27,13 +27,13 @@ + #include "trans.h" + #include "util.h" + +-static struct page *gfs2_private_nopage(struct vm_area_struct *area, +- unsigned long address, int *type) ++static struct page *gfs2_private_fault(struct vm_area_struct *vma, ++ struct fault_data *fdata) + { +- struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host); ++ struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host); + + set_bit(GIF_PAGED, &ip->i_flags); +- return filemap_nopage(area, address, type); ++ return filemap_fault(vma, fdata); + } + + static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) +@@ -104,16 +104,14 @@ + return error; + } + +-static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, +- unsigned long address, int *type) ++static struct page *gfs2_sharewrite_fault(struct vm_area_struct *vma, ++ struct fault_data *fdata) + { +- struct file *file = area->vm_file; ++ struct file *file = vma->vm_file; + struct gfs2_file *gf = file->private_data; + struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); + struct gfs2_holder i_gh; + struct page *result = NULL; +- unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + +- area->vm_pgoff; + int alloc_required; + int error; + +@@ -124,21 +122,27 @@ + set_bit(GIF_PAGED, &ip->i_flags); + set_bit(GIF_SW_PAGED, &ip->i_flags); + +- error = gfs2_write_alloc_required(ip, (u64)index << PAGE_CACHE_SHIFT, ++ error = gfs2_write_alloc_required(ip, ++ (u64)fdata->pgoff << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, &alloc_required); +- if (error) ++ if (error) { ++ fdata->type = VM_FAULT_OOM; /* XXX: are these right? */ + goto out; ++ } + + set_bit(GFF_EXLOCK, &gf->f_flags); +- result = filemap_nopage(area, address, type); ++ result = filemap_fault(vma, fdata); + clear_bit(GFF_EXLOCK, &gf->f_flags); +- if (!result || result == NOPAGE_OOM) ++ if (!result) + goto out; + + if (alloc_required) { + error = alloc_page_backing(ip, result); + if (error) { ++ if (vma->vm_flags & VM_CAN_INVALIDATE) ++ unlock_page(result); + page_cache_release(result); ++ fdata->type = VM_FAULT_OOM; + result = NULL; + goto out; + } +@@ -152,10 +156,10 @@ + } + + struct vm_operations_struct gfs2_vm_ops_private = { +- .nopage = gfs2_private_nopage, ++ .fault = gfs2_private_fault, + }; + + struct vm_operations_struct gfs2_vm_ops_sharewrite = { +- .nopage = gfs2_sharewrite_nopage, ++ .fault = gfs2_sharewrite_fault, + }; + +diff -Nurb linux-2.6.22-570/fs/inode.c linux-2.6.22-591/fs/inode.c +--- linux-2.6.22-570/fs/inode.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/inode.c 2007-12-21 15:36:12.000000000 -0500 +@@ -149,7 +149,7 @@ + mapping->a_ops = &empty_aops; + mapping->host = inode; + mapping->flags = 0; +- mapping_set_gfp_mask(mapping, GFP_HIGHUSER); ++ mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); + mapping->assoc_mapping = NULL; + mapping->backing_dev_info = &default_backing_dev_info; + +@@ -525,7 +525,13 @@ + * new_inode - obtain an inode + * @sb: superblock + * +- * Allocates a new inode for given superblock. ++ * Allocates a new inode for given superblock. The default gfp_mask ++ * for allocations related to inode->i_mapping is GFP_HIGHUSER_PAGECACHE. ++ * If HIGHMEM pages are unsuitable or it is known that pages allocated ++ * for the page cache are not reclaimable or migratable, ++ * mapping_set_gfp_mask() must be called with suitable flags on the ++ * newly created inode's mapping ++ * + */ + struct inode *new_inode(struct super_block *sb) + { +diff -Nurb linux-2.6.22-570/fs/jbd/journal.c linux-2.6.22-591/fs/jbd/journal.c +--- linux-2.6.22-570/fs/jbd/journal.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/jbd/journal.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1710,7 +1710,7 @@ + journal_head_cache = kmem_cache_create("journal_head", + sizeof(struct journal_head), + 0, /* offset */ +- 0, /* flags */ ++ SLAB_TEMPORARY, /* flags */ + NULL, /* ctor */ + NULL); /* dtor */ + retval = 0; +@@ -2007,7 +2007,7 @@ + jbd_handle_cache = kmem_cache_create("journal_handle", + sizeof(handle_t), + 0, /* offset */ +- 0, /* flags */ ++ SLAB_TEMPORARY, /* flags */ + NULL, /* ctor */ + NULL); /* dtor */ + if (jbd_handle_cache == NULL) { +diff -Nurb linux-2.6.22-570/fs/jbd/revoke.c linux-2.6.22-591/fs/jbd/revoke.c +--- linux-2.6.22-570/fs/jbd/revoke.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/jbd/revoke.c 2007-12-21 15:36:12.000000000 -0500 +@@ -169,13 +169,17 @@ + { + revoke_record_cache = kmem_cache_create("revoke_record", + sizeof(struct jbd_revoke_record_s), +- 0, SLAB_HWCACHE_ALIGN, NULL, NULL); ++ 0, ++ SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, ++ NULL, NULL); + if (revoke_record_cache == 0) + return -ENOMEM; + + revoke_table_cache = kmem_cache_create("revoke_table", + sizeof(struct jbd_revoke_table_s), +- 0, 0, NULL, NULL); ++ 0, ++ SLAB_TEMPORARY, ++ NULL, NULL); + if (revoke_table_cache == 0) { + kmem_cache_destroy(revoke_record_cache); + revoke_record_cache = NULL; +diff -Nurb linux-2.6.22-570/fs/jffs2/background.c linux-2.6.22-591/fs/jffs2/background.c +--- linux-2.6.22-570/fs/jffs2/background.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/jffs2/background.c 2007-12-21 15:36:12.000000000 -0500 +@@ -81,6 +81,7 @@ + + set_user_nice(current, 10); + ++ set_freezable(); + for (;;) { + allow_signal(SIGHUP); + +diff -Nurb linux-2.6.22-570/fs/lockd/host.c linux-2.6.22-591/fs/lockd/host.c +--- linux-2.6.22-570/fs/lockd/host.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/lockd/host.c 2007-12-21 15:36:12.000000000 -0500 +@@ -161,15 +161,9 @@ + */ + nsm_unmonitor(host); + +- if ((clnt = host->h_rpcclnt) != NULL) { +- if (atomic_read(&clnt->cl_users)) { +- printk(KERN_WARNING +- "lockd: active RPC handle\n"); +- clnt->cl_dead = 1; +- } else { +- rpc_destroy_client(host->h_rpcclnt); +- } +- } ++ clnt = host->h_rpcclnt; ++ if (clnt != NULL) ++ rpc_shutdown_client(clnt); + kfree(host); + } + +diff -Nurb linux-2.6.22-570/fs/lockd/mon.c linux-2.6.22-591/fs/lockd/mon.c +--- linux-2.6.22-570/fs/lockd/mon.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/lockd/mon.c 2007-12-21 15:36:12.000000000 -0500 +@@ -61,6 +61,7 @@ + status); + else + status = 0; ++ rpc_shutdown_client(clnt); + out: + return status; + } +@@ -138,7 +139,6 @@ + .program = &nsm_program, + .version = SM_VERSION, + .authflavor = RPC_AUTH_NULL, +- .flags = (RPC_CLNT_CREATE_ONESHOT), + }; + + return rpc_create(&args); +diff -Nurb linux-2.6.22-570/fs/lockd/svc.c linux-2.6.22-591/fs/lockd/svc.c +--- linux-2.6.22-570/fs/lockd/svc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/lockd/svc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -119,13 +120,11 @@ + complete(&lockd_start_done); + + daemonize("lockd"); ++ set_freezable(); + + /* Process request with signals blocked, but allow SIGKILL. */ + allow_signal(SIGKILL); + +- /* kick rpciod */ +- rpciod_up(); +- + dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); + + if (!nlm_timeout) +@@ -202,9 +201,6 @@ + /* Exit the RPC thread */ + svc_exit_thread(rqstp); + +- /* release rpciod */ +- rpciod_down(); +- + /* Release module */ + unlock_kernel(); + module_put_and_exit(0); +diff -Nurb linux-2.6.22-570/fs/namei.c linux-2.6.22-591/fs/namei.c +--- linux-2.6.22-570/fs/namei.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/namei.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1386,7 +1386,8 @@ + return 0; + } + +-struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) ++struct dentry *lookup_one_len_nd(const char *name, struct dentry *base, ++ int len, struct nameidata *nd) + { + int err; + struct qstr this; +@@ -1394,7 +1395,7 @@ + err = __lookup_one_len(name, &this, base, len); + if (err) + return ERR_PTR(err); +- return __lookup_hash(&this, base, NULL); ++ return __lookup_hash(&this, base, nd); + } + + struct dentry *lookup_one_len_kern(const char *name, struct dentry *base, int len) +@@ -3086,7 +3087,7 @@ + EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ + EXPORT_SYMBOL(getname); + EXPORT_SYMBOL(lock_rename); +-EXPORT_SYMBOL(lookup_one_len); ++EXPORT_SYMBOL(lookup_one_len_nd); + EXPORT_SYMBOL(page_follow_link_light); + EXPORT_SYMBOL(page_put_link); + EXPORT_SYMBOL(page_readlink); +diff -Nurb linux-2.6.22-570/fs/namespace.c linux-2.6.22-591/fs/namespace.c +--- linux-2.6.22-570/fs/namespace.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/namespace.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1538,7 +1538,7 @@ + + new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); + if (!new_ns) +- return NULL; ++ return ERR_PTR(-ENOMEM); + + atomic_set(&new_ns->count, 1); + INIT_LIST_HEAD(&new_ns->list); +@@ -1552,7 +1552,7 @@ + if (!new_ns->root) { + up_write(&namespace_sem); + kfree(new_ns); +- return NULL; ++ return ERR_PTR(-ENOMEM);; + } + spin_lock(&vfsmount_lock); + list_add_tail(&new_ns->list, &new_ns->root->mnt_list); +@@ -1597,7 +1597,7 @@ + return new_ns; + } + +-struct mnt_namespace *copy_mnt_ns(int flags, struct mnt_namespace *ns, ++struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, + struct fs_struct *new_fs) + { + struct mnt_namespace *new_ns; +diff -Nurb linux-2.6.22-570/fs/ncpfs/mmap.c linux-2.6.22-591/fs/ncpfs/mmap.c +--- linux-2.6.22-570/fs/ncpfs/mmap.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ncpfs/mmap.c 2007-12-21 15:36:12.000000000 -0500 +@@ -25,8 +25,8 @@ + /* + * Fill in the supplied page for mmap + */ +-static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area, +- unsigned long address, int *type) ++static struct page* ncp_file_mmap_fault(struct vm_area_struct *area, ++ struct fault_data *fdata) + { + struct file *file = area->vm_file; + struct dentry *dentry = file->f_path.dentry; +@@ -40,15 +40,17 @@ + + page = alloc_page(GFP_HIGHUSER); /* ncpfs has nothing against high pages + as long as recvmsg and memset works on it */ +- if (!page) +- return page; ++ if (!page) { ++ fdata->type = VM_FAULT_OOM; ++ return NULL; ++ } + pg_addr = kmap(page); +- address &= PAGE_MASK; +- pos = address - area->vm_start + (area->vm_pgoff << PAGE_SHIFT); ++ pos = fdata->pgoff << PAGE_SHIFT; + + count = PAGE_SIZE; +- if (address + PAGE_SIZE > area->vm_end) { +- count = area->vm_end - address; ++ if (fdata->address + PAGE_SIZE > area->vm_end) { ++ WARN_ON(1); /* shouldn't happen? */ ++ count = area->vm_end - fdata->address; + } + /* what we can read in one go */ + bufsize = NCP_SERVER(inode)->buffer_size; +@@ -91,15 +93,14 @@ + * fetches from the network, here the analogue of disk. + * -- wli + */ +- if (type) +- *type = VM_FAULT_MAJOR; ++ fdata->type = VM_FAULT_MAJOR; + count_vm_event(PGMAJFAULT); + return page; + } + + static struct vm_operations_struct ncp_file_mmap = + { +- .nopage = ncp_file_mmap_nopage, ++ .fault = ncp_file_mmap_fault, + }; + + +@@ -123,6 +124,7 @@ + return -EFBIG; + + vma->vm_ops = &ncp_file_mmap; ++ vma->vm_flags |= VM_CAN_INVALIDATE; + file_accessed(file); + return 0; + } +diff -Nurb linux-2.6.22-570/fs/nfs/callback.c linux-2.6.22-591/fs/nfs/callback.c +--- linux-2.6.22-570/fs/nfs/callback.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfs/callback.c 2007-12-21 15:36:12.000000000 -0500 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + + #include + +@@ -67,6 +68,7 @@ + daemonize("nfsv4-svc"); + /* Process request with signals blocked, but allow SIGKILL. */ + allow_signal(SIGKILL); ++ set_freezable(); + + complete(&nfs_callback_info.started); + +diff -Nurb linux-2.6.22-570/fs/nfs/client.c linux-2.6.22-591/fs/nfs/client.c +--- linux-2.6.22-570/fs/nfs/client.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/nfs/client.c 2007-12-21 15:36:12.000000000 -0500 +@@ -102,19 +102,10 @@ + int nfsversion) + { + struct nfs_client *clp; +- int error; + + if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) + goto error_0; + +- error = rpciod_up(); +- if (error < 0) { +- dprintk("%s: couldn't start rpciod! Error = %d\n", +- __FUNCTION__, error); +- goto error_1; +- } +- __set_bit(NFS_CS_RPCIOD, &clp->cl_res_state); +- + if (nfsversion == 4) { + if (nfs_callback_up() < 0) + goto error_2; +@@ -154,9 +145,6 @@ + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) + nfs_callback_down(); + error_2: +- rpciod_down(); +- __clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state); +-error_1: + kfree(clp); + error_0: + return NULL; +@@ -198,9 +186,6 @@ + if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) + nfs_callback_down(); + +- if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state)) +- rpciod_down(); +- + kfree(clp->cl_hostname); + kfree(clp); + +diff -Nurb linux-2.6.22-570/fs/nfs/delegation.c linux-2.6.22-591/fs/nfs/delegation.c +--- linux-2.6.22-570/fs/nfs/delegation.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfs/delegation.c 2007-12-21 15:36:12.000000000 -0500 +@@ -74,7 +74,7 @@ + continue; + get_nfs_open_context(ctx); + spin_unlock(&inode->i_lock); +- err = nfs4_open_delegation_recall(ctx->dentry, state); ++ err = nfs4_open_delegation_recall(ctx, state); + if (err >= 0) + err = nfs_delegation_claim_locks(ctx, state); + put_nfs_open_context(ctx); +diff -Nurb linux-2.6.22-570/fs/nfs/delegation.h linux-2.6.22-591/fs/nfs/delegation.h +--- linux-2.6.22-570/fs/nfs/delegation.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfs/delegation.h 2007-12-21 15:36:12.000000000 -0500 +@@ -39,7 +39,7 @@ + + /* NFSv4 delegation-related procedures */ + int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); +-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); ++int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state); + int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl); + int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode); + +diff -Nurb linux-2.6.22-570/fs/nfs/dir.c linux-2.6.22-591/fs/nfs/dir.c +--- linux-2.6.22-570/fs/nfs/dir.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/nfs/dir.c 2007-12-21 15:36:12.000000000 -0500 +@@ -898,14 +898,13 @@ + return (nd->intent.open.flags & O_EXCL) != 0; + } + +-static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir, +- struct nfs_fh *fh, struct nfs_fattr *fattr) ++static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr) + { + struct nfs_server *server = NFS_SERVER(dir); + + if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) +- /* Revalidate fsid on root dir */ +- return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode); ++ /* Revalidate fsid using the parent directory */ ++ return __nfs_revalidate_inode(server, dir); + return 0; + } + +@@ -947,7 +946,7 @@ + res = ERR_PTR(error); + goto out_unlock; + } +- error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr); ++ error = nfs_reval_fsid(dir, &fattr); + if (error < 0) { + res = ERR_PTR(error); + goto out_unlock; +@@ -1247,7 +1246,7 @@ + attr.ia_mode = mode; + attr.ia_valid = ATTR_MODE; + +- if (nd && (nd->flags & LOOKUP_CREATE)) ++ if ((nd->flags & LOOKUP_CREATE) != 0) + open_flags = nd->intent.open.flags; + + lock_kernel(); +@@ -1747,8 +1746,8 @@ + struct nfs_inode *nfsi; + struct nfs_access_entry *cache; + +- spin_lock(&nfs_access_lru_lock); + restart: ++ spin_lock(&nfs_access_lru_lock); + list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { + struct inode *inode; + +@@ -1773,6 +1772,7 @@ + clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); + } + spin_unlock(&inode->i_lock); ++ spin_unlock(&nfs_access_lru_lock); + iput(inode); + goto restart; + } +diff -Nurb linux-2.6.22-570/fs/nfs/direct.c linux-2.6.22-591/fs/nfs/direct.c +--- linux-2.6.22-570/fs/nfs/direct.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfs/direct.c 2007-12-21 15:36:12.000000000 -0500 +@@ -266,7 +266,7 @@ + static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos) + { + struct nfs_open_context *ctx = dreq->ctx; +- struct inode *inode = ctx->dentry->d_inode; ++ struct inode *inode = ctx->path.dentry->d_inode; + size_t rsize = NFS_SERVER(inode)->rsize; + unsigned int pgbase; + int result; +@@ -295,10 +295,15 @@ + break; + } + if ((unsigned)result < data->npages) { ++ bytes = result * PAGE_SIZE; ++ if (bytes <= pgbase) { + nfs_direct_release_pages(data->pagevec, result); + nfs_readdata_release(data); + break; + } ++ bytes -= pgbase; ++ data->npages = result; ++ } + + get_dreq(dreq); + +@@ -601,7 +606,7 @@ + static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync) + { + struct nfs_open_context *ctx = dreq->ctx; +- struct inode *inode = ctx->dentry->d_inode; ++ struct inode *inode = ctx->path.dentry->d_inode; + size_t wsize = NFS_SERVER(inode)->wsize; + unsigned int pgbase; + int result; +@@ -630,10 +635,15 @@ + break; + } + if ((unsigned)result < data->npages) { ++ bytes = result * PAGE_SIZE; ++ if (bytes <= pgbase) { + nfs_direct_release_pages(data->pagevec, result); + nfs_writedata_release(data); + break; + } ++ bytes -= pgbase; ++ data->npages = result; ++ } + + get_dreq(dreq); + +@@ -763,10 +773,8 @@ + (unsigned long) count, (long long) pos); + + if (nr_segs != 1) +- return -EINVAL; +- +- if (count < 0) + goto out; ++ + retval = -EFAULT; + if (!access_ok(VERIFY_WRITE, buf, count)) + goto out; +@@ -814,7 +822,7 @@ + ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) + { +- ssize_t retval; ++ ssize_t retval = -EINVAL; + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + /* XXX: temporary */ +@@ -827,7 +835,7 @@ + (unsigned long) count, (long long) pos); + + if (nr_segs != 1) +- return -EINVAL; ++ goto out; + + retval = generic_write_checks(file, &pos, &count, 0); + if (retval) +diff -Nurb linux-2.6.22-570/fs/nfs/inode.c linux-2.6.22-591/fs/nfs/inode.c +--- linux-2.6.22-570/fs/nfs/inode.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/nfs/inode.c 2007-12-21 15:36:12.000000000 -0500 +@@ -466,14 +466,14 @@ + + ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); + if (ctx != NULL) { +- atomic_set(&ctx->count, 1); +- ctx->dentry = dget(dentry); +- ctx->vfsmnt = mntget(mnt); ++ ctx->path.dentry = dget(dentry); ++ ctx->path.mnt = mntget(mnt); + ctx->cred = get_rpccred(cred); + ctx->state = NULL; + ctx->lockowner = current->files; + ctx->error = 0; + ctx->dir_cookie = 0; ++ kref_init(&ctx->kref); + } + return ctx; + } +@@ -481,27 +481,33 @@ + struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) + { + if (ctx != NULL) +- atomic_inc(&ctx->count); ++ kref_get(&ctx->kref); + return ctx; + } + +-void put_nfs_open_context(struct nfs_open_context *ctx) ++static void nfs_free_open_context(struct kref *kref) + { +- if (atomic_dec_and_test(&ctx->count)) { ++ struct nfs_open_context *ctx = container_of(kref, ++ struct nfs_open_context, kref); ++ + if (!list_empty(&ctx->list)) { +- struct inode *inode = ctx->dentry->d_inode; ++ struct inode *inode = ctx->path.dentry->d_inode; + spin_lock(&inode->i_lock); + list_del(&ctx->list); + spin_unlock(&inode->i_lock); + } + if (ctx->state != NULL) +- nfs4_close_state(ctx->state, ctx->mode); ++ nfs4_close_state(&ctx->path, ctx->state, ctx->mode); + if (ctx->cred != NULL) + put_rpccred(ctx->cred); +- dput(ctx->dentry); +- mntput(ctx->vfsmnt); ++ dput(ctx->path.dentry); ++ mntput(ctx->path.mnt); + kfree(ctx); +- } ++} ++ ++void put_nfs_open_context(struct nfs_open_context *ctx) ++{ ++ kref_put(&ctx->kref, nfs_free_open_context); + } + + /* +@@ -977,8 +983,8 @@ + goto out_changed; + + server = NFS_SERVER(inode); +- /* Update the fsid if and only if this is the root directory */ +- if (inode == inode->i_sb->s_root->d_inode ++ /* Update the fsid? */ ++ if (S_ISDIR(inode->i_mode) + && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) + server->fsid = fattr->fsid; + +@@ -1125,27 +1131,10 @@ + */ + void nfs4_clear_inode(struct inode *inode) + { +- struct nfs_inode *nfsi = NFS_I(inode); +- + /* If we are holding a delegation, return it! */ + nfs_inode_return_delegation(inode); + /* First call standard NFS clear_inode() code */ + nfs_clear_inode(inode); +- /* Now clear out any remaining state */ +- while (!list_empty(&nfsi->open_states)) { +- struct nfs4_state *state; +- +- state = list_entry(nfsi->open_states.next, +- struct nfs4_state, +- inode_states); +- dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n", +- __FUNCTION__, +- inode->i_sb->s_id, +- (long long)NFS_FILEID(inode), +- state); +- BUG_ON(atomic_read(&state->count) != 1); +- nfs4_close_state(state, state->state); +- } + } + #endif + +@@ -1188,14 +1177,11 @@ + + inode_init_once(&nfsi->vfs_inode); + spin_lock_init(&nfsi->req_lock); +- INIT_LIST_HEAD(&nfsi->dirty); +- INIT_LIST_HEAD(&nfsi->commit); + INIT_LIST_HEAD(&nfsi->open_files); + INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); + INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); + INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); + atomic_set(&nfsi->data_updates, 0); +- nfsi->ndirty = 0; + nfsi->ncommit = 0; + nfsi->npages = 0; + nfs4_init_once(nfsi); +diff -Nurb linux-2.6.22-570/fs/nfs/mount_clnt.c linux-2.6.22-591/fs/nfs/mount_clnt.c +--- linux-2.6.22-570/fs/nfs/mount_clnt.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfs/mount_clnt.c 2007-12-21 15:36:12.000000000 -0500 +@@ -69,6 +69,7 @@ + msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT]; + + status = rpc_call_sync(mnt_clnt, &msg, 0); ++ rpc_shutdown_client(mnt_clnt); + return status < 0? status : (result.status? -EACCES : 0); + } + +@@ -84,8 +85,7 @@ + .program = &mnt_program, + .version = version, + .authflavor = RPC_AUTH_UNIX, +- .flags = (RPC_CLNT_CREATE_ONESHOT | +- RPC_CLNT_CREATE_INTR), ++ .flags = RPC_CLNT_CREATE_INTR, + }; + + return rpc_create(&args); +diff -Nurb linux-2.6.22-570/fs/nfs/nfs3proc.c linux-2.6.22-591/fs/nfs/nfs3proc.c +--- linux-2.6.22-570/fs/nfs/nfs3proc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfs/nfs3proc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -335,9 +335,7 @@ + * not sure this buys us anything (and I'd have + * to revamp the NFSv3 XDR code) */ + status = nfs3_proc_setattr(dentry, &fattr, sattr); +- if (status == 0) +- nfs_setattr_update_inode(dentry->d_inode, sattr); +- nfs_refresh_inode(dentry->d_inode, &fattr); ++ nfs_post_op_update_inode(dentry->d_inode, &fattr); + dprintk("NFS reply setattr (post-create): %d\n", status); + } + if (status != 0) +diff -Nurb linux-2.6.22-570/fs/nfs/nfs4_fs.h linux-2.6.22-591/fs/nfs/nfs4_fs.h +--- linux-2.6.22-570/fs/nfs/nfs4_fs.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfs/nfs4_fs.h 2007-12-21 15:36:12.000000000 -0500 +@@ -165,7 +165,7 @@ + extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); + extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); + extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); +-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state); ++extern int nfs4_do_close(struct path *path, struct nfs4_state *state); + extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); + extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); + extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); +@@ -196,7 +196,7 @@ + extern void nfs4_drop_state_owner(struct nfs4_state_owner *); + extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); + extern void nfs4_put_open_state(struct nfs4_state *); +-extern void nfs4_close_state(struct nfs4_state *, mode_t); ++extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t); + extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t); + extern void nfs4_schedule_state_recovery(struct nfs_client *); + extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); +@@ -222,7 +222,7 @@ + + #else + +-#define nfs4_close_state(a, b) do { } while (0) ++#define nfs4_close_state(a, b, c) do { } while (0) + + #endif /* CONFIG_NFS_V4 */ + #endif /* __LINUX_FS_NFS_NFS4_FS.H */ +diff -Nurb linux-2.6.22-570/fs/nfs/nfs4proc.c linux-2.6.22-591/fs/nfs/nfs4proc.c +--- linux-2.6.22-570/fs/nfs/nfs4proc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfs/nfs4proc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -214,14 +214,14 @@ + } + + struct nfs4_opendata { +- atomic_t count; ++ struct kref kref; + struct nfs_openargs o_arg; + struct nfs_openres o_res; + struct nfs_open_confirmargs c_arg; + struct nfs_open_confirmres c_res; + struct nfs_fattr f_attr; + struct nfs_fattr dir_attr; +- struct dentry *dentry; ++ struct path path; + struct dentry *dir; + struct nfs4_state_owner *owner; + struct iattr attrs; +@@ -230,11 +230,11 @@ + int cancelled; + }; + +-static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, ++static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, + struct nfs4_state_owner *sp, int flags, + const struct iattr *attrs) + { +- struct dentry *parent = dget_parent(dentry); ++ struct dentry *parent = dget_parent(path->dentry); + struct inode *dir = parent->d_inode; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_opendata *p; +@@ -245,8 +245,8 @@ + p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + if (p->o_arg.seqid == NULL) + goto err_free; +- atomic_set(&p->count, 1); +- p->dentry = dget(dentry); ++ p->path.mnt = mntget(path->mnt); ++ p->path.dentry = dget(path->dentry); + p->dir = parent; + p->owner = sp; + atomic_inc(&sp->so_count); +@@ -254,7 +254,7 @@ + p->o_arg.open_flags = flags, + p->o_arg.clientid = server->nfs_client->cl_clientid; + p->o_arg.id = sp->so_id; +- p->o_arg.name = &dentry->d_name; ++ p->o_arg.name = &p->path.dentry->d_name; + p->o_arg.server = server; + p->o_arg.bitmask = server->attr_bitmask; + p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; +@@ -274,6 +274,7 @@ + p->c_arg.fh = &p->o_res.fh; + p->c_arg.stateid = &p->o_res.stateid; + p->c_arg.seqid = p->o_arg.seqid; ++ kref_init(&p->kref); + return p; + err_free: + kfree(p); +@@ -282,27 +283,23 @@ + return NULL; + } + +-static void nfs4_opendata_free(struct nfs4_opendata *p) ++static void nfs4_opendata_free(struct kref *kref) + { +- if (p != NULL && atomic_dec_and_test(&p->count)) { ++ struct nfs4_opendata *p = container_of(kref, ++ struct nfs4_opendata, kref); ++ + nfs_free_seqid(p->o_arg.seqid); + nfs4_put_state_owner(p->owner); + dput(p->dir); +- dput(p->dentry); ++ dput(p->path.dentry); ++ mntput(p->path.mnt); + kfree(p); +- } + } + +-/* Helper for asynchronous RPC calls */ +-static int nfs4_call_async(struct rpc_clnt *clnt, +- const struct rpc_call_ops *tk_ops, void *calldata) ++static void nfs4_opendata_put(struct nfs4_opendata *p) + { +- struct rpc_task *task; +- +- if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata))) +- return -ENOMEM; +- rpc_execute(task); +- return 0; ++ if (p != NULL) ++ kref_put(&p->kref, nfs4_opendata_free); + } + + static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task) +@@ -451,7 +448,7 @@ + opendata->owner->so_cred, + &opendata->o_res); + } +- nfs4_close_state(newstate, opendata->o_arg.open_flags); ++ nfs4_close_state(&opendata->path, newstate, opendata->o_arg.open_flags); + } + if (newstate != state) + return -ESTALE; +@@ -462,7 +459,7 @@ + * OPEN_RECLAIM: + * reclaim state on the server after a reboot. + */ +-static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) ++static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) + { + struct nfs_delegation *delegation = NFS_I(state->inode)->delegation; + struct nfs4_opendata *opendata; +@@ -478,7 +475,7 @@ + } + delegation_type = delegation->type; + } +- opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL); ++ opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL); + if (opendata == NULL) + return -ENOMEM; + opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS; +@@ -486,17 +483,17 @@ + nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh); + opendata->o_arg.u.delegation_type = delegation_type; + status = nfs4_open_recover(opendata, state); +- nfs4_opendata_free(opendata); ++ nfs4_opendata_put(opendata); + return status; + } + +-static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) ++static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state) + { + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + do { +- err = _nfs4_do_open_reclaim(sp, state, dentry); ++ err = _nfs4_do_open_reclaim(ctx, state); + if (err != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, err, &exception); +@@ -512,12 +509,12 @@ + ctx = nfs4_state_find_open_context(state); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); +- ret = nfs4_do_open_reclaim(sp, state, ctx->dentry); ++ ret = nfs4_do_open_reclaim(ctx, state); + put_nfs_open_context(ctx); + return ret; + } + +-static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) ++static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state) + { + struct nfs4_state_owner *sp = state->owner; + struct nfs4_opendata *opendata; +@@ -525,24 +522,24 @@ + + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + return 0; +- opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL); ++ opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL); + if (opendata == NULL) + return -ENOMEM; + opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR; + memcpy(opendata->o_arg.u.delegation.data, state->stateid.data, + sizeof(opendata->o_arg.u.delegation.data)); + ret = nfs4_open_recover(opendata, state); +- nfs4_opendata_free(opendata); ++ nfs4_opendata_put(opendata); + return ret; + } + +-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) ++int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state) + { + struct nfs4_exception exception = { }; +- struct nfs_server *server = NFS_SERVER(dentry->d_inode); ++ struct nfs_server *server = NFS_SERVER(state->inode); + int err; + do { +- err = _nfs4_open_delegation_recall(dentry, state); ++ err = _nfs4_open_delegation_recall(ctx, state); + switch (err) { + case 0: + return err; +@@ -601,9 +598,9 @@ + nfs_confirm_seqid(&data->owner->so_seqid, 0); + state = nfs4_opendata_to_nfs4_state(data); + if (state != NULL) +- nfs4_close_state(state, data->o_arg.open_flags); ++ nfs4_close_state(&data->path, state, data->o_arg.open_flags); + out_free: +- nfs4_opendata_free(data); ++ nfs4_opendata_put(data); + } + + static const struct rpc_call_ops nfs4_open_confirm_ops = { +@@ -621,7 +618,7 @@ + struct rpc_task *task; + int status; + +- atomic_inc(&data->count); ++ kref_get(&data->kref); + /* + * If rpc_run_task() ends up calling ->rpc_release(), we + * want to ensure that it takes the 'error' code path. +@@ -704,9 +701,9 @@ + nfs_confirm_seqid(&data->owner->so_seqid, 0); + state = nfs4_opendata_to_nfs4_state(data); + if (state != NULL) +- nfs4_close_state(state, data->o_arg.open_flags); ++ nfs4_close_state(&data->path, state, data->o_arg.open_flags); + out_free: +- nfs4_opendata_free(data); ++ nfs4_opendata_put(data); + } + + static const struct rpc_call_ops nfs4_open_ops = { +@@ -727,7 +724,7 @@ + struct rpc_task *task; + int status; + +- atomic_inc(&data->count); ++ kref_get(&data->kref); + /* + * If rpc_run_task() ends up calling ->rpc_release(), we + * want to ensure that it takes the 'error' code path. +@@ -811,7 +808,7 @@ + * reclaim state on the server after a network partition. + * Assumes caller holds the appropriate lock + */ +-static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) ++static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) + { + struct inode *inode = state->inode; + struct nfs_delegation *delegation = NFS_I(inode)->delegation; +@@ -820,34 +817,34 @@ + int ret; + + if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { +- ret = _nfs4_do_access(inode, sp->so_cred, openflags); ++ ret = _nfs4_do_access(inode, ctx->cred, openflags); + if (ret < 0) + return ret; + memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + return 0; + } +- opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL); ++ opendata = nfs4_opendata_alloc(&ctx->path, state->owner, openflags, NULL); + if (opendata == NULL) + return -ENOMEM; + ret = nfs4_open_recover(opendata, state); + if (ret == -ESTALE) { + /* Invalidate the state owner so we don't ever use it again */ +- nfs4_drop_state_owner(sp); +- d_drop(dentry); ++ nfs4_drop_state_owner(state->owner); ++ d_drop(ctx->path.dentry); + } +- nfs4_opendata_free(opendata); ++ nfs4_opendata_put(opendata); + return ret; + } + +-static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) ++static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) + { +- struct nfs_server *server = NFS_SERVER(dentry->d_inode); ++ struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + + do { +- err = _nfs4_open_expired(sp, state, dentry); ++ err = _nfs4_open_expired(ctx, state); + if (err == -NFS4ERR_DELAY) + nfs4_handle_exception(server, err, &exception); + } while (exception.retry); +@@ -862,7 +859,7 @@ + ctx = nfs4_state_find_open_context(state); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); +- ret = nfs4_do_open_expired(sp, state, ctx->dentry); ++ ret = nfs4_do_open_expired(ctx, state); + put_nfs_open_context(ctx); + return ret; + } +@@ -953,9 +950,25 @@ + } + + /* ++ * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* ++ * fields corresponding to attributes that were used to store the verifier. ++ * Make sure we clobber those fields in the later setattr call ++ */ ++static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr) ++{ ++ if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) && ++ !(sattr->ia_valid & ATTR_ATIME_SET)) ++ sattr->ia_valid |= ATTR_ATIME; ++ ++ if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) && ++ !(sattr->ia_valid & ATTR_MTIME_SET)) ++ sattr->ia_valid |= ATTR_MTIME; ++} ++ ++/* + * Returns a referenced nfs4_state + */ +-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) ++static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) + { + struct nfs4_state_owner *sp; + struct nfs4_state *state = NULL; +@@ -975,27 +988,30 @@ + goto err_put_state_owner; + down_read(&clp->cl_sem); + status = -ENOMEM; +- opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr); ++ opendata = nfs4_opendata_alloc(path, sp, flags, sattr); + if (opendata == NULL) + goto err_release_rwsem; + + status = _nfs4_proc_open(opendata); + if (status != 0) +- goto err_opendata_free; ++ goto err_opendata_put; ++ ++ if (opendata->o_arg.open_flags & O_EXCL) ++ nfs4_exclusive_attrset(opendata, sattr); + + status = -ENOMEM; + state = nfs4_opendata_to_nfs4_state(opendata); + if (state == NULL) +- goto err_opendata_free; ++ goto err_opendata_put; + if (opendata->o_res.delegation_type != 0) + nfs_inode_set_delegation(state->inode, cred, &opendata->o_res); +- nfs4_opendata_free(opendata); ++ nfs4_opendata_put(opendata); + nfs4_put_state_owner(sp); + up_read(&clp->cl_sem); + *res = state; + return 0; +-err_opendata_free: +- nfs4_opendata_free(opendata); ++err_opendata_put: ++ nfs4_opendata_put(opendata); + err_release_rwsem: + up_read(&clp->cl_sem); + err_put_state_owner: +@@ -1006,14 +1022,14 @@ + } + + +-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred) ++static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred) + { + struct nfs4_exception exception = { }; + struct nfs4_state *res; + int status; + + do { +- status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res); ++ status = _nfs4_do_open(dir, path, flags, sattr, cred, &res); + if (status == 0) + break; + /* NOTE: BAD_SEQID means the server and client disagree about the +@@ -1101,6 +1117,7 @@ + } + + struct nfs4_closedata { ++ struct path path; + struct inode *inode; + struct nfs4_state *state; + struct nfs_closeargs arg; +@@ -1117,6 +1134,8 @@ + nfs4_put_open_state(calldata->state); + nfs_free_seqid(calldata->arg.seqid); + nfs4_put_state_owner(sp); ++ dput(calldata->path.dentry); ++ mntput(calldata->path.mnt); + kfree(calldata); + } + +@@ -1209,18 +1228,20 @@ + * + * NOTE: Caller must be holding the sp->so_owner semaphore! + */ +-int nfs4_do_close(struct inode *inode, struct nfs4_state *state) ++int nfs4_do_close(struct path *path, struct nfs4_state *state) + { +- struct nfs_server *server = NFS_SERVER(inode); ++ struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_closedata *calldata; ++ struct nfs4_state_owner *sp = state->owner; ++ struct rpc_task *task; + int status = -ENOMEM; + + calldata = kmalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + goto out; +- calldata->inode = inode; ++ calldata->inode = state->inode; + calldata->state = state; +- calldata->arg.fh = NFS_FH(inode); ++ calldata->arg.fh = NFS_FH(state->inode); + calldata->arg.stateid = &state->stateid; + /* Serialization for the sequence id */ + calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); +@@ -1229,36 +1250,44 @@ + calldata->arg.bitmask = server->attr_bitmask; + calldata->res.fattr = &calldata->fattr; + calldata->res.server = server; ++ calldata->path.mnt = mntget(path->mnt); ++ calldata->path.dentry = dget(path->dentry); + +- status = nfs4_call_async(server->client, &nfs4_close_ops, calldata); +- if (status == 0) +- goto out; +- +- nfs_free_seqid(calldata->arg.seqid); ++ task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata); ++ if (IS_ERR(task)) ++ return PTR_ERR(task); ++ rpc_put_task(task); ++ return 0; + out_free_calldata: + kfree(calldata); + out: ++ nfs4_put_open_state(state); ++ nfs4_put_state_owner(sp); + return status; + } + +-static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) ++static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state) + { + struct file *filp; + +- filp = lookup_instantiate_filp(nd, dentry, NULL); ++ filp = lookup_instantiate_filp(nd, path->dentry, NULL); + if (!IS_ERR(filp)) { + struct nfs_open_context *ctx; + ctx = (struct nfs_open_context *)filp->private_data; + ctx->state = state; + return 0; + } +- nfs4_close_state(state, nd->intent.open.flags); ++ nfs4_close_state(path, state, nd->intent.open.flags); + return PTR_ERR(filp); + } + + struct dentry * + nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) + { ++ struct path path = { ++ .mnt = nd->mnt, ++ .dentry = dentry, ++ }; + struct iattr attr; + struct rpc_cred *cred; + struct nfs4_state *state; +@@ -1277,7 +1306,7 @@ + cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0); + if (IS_ERR(cred)) + return (struct dentry *)cred; +- state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); ++ state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) { + if (PTR_ERR(state) == -ENOENT) +@@ -1287,13 +1316,17 @@ + res = d_add_unique(dentry, igrab(state->inode)); + if (res != NULL) + dentry = res; +- nfs4_intent_set_file(nd, dentry, state); ++ nfs4_intent_set_file(nd, &path, state); + return res; + } + + int + nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) + { ++ struct path path = { ++ .mnt = nd->mnt, ++ .dentry = dentry, ++ }; + struct rpc_cred *cred; + struct nfs4_state *state; + +@@ -1302,7 +1335,7 @@ + return PTR_ERR(cred); + state = nfs4_open_delegated(dentry->d_inode, openflags, cred); + if (IS_ERR(state)) +- state = nfs4_do_open(dir, dentry, openflags, NULL, cred); ++ state = nfs4_do_open(dir, &path, openflags, NULL, cred); + put_rpccred(cred); + if (IS_ERR(state)) { + switch (PTR_ERR(state)) { +@@ -1318,10 +1351,10 @@ + } + } + if (state->inode == dentry->d_inode) { +- nfs4_intent_set_file(nd, dentry, state); ++ nfs4_intent_set_file(nd, &path, state); + return 1; + } +- nfs4_close_state(state, openflags); ++ nfs4_close_state(&path, state, openflags); + out_drop: + d_drop(dentry); + return 0; +@@ -1752,6 +1785,10 @@ + nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + int flags, struct nameidata *nd) + { ++ struct path path = { ++ .mnt = nd->mnt, ++ .dentry = dentry, ++ }; + struct nfs4_state *state; + struct rpc_cred *cred; + int status = 0; +@@ -1761,7 +1798,7 @@ + status = PTR_ERR(cred); + goto out; + } +- state = nfs4_do_open(dir, dentry, flags, sattr, cred); ++ state = nfs4_do_open(dir, &path, flags, sattr, cred); + put_rpccred(cred); + if (IS_ERR(state)) { + status = PTR_ERR(state); +@@ -1773,11 +1810,12 @@ + status = nfs4_do_setattr(state->inode, &fattr, sattr, state); + if (status == 0) + nfs_setattr_update_inode(state->inode, sattr); ++ nfs_post_op_update_inode(state->inode, &fattr); + } +- if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) +- status = nfs4_intent_set_file(nd, dentry, state); ++ if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) ++ status = nfs4_intent_set_file(nd, &path, state); + else +- nfs4_close_state(state, flags); ++ nfs4_close_state(&path, state, flags); + out: + return status; + } +@@ -3285,7 +3323,7 @@ + memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, + sizeof(data->lsp->ls_stateid.data)); + data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; +- renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); ++ renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp); + } + nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid); + out: +diff -Nurb linux-2.6.22-570/fs/nfs/nfs4state.c linux-2.6.22-591/fs/nfs/nfs4state.c +--- linux-2.6.22-570/fs/nfs/nfs4state.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfs/nfs4state.c 2007-12-21 15:36:12.000000000 -0500 +@@ -341,7 +341,7 @@ + /* + * Close the current file. + */ +-void nfs4_close_state(struct nfs4_state *state, mode_t mode) ++void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode) + { + struct inode *inode = state->inode; + struct nfs4_state_owner *owner = state->owner; +@@ -375,10 +375,11 @@ + spin_unlock(&inode->i_lock); + spin_unlock(&owner->so_lock); + +- if (oldstate != newstate && nfs4_do_close(inode, state) == 0) +- return; ++ if (oldstate == newstate) { + nfs4_put_open_state(state); + nfs4_put_state_owner(owner); ++ } else ++ nfs4_do_close(path, state); + } + + /* +diff -Nurb linux-2.6.22-570/fs/nfs/nfs4xdr.c linux-2.6.22-591/fs/nfs/nfs4xdr.c +--- linux-2.6.22-570/fs/nfs/nfs4xdr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfs/nfs4xdr.c 2007-12-21 15:36:12.000000000 -0500 +@@ -3269,7 +3269,7 @@ + static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) + { + __be32 *p; +- uint32_t bmlen; ++ uint32_t savewords, bmlen, i; + int status; + + status = decode_op_hdr(xdr, OP_OPEN); +@@ -3287,7 +3287,12 @@ + goto xdr_error; + + READ_BUF(bmlen << 2); +- p += bmlen; ++ savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); ++ for (i = 0; i < savewords; ++i) ++ READ32(res->attrset[i]); ++ ++ p += (bmlen - savewords); ++ + return decode_delegation(xdr, res); + xdr_error: + dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen); +diff -Nurb linux-2.6.22-570/fs/nfs/pagelist.c linux-2.6.22-591/fs/nfs/pagelist.c +--- linux-2.6.22-570/fs/nfs/pagelist.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfs/pagelist.c 2007-12-21 15:36:12.000000000 -0500 +@@ -85,9 +85,8 @@ + req->wb_offset = offset; + req->wb_pgbase = offset; + req->wb_bytes = count; +- atomic_set(&req->wb_count, 1); + req->wb_context = get_nfs_open_context(ctx); +- ++ kref_init(&req->wb_kref); + return req; + } + +@@ -109,29 +108,29 @@ + } + + /** +- * nfs_set_page_writeback_locked - Lock a request for writeback ++ * nfs_set_page_tag_locked - Tag a request as locked + * @req: + */ +-int nfs_set_page_writeback_locked(struct nfs_page *req) ++static int nfs_set_page_tag_locked(struct nfs_page *req) + { +- struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); ++ struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); + + if (!nfs_lock_request(req)) + return 0; +- radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); ++ radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + return 1; + } + + /** +- * nfs_clear_page_writeback - Unlock request and wake up sleepers ++ * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers + */ +-void nfs_clear_page_writeback(struct nfs_page *req) ++void nfs_clear_page_tag_locked(struct nfs_page *req) + { +- struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); ++ struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode); + + if (req->wb_page != NULL) { + spin_lock(&nfsi->req_lock); +- radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK); ++ radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + spin_unlock(&nfsi->req_lock); + } + nfs_unlock_request(req); +@@ -160,11 +159,9 @@ + * + * Note: Should never be called with the spinlock held! + */ +-void +-nfs_release_request(struct nfs_page *req) ++static void nfs_free_request(struct kref *kref) + { +- if (!atomic_dec_and_test(&req->wb_count)) +- return; ++ struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref); + + /* Release struct file or cached credential */ + nfs_clear_request(req); +@@ -172,6 +169,11 @@ + nfs_page_free(req); + } + ++void nfs_release_request(struct nfs_page *req) ++{ ++ kref_put(&req->wb_kref, nfs_free_request); ++} ++ + static int nfs_wait_bit_interruptible(void *word) + { + int ret = 0; +@@ -193,7 +195,7 @@ + int + nfs_wait_on_request(struct nfs_page *req) + { +- struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->dentry->d_inode); ++ struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode); + sigset_t oldmask; + int ret = 0; + +@@ -379,10 +381,10 @@ + /** + * nfs_scan_list - Scan a list for matching requests + * @nfsi: NFS inode +- * @head: One of the NFS inode request lists + * @dst: Destination list + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan. ++ * @tag: tag to scan for + * + * Moves elements from one of the inode request lists. + * If the number of requests is set to 0, the entire address_space +@@ -390,9 +392,9 @@ + * The requests are *not* checked to ensure that they form a contiguous set. + * You must be holding the inode's req_lock when calling this function + */ +-int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, ++int nfs_scan_list(struct nfs_inode *nfsi, + struct list_head *dst, pgoff_t idx_start, +- unsigned int npages) ++ unsigned int npages, int tag) + { + struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; + struct nfs_page *req; +@@ -407,9 +409,9 @@ + idx_end = idx_start + npages - 1; + + for (;;) { +- found = radix_tree_gang_lookup(&nfsi->nfs_page_tree, ++ found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, + (void **)&pgvec[0], idx_start, +- NFS_SCAN_MAXENTRIES); ++ NFS_SCAN_MAXENTRIES, tag); + if (found <= 0) + break; + for (i = 0; i < found; i++) { +@@ -417,15 +419,18 @@ + if (req->wb_index > idx_end) + goto out; + idx_start = req->wb_index + 1; +- if (req->wb_list_head != head) +- continue; +- if (nfs_set_page_writeback_locked(req)) { ++ if (nfs_set_page_tag_locked(req)) { + nfs_list_remove_request(req); ++ radix_tree_tag_clear(&nfsi->nfs_page_tree, ++ req->wb_index, tag); + nfs_list_add_request(req, dst); + res++; ++ if (res == INT_MAX) ++ goto out; + } + } +- ++ /* for latency reduction */ ++ cond_resched_lock(&nfsi->req_lock); + } + out: + return res; +diff -Nurb linux-2.6.22-570/fs/nfs/read.c linux-2.6.22-591/fs/nfs/read.c +--- linux-2.6.22-570/fs/nfs/read.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfs/read.c 2007-12-21 15:36:12.000000000 -0500 +@@ -145,8 +145,8 @@ + unlock_page(req->wb_page); + + dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", +- req->wb_context->dentry->d_inode->i_sb->s_id, +- (long long)NFS_FILEID(req->wb_context->dentry->d_inode), ++ req->wb_context->path.dentry->d_inode->i_sb->s_id, ++ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); + nfs_clear_request(req); +@@ -164,7 +164,7 @@ + int flags; + + data->req = req; +- data->inode = inode = req->wb_context->dentry->d_inode; ++ data->inode = inode = req->wb_context->path.dentry->d_inode; + data->cred = req->wb_context->cred; + + data->args.fh = NFS_FH(inode); +@@ -483,17 +483,19 @@ + */ + error = nfs_wb_page(inode, page); + if (error) +- goto out_error; ++ goto out_unlock; ++ if (PageUptodate(page)) ++ goto out_unlock; + + error = -ESTALE; + if (NFS_STALE(inode)) +- goto out_error; ++ goto out_unlock; + + if (file == NULL) { + error = -EBADF; + ctx = nfs_find_open_context(inode, NULL, FMODE_READ); + if (ctx == NULL) +- goto out_error; ++ goto out_unlock; + } else + ctx = get_nfs_open_context((struct nfs_open_context *) + file->private_data); +@@ -502,8 +504,7 @@ + + put_nfs_open_context(ctx); + return error; +- +-out_error: ++out_unlock: + unlock_page(page); + return error; + } +@@ -520,21 +521,32 @@ + struct inode *inode = page->mapping->host; + struct nfs_page *new; + unsigned int len; ++ int error; ++ ++ error = nfs_wb_page(inode, page); ++ if (error) ++ goto out_unlock; ++ if (PageUptodate(page)) ++ goto out_unlock; + +- nfs_wb_page(inode, page); + len = nfs_page_length(page); + if (len == 0) + return nfs_return_empty_page(page); ++ + new = nfs_create_request(desc->ctx, inode, page, 0, len); +- if (IS_ERR(new)) { +- SetPageError(page); +- unlock_page(page); +- return PTR_ERR(new); +- } ++ if (IS_ERR(new)) ++ goto out_error; ++ + if (len < PAGE_CACHE_SIZE) + zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); + nfs_pageio_add_request(desc->pgio, new); + return 0; ++out_error: ++ error = PTR_ERR(new); ++ SetPageError(page); ++out_unlock: ++ unlock_page(page); ++ return error; + } + + int nfs_readpages(struct file *filp, struct address_space *mapping, +diff -Nurb linux-2.6.22-570/fs/nfs/super.c linux-2.6.22-591/fs/nfs/super.c +--- linux-2.6.22-570/fs/nfs/super.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/nfs/super.c 2007-12-21 15:36:12.000000000 -0500 +@@ -292,6 +292,7 @@ + { NFS_MOUNT_NONLM, ",nolock", "" }, + { NFS_MOUNT_NOACL, ",noacl", "" }, + { NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" }, ++ { NFS_MOUNT_UNSHARED, ",nosharecache", ""}, + { NFS_MOUNT_TAGGED, ",tag", "" }, + { 0, NULL, NULL } + }; +@@ -432,7 +433,20 @@ + */ + static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags) + { ++ struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb); ++ struct rpc_clnt *rpc; ++ + shrink_submounts(vfsmnt, &nfs_automount_list); ++ ++ if (!(flags & MNT_FORCE)) ++ return; ++ /* -EIO all pending I/O */ ++ rpc = server->client_acl; ++ if (!IS_ERR(rpc)) ++ rpc_killall_tasks(rpc); ++ rpc = server->client; ++ if (!IS_ERR(rpc)) ++ rpc_killall_tasks(rpc); + } + + /* +@@ -602,13 +616,51 @@ + { + struct nfs_server *server = data, *old = NFS_SB(sb); + +- if (old->nfs_client != server->nfs_client) ++ if (memcmp(&old->nfs_client->cl_addr, ++ &server->nfs_client->cl_addr, ++ sizeof(old->nfs_client->cl_addr)) != 0) ++ return 0; ++ /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */ ++ if (old->flags & NFS_MOUNT_UNSHARED) + return 0; + if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) + return 0; + return 1; + } + ++#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) ++ ++static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) ++{ ++ const struct nfs_server *a = s->s_fs_info; ++ const struct rpc_clnt *clnt_a = a->client; ++ const struct rpc_clnt *clnt_b = b->client; ++ ++ if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK)) ++ goto Ebusy; ++ if (a->nfs_client != b->nfs_client) ++ goto Ebusy; ++ if (a->flags != b->flags) ++ goto Ebusy; ++ if (a->wsize != b->wsize) ++ goto Ebusy; ++ if (a->rsize != b->rsize) ++ goto Ebusy; ++ if (a->acregmin != b->acregmin) ++ goto Ebusy; ++ if (a->acregmax != b->acregmax) ++ goto Ebusy; ++ if (a->acdirmin != b->acdirmin) ++ goto Ebusy; ++ if (a->acdirmax != b->acdirmax) ++ goto Ebusy; ++ if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) ++ goto Ebusy; ++ return 0; ++Ebusy: ++ return -EBUSY; ++} ++ + static int nfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) + { +@@ -617,6 +669,7 @@ + struct nfs_fh mntfh; + struct nfs_mount_data *data = raw_data; + struct dentry *mntroot; ++ int (*compare_super)(struct super_block *,void *) = nfs_compare_super; + int error; + + /* Validate the mount data */ +@@ -631,16 +684,22 @@ + goto out_err_noserver; + } + ++ if (server->flags & NFS_MOUNT_UNSHARED) ++ compare_super = NULL; ++ + /* Get a superblock - note that we may end up sharing one that already exists */ +- s = sget(fs_type, nfs_compare_super, nfs_set_super, server); ++ s = sget(fs_type, compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; + } + + if (s->s_fs_info != server) { ++ error = nfs_compare_mount_options(s, server, flags); + nfs_free_server(server); + server = NULL; ++ if (error < 0) ++ goto error_splat_super; + } + + if (!s->s_root) { +@@ -693,6 +752,7 @@ + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; ++ int (*compare_super)(struct super_block *,void *) = nfs_compare_super; + int error; + + dprintk("--> nfs_xdev_get_sb()\n"); +@@ -704,8 +764,11 @@ + goto out_err_noserver; + } + ++ if (server->flags & NFS_MOUNT_UNSHARED) ++ compare_super = NULL; ++ + /* Get a superblock - note that we may end up sharing one that already exists */ +- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); ++ s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; +@@ -810,6 +873,7 @@ + struct dentry *mntroot; + char *mntpath = NULL, *hostname = NULL, ip_addr[16]; + void *p; ++ int (*compare_super)(struct super_block *,void *) = nfs_compare_super; + int error; + + if (data == NULL) { +@@ -881,16 +945,22 @@ + goto out_err_noserver; + } + ++ if (server->flags & NFS4_MOUNT_UNSHARED) ++ compare_super = NULL; ++ + /* Get a superblock - note that we may end up sharing one that already exists */ +- s = sget(fs_type, nfs_compare_super, nfs_set_super, server); ++ s = sget(fs_type, compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_free; + } + + if (s->s_fs_info != server) { ++ error = nfs_compare_mount_options(s, server, flags); + nfs_free_server(server); + server = NULL; ++ if (error < 0) ++ goto error_splat_super; + } + + if (!s->s_root) { +@@ -951,6 +1021,7 @@ + struct super_block *s; + struct nfs_server *server; + struct dentry *mntroot; ++ int (*compare_super)(struct super_block *,void *) = nfs_compare_super; + int error; + + dprintk("--> nfs4_xdev_get_sb()\n"); +@@ -962,8 +1033,11 @@ + goto out_err_noserver; + } + ++ if (server->flags & NFS4_MOUNT_UNSHARED) ++ compare_super = NULL; ++ + /* Get a superblock - note that we may end up sharing one that already exists */ +- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); ++ s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; +@@ -1018,6 +1092,7 @@ + struct nfs_server *server; + struct dentry *mntroot; + struct nfs_fh mntfh; ++ int (*compare_super)(struct super_block *,void *) = nfs_compare_super; + int error; + + dprintk("--> nfs4_referral_get_sb()\n"); +@@ -1029,8 +1104,11 @@ + goto out_err_noserver; + } + ++ if (server->flags & NFS4_MOUNT_UNSHARED) ++ compare_super = NULL; ++ + /* Get a superblock - note that we may end up sharing one that already exists */ +- s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); ++ s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_nosb; +diff -Nurb linux-2.6.22-570/fs/nfs/write.c linux-2.6.22-591/fs/nfs/write.c +--- linux-2.6.22-570/fs/nfs/write.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfs/write.c 2007-12-21 15:36:12.000000000 -0500 +@@ -117,7 +117,7 @@ + if (PagePrivate(page)) { + req = (struct nfs_page *)page_private(page); + if (req != NULL) +- atomic_inc(&req->wb_count); ++ kref_get(&req->wb_kref); + } + return req; + } +@@ -191,8 +191,6 @@ + } + /* Update file length */ + nfs_grow_file(page, offset, count); +- /* Set the PG_uptodate flag? */ +- nfs_mark_uptodate(page, offset, count); + nfs_unlock_request(req); + return 0; + } +@@ -291,7 +289,7 @@ + BUG(); + } + radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, +- NFS_PAGE_TAG_WRITEBACK); ++ NFS_PAGE_TAG_LOCKED); + ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); + spin_unlock(req_lock); + nfs_pageio_add_request(pgio, req); +@@ -400,7 +398,7 @@ + if (PageDirty(req->wb_page)) + set_bit(PG_NEED_FLUSH, &req->wb_flags); + nfsi->npages++; +- atomic_inc(&req->wb_count); ++ kref_get(&req->wb_kref); + return 0; + } + +@@ -409,7 +407,7 @@ + */ + static void nfs_inode_remove_request(struct nfs_page *req) + { +- struct inode *inode = req->wb_context->dentry->d_inode; ++ struct inode *inode = req->wb_context->path.dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + + BUG_ON (!NFS_WBACK_BUSY(req)); +@@ -457,13 +455,15 @@ + static void + nfs_mark_request_commit(struct nfs_page *req) + { +- struct inode *inode = req->wb_context->dentry->d_inode; ++ struct inode *inode = req->wb_context->path.dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&nfsi->req_lock); +- nfs_list_add_request(req, &nfsi->commit); + nfsi->ncommit++; + set_bit(PG_NEED_COMMIT, &(req)->wb_flags); ++ radix_tree_tag_set(&nfsi->nfs_page_tree, ++ req->wb_index, ++ NFS_PAGE_TAG_COMMIT); + spin_unlock(&nfsi->req_lock); + inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); +@@ -526,14 +526,14 @@ + idx_end = idx_start + npages - 1; + + next = idx_start; +- while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) { ++ while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { + if (req->wb_index > idx_end) + break; + + next = req->wb_index + 1; + BUG_ON(!NFS_WBACK_BUSY(req)); + +- atomic_inc(&req->wb_count); ++ kref_get(&req->wb_kref); + spin_unlock(&nfsi->req_lock); + error = nfs_wait_on_request(req); + nfs_release_request(req); +@@ -577,10 +577,9 @@ + int res = 0; + + if (nfsi->ncommit != 0) { +- res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages); ++ res = nfs_scan_list(nfsi, dst, idx_start, npages, ++ NFS_PAGE_TAG_COMMIT); + nfsi->ncommit -= res; +- if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) +- printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + } + return res; + } +@@ -751,12 +750,17 @@ + static void nfs_writepage_release(struct nfs_page *req) + { + +- if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { ++ if (PageError(req->wb_page)) { ++ nfs_end_page_writeback(req->wb_page); ++ nfs_inode_remove_request(req); ++ } else if (!nfs_reschedule_unstable_write(req)) { ++ /* Set the PG_uptodate flag */ ++ nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes); + nfs_end_page_writeback(req->wb_page); + nfs_inode_remove_request(req); + } else + nfs_end_page_writeback(req->wb_page); +- nfs_clear_page_writeback(req); ++ nfs_clear_page_tag_locked(req); + } + + static inline int flush_task_priority(int how) +@@ -786,7 +790,7 @@ + * NB: take care not to mess about with data->commit et al. */ + + data->req = req; +- data->inode = inode = req->wb_context->dentry->d_inode; ++ data->inode = inode = req->wb_context->path.dentry->d_inode; + data->cred = req->wb_context->cred; + + data->args.fh = NFS_FH(inode); +@@ -885,7 +889,7 @@ + } + nfs_redirty_request(req); + nfs_end_page_writeback(req->wb_page); +- nfs_clear_page_writeback(req); ++ nfs_clear_page_tag_locked(req); + return -ENOMEM; + } + +@@ -928,7 +932,7 @@ + nfs_list_remove_request(req); + nfs_redirty_request(req); + nfs_end_page_writeback(req->wb_page); +- nfs_clear_page_writeback(req); ++ nfs_clear_page_tag_locked(req); + } + return -ENOMEM; + } +@@ -954,8 +958,8 @@ + struct page *page = req->wb_page; + + dprintk("NFS: write (%s/%Ld %d@%Ld)", +- req->wb_context->dentry->d_inode->i_sb->s_id, +- (long long)NFS_FILEID(req->wb_context->dentry->d_inode), ++ req->wb_context->path.dentry->d_inode->i_sb->s_id, ++ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); + +@@ -1020,8 +1024,8 @@ + page = req->wb_page; + + dprintk("NFS: write (%s/%Ld %d@%Ld)", +- req->wb_context->dentry->d_inode->i_sb->s_id, +- (long long)NFS_FILEID(req->wb_context->dentry->d_inode), ++ req->wb_context->path.dentry->d_inode->i_sb->s_id, ++ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); + +@@ -1039,12 +1043,14 @@ + dprintk(" marked for commit\n"); + goto next; + } ++ /* Set the PG_uptodate flag? */ ++ nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); + dprintk(" OK\n"); + remove_request: + nfs_end_page_writeback(page); + nfs_inode_remove_request(req); + next: +- nfs_clear_page_writeback(req); ++ nfs_clear_page_tag_locked(req); + } + } + +@@ -1157,7 +1163,7 @@ + + list_splice_init(head, &data->pages); + first = nfs_list_entry(data->pages.next); +- inode = first->wb_context->dentry->d_inode; ++ inode = first->wb_context->path.dentry->d_inode; + + data->inode = inode; + data->cred = first->wb_context->cred; +@@ -1207,7 +1213,7 @@ + nfs_list_remove_request(req); + nfs_mark_request_commit(req); + dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); +- nfs_clear_page_writeback(req); ++ nfs_clear_page_tag_locked(req); + } + return -ENOMEM; + } +@@ -1234,8 +1240,8 @@ + dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + + dprintk("NFS: commit (%s/%Ld %d@%Ld)", +- req->wb_context->dentry->d_inode->i_sb->s_id, +- (long long)NFS_FILEID(req->wb_context->dentry->d_inode), ++ req->wb_context->path.dentry->d_inode->i_sb->s_id, ++ (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), + req->wb_bytes, + (long long)req_offset(req)); + if (task->tk_status < 0) { +@@ -1249,6 +1255,9 @@ + * returned by the server against all stored verfs. */ + if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { + /* We have a match */ ++ /* Set the PG_uptodate flag */ ++ nfs_mark_uptodate(req->wb_page, req->wb_pgbase, ++ req->wb_bytes); + nfs_inode_remove_request(req); + dprintk(" OK\n"); + goto next; +@@ -1257,7 +1266,7 @@ + dprintk(" mismatch\n"); + nfs_redirty_request(req); + next: +- nfs_clear_page_writeback(req); ++ nfs_clear_page_tag_locked(req); + } + } + +diff -Nurb linux-2.6.22-570/fs/nfsd/nfs4callback.c linux-2.6.22-591/fs/nfsd/nfs4callback.c +--- linux-2.6.22-570/fs/nfsd/nfs4callback.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfsd/nfs4callback.c 2007-12-21 15:36:12.000000000 -0500 +@@ -429,29 +429,23 @@ + goto out_err; + } + +- /* Kick rpciod, put the call on the wire. */ +- if (rpciod_up() != 0) +- goto out_clnt; +- + /* the task holds a reference to the nfs4_client struct */ + atomic_inc(&clp->cl_count); + + msg.rpc_cred = nfsd4_lookupcred(clp,0); + if (IS_ERR(msg.rpc_cred)) +- goto out_rpciod; ++ goto out_release_clp; + status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL); + put_rpccred(msg.rpc_cred); + + if (status != 0) { + dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n"); +- goto out_rpciod; ++ goto out_release_clp; + } + return; + +-out_rpciod: ++out_release_clp: + atomic_dec(&clp->cl_count); +- rpciod_down(); +-out_clnt: + rpc_shutdown_client(cb->cb_client); + out_err: + cb->cb_client = NULL; +diff -Nurb linux-2.6.22-570/fs/nfsd/nfs4state.c linux-2.6.22-591/fs/nfsd/nfs4state.c +--- linux-2.6.22-570/fs/nfsd/nfs4state.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfsd/nfs4state.c 2007-12-21 15:36:12.000000000 -0500 +@@ -378,7 +378,6 @@ + if (clnt) { + clp->cl_callback.cb_client = NULL; + rpc_shutdown_client(clnt); +- rpciod_down(); + } + } + +diff -Nurb linux-2.6.22-570/fs/nfsd/nfssvc.c linux-2.6.22-591/fs/nfsd/nfssvc.c +--- linux-2.6.22-570/fs/nfsd/nfssvc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/nfsd/nfssvc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -432,6 +433,7 @@ + * dirty pages. + */ + current->flags |= PF_LESS_THROTTLE; ++ set_freezable(); + + /* + * The main request loop +diff -Nurb linux-2.6.22-570/fs/ocfs2/alloc.c linux-2.6.22-591/fs/ocfs2/alloc.c +--- linux-2.6.22-570/fs/ocfs2/alloc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/alloc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -50,6 +50,8 @@ + #include "buffer_head_io.h" + + static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); ++static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, ++ struct ocfs2_extent_block *eb); + + /* + * Structures which describe a path through a btree, and functions to +@@ -117,6 +119,31 @@ + } + + /* ++ * All the elements of src into dest. After this call, src could be freed ++ * without affecting dest. ++ * ++ * Both paths should have the same root. Any non-root elements of dest ++ * will be freed. ++ */ ++static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src) ++{ ++ int i; ++ ++ BUG_ON(path_root_bh(dest) != path_root_bh(src)); ++ BUG_ON(path_root_el(dest) != path_root_el(src)); ++ ++ ocfs2_reinit_path(dest, 1); ++ ++ for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) { ++ dest->p_node[i].bh = src->p_node[i].bh; ++ dest->p_node[i].el = src->p_node[i].el; ++ ++ if (dest->p_node[i].bh) ++ get_bh(dest->p_node[i].bh); ++ } ++} ++ ++/* + * Make the *dest path the same as src and re-initialize src path to + * have a root only. + */ +@@ -212,10 +239,41 @@ + return ret; + } + ++/* ++ * Return the index of the extent record which contains cluster #v_cluster. ++ * -1 is returned if it was not found. ++ * ++ * Should work fine on interior and exterior nodes. ++ */ ++int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster) ++{ ++ int ret = -1; ++ int i; ++ struct ocfs2_extent_rec *rec; ++ u32 rec_end, rec_start, clusters; ++ ++ for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { ++ rec = &el->l_recs[i]; ++ ++ rec_start = le32_to_cpu(rec->e_cpos); ++ clusters = ocfs2_rec_clusters(el, rec); ++ ++ rec_end = rec_start + clusters; ++ ++ if (v_cluster >= rec_start && v_cluster < rec_end) { ++ ret = i; ++ break; ++ } ++ } ++ ++ return ret; ++} ++ + enum ocfs2_contig_type { + CONTIG_NONE = 0, + CONTIG_LEFT, +- CONTIG_RIGHT ++ CONTIG_RIGHT, ++ CONTIG_LEFTRIGHT, + }; + + +@@ -253,6 +311,14 @@ + { + u64 blkno = le64_to_cpu(insert_rec->e_blkno); + ++ /* ++ * Refuse to coalesce extent records with different flag ++ * fields - we don't want to mix unwritten extents with user ++ * data. ++ */ ++ if (ext->e_flags != insert_rec->e_flags) ++ return CONTIG_NONE; ++ + if (ocfs2_extents_adjacent(ext, insert_rec) && + ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) + return CONTIG_RIGHT; +@@ -277,7 +343,14 @@ + APPEND_TAIL, + }; + ++enum ocfs2_split_type { ++ SPLIT_NONE = 0, ++ SPLIT_LEFT, ++ SPLIT_RIGHT, ++}; ++ + struct ocfs2_insert_type { ++ enum ocfs2_split_type ins_split; + enum ocfs2_append_type ins_appending; + enum ocfs2_contig_type ins_contig; + int ins_contig_index; +@@ -285,6 +358,13 @@ + int ins_tree_depth; + }; + ++struct ocfs2_merge_ctxt { ++ enum ocfs2_contig_type c_contig_type; ++ int c_has_empty_extent; ++ int c_split_covers_rec; ++ int c_used_tail_recs; ++}; ++ + /* + * How many free extents have we got before we need more meta data? + */ +@@ -384,13 +464,7 @@ + strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE); + eb->h_blkno = cpu_to_le64(first_blkno); + eb->h_fs_generation = cpu_to_le32(osb->fs_generation); +- +-#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS +- /* we always use slot zero's suballocator */ +- eb->h_suballoc_slot = 0; +-#else + eb->h_suballoc_slot = cpu_to_le16(osb->slot_num); +-#endif + eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); + eb->h_list.l_count = + cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); +@@ -461,7 +535,7 @@ + struct inode *inode, + struct buffer_head *fe_bh, + struct buffer_head *eb_bh, +- struct buffer_head *last_eb_bh, ++ struct buffer_head **last_eb_bh, + struct ocfs2_alloc_context *meta_ac) + { + int status, new_blocks, i; +@@ -476,7 +550,7 @@ + + mlog_entry_void(); + +- BUG_ON(!last_eb_bh); ++ BUG_ON(!last_eb_bh || !*last_eb_bh); + + fe = (struct ocfs2_dinode *) fe_bh->b_data; + +@@ -507,7 +581,7 @@ + goto bail; + } + +- eb = (struct ocfs2_extent_block *)last_eb_bh->b_data; ++ eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; + new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list); + + /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be +@@ -568,7 +642,7 @@ + * journal_dirty erroring as it won't unless we've aborted the + * handle (in which case we would never be here) so reserving + * the write with journal_access is all we need to do. */ +- status = ocfs2_journal_access(handle, inode, last_eb_bh, ++ status = ocfs2_journal_access(handle, inode, *last_eb_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); +@@ -601,10 +675,10 @@ + * next_leaf on the previously last-extent-block. */ + fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); + +- eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; ++ eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; + eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); + +- status = ocfs2_journal_dirty(handle, last_eb_bh); ++ status = ocfs2_journal_dirty(handle, *last_eb_bh); + if (status < 0) + mlog_errno(status); + status = ocfs2_journal_dirty(handle, fe_bh); +@@ -616,6 +690,14 @@ + mlog_errno(status); + } + ++ /* ++ * Some callers want to track the rightmost leaf so pass it ++ * back here. ++ */ ++ brelse(*last_eb_bh); ++ get_bh(new_eb_bhs[0]); ++ *last_eb_bh = new_eb_bhs[0]; ++ + status = 0; + bail: + if (new_eb_bhs) { +@@ -829,6 +911,87 @@ + } + + /* ++ * Grow a b-tree so that it has more records. ++ * ++ * We might shift the tree depth in which case existing paths should ++ * be considered invalid. ++ * ++ * Tree depth after the grow is returned via *final_depth. ++ * ++ * *last_eb_bh will be updated by ocfs2_add_branch(). ++ */ ++static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, ++ struct buffer_head *di_bh, int *final_depth, ++ struct buffer_head **last_eb_bh, ++ struct ocfs2_alloc_context *meta_ac) ++{ ++ int ret, shift; ++ struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; ++ int depth = le16_to_cpu(di->id2.i_list.l_tree_depth); ++ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); ++ struct buffer_head *bh = NULL; ++ ++ BUG_ON(meta_ac == NULL); ++ ++ shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh); ++ if (shift < 0) { ++ ret = shift; ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ /* We traveled all the way to the bottom of the allocation tree ++ * and didn't find room for any more extents - we need to add ++ * another tree level */ ++ if (shift) { ++ BUG_ON(bh); ++ mlog(0, "need to shift tree depth (current = %d)\n", depth); ++ ++ /* ocfs2_shift_tree_depth will return us a buffer with ++ * the new extent block (so we can pass that to ++ * ocfs2_add_branch). */ ++ ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh, ++ meta_ac, &bh); ++ if (ret < 0) { ++ mlog_errno(ret); ++ goto out; ++ } ++ depth++; ++ if (depth == 1) { ++ /* ++ * Special case: we have room now if we shifted from ++ * tree_depth 0, so no more work needs to be done. ++ * ++ * We won't be calling add_branch, so pass ++ * back *last_eb_bh as the new leaf. At depth ++ * zero, it should always be null so there's ++ * no reason to brelse. ++ */ ++ BUG_ON(*last_eb_bh); ++ get_bh(bh); ++ *last_eb_bh = bh; ++ goto out; ++ } ++ } ++ ++ /* call ocfs2_add_branch to add the final part of the tree with ++ * the new data. */ ++ mlog(0, "add branch. bh = %p\n", bh); ++ ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh, ++ meta_ac); ++ if (ret < 0) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++out: ++ if (final_depth) ++ *final_depth = depth; ++ brelse(bh); ++ return ret; ++} ++ ++/* + * This is only valid for leaf nodes, which are the only ones that can + * have empty extents anyway. + */ +@@ -934,6 +1097,22 @@ + + } + ++static void ocfs2_remove_empty_extent(struct ocfs2_extent_list *el) ++{ ++ int size, num_recs = le16_to_cpu(el->l_next_free_rec); ++ ++ BUG_ON(num_recs == 0); ++ ++ if (ocfs2_is_empty_extent(&el->l_recs[0])) { ++ num_recs--; ++ size = num_recs * sizeof(struct ocfs2_extent_rec); ++ memmove(&el->l_recs[0], &el->l_recs[1], size); ++ memset(&el->l_recs[num_recs], 0, ++ sizeof(struct ocfs2_extent_rec)); ++ el->l_next_free_rec = cpu_to_le16(num_recs); ++ } ++} ++ + /* + * Create an empty extent record . + * +@@ -1211,6 +1390,10 @@ + * immediately to their right. + */ + left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); ++ if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { ++ BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); ++ left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); ++ } + left_clusters -= le32_to_cpu(left_rec->e_cpos); + left_rec->e_int_clusters = cpu_to_le32(left_clusters); + +@@ -1531,10 +1714,16 @@ + return ret; + } + ++/* ++ * Extend the transaction by enough credits to complete the rotation, ++ * and still leave at least the original number of credits allocated ++ * to this transaction. ++ */ + static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth, ++ int op_credits, + struct ocfs2_path *path) + { +- int credits = (path->p_tree_depth - subtree_depth) * 2 + 1; ++ int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; + + if (handle->h_buffer_credits < credits) + return ocfs2_extend_trans(handle, credits); +@@ -1568,6 +1757,29 @@ + return 0; + } + ++static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos) ++{ ++ int next_free = le16_to_cpu(el->l_next_free_rec); ++ unsigned int range; ++ struct ocfs2_extent_rec *rec; ++ ++ if (next_free == 0) ++ return 0; ++ ++ rec = &el->l_recs[0]; ++ if (ocfs2_is_empty_extent(rec)) { ++ /* Empty list. */ ++ if (next_free == 1) ++ return 0; ++ rec = &el->l_recs[1]; ++ } ++ ++ range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); ++ if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range) ++ return 1; ++ return 0; ++} ++ + /* + * Rotate all the records in a btree right one record, starting at insert_cpos. + * +@@ -1586,11 +1798,12 @@ + */ + static int ocfs2_rotate_tree_right(struct inode *inode, + handle_t *handle, ++ enum ocfs2_split_type split, + u32 insert_cpos, + struct ocfs2_path *right_path, + struct ocfs2_path **ret_left_path) + { +- int ret, start; ++ int ret, start, orig_credits = handle->h_buffer_credits; + u32 cpos; + struct ocfs2_path *left_path = NULL; + +@@ -1657,9 +1870,9 @@ + (unsigned long long) + path_leaf_bh(left_path)->b_blocknr); + +- if (ocfs2_rotate_requires_path_adjustment(left_path, ++ if (split == SPLIT_NONE && ++ ocfs2_rotate_requires_path_adjustment(left_path, + insert_cpos)) { +- mlog(0, "Path adjustment required\n"); + + /* + * We've rotated the tree as much as we +@@ -1687,7 +1900,7 @@ + right_path->p_tree_depth); + + ret = ocfs2_extend_rotate_transaction(handle, start, +- right_path); ++ orig_credits, right_path); + if (ret) { + mlog_errno(ret); + goto out; +@@ -1700,6 +1913,24 @@ + goto out; + } + ++ if (split != SPLIT_NONE && ++ ocfs2_leftmost_rec_contains(path_leaf_el(right_path), ++ insert_cpos)) { ++ /* ++ * A rotate moves the rightmost left leaf ++ * record over to the leftmost right leaf ++ * slot. If we're doing an extent split ++ * instead of a real insert, then we have to ++ * check that the extent to be split wasn't ++ * just moved over. If it was, then we can ++ * exit here, passing left_path back - ++ * ocfs2_split_extent() is smart enough to ++ * search both leaves. ++ */ ++ *ret_left_path = left_path; ++ goto out_ret_path; ++ } ++ + /* + * There is no need to re-read the next right path + * as we know that it'll be our current left +@@ -1722,124 +1953,935 @@ + return ret; + } + +-/* +- * Do the final bits of extent record insertion at the target leaf +- * list. If this leaf is part of an allocation tree, it is assumed +- * that the tree above has been prepared. +- */ +-static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, +- struct ocfs2_extent_list *el, +- struct ocfs2_insert_type *insert, +- struct inode *inode) ++static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, ++ struct ocfs2_path *path) + { +- int i = insert->ins_contig_index; +- unsigned int range; ++ int i, idx; + struct ocfs2_extent_rec *rec; ++ struct ocfs2_extent_list *el; ++ struct ocfs2_extent_block *eb; ++ u32 range; + +- BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); ++ /* Path should always be rightmost. */ ++ eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; ++ BUG_ON(eb->h_next_leaf_blk != 0ULL); + +- /* +- * Contiguous insert - either left or right. +- */ +- if (insert->ins_contig != CONTIG_NONE) { +- rec = &el->l_recs[i]; +- if (insert->ins_contig == CONTIG_LEFT) { +- rec->e_blkno = insert_rec->e_blkno; +- rec->e_cpos = insert_rec->e_cpos; +- } +- le16_add_cpu(&rec->e_leaf_clusters, +- le16_to_cpu(insert_rec->e_leaf_clusters)); +- return; +- } ++ el = &eb->h_list; ++ BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0); ++ idx = le16_to_cpu(el->l_next_free_rec) - 1; ++ rec = &el->l_recs[idx]; ++ range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); + +- /* +- * Handle insert into an empty leaf. +- */ +- if (le16_to_cpu(el->l_next_free_rec) == 0 || +- ((le16_to_cpu(el->l_next_free_rec) == 1) && +- ocfs2_is_empty_extent(&el->l_recs[0]))) { +- el->l_recs[0] = *insert_rec; +- el->l_next_free_rec = cpu_to_le16(1); +- return; +- } ++ for (i = 0; i < path->p_tree_depth; i++) { ++ el = path->p_node[i].el; ++ idx = le16_to_cpu(el->l_next_free_rec) - 1; ++ rec = &el->l_recs[idx]; + +- /* +- * Appending insert. +- */ +- if (insert->ins_appending == APPEND_TAIL) { +- i = le16_to_cpu(el->l_next_free_rec) - 1; +- rec = &el->l_recs[i]; +- range = le32_to_cpu(rec->e_cpos) +- + le16_to_cpu(rec->e_leaf_clusters); +- BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range); ++ rec->e_int_clusters = cpu_to_le32(range); ++ le32_add_cpu(&rec->e_int_clusters, -le32_to_cpu(rec->e_cpos)); + +- mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >= +- le16_to_cpu(el->l_count), +- "inode %lu, depth %u, count %u, next free %u, " +- "rec.cpos %u, rec.clusters %u, " +- "insert.cpos %u, insert.clusters %u\n", +- inode->i_ino, +- le16_to_cpu(el->l_tree_depth), +- le16_to_cpu(el->l_count), +- le16_to_cpu(el->l_next_free_rec), +- le32_to_cpu(el->l_recs[i].e_cpos), +- le16_to_cpu(el->l_recs[i].e_leaf_clusters), +- le32_to_cpu(insert_rec->e_cpos), +- le16_to_cpu(insert_rec->e_leaf_clusters)); +- i++; +- el->l_recs[i] = *insert_rec; +- le16_add_cpu(&el->l_next_free_rec, 1); +- return; ++ ocfs2_journal_dirty(handle, path->p_node[i].bh); + } +- +- /* +- * Ok, we have to rotate. +- * +- * At this point, it is safe to assume that inserting into an +- * empty leaf and appending to a leaf have both been handled +- * above. +- * +- * This leaf needs to have space, either by the empty 1st +- * extent record, or by virtue of an l_next_rec < l_count. +- */ +- ocfs2_rotate_leaf(el, insert_rec); +-} +- +-static inline void ocfs2_update_dinode_clusters(struct inode *inode, +- struct ocfs2_dinode *di, +- u32 clusters) +-{ +- le32_add_cpu(&di->i_clusters, clusters); +- spin_lock(&OCFS2_I(inode)->ip_lock); +- OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); +- spin_unlock(&OCFS2_I(inode)->ip_lock); + } + +-static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, +- struct ocfs2_extent_rec *insert_rec, ++static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle, ++ struct ocfs2_path *left_path, + struct ocfs2_path *right_path, +- struct ocfs2_path **ret_left_path) ++ int subtree_index, ++ struct ocfs2_cached_dealloc_ctxt *dealloc) + { +- int ret, i, next_free; +- struct buffer_head *bh; ++ int ret, i; ++ struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; ++ struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el; + struct ocfs2_extent_list *el; +- struct ocfs2_path *left_path = NULL; ++ struct ocfs2_extent_block *eb; ++ struct buffer_head *bh; + +- *ret_left_path = NULL; ++ el = path_leaf_el(left_path); + +- /* +- * This shouldn't happen for non-trees. The extent rec cluster +- * count manipulation below only works for interior nodes. +- */ +- BUG_ON(right_path->p_tree_depth == 0); ++ eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data; + +- /* +- * If our appending insert is at the leftmost edge of a leaf, +- * then we might need to update the rightmost records of the +- * neighboring path. +- */ +- el = path_leaf_el(right_path); +- next_free = le16_to_cpu(el->l_next_free_rec); ++ for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++) ++ if (root_el->l_recs[i].e_blkno == eb->h_blkno) ++ break; ++ ++ BUG_ON(i >= le16_to_cpu(root_el->l_next_free_rec)); ++ ++ memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); ++ le16_add_cpu(&root_el->l_next_free_rec, -1); ++ ++ eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; ++ eb->h_next_leaf_blk = 0; ++ ++ ocfs2_journal_dirty(handle, root_bh); ++ ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); ++ ++ for(i = subtree_index + 1; i < path_num_items(right_path); i++) { ++ bh = right_path->p_node[i].bh; ++ ++ eb = (struct ocfs2_extent_block *)bh->b_data; ++ /* ++ * Not all nodes might have had their final count ++ * decremented by the caller - handle this here. ++ */ ++ el = &eb->h_list; ++ if (le16_to_cpu(el->l_next_free_rec) > 1) { ++ mlog(ML_ERROR, ++ "Inode %llu, attempted to remove extent block " ++ "%llu with %u records\n", ++ (unsigned long long)OCFS2_I(inode)->ip_blkno, ++ (unsigned long long)le64_to_cpu(eb->h_blkno), ++ le16_to_cpu(el->l_next_free_rec)); ++ ++ ocfs2_journal_dirty(handle, bh); ++ ocfs2_remove_from_cache(inode, bh); ++ continue; ++ } ++ ++ el->l_next_free_rec = 0; ++ memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); ++ ++ ocfs2_journal_dirty(handle, bh); ++ ++ ret = ocfs2_cache_extent_block_free(dealloc, eb); ++ if (ret) ++ mlog_errno(ret); ++ ++ ocfs2_remove_from_cache(inode, bh); ++ } ++} ++ ++static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, ++ struct ocfs2_path *left_path, ++ struct ocfs2_path *right_path, ++ int subtree_index, ++ struct ocfs2_cached_dealloc_ctxt *dealloc, ++ int *deleted) ++{ ++ int ret, i, del_right_subtree = 0; ++ struct buffer_head *root_bh, *di_bh = path_root_bh(right_path); ++ struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; ++ struct ocfs2_extent_list *right_leaf_el, *left_leaf_el; ++ struct ocfs2_extent_block *eb; ++ ++ *deleted = 0; ++ ++ right_leaf_el = path_leaf_el(right_path); ++ left_leaf_el = path_leaf_el(left_path); ++ root_bh = left_path->p_node[subtree_index].bh; ++ BUG_ON(root_bh != right_path->p_node[subtree_index].bh); ++ ++ if (!ocfs2_is_empty_extent(&left_leaf_el->l_recs[0])) ++ return 0; ++ ++ if (ocfs2_is_empty_extent(&right_leaf_el->l_recs[0])) ++ return -EAGAIN; ++ ++ eb = (struct ocfs2_extent_block *)path_leaf_bh(right_path)->b_data; ++ if (eb->h_next_leaf_blk == 0ULL && ++ le16_to_cpu(right_leaf_el->l_next_free_rec) == 1) { ++ /* ++ * We have to update i_last_eb_blk during the meta ++ * data delete. ++ */ ++ ret = ocfs2_journal_access(handle, inode, di_bh, ++ OCFS2_JOURNAL_ACCESS_WRITE); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ del_right_subtree = 1; ++ } ++ ++ ret = ocfs2_journal_access(handle, inode, root_bh, ++ OCFS2_JOURNAL_ACCESS_WRITE); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ for(i = subtree_index + 1; i < path_num_items(right_path); i++) { ++ ret = ocfs2_journal_access(handle, inode, ++ right_path->p_node[i].bh, ++ OCFS2_JOURNAL_ACCESS_WRITE); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ ret = ocfs2_journal_access(handle, inode, ++ left_path->p_node[i].bh, ++ OCFS2_JOURNAL_ACCESS_WRITE); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ } ++ ++ ocfs2_rotate_leaf(left_leaf_el, &right_leaf_el->l_recs[0]); ++ memset(&right_leaf_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); ++ if (eb->h_next_leaf_blk == 0ULL) { ++ /* ++ * XXX: move recs over to get rid of empty extent, ++ * decrease next_free. how does this play with the ++ * delete code below? ++ */ ++ ocfs2_remove_empty_extent(right_leaf_el); ++ } ++ ++ ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); ++ if (ret) ++ mlog_errno(ret); ++ ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); ++ if (ret) ++ mlog_errno(ret); ++ ++ if (del_right_subtree) { ++ ocfs2_unlink_subtree(inode, handle, left_path, right_path, ++ subtree_index, dealloc); ++ ocfs2_update_edge_lengths(inode, handle, left_path); ++ ++ eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; ++ di->i_last_eb_blk = eb->h_blkno; ++ ret = ocfs2_journal_dirty(handle, di_bh); ++ if (ret) ++ mlog_errno(ret); ++ ++ *deleted = 1; ++ } else ++ ocfs2_complete_edge_insert(inode, handle, left_path, right_path, ++ subtree_index); ++ ++out: ++ return ret; ++} ++ ++/* ++ * Given a full path, determine what cpos value would return us a path ++ * containing the leaf immediately to the right of the current one. ++ * ++ * Will return zero if the path passed in is already the rightmost path. ++ * ++ * This looks similar, but is subtly different to ++ * ocfs2_find_cpos_for_left_leaf(). ++ */ ++static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, ++ struct ocfs2_path *path, u32 *cpos) ++{ ++ int i, j, ret = 0; ++ u64 blkno; ++ struct ocfs2_extent_list *el; ++ ++ *cpos = 0; ++ ++ if (path->p_tree_depth == 0) ++ return 0; ++ ++ blkno = path_leaf_bh(path)->b_blocknr; ++ ++ /* Start at the tree node just above the leaf and work our way up. */ ++ i = path->p_tree_depth - 1; ++ while (i >= 0) { ++ int next_free; ++ ++ el = path->p_node[i].el; ++ ++ /* ++ * Find the extent record just after the one in our ++ * path. ++ */ ++ next_free = le16_to_cpu(el->l_next_free_rec); ++ for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) { ++ if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) { ++ if (j == (next_free - 1)) { ++ if (i == 0) { ++ /* ++ * We've determined that the ++ * path specified is already ++ * the rightmost one - return a ++ * cpos of zero. ++ */ ++ goto out; ++ } ++ /* ++ * The rightmost record points to our ++ * leaf - we need to travel up the ++ * tree one level. ++ */ ++ goto next_node; ++ } ++ ++ *cpos = le32_to_cpu(el->l_recs[j + 1].e_cpos); ++ goto out; ++ } ++ } ++ ++ /* ++ * If we got here, we never found a valid node where ++ * the tree indicated one should be. ++ */ ++ ocfs2_error(sb, ++ "Invalid extent tree at extent block %llu\n", ++ (unsigned long long)blkno); ++ ret = -EROFS; ++ goto out; ++ ++next_node: ++ blkno = path->p_node[i].bh->b_blocknr; ++ i--; ++ } ++ ++out: ++ return ret; ++} ++ ++static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode, ++ handle_t *handle, ++ struct buffer_head *bh, ++ struct ocfs2_extent_list *el, ++ int *rotated_any) ++{ ++ int ret; ++ ++ if (rotated_any) ++ *rotated_any = 0; ++ ++ if (!ocfs2_is_empty_extent(&el->l_recs[0])) ++ return 0; ++ ++ if (le16_to_cpu(el->l_next_free_rec) == 1) ++ return -EAGAIN; ++ ++ ret = ocfs2_journal_access(handle, inode, bh, ++ OCFS2_JOURNAL_ACCESS_WRITE); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ ocfs2_remove_empty_extent(el); ++ ++ ret = ocfs2_journal_dirty(handle, bh); ++ if (ret) ++ mlog_errno(ret); ++ ++ if (rotated_any) ++ *rotated_any = 1; ++out: ++ return ret; ++} ++ ++/* ++ * Left rotation of btree records. ++ * ++ * In many ways, this is (unsurprisingly) the opposite of right ++ * rotation. We start at some non-rightmost path containing an empty ++ * extent in the leaf block. The code works its way to the rightmost ++ * path by rotating records to the left in every subtree. ++ * ++ * There are a few places where we might want to do this: ++ * - merging extent records ++ * - rightleft contiguousness during insert ++ * - merging two previously unwritten extents ++ * - truncate ++ * - via ocfs2_truncate, if we ever fix it up to use this code ++ * - via ioctl at the request of user (reverse fallocate) ++ * - "compressing" a tree with empty extents ++ * - as a result of a user defrag request ++ * - perhaps as a preventative measure if we notice a tree needs ++ * this during any of the above operations. ++ * ++ * The major difference between those states above are the ability to ++ * lock one of the meta data allocators so that we can remove unused ++ * extent blocks. It might be unrealistic for us to assume that any ++ * merging cases will want to lock the meta data allocator. Luckily, ++ * the merges are an optimization. ++ * ++ * So, merging won't happen if it would result in an empty rightmost ++ * path (this is illegal). ++ * ++ * This function will move extents left until it runs out of leaves to ++ * rotate, or it hits a right leaf that already contains an empty ++ * extent, in which case it will exit early. This means that we might ++ * never rotate anything if the 1st right leaf contains an empty ++ * extent. ++ * ++ * Truncate cases will have to happen as a second step. I'm not ++ * completely sure how we want to handle those yet. ++ */ ++static int ocfs2_rotate_tree_left(struct inode *inode, ++ handle_t *handle, ++ struct ocfs2_path *path, ++ struct ocfs2_cached_dealloc_ctxt *dealloc, ++ int *rotated_any) ++{ ++ int ret, subtree_root, deleted, orig_credits = handle->h_buffer_credits; ++ u32 right_cpos; ++ struct ocfs2_path *left_path = NULL; ++ struct ocfs2_path *right_path = NULL; ++ ++ BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0]))); ++ ++ if (rotated_any) ++ *rotated_any = 0; ++ ++ ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path, ++ &right_cpos); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ if (path->p_tree_depth == 0 || right_cpos == 0) { ++ /* ++ * Two cases where rotation of adjacent leaves isn't ++ * necessary: ++ * - in-inode extents (no btree) ++ * - path passed is already rightmost ++ */ ++ ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, ++ path_leaf_bh(path), ++ path_leaf_el(path), ++ rotated_any); ++ if (ret) ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ left_path = ocfs2_new_path(path_root_bh(path), ++ path_root_el(path)); ++ if (!left_path) { ++ ret = -ENOMEM; ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ ocfs2_cp_path(left_path, path); ++ ++ right_path = ocfs2_new_path(path_root_bh(path), ++ path_root_el(path)); ++ if (!right_path) { ++ ret = -ENOMEM; ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ while (right_cpos) { ++ ret = ocfs2_find_path(inode, right_path, right_cpos); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ subtree_root = ocfs2_find_subtree_root(inode, left_path, ++ right_path); ++ ++ mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", ++ subtree_root, ++ (unsigned long long) ++ right_path->p_node[subtree_root].bh->b_blocknr, ++ right_path->p_tree_depth); ++ ++ ret = ocfs2_extend_rotate_transaction(handle, subtree_root, ++ orig_credits, left_path); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ ret = ocfs2_rotate_subtree_left(inode, handle, left_path, ++ right_path, subtree_root, ++ dealloc, &deleted); ++ if (ret) { ++ if (ret != -EAGAIN) ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ if (rotated_any) ++ *rotated_any = 1; ++ ++ /* ++ * The subtree rotate might have removed records on ++ * the rightmost edge. If so, then rotation is ++ * complete. ++ */ ++ if (deleted) ++ break; ++ ++ ocfs2_mv_path(left_path, right_path); ++ ++ ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, ++ &right_cpos); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ } ++ ++out: ++ ocfs2_free_path(right_path); ++ ocfs2_free_path(left_path); ++ ++ return ret; ++} ++ ++static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el, ++ int index) ++{ ++ struct ocfs2_extent_rec *rec = &el->l_recs[index]; ++ unsigned int size; ++ ++ if (rec->e_leaf_clusters == 0) { ++ /* ++ * We consumed all of the merged-from record. An empty ++ * extent cannot exist anywhere but the 1st array ++ * position, so move things over if the merged-from ++ * record doesn't occupy that position. ++ * ++ * This creates a new empty extent so the caller ++ * should be smart enough to have removed any existing ++ * ones. ++ */ ++ if (index > 0) { ++ BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0])); ++ size = index * sizeof(struct ocfs2_extent_rec); ++ memmove(&el->l_recs[1], &el->l_recs[0], size); ++ } ++ ++ /* ++ * Always memset - the caller doesn't check whether it ++ * created an empty extent, so there could be junk in ++ * the other fields. ++ */ ++ memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); ++ } ++} ++ ++/* ++ * Remove split_rec clusters from the record at index and merge them ++ * onto the beginning of the record at index + 1. ++ */ ++static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh, ++ handle_t *handle, ++ struct ocfs2_extent_rec *split_rec, ++ struct ocfs2_extent_list *el, int index) ++{ ++ int ret; ++ unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); ++ struct ocfs2_extent_rec *left_rec; ++ struct ocfs2_extent_rec *right_rec; ++ ++ BUG_ON(index >= le16_to_cpu(el->l_next_free_rec)); ++ ++ left_rec = &el->l_recs[index]; ++ right_rec = &el->l_recs[index + 1]; ++ ++ ret = ocfs2_journal_access(handle, inode, bh, ++ OCFS2_JOURNAL_ACCESS_WRITE); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ le16_add_cpu(&left_rec->e_leaf_clusters, -split_clusters); ++ ++ le32_add_cpu(&right_rec->e_cpos, -split_clusters); ++ le64_add_cpu(&right_rec->e_blkno, ++ -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); ++ le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters); ++ ++ ocfs2_cleanup_merge(el, index); ++ ++ ret = ocfs2_journal_dirty(handle, bh); ++ if (ret) ++ mlog_errno(ret); ++ ++out: ++ return ret; ++} ++ ++/* ++ * Remove split_rec clusters from the record at index and merge them ++ * onto the tail of the record at index - 1. ++ */ ++static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh, ++ handle_t *handle, ++ struct ocfs2_extent_rec *split_rec, ++ struct ocfs2_extent_list *el, int index) ++{ ++ int ret, has_empty_extent = 0; ++ unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters); ++ struct ocfs2_extent_rec *left_rec; ++ struct ocfs2_extent_rec *right_rec; ++ ++ BUG_ON(index <= 0); ++ ++ left_rec = &el->l_recs[index - 1]; ++ right_rec = &el->l_recs[index]; ++ if (ocfs2_is_empty_extent(&el->l_recs[0])) ++ has_empty_extent = 1; ++ ++ ret = ocfs2_journal_access(handle, inode, bh, ++ OCFS2_JOURNAL_ACCESS_WRITE); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ if (has_empty_extent && index == 1) { ++ /* ++ * The easy case - we can just plop the record right in. ++ */ ++ *left_rec = *split_rec; ++ ++ has_empty_extent = 0; ++ } else { ++ le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters); ++ } ++ ++ le32_add_cpu(&right_rec->e_cpos, split_clusters); ++ le64_add_cpu(&right_rec->e_blkno, ++ ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); ++ le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters); ++ ++ ocfs2_cleanup_merge(el, index); ++ ++ ret = ocfs2_journal_dirty(handle, bh); ++ if (ret) ++ mlog_errno(ret); ++ ++out: ++ return ret; ++} ++ ++static int ocfs2_try_to_merge_extent(struct inode *inode, ++ handle_t *handle, ++ struct ocfs2_path *left_path, ++ int split_index, ++ struct ocfs2_extent_rec *split_rec, ++ struct ocfs2_cached_dealloc_ctxt *dealloc, ++ struct ocfs2_merge_ctxt *ctxt) ++ ++{ ++ int ret = 0, rotated, delete_tail_recs = 0; ++ struct ocfs2_extent_list *el = path_leaf_el(left_path); ++ struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; ++ ++ BUG_ON(ctxt->c_contig_type == CONTIG_NONE); ++ ++ if (ctxt->c_split_covers_rec) { ++ delete_tail_recs++; ++ ++ if (ctxt->c_contig_type == CONTIG_LEFTRIGHT || ++ ctxt->c_has_empty_extent) ++ delete_tail_recs++; ++ ++ if (ctxt->c_has_empty_extent) { ++ /* ++ * The merge code will need to create an empty ++ * extent to take the place of the newly ++ * emptied slot. Remove any pre-existing empty ++ * extents - having more than one in a leaf is ++ * illegal. ++ */ ++ ret = ocfs2_rotate_tree_left(inode, handle, left_path, ++ dealloc, &rotated); ++ if (rotated) { ++ split_index--; ++ rec = &el->l_recs[split_index]; ++ } ++ if (ret) { ++ if (ret == -EAGAIN) { ++ ret = 0; ++ goto straight_insert; ++ } ++ ++ mlog_errno(ret); ++ goto out; ++ } ++ } ++ } ++ ++ if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) { ++ /* ++ * Left-right contig implies this. ++ */ ++ BUG_ON(!ctxt->c_split_covers_rec); ++ BUG_ON(split_index == 0); ++ ++ /* ++ * Since the leftright insert always covers the entire ++ * extent, this call will delete the insert record ++ * entirely, resulting in an empty extent record added to ++ * the extent block. ++ * ++ * Since the adding of an empty extent shifts ++ * everything back to the right, there's no need to ++ * update split_index here. ++ */ ++ ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path), ++ handle, split_rec, el, split_index); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ /* ++ * We can only get this from logic error above. ++ */ ++ BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); ++ ++ /* ++ * The left merge left us with an empty extent, remove ++ * it. ++ */ ++ ret = ocfs2_rotate_tree_left(inode, handle, left_path, ++ dealloc, &rotated); ++ if (rotated) { ++ split_index--; ++ rec = &el->l_recs[split_index]; ++ } ++ if (ret) { ++ if (ret == -EAGAIN) { ++ ret = 0; ++ goto straight_insert; ++ } ++ ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ /* ++ * Note that we don't pass split_rec here on purpose - ++ * we've merged it into the left side. ++ */ ++ ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path), ++ handle, rec, el, split_index); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); ++ ++ ret = ocfs2_rotate_tree_left(inode, handle, left_path, ++ dealloc, NULL); ++ /* ++ * Error from this last rotate is not critical, so ++ * print but don't bubble it up. ++ */ ++ if (ret && ret != -EAGAIN) ++ mlog_errno(ret); ++ ret = 0; ++ } else { ++ /* ++ * Merge a record to the left or right. ++ * ++ * 'contig_type' is relative to the existing record, ++ * so for example, if we're "right contig", it's to ++ * the record on the left (hence the left merge). ++ */ ++ if (ctxt->c_contig_type == CONTIG_RIGHT) { ++ ret = ocfs2_merge_rec_left(inode, ++ path_leaf_bh(left_path), ++ handle, split_rec, el, ++ split_index); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ } else { ++ ret = ocfs2_merge_rec_right(inode, ++ path_leaf_bh(left_path), ++ handle, split_rec, el, ++ split_index); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ } ++ ++ if (ctxt->c_split_covers_rec) { ++ /* ++ * The merge may have left an empty extent in ++ * our leaf. Try to rotate it away. ++ */ ++ ret = ocfs2_rotate_tree_left(inode, handle, left_path, ++ dealloc, &rotated); ++ if (ret) ++ mlog_errno(ret); ++ ret = 0; ++ } ++ } ++ ++out: ++ return ret; ++ ++straight_insert: ++ el->l_recs[split_index] = *split_rec; ++ goto out; ++} ++ ++static void ocfs2_subtract_from_rec(struct super_block *sb, ++ enum ocfs2_split_type split, ++ struct ocfs2_extent_rec *rec, ++ struct ocfs2_extent_rec *split_rec) ++{ ++ u64 len_blocks; ++ ++ len_blocks = ocfs2_clusters_to_blocks(sb, ++ le16_to_cpu(split_rec->e_leaf_clusters)); ++ ++ if (split == SPLIT_LEFT) { ++ /* ++ * Region is on the left edge of the existing ++ * record. ++ */ ++ le32_add_cpu(&rec->e_cpos, ++ le16_to_cpu(split_rec->e_leaf_clusters)); ++ le64_add_cpu(&rec->e_blkno, len_blocks); ++ le16_add_cpu(&rec->e_leaf_clusters, ++ -le16_to_cpu(split_rec->e_leaf_clusters)); ++ } else { ++ /* ++ * Region is on the right edge of the existing ++ * record. ++ */ ++ le16_add_cpu(&rec->e_leaf_clusters, ++ -le16_to_cpu(split_rec->e_leaf_clusters)); ++ } ++} ++ ++/* ++ * Do the final bits of extent record insertion at the target leaf ++ * list. If this leaf is part of an allocation tree, it is assumed ++ * that the tree above has been prepared. ++ */ ++static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, ++ struct ocfs2_extent_list *el, ++ struct ocfs2_insert_type *insert, ++ struct inode *inode) ++{ ++ int i = insert->ins_contig_index; ++ unsigned int range; ++ struct ocfs2_extent_rec *rec; ++ ++ BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); ++ ++ if (insert->ins_split != SPLIT_NONE) { ++ i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos)); ++ BUG_ON(i == -1); ++ rec = &el->l_recs[i]; ++ ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec, ++ insert_rec); ++ goto rotate; ++ } ++ ++ /* ++ * Contiguous insert - either left or right. ++ */ ++ if (insert->ins_contig != CONTIG_NONE) { ++ rec = &el->l_recs[i]; ++ if (insert->ins_contig == CONTIG_LEFT) { ++ rec->e_blkno = insert_rec->e_blkno; ++ rec->e_cpos = insert_rec->e_cpos; ++ } ++ le16_add_cpu(&rec->e_leaf_clusters, ++ le16_to_cpu(insert_rec->e_leaf_clusters)); ++ return; ++ } ++ ++ /* ++ * Handle insert into an empty leaf. ++ */ ++ if (le16_to_cpu(el->l_next_free_rec) == 0 || ++ ((le16_to_cpu(el->l_next_free_rec) == 1) && ++ ocfs2_is_empty_extent(&el->l_recs[0]))) { ++ el->l_recs[0] = *insert_rec; ++ el->l_next_free_rec = cpu_to_le16(1); ++ return; ++ } ++ ++ /* ++ * Appending insert. ++ */ ++ if (insert->ins_appending == APPEND_TAIL) { ++ i = le16_to_cpu(el->l_next_free_rec) - 1; ++ rec = &el->l_recs[i]; ++ range = le32_to_cpu(rec->e_cpos) ++ + le16_to_cpu(rec->e_leaf_clusters); ++ BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range); ++ ++ mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >= ++ le16_to_cpu(el->l_count), ++ "inode %lu, depth %u, count %u, next free %u, " ++ "rec.cpos %u, rec.clusters %u, " ++ "insert.cpos %u, insert.clusters %u\n", ++ inode->i_ino, ++ le16_to_cpu(el->l_tree_depth), ++ le16_to_cpu(el->l_count), ++ le16_to_cpu(el->l_next_free_rec), ++ le32_to_cpu(el->l_recs[i].e_cpos), ++ le16_to_cpu(el->l_recs[i].e_leaf_clusters), ++ le32_to_cpu(insert_rec->e_cpos), ++ le16_to_cpu(insert_rec->e_leaf_clusters)); ++ i++; ++ el->l_recs[i] = *insert_rec; ++ le16_add_cpu(&el->l_next_free_rec, 1); ++ return; ++ } ++ ++rotate: ++ /* ++ * Ok, we have to rotate. ++ * ++ * At this point, it is safe to assume that inserting into an ++ * empty leaf and appending to a leaf have both been handled ++ * above. ++ * ++ * This leaf needs to have space, either by the empty 1st ++ * extent record, or by virtue of an l_next_rec < l_count. ++ */ ++ ocfs2_rotate_leaf(el, insert_rec); ++} ++ ++static inline void ocfs2_update_dinode_clusters(struct inode *inode, ++ struct ocfs2_dinode *di, ++ u32 clusters) ++{ ++ le32_add_cpu(&di->i_clusters, clusters); ++ spin_lock(&OCFS2_I(inode)->ip_lock); ++ OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); ++ spin_unlock(&OCFS2_I(inode)->ip_lock); ++} ++ ++static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, ++ struct ocfs2_extent_rec *insert_rec, ++ struct ocfs2_path *right_path, ++ struct ocfs2_path **ret_left_path) ++{ ++ int ret, i, next_free; ++ struct buffer_head *bh; ++ struct ocfs2_extent_list *el; ++ struct ocfs2_path *left_path = NULL; ++ ++ *ret_left_path = NULL; ++ ++ /* ++ * This shouldn't happen for non-trees. The extent rec cluster ++ * count manipulation below only works for interior nodes. ++ */ ++ BUG_ON(right_path->p_tree_depth == 0); ++ ++ /* ++ * If our appending insert is at the leftmost edge of a leaf, ++ * then we might need to update the rightmost records of the ++ * neighboring path. ++ */ ++ el = path_leaf_el(right_path); ++ next_free = le16_to_cpu(el->l_next_free_rec); + if (next_free == 0 || + (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) { + u32 left_cpos; +@@ -1931,6 +2973,83 @@ + return ret; + } + ++static void ocfs2_split_record(struct inode *inode, ++ struct ocfs2_path *left_path, ++ struct ocfs2_path *right_path, ++ struct ocfs2_extent_rec *split_rec, ++ enum ocfs2_split_type split) ++{ ++ int index; ++ u32 cpos = le32_to_cpu(split_rec->e_cpos); ++ struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el; ++ struct ocfs2_extent_rec *rec, *tmprec; ++ ++ right_el = path_leaf_el(right_path);; ++ if (left_path) ++ left_el = path_leaf_el(left_path); ++ ++ el = right_el; ++ insert_el = right_el; ++ index = ocfs2_search_extent_list(el, cpos); ++ if (index != -1) { ++ if (index == 0 && left_path) { ++ BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0])); ++ ++ /* ++ * This typically means that the record ++ * started in the left path but moved to the ++ * right as a result of rotation. We either ++ * move the existing record to the left, or we ++ * do the later insert there. ++ * ++ * In this case, the left path should always ++ * exist as the rotate code will have passed ++ * it back for a post-insert update. ++ */ ++ ++ if (split == SPLIT_LEFT) { ++ /* ++ * It's a left split. Since we know ++ * that the rotate code gave us an ++ * empty extent in the left path, we ++ * can just do the insert there. ++ */ ++ insert_el = left_el; ++ } else { ++ /* ++ * Right split - we have to move the ++ * existing record over to the left ++ * leaf. The insert will be into the ++ * newly created empty extent in the ++ * right leaf. ++ */ ++ tmprec = &right_el->l_recs[index]; ++ ocfs2_rotate_leaf(left_el, tmprec); ++ el = left_el; ++ ++ memset(tmprec, 0, sizeof(*tmprec)); ++ index = ocfs2_search_extent_list(left_el, cpos); ++ BUG_ON(index == -1); ++ } ++ } ++ } else { ++ BUG_ON(!left_path); ++ BUG_ON(!ocfs2_is_empty_extent(&left_el->l_recs[0])); ++ /* ++ * Left path is easy - we can just allow the insert to ++ * happen. ++ */ ++ el = left_el; ++ insert_el = left_el; ++ index = ocfs2_search_extent_list(el, cpos); ++ BUG_ON(index == -1); ++ } ++ ++ rec = &el->l_recs[index]; ++ ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec); ++ ocfs2_rotate_leaf(insert_el, split_rec); ++} ++ + /* + * This function only does inserts on an allocation b-tree. For dinode + * lists, ocfs2_insert_at_leaf() is called directly. +@@ -1948,7 +3067,6 @@ + { + int ret, subtree_index; + struct buffer_head *leaf_bh = path_leaf_bh(right_path); +- struct ocfs2_extent_list *el; + + /* + * Pass both paths to the journal. The majority of inserts +@@ -1984,9 +3102,18 @@ + } + } + +- el = path_leaf_el(right_path); ++ if (insert->ins_split != SPLIT_NONE) { ++ /* ++ * We could call ocfs2_insert_at_leaf() for some types ++ * of splits, but it's easier to just let one seperate ++ * function sort it all out. ++ */ ++ ocfs2_split_record(inode, left_path, right_path, ++ insert_rec, insert->ins_split); ++ } else ++ ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path), ++ insert, inode); + +- ocfs2_insert_at_leaf(insert_rec, el, insert, inode); + ret = ocfs2_journal_dirty(handle, leaf_bh); + if (ret) + mlog_errno(ret); +@@ -2075,7 +3202,7 @@ + * can wind up skipping both of these two special cases... + */ + if (rotate) { +- ret = ocfs2_rotate_tree_right(inode, handle, ++ ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split, + le32_to_cpu(insert_rec->e_cpos), + right_path, &left_path); + if (ret) { +@@ -2100,6 +3227,7 @@ + } + + out_update_clusters: ++ if (type->ins_split == SPLIT_NONE) + ocfs2_update_dinode_clusters(inode, di, + le16_to_cpu(insert_rec->e_leaf_clusters)); + +@@ -2114,6 +3242,44 @@ + return ret; + } + ++static enum ocfs2_contig_type ++ocfs2_figure_merge_contig_type(struct inode *inode, ++ struct ocfs2_extent_list *el, int index, ++ struct ocfs2_extent_rec *split_rec) ++{ ++ struct ocfs2_extent_rec *rec; ++ enum ocfs2_contig_type ret = CONTIG_NONE; ++ ++ /* ++ * We're careful to check for an empty extent record here - ++ * the merge code will know what to do if it sees one. ++ */ ++ ++ if (index > 0) { ++ rec = &el->l_recs[index - 1]; ++ if (index == 1 && ocfs2_is_empty_extent(rec)) { ++ if (split_rec->e_cpos == el->l_recs[index].e_cpos) ++ ret = CONTIG_RIGHT; ++ } else { ++ ret = ocfs2_extent_contig(inode, rec, split_rec); ++ } ++ } ++ ++ if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) { ++ enum ocfs2_contig_type contig_type; ++ ++ rec = &el->l_recs[index + 1]; ++ contig_type = ocfs2_extent_contig(inode, rec, split_rec); ++ ++ if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) ++ ret = CONTIG_LEFTRIGHT; ++ else if (ret == CONTIG_NONE) ++ ret = contig_type; ++ } ++ ++ return ret; ++} ++ + static void ocfs2_figure_contig_type(struct inode *inode, + struct ocfs2_insert_type *insert, + struct ocfs2_extent_list *el, +@@ -2205,6 +3371,8 @@ + struct ocfs2_path *path = NULL; + struct buffer_head *bh = NULL; + ++ insert->ins_split = SPLIT_NONE; ++ + el = &di->id2.i_list; + insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); + +@@ -2305,130 +3473,425 @@ + ocfs2_figure_appending_type(insert, el, insert_rec); + } + +-out: +- ocfs2_free_path(path); ++out: ++ ocfs2_free_path(path); ++ ++ if (ret == 0) ++ *last_eb_bh = bh; ++ else ++ brelse(bh); ++ return ret; ++} ++ ++/* ++ * Insert an extent into an inode btree. ++ * ++ * The caller needs to update fe->i_clusters ++ */ ++int ocfs2_insert_extent(struct ocfs2_super *osb, ++ handle_t *handle, ++ struct inode *inode, ++ struct buffer_head *fe_bh, ++ u32 cpos, ++ u64 start_blk, ++ u32 new_clusters, ++ u8 flags, ++ struct ocfs2_alloc_context *meta_ac) ++{ ++ int status; ++ struct buffer_head *last_eb_bh = NULL; ++ struct buffer_head *bh = NULL; ++ struct ocfs2_insert_type insert = {0, }; ++ struct ocfs2_extent_rec rec; ++ ++ mlog(0, "add %u clusters at position %u to inode %llu\n", ++ new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); ++ ++ mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && ++ (OCFS2_I(inode)->ip_clusters != cpos), ++ "Device %s, asking for sparse allocation: inode %llu, " ++ "cpos %u, clusters %u\n", ++ osb->dev_str, ++ (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, ++ OCFS2_I(inode)->ip_clusters); ++ ++ memset(&rec, 0, sizeof(rec)); ++ rec.e_cpos = cpu_to_le32(cpos); ++ rec.e_blkno = cpu_to_le64(start_blk); ++ rec.e_leaf_clusters = cpu_to_le16(new_clusters); ++ rec.e_flags = flags; ++ ++ status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, ++ &insert); ++ if (status < 0) { ++ mlog_errno(status); ++ goto bail; ++ } ++ ++ mlog(0, "Insert.appending: %u, Insert.Contig: %u, " ++ "Insert.contig_index: %d, Insert.free_records: %d, " ++ "Insert.tree_depth: %d\n", ++ insert.ins_appending, insert.ins_contig, insert.ins_contig_index, ++ insert.ins_free_records, insert.ins_tree_depth); ++ ++ if (insert.ins_contig == CONTIG_NONE && insert.ins_free_records == 0) { ++ status = ocfs2_grow_tree(inode, handle, fe_bh, ++ &insert.ins_tree_depth, &last_eb_bh, ++ meta_ac); ++ if (status) { ++ mlog_errno(status); ++ goto bail; ++ } ++ } ++ ++ /* Finally, we can add clusters. This might rotate the tree for us. */ ++ status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); ++ if (status < 0) ++ mlog_errno(status); ++ else ++ ocfs2_extent_map_insert_rec(inode, &rec); ++ ++bail: ++ if (bh) ++ brelse(bh); ++ ++ if (last_eb_bh) ++ brelse(last_eb_bh); ++ ++ mlog_exit(status); ++ return status; ++} ++ ++static int ocfs2_split_and_insert(struct inode *inode, ++ handle_t *handle, ++ struct ocfs2_path *path, ++ struct buffer_head *di_bh, ++ struct buffer_head **last_eb_bh, ++ int split_index, ++ struct ocfs2_extent_rec *orig_split_rec, ++ struct ocfs2_alloc_context *meta_ac) ++{ ++ int ret = 0, depth; ++ unsigned int insert_range, rec_range, do_leftright = 0; ++ struct ocfs2_extent_rec tmprec; ++ struct ocfs2_extent_list *rightmost_el; ++ struct ocfs2_extent_rec rec; ++ struct ocfs2_extent_rec split_rec = *orig_split_rec; ++ struct ocfs2_insert_type insert; ++ struct ocfs2_extent_block *eb; ++ struct ocfs2_dinode *di; ++ ++leftright: ++ /* ++ * Store a copy of the record on the stack - it might move ++ * around as the tree is manipulated below. ++ */ ++ rec = path_leaf_el(path)->l_recs[split_index]; ++ ++ di = (struct ocfs2_dinode *)di_bh->b_data; ++ rightmost_el = &di->id2.i_list; ++ ++ depth = le16_to_cpu(rightmost_el->l_tree_depth); ++ if (depth) { ++ BUG_ON(!(*last_eb_bh)); ++ eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; ++ rightmost_el = &eb->h_list; ++ } ++ ++ if (le16_to_cpu(rightmost_el->l_next_free_rec) == ++ le16_to_cpu(rightmost_el->l_count)) { ++ int old_depth = depth; ++ ++ ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh, ++ meta_ac); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ if (old_depth != depth) { ++ eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data; ++ rightmost_el = &eb->h_list; ++ } ++ } ++ ++ memset(&insert, 0, sizeof(struct ocfs2_insert_type)); ++ insert.ins_appending = APPEND_NONE; ++ insert.ins_contig = CONTIG_NONE; ++ insert.ins_free_records = le16_to_cpu(rightmost_el->l_count) ++ - le16_to_cpu(rightmost_el->l_next_free_rec); ++ insert.ins_tree_depth = depth; ++ ++ insert_range = le32_to_cpu(split_rec.e_cpos) + ++ le16_to_cpu(split_rec.e_leaf_clusters); ++ rec_range = le32_to_cpu(rec.e_cpos) + ++ le16_to_cpu(rec.e_leaf_clusters); ++ ++ if (split_rec.e_cpos == rec.e_cpos) { ++ insert.ins_split = SPLIT_LEFT; ++ } else if (insert_range == rec_range) { ++ insert.ins_split = SPLIT_RIGHT; ++ } else { ++ /* ++ * Left/right split. We fake this as a right split ++ * first and then make a second pass as a left split. ++ */ ++ insert.ins_split = SPLIT_RIGHT; ++ ++ memset(&tmprec, 0, sizeof(tmprec)); ++ ++ tmprec.e_cpos = cpu_to_le32(insert_range); ++ tmprec.e_leaf_clusters = cpu_to_le16(rec_range - insert_range); ++ tmprec.e_flags = rec.e_flags; ++ tmprec.e_blkno = split_rec.e_blkno; ++ le64_add_cpu(&tmprec.e_blkno, ++ ocfs2_clusters_to_blocks(inode->i_sb, ++ le16_to_cpu(split_rec.e_leaf_clusters))); ++ split_rec = tmprec; ++ ++ BUG_ON(do_leftright); ++ do_leftright = 1; ++ } ++ ++ ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, ++ &insert); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ if (do_leftright == 1) { ++ u32 cpos; ++ struct ocfs2_extent_list *el; ++ ++ do_leftright++; ++ split_rec = *orig_split_rec; ++ ++ ocfs2_reinit_path(path, 1); ++ ++ cpos = le32_to_cpu(split_rec.e_cpos); ++ ret = ocfs2_find_path(inode, path, cpos); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ el = path_leaf_el(path); ++ split_index = ocfs2_search_extent_list(el, cpos); ++ goto leftright; ++ } ++out: ++ ++ return ret; ++} ++ ++/* ++ * Mark part or all of the extent record at split_index in the leaf ++ * pointed to by path as written. This removes the unwritten ++ * extent flag. ++ * ++ * Care is taken to handle contiguousness so as to not grow the tree. ++ * ++ * meta_ac is not strictly necessary - we only truly need it if growth ++ * of the tree is required. All other cases will degrade into a less ++ * optimal tree layout. ++ * ++ * last_eb_bh should be the rightmost leaf block for any inode with a ++ * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call. ++ * ++ * This code is optimized for readability - several passes might be ++ * made over certain portions of the tree. All of those blocks will ++ * have been brought into cache (and pinned via the journal), so the ++ * extra overhead is not expressed in terms of disk reads. ++ */ ++static int __ocfs2_mark_extent_written(struct inode *inode, ++ struct buffer_head *di_bh, ++ handle_t *handle, ++ struct ocfs2_path *path, ++ int split_index, ++ struct ocfs2_extent_rec *split_rec, ++ struct ocfs2_alloc_context *meta_ac, ++ struct ocfs2_cached_dealloc_ctxt *dealloc) ++{ ++ int ret = 0; ++ struct ocfs2_extent_list *el = path_leaf_el(path); ++ struct buffer_head *eb_bh, *last_eb_bh = NULL; ++ struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; ++ struct ocfs2_merge_ctxt ctxt; ++ struct ocfs2_extent_list *rightmost_el; ++ ++ if (!rec->e_flags & OCFS2_EXT_UNWRITTEN) { ++ ret = -EIO; ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) || ++ ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) < ++ (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) { ++ ret = -EIO; ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ eb_bh = path_leaf_bh(path); ++ ret = ocfs2_journal_access(handle, inode, eb_bh, ++ OCFS2_JOURNAL_ACCESS_WRITE); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el, ++ split_index, ++ split_rec); ++ ++ /* ++ * The core merge / split code wants to know how much room is ++ * left in this inodes allocation tree, so we pass the ++ * rightmost extent list. ++ */ ++ if (path->p_tree_depth) { ++ struct ocfs2_extent_block *eb; ++ struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; ++ ++ ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), ++ le64_to_cpu(di->i_last_eb_blk), ++ &last_eb_bh, OCFS2_BH_CACHED, inode); ++ if (ret) { ++ mlog_exit(ret); ++ goto out; ++ } ++ ++ eb = (struct ocfs2_extent_block *) last_eb_bh->b_data; ++ if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { ++ OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); ++ ret = -EROFS; ++ goto out; ++ } + +- if (ret == 0) +- *last_eb_bh = bh; ++ rightmost_el = &eb->h_list; ++ } else ++ rightmost_el = path_root_el(path); ++ ++ ctxt.c_used_tail_recs = le16_to_cpu(rightmost_el->l_next_free_rec); ++ if (ctxt.c_used_tail_recs > 0 && ++ ocfs2_is_empty_extent(&rightmost_el->l_recs[0])) ++ ctxt.c_used_tail_recs--; ++ ++ if (rec->e_cpos == split_rec->e_cpos && ++ rec->e_leaf_clusters == split_rec->e_leaf_clusters) ++ ctxt.c_split_covers_rec = 1; + else +- brelse(bh); ++ ctxt.c_split_covers_rec = 0; ++ ++ ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]); ++ ++ mlog(0, "index: %d, contig: %u, used_tail_recs: %u, " ++ "has_empty: %u, split_covers: %u\n", split_index, ++ ctxt.c_contig_type, ctxt.c_used_tail_recs, ++ ctxt.c_has_empty_extent, ctxt.c_split_covers_rec); ++ ++ if (ctxt.c_contig_type == CONTIG_NONE) { ++ if (ctxt.c_split_covers_rec) ++ el->l_recs[split_index] = *split_rec; ++ else ++ ret = ocfs2_split_and_insert(inode, handle, path, di_bh, ++ &last_eb_bh, split_index, ++ split_rec, meta_ac); ++ if (ret) ++ mlog_errno(ret); ++ } else { ++ ret = ocfs2_try_to_merge_extent(inode, handle, path, ++ split_index, split_rec, ++ dealloc, &ctxt); ++ if (ret) ++ mlog_errno(ret); ++ } ++ ++ ocfs2_journal_dirty(handle, eb_bh); ++ ++out: ++ brelse(last_eb_bh); + return ret; + } + + /* +- * Insert an extent into an inode btree. ++ * Mark the already-existing extent at cpos as written for len clusters. + * +- * The caller needs to update fe->i_clusters ++ * If the existing extent is larger than the request, initiate a ++ * split. An attempt will be made at merging with adjacent extents. ++ * ++ * The caller is responsible for passing down meta_ac if we'll need it. + */ +-int ocfs2_insert_extent(struct ocfs2_super *osb, +- handle_t *handle, +- struct inode *inode, +- struct buffer_head *fe_bh, +- u32 cpos, +- u64 start_blk, +- u32 new_clusters, +- struct ocfs2_alloc_context *meta_ac) ++int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, ++ handle_t *handle, u32 cpos, u32 len, u32 phys, ++ struct ocfs2_alloc_context *meta_ac, ++ struct ocfs2_cached_dealloc_ctxt *dealloc) + { +- int status, shift; +- struct buffer_head *last_eb_bh = NULL; +- struct buffer_head *bh = NULL; +- struct ocfs2_insert_type insert = {0, }; +- struct ocfs2_extent_rec rec; +- +- mlog(0, "add %u clusters at position %u to inode %llu\n", +- new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); +- +- mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && +- (OCFS2_I(inode)->ip_clusters != cpos), +- "Device %s, asking for sparse allocation: inode %llu, " +- "cpos %u, clusters %u\n", +- osb->dev_str, +- (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, +- OCFS2_I(inode)->ip_clusters); ++ int ret, index; ++ u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys); ++ struct ocfs2_extent_rec split_rec; ++ struct ocfs2_path *left_path = NULL; ++ struct ocfs2_extent_list *el; + +- memset(&rec, 0, sizeof(rec)); +- rec.e_cpos = cpu_to_le32(cpos); +- rec.e_blkno = cpu_to_le64(start_blk); +- rec.e_leaf_clusters = cpu_to_le16(new_clusters); ++ mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n", ++ inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno); + +- status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, +- &insert); +- if (status < 0) { +- mlog_errno(status); +- goto bail; ++ if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) { ++ ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents " ++ "that are being written to, but the feature bit " ++ "is not set in the super block.", ++ (unsigned long long)OCFS2_I(inode)->ip_blkno); ++ ret = -EROFS; ++ goto out; + } + +- mlog(0, "Insert.appending: %u, Insert.Contig: %u, " +- "Insert.contig_index: %d, Insert.free_records: %d, " +- "Insert.tree_depth: %d\n", +- insert.ins_appending, insert.ins_contig, insert.ins_contig_index, +- insert.ins_free_records, insert.ins_tree_depth); +- + /* +- * Avoid growing the tree unless we're out of records and the +- * insert type requres one. ++ * XXX: This should be fixed up so that we just re-insert the ++ * next extent records. + */ +- if (insert.ins_contig != CONTIG_NONE || insert.ins_free_records) +- goto out_add; ++ ocfs2_extent_map_trunc(inode, 0); + +- shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh); +- if (shift < 0) { +- status = shift; +- mlog_errno(status); +- goto bail; ++ left_path = ocfs2_new_inode_path(di_bh); ++ if (!left_path) { ++ ret = -ENOMEM; ++ mlog_errno(ret); ++ goto out; + } + +- /* We traveled all the way to the bottom of the allocation tree +- * and didn't find room for any more extents - we need to add +- * another tree level */ +- if (shift) { +- BUG_ON(bh); +- mlog(0, "need to shift tree depth " +- "(current = %d)\n", insert.ins_tree_depth); +- +- /* ocfs2_shift_tree_depth will return us a buffer with +- * the new extent block (so we can pass that to +- * ocfs2_add_branch). */ +- status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh, +- meta_ac, &bh); +- if (status < 0) { +- mlog_errno(status); +- goto bail; +- } +- insert.ins_tree_depth++; +- /* Special case: we have room now if we shifted from +- * tree_depth 0 */ +- if (insert.ins_tree_depth == 1) +- goto out_add; ++ ret = ocfs2_find_path(inode, left_path, cpos); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; + } ++ el = path_leaf_el(left_path); + +- /* call ocfs2_add_branch to add the final part of the tree with +- * the new data. */ +- mlog(0, "add branch. bh = %p\n", bh); +- status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh, +- meta_ac); +- if (status < 0) { +- mlog_errno(status); +- goto bail; ++ index = ocfs2_search_extent_list(el, cpos); ++ if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { ++ ocfs2_error(inode->i_sb, ++ "Inode %llu has an extent at cpos %u which can no " ++ "longer be found.\n", ++ (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); ++ ret = -EROFS; ++ goto out; + } + +-out_add: +- /* Finally, we can add clusters. This might rotate the tree for us. */ +- status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); +- if (status < 0) +- mlog_errno(status); +- else +- ocfs2_extent_map_insert_rec(inode, &rec); +- +-bail: +- if (bh) +- brelse(bh); ++ memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec)); ++ split_rec.e_cpos = cpu_to_le32(cpos); ++ split_rec.e_leaf_clusters = cpu_to_le16(len); ++ split_rec.e_blkno = cpu_to_le64(start_blkno); ++ split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; ++ split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; + +- if (last_eb_bh) +- brelse(last_eb_bh); ++ ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path, ++ index, &split_rec, meta_ac, dealloc); ++ if (ret) ++ mlog_errno(ret); + +- mlog_exit(status); +- return status; ++out: ++ ocfs2_free_path(left_path); ++ return ret; + } + + static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) +@@ -2957,6 +4420,219 @@ + return status; + } + ++/* ++ * Delayed de-allocation of suballocator blocks. ++ * ++ * Some sets of block de-allocations might involve multiple suballocator inodes. ++ * ++ * The locking for this can get extremely complicated, especially when ++ * the suballocator inodes to delete from aren't known until deep ++ * within an unrelated codepath. ++ * ++ * ocfs2_extent_block structures are a good example of this - an inode ++ * btree could have been grown by any number of nodes each allocating ++ * out of their own suballoc inode. ++ * ++ * These structures allow the delay of block de-allocation until a ++ * later time, when locking of multiple cluster inodes won't cause ++ * deadlock. ++ */ ++ ++/* ++ * Describes a single block free from a suballocator ++ */ ++struct ocfs2_cached_block_free { ++ struct ocfs2_cached_block_free *free_next; ++ u64 free_blk; ++ unsigned int free_bit; ++}; ++ ++struct ocfs2_per_slot_free_list { ++ struct ocfs2_per_slot_free_list *f_next_suballocator; ++ int f_inode_type; ++ int f_slot; ++ struct ocfs2_cached_block_free *f_first; ++}; ++ ++static int ocfs2_free_cached_items(struct ocfs2_super *osb, ++ int sysfile_type, ++ int slot, ++ struct ocfs2_cached_block_free *head) ++{ ++ int ret; ++ u64 bg_blkno; ++ handle_t *handle; ++ struct inode *inode; ++ struct buffer_head *di_bh = NULL; ++ struct ocfs2_cached_block_free *tmp; ++ ++ inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot); ++ if (!inode) { ++ ret = -EINVAL; ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ mutex_lock(&inode->i_mutex); ++ ++ ret = ocfs2_meta_lock(inode, &di_bh, 1); ++ if (ret) { ++ mlog_errno(ret); ++ goto out_mutex; ++ } ++ ++ handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); ++ if (IS_ERR(handle)) { ++ ret = PTR_ERR(handle); ++ mlog_errno(ret); ++ goto out_unlock; ++ } ++ ++ while (head) { ++ bg_blkno = ocfs2_which_suballoc_group(head->free_blk, ++ head->free_bit); ++ mlog(0, "Free bit: (bit %u, blkno %llu)\n", ++ head->free_bit, (unsigned long long)head->free_blk); ++ ++ ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, ++ head->free_bit, bg_blkno, 1); ++ if (ret) { ++ mlog_errno(ret); ++ goto out_journal; ++ } ++ ++ ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE); ++ if (ret) { ++ mlog_errno(ret); ++ goto out_journal; ++ } ++ ++ tmp = head; ++ head = head->free_next; ++ kfree(tmp); ++ } ++ ++out_journal: ++ ocfs2_commit_trans(osb, handle); ++ ++out_unlock: ++ ocfs2_meta_unlock(inode, 1); ++ brelse(di_bh); ++out_mutex: ++ mutex_unlock(&inode->i_mutex); ++ iput(inode); ++out: ++ while(head) { ++ /* Premature exit may have left some dangling items. */ ++ tmp = head; ++ head = head->free_next; ++ kfree(tmp); ++ } ++ ++ return ret; ++} ++ ++int ocfs2_run_deallocs(struct ocfs2_super *osb, ++ struct ocfs2_cached_dealloc_ctxt *ctxt) ++{ ++ int ret = 0, ret2; ++ struct ocfs2_per_slot_free_list *fl; ++ ++ if (!ctxt) ++ return 0; ++ ++ while (ctxt->c_first_suballocator) { ++ fl = ctxt->c_first_suballocator; ++ ++ if (fl->f_first) { ++ mlog(0, "Free items: (type %u, slot %d)\n", ++ fl->f_inode_type, fl->f_slot); ++ ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type, ++ fl->f_slot, fl->f_first); ++ if (ret2) ++ mlog_errno(ret2); ++ if (!ret) ++ ret = ret2; ++ } ++ ++ ctxt->c_first_suballocator = fl->f_next_suballocator; ++ kfree(fl); ++ } ++ ++ return ret; ++} ++ ++static struct ocfs2_per_slot_free_list * ++ocfs2_find_per_slot_free_list(int type, ++ int slot, ++ struct ocfs2_cached_dealloc_ctxt *ctxt) ++{ ++ struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator; ++ ++ while (fl) { ++ if (fl->f_inode_type == type && fl->f_slot == slot) ++ return fl; ++ ++ fl = fl->f_next_suballocator; ++ } ++ ++ fl = kmalloc(sizeof(*fl), GFP_NOFS); ++ if (fl) { ++ fl->f_inode_type = type; ++ fl->f_slot = slot; ++ fl->f_first = NULL; ++ fl->f_next_suballocator = ctxt->c_first_suballocator; ++ ++ ctxt->c_first_suballocator = fl; ++ } ++ return fl; ++} ++ ++static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, ++ int type, int slot, u64 blkno, ++ unsigned int bit) ++{ ++ int ret; ++ struct ocfs2_per_slot_free_list *fl; ++ struct ocfs2_cached_block_free *item; ++ ++ fl = ocfs2_find_per_slot_free_list(type, slot, ctxt); ++ if (fl == NULL) { ++ ret = -ENOMEM; ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ item = kmalloc(sizeof(*item), GFP_NOFS); ++ if (item == NULL) { ++ ret = -ENOMEM; ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n", ++ type, slot, bit, (unsigned long long)blkno); ++ ++ item->free_blk = blkno; ++ item->free_bit = bit; ++ item->free_next = fl->f_first; ++ ++ fl->f_first = item; ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, ++ struct ocfs2_extent_block *eb) ++{ ++ return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE, ++ le16_to_cpu(eb->h_suballoc_slot), ++ le64_to_cpu(eb->h_blkno), ++ le16_to_cpu(eb->h_suballoc_bit)); ++} ++ + /* This function will figure out whether the currently last extent + * block will be deleted, and if it will, what the new last extent + * block will be so we can update his h_next_leaf_blk field, as well +@@ -3238,27 +4914,10 @@ + BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); + BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno)); + +- if (le16_to_cpu(eb->h_suballoc_slot) == 0) { +- /* +- * This code only understands how to +- * lock the suballocator in slot 0, +- * which is fine because allocation is +- * only ever done out of that +- * suballocator too. A future version +- * might change that however, so avoid +- * a free if we don't know how to +- * handle it. This way an fs incompat +- * bit will not be necessary. +- */ +- ret = ocfs2_free_extent_block(handle, +- tc->tc_ext_alloc_inode, +- tc->tc_ext_alloc_bh, +- eb); +- ++ ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb); + /* An error here is not fatal. */ + if (ret < 0) + mlog_errno(ret); +- } + } else { + deleted_eb = 0; + } +@@ -3631,8 +5290,6 @@ + + mlog_entry_void(); + +- down_write(&OCFS2_I(inode)->ip_alloc_sem); +- + new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, + i_size_read(inode)); + +@@ -3754,7 +5411,6 @@ + goto start; + + bail: +- up_write(&OCFS2_I(inode)->ip_alloc_sem); + + ocfs2_schedule_truncate_log_flush(osb, 1); + +@@ -3764,6 +5420,8 @@ + if (handle) + ocfs2_commit_trans(osb, handle); + ++ ocfs2_run_deallocs(osb, &tc->tc_dealloc); ++ + ocfs2_free_path(path); + + /* This will drop the ext_alloc cluster lock for us */ +@@ -3774,23 +5432,18 @@ + } + + /* +- * Expects the inode to already be locked. This will figure out which +- * inodes need to be locked and will put them on the returned truncate +- * context. ++ * Expects the inode to already be locked. + */ + int ocfs2_prepare_truncate(struct ocfs2_super *osb, + struct inode *inode, + struct buffer_head *fe_bh, + struct ocfs2_truncate_context **tc) + { +- int status, metadata_delete, i; ++ int status; + unsigned int new_i_clusters; + struct ocfs2_dinode *fe; + struct ocfs2_extent_block *eb; +- struct ocfs2_extent_list *el; + struct buffer_head *last_eb_bh = NULL; +- struct inode *ext_alloc_inode = NULL; +- struct buffer_head *ext_alloc_bh = NULL; + + mlog_entry_void(); + +@@ -3810,12 +5463,9 @@ + mlog_errno(status); + goto bail; + } ++ ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); + +- metadata_delete = 0; + if (fe->id2.i_list.l_tree_depth) { +- /* If we have a tree, then the truncate may result in +- * metadata deletes. Figure this out from the +- * rightmost leaf block.*/ + status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), + &last_eb_bh, OCFS2_BH_CACHED, inode); + if (status < 0) { +@@ -3830,43 +5480,10 @@ + status = -EIO; + goto bail; + } +- el = &(eb->h_list); +- +- i = 0; +- if (ocfs2_is_empty_extent(&el->l_recs[0])) +- i = 1; +- /* +- * XXX: Should we check that next_free_rec contains +- * the extent? +- */ +- if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_i_clusters) +- metadata_delete = 1; + } + + (*tc)->tc_last_eb_bh = last_eb_bh; + +- if (metadata_delete) { +- mlog(0, "Will have to delete metadata for this trunc. " +- "locking allocator.\n"); +- ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0); +- if (!ext_alloc_inode) { +- status = -ENOMEM; +- mlog_errno(status); +- goto bail; +- } +- +- mutex_lock(&ext_alloc_inode->i_mutex); +- (*tc)->tc_ext_alloc_inode = ext_alloc_inode; +- +- status = ocfs2_meta_lock(ext_alloc_inode, &ext_alloc_bh, 1); +- if (status < 0) { +- mlog_errno(status); +- goto bail; +- } +- (*tc)->tc_ext_alloc_bh = ext_alloc_bh; +- (*tc)->tc_ext_alloc_locked = 1; +- } +- + status = 0; + bail: + if (status < 0) { +@@ -3880,16 +5497,13 @@ + + static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) + { +- if (tc->tc_ext_alloc_inode) { +- if (tc->tc_ext_alloc_locked) +- ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1); +- +- mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex); +- iput(tc->tc_ext_alloc_inode); +- } +- +- if (tc->tc_ext_alloc_bh) +- brelse(tc->tc_ext_alloc_bh); ++ /* ++ * The caller is responsible for completing deallocation ++ * before freeing the context. ++ */ ++ if (tc->tc_dealloc.c_first_suballocator != NULL) ++ mlog(ML_NOTICE, ++ "Truncate completion has non-empty dealloc context\n"); + + if (tc->tc_last_eb_bh) + brelse(tc->tc_last_eb_bh); +diff -Nurb linux-2.6.22-570/fs/ocfs2/alloc.h linux-2.6.22-591/fs/ocfs2/alloc.h +--- linux-2.6.22-570/fs/ocfs2/alloc.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/alloc.h 2007-12-21 15:36:12.000000000 -0500 +@@ -34,7 +34,13 @@ + u32 cpos, + u64 start_blk, + u32 new_clusters, ++ u8 flags, + struct ocfs2_alloc_context *meta_ac); ++struct ocfs2_cached_dealloc_ctxt; ++int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, ++ handle_t *handle, u32 cpos, u32 len, u32 phys, ++ struct ocfs2_alloc_context *meta_ac, ++ struct ocfs2_cached_dealloc_ctxt *dealloc); + int ocfs2_num_free_extents(struct ocfs2_super *osb, + struct inode *inode, + struct ocfs2_dinode *fe); +@@ -63,9 +69,27 @@ + int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, + struct ocfs2_dinode *tl_copy); + ++/* ++ * Process local structure which describes the block unlinks done ++ * during an operation. This is populated via ++ * ocfs2_cache_block_dealloc(). ++ * ++ * ocfs2_run_deallocs() should be called after the potentially ++ * de-allocating routines. No journal handles should be open, and most ++ * locks should have been dropped. ++ */ ++struct ocfs2_cached_dealloc_ctxt { ++ struct ocfs2_per_slot_free_list *c_first_suballocator; ++}; ++static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c) ++{ ++ c->c_first_suballocator = NULL; ++} ++int ocfs2_run_deallocs(struct ocfs2_super *osb, ++ struct ocfs2_cached_dealloc_ctxt *ctxt); ++ + struct ocfs2_truncate_context { +- struct inode *tc_ext_alloc_inode; +- struct buffer_head *tc_ext_alloc_bh; ++ struct ocfs2_cached_dealloc_ctxt tc_dealloc; + int tc_ext_alloc_locked; /* is it cluster locked? */ + /* these get destroyed once it's passed to ocfs2_commit_truncate. */ + struct buffer_head *tc_last_eb_bh; +@@ -84,6 +108,7 @@ + + int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, + u32 cpos, struct buffer_head **leaf_bh); ++int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); + + /* + * Helper function to look at the # of clusters in an extent record. +diff -Nurb linux-2.6.22-570/fs/ocfs2/aops.c linux-2.6.22-591/fs/ocfs2/aops.c +--- linux-2.6.22-570/fs/ocfs2/aops.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/fs/ocfs2/aops.c 2007-12-21 15:36:12.000000000 -0500 +@@ -232,7 +232,7 @@ + * might now be discovering a truncate that hit on another node. + * block_read_full_page->get_block freaks out if it is asked to read + * beyond the end of a file, so we check here. Callers +- * (generic_file_read, fault->nopage) are clever enough to check i_size ++ * (generic_file_read, vm_ops->fault) are clever enough to check i_size + * and notice that the page they just read isn't needed. + * + * XXX sys_readahead() seems to get that wrong? +@@ -705,6 +705,8 @@ + bh = bh->b_this_page, block_start += bsize) { + block_end = block_start + bsize; + ++ clear_buffer_new(bh); ++ + /* + * Ignore blocks outside of our i/o range - + * they may belong to unallocated clusters. +@@ -719,9 +721,8 @@ + * For an allocating write with cluster size >= page + * size, we always write the entire page. + */ +- +- if (buffer_new(bh)) +- clear_buffer_new(bh); ++ if (new) ++ set_buffer_new(bh); + + if (!buffer_mapped(bh)) { + map_bh(bh, inode->i_sb, *p_blkno); +@@ -760,18 +761,13 @@ + bh = head; + block_start = 0; + do { +- void *kaddr; +- + block_end = block_start + bsize; + if (block_end <= from) + goto next_bh; + if (block_start >= to) + break; + +- kaddr = kmap_atomic(page, KM_USER0); +- memset(kaddr+block_start, 0, bh->b_size); +- flush_dcache_page(page); +- kunmap_atomic(kaddr, KM_USER0); ++ zero_user_page(page, block_start, bh->b_size, KM_USER0); + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + +@@ -783,217 +779,240 @@ + return ret; + } + ++#if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE) ++#define OCFS2_MAX_CTXT_PAGES 1 ++#else ++#define OCFS2_MAX_CTXT_PAGES (OCFS2_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE) ++#endif ++ ++#define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE) ++ + /* +- * This will copy user data from the buffer page in the splice +- * context. +- * +- * For now, we ignore SPLICE_F_MOVE as that would require some extra +- * communication out all the way to ocfs2_write(). ++ * Describe the state of a single cluster to be written to. + */ +-int ocfs2_map_and_write_splice_data(struct inode *inode, +- struct ocfs2_write_ctxt *wc, u64 *p_blkno, +- unsigned int *ret_from, unsigned int *ret_to) ++struct ocfs2_write_cluster_desc { ++ u32 c_cpos; ++ u32 c_phys; ++ /* ++ * Give this a unique field because c_phys eventually gets ++ * filled. ++ */ ++ unsigned c_new; ++ unsigned c_unwritten; ++}; ++ ++static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d) + { +- int ret; +- unsigned int to, from, cluster_start, cluster_end; +- char *src, *dst; +- struct ocfs2_splice_write_priv *sp = wc->w_private; +- struct pipe_buffer *buf = sp->s_buf; +- unsigned long bytes, src_from; +- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); ++ return d->c_new || d->c_unwritten; ++} + +- ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, +- &cluster_end); ++struct ocfs2_write_ctxt { ++ /* Logical cluster position / len of write */ ++ u32 w_cpos; ++ u32 w_clen; + +- from = sp->s_offset; +- src_from = sp->s_buf_offset; +- bytes = wc->w_count; ++ struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; + +- if (wc->w_large_pages) { + /* +- * For cluster size < page size, we have to +- * calculate pos within the cluster and obey +- * the rightmost boundary. +- */ +- bytes = min(bytes, (unsigned long)(osb->s_clustersize +- - (wc->w_pos & (osb->s_clustersize - 1)))); +- } +- to = from + bytes; +- +- BUG_ON(from > PAGE_CACHE_SIZE); +- BUG_ON(to > PAGE_CACHE_SIZE); +- BUG_ON(from < cluster_start); +- BUG_ON(to > cluster_end); +- +- if (wc->w_this_page_new) +- ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, +- cluster_start, cluster_end, 1); +- else +- ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, +- from, to, 0); +- if (ret) { +- mlog_errno(ret); +- goto out; ++ * This is true if page_size > cluster_size. ++ * ++ * It triggers a set of special cases during write which might ++ * have to deal with allocating writes to partial pages. ++ */ ++ unsigned int w_large_pages; ++ ++ /* ++ * Pages involved in this write. ++ * ++ * w_target_page is the page being written to by the user. ++ * ++ * w_pages is an array of pages which always contains ++ * w_target_page, and in the case of an allocating write with ++ * page_size < cluster size, it will contain zero'd and mapped ++ * pages adjacent to w_target_page which need to be written ++ * out in so that future reads from that region will get ++ * zero's. ++ */ ++ struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; ++ unsigned int w_num_pages; ++ struct page *w_target_page; ++ ++ /* ++ * ocfs2_write_end() uses this to know what the real range to ++ * write in the target should be. ++ */ ++ unsigned int w_target_from; ++ unsigned int w_target_to; ++ ++ /* ++ * We could use journal_current_handle() but this is cleaner, ++ * IMHO -Mark ++ */ ++ handle_t *w_handle; ++ ++ struct buffer_head *w_di_bh; ++ ++ struct ocfs2_cached_dealloc_ctxt w_dealloc; ++}; ++ ++static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) ++{ ++ int i; ++ ++ for(i = 0; i < wc->w_num_pages; i++) { ++ if (wc->w_pages[i] == NULL) ++ continue; ++ ++ unlock_page(wc->w_pages[i]); ++ mark_page_accessed(wc->w_pages[i]); ++ page_cache_release(wc->w_pages[i]); + } + +- src = buf->ops->map(sp->s_pipe, buf, 1); +- dst = kmap_atomic(wc->w_this_page, KM_USER1); +- memcpy(dst + from, src + src_from, bytes); +- kunmap_atomic(wc->w_this_page, KM_USER1); +- buf->ops->unmap(sp->s_pipe, buf, src); ++ brelse(wc->w_di_bh); ++ kfree(wc); ++} ++ ++static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, ++ struct ocfs2_super *osb, loff_t pos, ++ unsigned len, struct buffer_head *di_bh) ++{ ++ struct ocfs2_write_ctxt *wc; ++ ++ wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS); ++ if (!wc) ++ return -ENOMEM; ++ ++ wc->w_cpos = pos >> osb->s_clustersize_bits; ++ wc->w_clen = ocfs2_clusters_for_bytes(osb->sb, len); ++ get_bh(di_bh); ++ wc->w_di_bh = di_bh; + +- wc->w_finished_copy = 1; ++ if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) ++ wc->w_large_pages = 1; ++ else ++ wc->w_large_pages = 0; + +- *ret_from = from; +- *ret_to = to; +-out: ++ ocfs2_init_dealloc_ctxt(&wc->w_dealloc); + +- return bytes ? (unsigned int)bytes : ret; ++ *wcp = wc; ++ ++ return 0; + } + + /* +- * This will copy user data from the iovec in the buffered write +- * context. ++ * If a page has any new buffers, zero them out here, and mark them uptodate ++ * and dirty so they'll be written out (in order to prevent uninitialised ++ * block data from leaking). And clear the new bit. + */ +-int ocfs2_map_and_write_user_data(struct inode *inode, +- struct ocfs2_write_ctxt *wc, u64 *p_blkno, +- unsigned int *ret_from, unsigned int *ret_to) ++static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to) + { +- int ret; +- unsigned int to, from, cluster_start, cluster_end; +- unsigned long bytes, src_from; +- char *dst; +- struct ocfs2_buffered_write_priv *bp = wc->w_private; +- const struct iovec *cur_iov = bp->b_cur_iov; +- char __user *buf; +- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); +- +- ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start, +- &cluster_end); ++ unsigned int block_start, block_end; ++ struct buffer_head *head, *bh; + +- buf = cur_iov->iov_base + bp->b_cur_off; +- src_from = (unsigned long)buf & ~PAGE_CACHE_MASK; ++ BUG_ON(!PageLocked(page)); ++ if (!page_has_buffers(page)) ++ return; + +- from = wc->w_pos & (PAGE_CACHE_SIZE - 1); ++ bh = head = page_buffers(page); ++ block_start = 0; ++ do { ++ block_end = block_start + bh->b_size; + +- /* +- * This is a lot of comparisons, but it reads quite +- * easily, which is important here. +- */ +- /* Stay within the src page */ +- bytes = PAGE_SIZE - src_from; +- /* Stay within the vector */ +- bytes = min(bytes, +- (unsigned long)(cur_iov->iov_len - bp->b_cur_off)); +- /* Stay within count */ +- bytes = min(bytes, (unsigned long)wc->w_count); +- /* +- * For clustersize > page size, just stay within +- * target page, otherwise we have to calculate pos +- * within the cluster and obey the rightmost +- * boundary. +- */ +- if (wc->w_large_pages) { +- /* +- * For cluster size < page size, we have to +- * calculate pos within the cluster and obey +- * the rightmost boundary. +- */ +- bytes = min(bytes, (unsigned long)(osb->s_clustersize +- - (wc->w_pos & (osb->s_clustersize - 1)))); +- } else { +- /* +- * cluster size > page size is the most common +- * case - we just stay within the target page +- * boundary. +- */ +- bytes = min(bytes, PAGE_CACHE_SIZE - from); +- } ++ if (buffer_new(bh)) { ++ if (block_end > from && block_start < to) { ++ if (!PageUptodate(page)) { ++ unsigned start, end; + +- to = from + bytes; ++ start = max(from, block_start); ++ end = min(to, block_end); + +- BUG_ON(from > PAGE_CACHE_SIZE); +- BUG_ON(to > PAGE_CACHE_SIZE); +- BUG_ON(from < cluster_start); +- BUG_ON(to > cluster_end); ++ zero_user_page(page, start, end - start, KM_USER0); ++ set_buffer_uptodate(bh); ++ } + +- if (wc->w_this_page_new) +- ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, +- cluster_start, cluster_end, 1); +- else +- ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode, +- from, to, 0); +- if (ret) { +- mlog_errno(ret); +- goto out; ++ clear_buffer_new(bh); ++ mark_buffer_dirty(bh); ++ } + } + +- dst = kmap(wc->w_this_page); +- memcpy(dst + from, bp->b_src_buf + src_from, bytes); +- kunmap(wc->w_this_page); ++ block_start = block_end; ++ bh = bh->b_this_page; ++ } while (bh != head); ++} + +- /* +- * XXX: This is slow, but simple. The caller of +- * ocfs2_buffered_write_cluster() is responsible for +- * passing through the iovecs, so it's difficult to +- * predict what our next step is in here after our +- * initial write. A future version should be pushing +- * that iovec manipulation further down. +- * +- * By setting this, we indicate that a copy from user +- * data was done, and subsequent calls for this +- * cluster will skip copying more data. ++/* ++ * Only called when we have a failure during allocating write to write ++ * zero's to the newly allocated region. + */ +- wc->w_finished_copy = 1; ++static void ocfs2_write_failure(struct inode *inode, ++ struct ocfs2_write_ctxt *wc, ++ loff_t user_pos, unsigned user_len) ++{ ++ int i; ++ unsigned from, to; ++ struct page *tmppage; + +- *ret_from = from; +- *ret_to = to; +-out: ++ ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len); ++ ++ if (wc->w_large_pages) { ++ from = wc->w_target_from; ++ to = wc->w_target_to; ++ } else { ++ from = 0; ++ to = PAGE_CACHE_SIZE; ++ } + +- return bytes ? (unsigned int)bytes : ret; ++ for(i = 0; i < wc->w_num_pages; i++) { ++ tmppage = wc->w_pages[i]; ++ ++ if (ocfs2_should_order_data(inode)) ++ walk_page_buffers(wc->w_handle, page_buffers(tmppage), ++ from, to, NULL, ++ ocfs2_journal_dirty_data); ++ ++ block_commit_write(tmppage, from, to); ++ } + } + +-/* +- * Map, fill and write a page to disk. +- * +- * The work of copying data is done via callback. Newly allocated +- * pages which don't take user data will be zero'd (set 'new' to +- * indicate an allocating write) +- * +- * Returns a negative error code or the number of bytes copied into +- * the page. +- */ +-static int ocfs2_write_data_page(struct inode *inode, handle_t *handle, +- u64 *p_blkno, struct page *page, +- struct ocfs2_write_ctxt *wc, int new) ++static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, ++ struct ocfs2_write_ctxt *wc, ++ struct page *page, u32 cpos, ++ loff_t user_pos, unsigned user_len, ++ int new) + { +- int ret, copied = 0; +- unsigned int from = 0, to = 0; ++ int ret; ++ unsigned int map_from = 0, map_to = 0; + unsigned int cluster_start, cluster_end; +- unsigned int zero_from = 0, zero_to = 0; ++ unsigned int user_data_from = 0, user_data_to = 0; + +- ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), wc->w_cpos, ++ ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos, + &cluster_start, &cluster_end); + +- if ((wc->w_pos >> PAGE_CACHE_SHIFT) == page->index +- && !wc->w_finished_copy) { ++ if (page == wc->w_target_page) { ++ map_from = user_pos & (PAGE_CACHE_SIZE - 1); ++ map_to = map_from + user_len; + +- wc->w_this_page = page; +- wc->w_this_page_new = new; +- ret = wc->w_write_data_page(inode, wc, p_blkno, &from, &to); +- if (ret < 0) { ++ if (new) ++ ret = ocfs2_map_page_blocks(page, p_blkno, inode, ++ cluster_start, cluster_end, ++ new); ++ else ++ ret = ocfs2_map_page_blocks(page, p_blkno, inode, ++ map_from, map_to, new); ++ if (ret) { + mlog_errno(ret); + goto out; + } + +- copied = ret; +- +- zero_from = from; +- zero_to = to; ++ user_data_from = map_from; ++ user_data_to = map_to; + if (new) { +- from = cluster_start; +- to = cluster_end; ++ map_from = cluster_start; ++ map_to = cluster_end; + } ++ ++ wc->w_target_from = map_from; ++ wc->w_target_to = map_to; + } else { + /* + * If we haven't allocated the new page yet, we +@@ -1002,11 +1021,11 @@ + */ + BUG_ON(!new); + +- from = cluster_start; +- to = cluster_end; ++ map_from = cluster_start; ++ map_to = cluster_end; + + ret = ocfs2_map_page_blocks(page, p_blkno, inode, +- cluster_start, cluster_end, 1); ++ cluster_start, cluster_end, new); + if (ret) { + mlog_errno(ret); + goto out; +@@ -1025,108 +1044,113 @@ + */ + if (new && !PageUptodate(page)) + ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb), +- wc->w_cpos, zero_from, zero_to); ++ cpos, user_data_from, user_data_to); + + flush_dcache_page(page); + +- if (ocfs2_should_order_data(inode)) { +- ret = walk_page_buffers(handle, +- page_buffers(page), +- from, to, NULL, +- ocfs2_journal_dirty_data); +- if (ret < 0) +- mlog_errno(ret); +- } +- +- /* +- * We don't use generic_commit_write() because we need to +- * handle our own i_size update. +- */ +- ret = block_commit_write(page, from, to); +- if (ret) +- mlog_errno(ret); + out: +- +- return copied ? copied : ret; ++ return ret; + } + + /* +- * Do the actual write of some data into an inode. Optionally allocate +- * in order to fulfill the write. +- * +- * cpos is the logical cluster offset within the file to write at +- * +- * 'phys' is the physical mapping of that offset. a 'phys' value of +- * zero indicates that allocation is required. In this case, data_ac +- * and meta_ac should be valid (meta_ac can be null if metadata +- * allocation isn't required). ++ * This function will only grab one clusters worth of pages. + */ +-static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle, +- struct buffer_head *di_bh, +- struct ocfs2_alloc_context *data_ac, +- struct ocfs2_alloc_context *meta_ac, +- struct ocfs2_write_ctxt *wc) ++static int ocfs2_grab_pages_for_write(struct address_space *mapping, ++ struct ocfs2_write_ctxt *wc, ++ u32 cpos, loff_t user_pos, int new, ++ struct page *mmap_page) + { +- int ret, i, numpages = 1, new; +- unsigned int copied = 0; +- u32 tmp_pos; +- u64 v_blkno, p_blkno; +- struct address_space *mapping = file->f_mapping; ++ int ret = 0, i; ++ unsigned long start, target_index, index; + struct inode *inode = mapping->host; +- unsigned long index, start; +- struct page **cpages; + +- new = phys == 0 ? 1 : 0; ++ target_index = user_pos >> PAGE_CACHE_SHIFT; + + /* + * Figure out how many pages we'll be manipulating here. For + * non allocating write, we just change the one + * page. Otherwise, we'll need a whole clusters worth. + */ +- if (new) +- numpages = ocfs2_pages_per_cluster(inode->i_sb); +- +- cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS); +- if (!cpages) { +- ret = -ENOMEM; +- mlog_errno(ret); +- return ret; +- } +- +- /* +- * Fill our page array first. That way we've grabbed enough so +- * that we can zero and flush if we error after adding the +- * extent. +- */ + if (new) { +- start = ocfs2_align_clusters_to_page_index(inode->i_sb, +- wc->w_cpos); +- v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, wc->w_cpos); ++ wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); ++ start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); + } else { +- start = wc->w_pos >> PAGE_CACHE_SHIFT; +- v_blkno = wc->w_pos >> inode->i_sb->s_blocksize_bits; ++ wc->w_num_pages = 1; ++ start = target_index; + } + +- for(i = 0; i < numpages; i++) { ++ for(i = 0; i < wc->w_num_pages; i++) { + index = start + i; + +- cpages[i] = find_or_create_page(mapping, index, GFP_NOFS); +- if (!cpages[i]) { ++ if (index == target_index && mmap_page) { ++ /* ++ * ocfs2_pagemkwrite() is a little different ++ * and wants us to directly use the page ++ * passed in. ++ */ ++ lock_page(mmap_page); ++ ++ if (mmap_page->mapping != mapping) { ++ unlock_page(mmap_page); ++ /* ++ * Sanity check - the locking in ++ * ocfs2_pagemkwrite() should ensure ++ * that this code doesn't trigger. ++ */ ++ ret = -EINVAL; ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ page_cache_get(mmap_page); ++ wc->w_pages[i] = mmap_page; ++ } else { ++ wc->w_pages[i] = find_or_create_page(mapping, index, ++ GFP_NOFS); ++ if (!wc->w_pages[i]) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + } + ++ if (index == target_index) ++ wc->w_target_page = wc->w_pages[i]; ++ } ++out: ++ return ret; ++} ++ ++/* ++ * Prepare a single cluster for write one cluster into the file. ++ */ ++static int ocfs2_write_cluster(struct address_space *mapping, ++ u32 phys, unsigned int unwritten, ++ struct ocfs2_alloc_context *data_ac, ++ struct ocfs2_alloc_context *meta_ac, ++ struct ocfs2_write_ctxt *wc, u32 cpos, ++ loff_t user_pos, unsigned user_len) ++{ ++ int ret, i, new, should_zero = 0; ++ u64 v_blkno, p_blkno; ++ struct inode *inode = mapping->host; ++ ++ new = phys == 0 ? 1 : 0; ++ if (new || unwritten) ++ should_zero = 1; ++ + if (new) { ++ u32 tmp_pos; ++ + /* + * This is safe to call with the page locks - it won't take + * any additional semaphores or cluster locks. + */ +- tmp_pos = wc->w_cpos; ++ tmp_pos = cpos; + ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, +- &tmp_pos, 1, di_bh, handle, +- data_ac, meta_ac, NULL); ++ &tmp_pos, 1, 0, wc->w_di_bh, ++ wc->w_handle, data_ac, ++ meta_ac, NULL); + /* + * This shouldn't happen because we must have already + * calculated the correct meta data allocation required. The +@@ -1143,159 +1167,433 @@ + mlog_errno(ret); + goto out; + } ++ } else if (unwritten) { ++ ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, ++ wc->w_handle, cpos, 1, phys, ++ meta_ac, &wc->w_dealloc); ++ if (ret < 0) { ++ mlog_errno(ret); ++ goto out; ++ } + } + ++ if (should_zero) ++ v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos); ++ else ++ v_blkno = user_pos >> inode->i_sb->s_blocksize_bits; ++ ++ /* ++ * The only reason this should fail is due to an inability to ++ * find the extent added. ++ */ + ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL, + NULL); + if (ret < 0) { ++ ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, " ++ "at logical block %llu", ++ (unsigned long long)OCFS2_I(inode)->ip_blkno, ++ (unsigned long long)v_blkno); ++ goto out; ++ } ++ ++ BUG_ON(p_blkno == 0); ++ ++ for(i = 0; i < wc->w_num_pages; i++) { ++ int tmpret; ++ ++ tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, ++ wc->w_pages[i], cpos, ++ user_pos, user_len, ++ should_zero); ++ if (tmpret) { ++ mlog_errno(tmpret); ++ if (ret == 0) ++ tmpret = ret; ++ } ++ } + + /* +- * XXX: Should we go readonly here? ++ * We only have cleanup to do in case of allocating write. + */ ++ if (ret && new) ++ ocfs2_write_failure(inode, wc, user_pos, user_len); + +- mlog_errno(ret); +- goto out; +- } ++out: + +- BUG_ON(p_blkno == 0); ++ return ret; ++} + +- for(i = 0; i < numpages; i++) { +- ret = ocfs2_write_data_page(inode, handle, &p_blkno, cpages[i], +- wc, new); +- if (ret < 0) { ++static int ocfs2_write_cluster_by_desc(struct address_space *mapping, ++ struct ocfs2_alloc_context *data_ac, ++ struct ocfs2_alloc_context *meta_ac, ++ struct ocfs2_write_ctxt *wc, ++ loff_t pos, unsigned len) ++{ ++ int ret, i; ++ struct ocfs2_write_cluster_desc *desc; ++ ++ for (i = 0; i < wc->w_clen; i++) { ++ desc = &wc->w_desc[i]; ++ ++ ret = ocfs2_write_cluster(mapping, desc->c_phys, ++ desc->c_unwritten, data_ac, meta_ac, ++ wc, desc->c_cpos, pos, len); ++ if (ret) { + mlog_errno(ret); + goto out; + } +- +- copied += ret; + } + ++ ret = 0; + out: +- for(i = 0; i < numpages; i++) { +- unlock_page(cpages[i]); +- mark_page_accessed(cpages[i]); +- page_cache_release(cpages[i]); +- } +- kfree(cpages); +- +- return copied ? copied : ret; ++ return ret; + } + +-static void ocfs2_write_ctxt_init(struct ocfs2_write_ctxt *wc, +- struct ocfs2_super *osb, loff_t pos, +- size_t count, ocfs2_page_writer *cb, +- void *cb_priv) ++/* ++ * ocfs2_write_end() wants to know which parts of the target page it ++ * should complete the write on. It's easiest to compute them ahead of ++ * time when a more complete view of the write is available. ++ */ ++static void ocfs2_set_target_boundaries(struct ocfs2_super *osb, ++ struct ocfs2_write_ctxt *wc, ++ loff_t pos, unsigned len, int alloc) + { +- wc->w_count = count; +- wc->w_pos = pos; +- wc->w_cpos = wc->w_pos >> osb->s_clustersize_bits; +- wc->w_finished_copy = 0; ++ struct ocfs2_write_cluster_desc *desc; + +- if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) +- wc->w_large_pages = 1; +- else +- wc->w_large_pages = 0; ++ wc->w_target_from = pos & (PAGE_CACHE_SIZE - 1); ++ wc->w_target_to = wc->w_target_from + len; ++ ++ if (alloc == 0) ++ return; ++ ++ /* ++ * Allocating write - we may have different boundaries based ++ * on page size and cluster size. ++ * ++ * NOTE: We can no longer compute one value from the other as ++ * the actual write length and user provided length may be ++ * different. ++ */ + +- wc->w_write_data_page = cb; +- wc->w_private = cb_priv; ++ if (wc->w_large_pages) { ++ /* ++ * We only care about the 1st and last cluster within ++ * our range and whether they should be zero'd or not. Either ++ * value may be extended out to the start/end of a ++ * newly allocated cluster. ++ */ ++ desc = &wc->w_desc[0]; ++ if (ocfs2_should_zero_cluster(desc)) ++ ocfs2_figure_cluster_boundaries(osb, ++ desc->c_cpos, ++ &wc->w_target_from, ++ NULL); ++ ++ desc = &wc->w_desc[wc->w_clen - 1]; ++ if (ocfs2_should_zero_cluster(desc)) ++ ocfs2_figure_cluster_boundaries(osb, ++ desc->c_cpos, ++ NULL, ++ &wc->w_target_to); ++ } else { ++ wc->w_target_from = 0; ++ wc->w_target_to = PAGE_CACHE_SIZE; ++ } + } + + /* +- * Write a cluster to an inode. The cluster may not be allocated yet, +- * in which case it will be. This only exists for buffered writes - +- * O_DIRECT takes a more "traditional" path through the kernel. ++ * Populate each single-cluster write descriptor in the write context ++ * with information about the i/o to be done. + * +- * The caller is responsible for incrementing pos, written counts, etc +- * +- * For file systems that don't support sparse files, pre-allocation +- * and page zeroing up until cpos should be done prior to this +- * function call. +- * +- * Callers should be holding i_sem, and the rw cluster lock. ++ * Returns the number of clusters that will have to be allocated, as ++ * well as a worst case estimate of the number of extent records that ++ * would have to be created during a write to an unwritten region. ++ */ ++static int ocfs2_populate_write_desc(struct inode *inode, ++ struct ocfs2_write_ctxt *wc, ++ unsigned int *clusters_to_alloc, ++ unsigned int *extents_to_split) ++{ ++ int ret; ++ struct ocfs2_write_cluster_desc *desc; ++ unsigned int num_clusters = 0; ++ unsigned int ext_flags = 0; ++ u32 phys = 0; ++ int i; ++ ++ *clusters_to_alloc = 0; ++ *extents_to_split = 0; ++ ++ for (i = 0; i < wc->w_clen; i++) { ++ desc = &wc->w_desc[i]; ++ desc->c_cpos = wc->w_cpos + i; ++ ++ if (num_clusters == 0) { ++ /* ++ * Need to look up the next extent record. ++ */ ++ ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys, ++ &num_clusters, &ext_flags); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ /* ++ * Assume worst case - that we're writing in ++ * the middle of the extent. + * +- * Returns the number of user bytes written, or less than zero for +- * error. ++ * We can assume that the write proceeds from ++ * left to right, in which case the extent ++ * insert code is smart enough to coalesce the ++ * next splits into the previous records created. + */ +-ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, +- size_t count, ocfs2_page_writer *actor, +- void *priv) ++ if (ext_flags & OCFS2_EXT_UNWRITTEN) ++ *extents_to_split = *extents_to_split + 2; ++ } else if (phys) { ++ /* ++ * Only increment phys if it doesn't describe ++ * a hole. ++ */ ++ phys++; ++ } ++ ++ desc->c_phys = phys; ++ if (phys == 0) { ++ desc->c_new = 1; ++ *clusters_to_alloc = *clusters_to_alloc + 1; ++ } ++ if (ext_flags & OCFS2_EXT_UNWRITTEN) ++ desc->c_unwritten = 1; ++ ++ num_clusters--; ++ } ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++int ocfs2_write_begin_nolock(struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned flags, ++ struct page **pagep, void **fsdata, ++ struct buffer_head *di_bh, struct page *mmap_page) + { + int ret, credits = OCFS2_INODE_UPDATE_CREDITS; +- ssize_t written = 0; +- u32 phys; +- struct inode *inode = file->f_mapping->host; ++ unsigned int clusters_to_alloc, extents_to_split; ++ struct ocfs2_write_ctxt *wc; ++ struct inode *inode = mapping->host; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); +- struct buffer_head *di_bh = NULL; + struct ocfs2_dinode *di; + struct ocfs2_alloc_context *data_ac = NULL; + struct ocfs2_alloc_context *meta_ac = NULL; + handle_t *handle; +- struct ocfs2_write_ctxt wc; + +- ocfs2_write_ctxt_init(&wc, osb, pos, count, actor, priv); ++ ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); ++ if (ret) { ++ mlog_errno(ret); ++ return ret; ++ } + +- ret = ocfs2_meta_lock(inode, &di_bh, 1); ++ ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, ++ &extents_to_split); + if (ret) { + mlog_errno(ret); + goto out; + } +- di = (struct ocfs2_dinode *)di_bh->b_data; ++ ++ di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; + + /* +- * Take alloc sem here to prevent concurrent lookups. That way +- * the mapping, zeroing and tree manipulation within +- * ocfs2_write() will be safe against ->readpage(). This +- * should also serve to lock out allocation from a shared +- * writeable region. ++ * We set w_target_from, w_target_to here so that ++ * ocfs2_write_end() knows which range in the target page to ++ * write out. An allocation requires that we write the entire ++ * cluster range. + */ +- down_write(&OCFS2_I(inode)->ip_alloc_sem); +- +- ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL, NULL); ++ if (clusters_to_alloc || extents_to_split) { ++ /* ++ * XXX: We are stretching the limits of ++ * ocfs2_lock_allocators(). It greatly over-estimates ++ * the work to be done. ++ */ ++ ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, ++ extents_to_split, &data_ac, &meta_ac); + if (ret) { + mlog_errno(ret); +- goto out_meta; ++ goto out; + } + +- /* phys == 0 means that allocation is required. */ +- if (phys == 0) { +- ret = ocfs2_lock_allocators(inode, di, 1, &data_ac, &meta_ac); ++ credits = ocfs2_calc_extend_credits(inode->i_sb, di, ++ clusters_to_alloc); ++ ++ } ++ ++ ocfs2_set_target_boundaries(osb, wc, pos, len, ++ clusters_to_alloc + extents_to_split); ++ ++ handle = ocfs2_start_trans(osb, credits); ++ if (IS_ERR(handle)) { ++ ret = PTR_ERR(handle); ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ wc->w_handle = handle; ++ ++ /* ++ * We don't want this to fail in ocfs2_write_end(), so do it ++ * here. ++ */ ++ ret = ocfs2_journal_access(handle, inode, wc->w_di_bh, ++ OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); +- goto out_meta; ++ goto out_commit; + } + +- credits = ocfs2_calc_extend_credits(inode->i_sb, di, 1); ++ /* ++ * Fill our page array first. That way we've grabbed enough so ++ * that we can zero and flush if we error after adding the ++ * extent. ++ */ ++ ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, ++ clusters_to_alloc + extents_to_split, ++ mmap_page); ++ if (ret) { ++ mlog_errno(ret); ++ goto out_commit; + } + +- ret = ocfs2_data_lock(inode, 1); ++ ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos, ++ len); + if (ret) { + mlog_errno(ret); +- goto out_meta; ++ goto out_commit; + } + +- handle = ocfs2_start_trans(osb, credits); +- if (IS_ERR(handle)) { +- ret = PTR_ERR(handle); ++ if (data_ac) ++ ocfs2_free_alloc_context(data_ac); ++ if (meta_ac) ++ ocfs2_free_alloc_context(meta_ac); ++ ++ *pagep = wc->w_target_page; ++ *fsdata = wc; ++ return 0; ++out_commit: ++ ocfs2_commit_trans(osb, handle); ++ ++out: ++ ocfs2_free_write_ctxt(wc); ++ ++ if (data_ac) ++ ocfs2_free_alloc_context(data_ac); ++ if (meta_ac) ++ ocfs2_free_alloc_context(meta_ac); ++ return ret; ++} ++ ++int ocfs2_write_begin(struct file *file, struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned flags, ++ struct page **pagep, void **fsdata) ++{ ++ int ret; ++ struct buffer_head *di_bh = NULL; ++ struct inode *inode = mapping->host; ++ ++ ret = ocfs2_meta_lock(inode, &di_bh, 1); ++ if (ret) { + mlog_errno(ret); +- goto out_data; ++ return ret; + } + +- written = ocfs2_write(file, phys, handle, di_bh, data_ac, +- meta_ac, &wc); +- if (written < 0) { +- ret = written; ++ /* ++ * Take alloc sem here to prevent concurrent lookups. That way ++ * the mapping, zeroing and tree manipulation within ++ * ocfs2_write() will be safe against ->readpage(). This ++ * should also serve to lock out allocation from a shared ++ * writeable region. ++ */ ++ down_write(&OCFS2_I(inode)->ip_alloc_sem); ++ ++ ret = ocfs2_data_lock(inode, 1); ++ if (ret) { + mlog_errno(ret); +- goto out_commit; ++ goto out_fail; + } + +- ret = ocfs2_journal_access(handle, inode, di_bh, +- OCFS2_JOURNAL_ACCESS_WRITE); ++ ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, ++ fsdata, di_bh, NULL); + if (ret) { + mlog_errno(ret); +- goto out_commit; ++ goto out_fail_data; ++ } ++ ++ brelse(di_bh); ++ ++ return 0; ++ ++out_fail_data: ++ ocfs2_data_unlock(inode, 1); ++out_fail: ++ up_write(&OCFS2_I(inode)->ip_alloc_sem); ++ ++ brelse(di_bh); ++ ocfs2_meta_unlock(inode, 1); ++ ++ return ret; ++} ++ ++int ocfs2_write_end_nolock(struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned copied, ++ struct page *page, void *fsdata) ++{ ++ int i; ++ unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1); ++ struct inode *inode = mapping->host; ++ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); ++ struct ocfs2_write_ctxt *wc = fsdata; ++ struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; ++ handle_t *handle = wc->w_handle; ++ struct page *tmppage; ++ ++ if (unlikely(copied < len)) { ++ if (!PageUptodate(wc->w_target_page)) ++ copied = 0; ++ ++ ocfs2_zero_new_buffers(wc->w_target_page, start+copied, ++ start+len); ++ } ++ flush_dcache_page(wc->w_target_page); ++ ++ for(i = 0; i < wc->w_num_pages; i++) { ++ tmppage = wc->w_pages[i]; ++ ++ if (tmppage == wc->w_target_page) { ++ from = wc->w_target_from; ++ to = wc->w_target_to; ++ ++ BUG_ON(from > PAGE_CACHE_SIZE || ++ to > PAGE_CACHE_SIZE || ++ to < from); ++ } else { ++ /* ++ * Pages adjacent to the target (if any) imply ++ * a hole-filling write in which case we want ++ * to flush their entire range. ++ */ ++ from = 0; ++ to = PAGE_CACHE_SIZE; ++ } ++ ++ if (ocfs2_should_order_data(inode)) ++ walk_page_buffers(wc->w_handle, page_buffers(tmppage), ++ from, to, NULL, ++ ocfs2_journal_dirty_data); ++ ++ block_commit_write(tmppage, from, to); + } + +- pos += written; ++ pos += copied; + if (pos > inode->i_size) { + i_size_write(inode, pos); + mark_inode_dirty(inode); +@@ -1306,28 +1604,31 @@ + di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); + di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + +- ret = ocfs2_journal_dirty(handle, di_bh); +- if (ret) +- mlog_errno(ret); ++ ocfs2_journal_dirty(handle, wc->w_di_bh); + +-out_commit: + ocfs2_commit_trans(osb, handle); + +-out_data: +- ocfs2_data_unlock(inode, 1); ++ ocfs2_run_deallocs(osb, &wc->w_dealloc); ++ ++ ocfs2_free_write_ctxt(wc); ++ ++ return copied; ++} ++ ++int ocfs2_write_end(struct file *file, struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned copied, ++ struct page *page, void *fsdata) ++{ ++ int ret; ++ struct inode *inode = mapping->host; ++ ++ ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata); + +-out_meta: ++ ocfs2_data_unlock(inode, 1); + up_write(&OCFS2_I(inode)->ip_alloc_sem); + ocfs2_meta_unlock(inode, 1); + +-out: +- brelse(di_bh); +- if (data_ac) +- ocfs2_free_alloc_context(data_ac); +- if (meta_ac) +- ocfs2_free_alloc_context(meta_ac); +- +- return written ? written : ret; ++ return ret; + } + + const struct address_space_operations ocfs2_aops = { +diff -Nurb linux-2.6.22-570/fs/ocfs2/aops.h linux-2.6.22-591/fs/ocfs2/aops.h +--- linux-2.6.22-570/fs/ocfs2/aops.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/aops.h 2007-12-21 15:36:12.000000000 -0500 +@@ -42,57 +42,22 @@ + int (*fn)( handle_t *handle, + struct buffer_head *bh)); + +-struct ocfs2_write_ctxt; +-typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *, +- u64 *, unsigned int *, unsigned int *); +- +-ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos, +- size_t count, ocfs2_page_writer *actor, +- void *priv); +- +-struct ocfs2_write_ctxt { +- size_t w_count; +- loff_t w_pos; +- u32 w_cpos; +- unsigned int w_finished_copy; +- +- /* This is true if page_size > cluster_size */ +- unsigned int w_large_pages; +- +- /* Filler callback and private data */ +- ocfs2_page_writer *w_write_data_page; +- void *w_private; +- +- /* Only valid for the filler callback */ +- struct page *w_this_page; +- unsigned int w_this_page_new; +-}; +- +-struct ocfs2_buffered_write_priv { +- char *b_src_buf; +- const struct iovec *b_cur_iov; /* Current iovec */ +- size_t b_cur_off; /* Offset in the +- * current iovec */ +-}; +-int ocfs2_map_and_write_user_data(struct inode *inode, +- struct ocfs2_write_ctxt *wc, +- u64 *p_blkno, +- unsigned int *ret_from, +- unsigned int *ret_to); +- +-struct ocfs2_splice_write_priv { +- struct splice_desc *s_sd; +- struct pipe_buffer *s_buf; +- struct pipe_inode_info *s_pipe; +- /* Neither offset value is ever larger than one page */ +- unsigned int s_offset; +- unsigned int s_buf_offset; +-}; +-int ocfs2_map_and_write_splice_data(struct inode *inode, +- struct ocfs2_write_ctxt *wc, +- u64 *p_blkno, +- unsigned int *ret_from, +- unsigned int *ret_to); ++int ocfs2_write_begin(struct file *file, struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned flags, ++ struct page **pagep, void **fsdata); ++ ++int ocfs2_write_end(struct file *file, struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned copied, ++ struct page *page, void *fsdata); ++ ++int ocfs2_write_end_nolock(struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned copied, ++ struct page *page, void *fsdata); ++ ++int ocfs2_write_begin_nolock(struct address_space *mapping, ++ loff_t pos, unsigned len, unsigned flags, ++ struct page **pagep, void **fsdata, ++ struct buffer_head *di_bh, struct page *mmap_page); + + /* all ocfs2_dio_end_io()'s fault */ + #define ocfs2_iocb_is_rw_locked(iocb) \ +diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.c linux-2.6.22-591/fs/ocfs2/cluster/heartbeat.c +--- linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/cluster/heartbeat.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1335,6 +1335,7 @@ + ret = wait_event_interruptible(o2hb_steady_queue, + atomic_read(®->hr_steady_iterations) == 0); + if (ret) { ++ /* We got interrupted (hello ptrace!). Clean up */ + spin_lock(&o2hb_live_lock); + hb_task = reg->hr_task; + reg->hr_task = NULL; +@@ -1345,7 +1346,16 @@ + goto out; + } + ++ /* Ok, we were woken. Make sure it wasn't by drop_item() */ ++ spin_lock(&o2hb_live_lock); ++ hb_task = reg->hr_task; ++ spin_unlock(&o2hb_live_lock); ++ ++ if (hb_task) + ret = count; ++ else ++ ret = -EIO; ++ + out: + if (filp) + fput(filp); +@@ -1523,6 +1533,15 @@ + if (hb_task) + kthread_stop(hb_task); + ++ /* ++ * If we're racing a dev_write(), we need to wake them. They will ++ * check reg->hr_task ++ */ ++ if (atomic_read(®->hr_steady_iterations) != 0) { ++ atomic_set(®->hr_steady_iterations, 0); ++ wake_up(&o2hb_steady_queue); ++ } ++ + config_item_put(item); + } + +@@ -1665,7 +1684,67 @@ + } + EXPORT_SYMBOL_GPL(o2hb_setup_callback); + +-int o2hb_register_callback(struct o2hb_callback_func *hc) ++static struct o2hb_region *o2hb_find_region(const char *region_uuid) ++{ ++ struct o2hb_region *p, *reg = NULL; ++ ++ assert_spin_locked(&o2hb_live_lock); ++ ++ list_for_each_entry(p, &o2hb_all_regions, hr_all_item) { ++ if (!strcmp(region_uuid, config_item_name(&p->hr_item))) { ++ reg = p; ++ break; ++ } ++ } ++ ++ return reg; ++} ++ ++static int o2hb_region_get(const char *region_uuid) ++{ ++ int ret = 0; ++ struct o2hb_region *reg; ++ ++ spin_lock(&o2hb_live_lock); ++ ++ reg = o2hb_find_region(region_uuid); ++ if (!reg) ++ ret = -ENOENT; ++ spin_unlock(&o2hb_live_lock); ++ ++ if (ret) ++ goto out; ++ ++ ret = o2nm_depend_this_node(); ++ if (ret) ++ goto out; ++ ++ ret = o2nm_depend_item(®->hr_item); ++ if (ret) ++ o2nm_undepend_this_node(); ++ ++out: ++ return ret; ++} ++ ++static void o2hb_region_put(const char *region_uuid) ++{ ++ struct o2hb_region *reg; ++ ++ spin_lock(&o2hb_live_lock); ++ ++ reg = o2hb_find_region(region_uuid); ++ ++ spin_unlock(&o2hb_live_lock); ++ ++ if (reg) { ++ o2nm_undepend_item(®->hr_item); ++ o2nm_undepend_this_node(); ++ } ++} ++ ++int o2hb_register_callback(const char *region_uuid, ++ struct o2hb_callback_func *hc) + { + struct o2hb_callback_func *tmp; + struct list_head *iter; +@@ -1681,6 +1760,12 @@ + goto out; + } + ++ if (region_uuid) { ++ ret = o2hb_region_get(region_uuid); ++ if (ret) ++ goto out; ++ } ++ + down_write(&o2hb_callback_sem); + + list_for_each(iter, &hbcall->list) { +@@ -1702,16 +1787,21 @@ + } + EXPORT_SYMBOL_GPL(o2hb_register_callback); + +-void o2hb_unregister_callback(struct o2hb_callback_func *hc) ++void o2hb_unregister_callback(const char *region_uuid, ++ struct o2hb_callback_func *hc) + { + BUG_ON(hc->hc_magic != O2HB_CB_MAGIC); + + mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n", + __builtin_return_address(0), hc); + ++ /* XXX Can this happen _with_ a region reference? */ + if (list_empty(&hc->hc_item)) + return; + ++ if (region_uuid) ++ o2hb_region_put(region_uuid); ++ + down_write(&o2hb_callback_sem); + + list_del_init(&hc->hc_item); +diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.h linux-2.6.22-591/fs/ocfs2/cluster/heartbeat.h +--- linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/cluster/heartbeat.h 2007-12-21 15:36:12.000000000 -0500 +@@ -69,8 +69,10 @@ + o2hb_cb_func *func, + void *data, + int priority); +-int o2hb_register_callback(struct o2hb_callback_func *hc); +-void o2hb_unregister_callback(struct o2hb_callback_func *hc); ++int o2hb_register_callback(const char *region_uuid, ++ struct o2hb_callback_func *hc); ++void o2hb_unregister_callback(const char *region_uuid, ++ struct o2hb_callback_func *hc); + void o2hb_fill_node_map(unsigned long *map, + unsigned bytes); + void o2hb_init(void); +diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/masklog.c linux-2.6.22-591/fs/ocfs2/cluster/masklog.c +--- linux-2.6.22-570/fs/ocfs2/cluster/masklog.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/cluster/masklog.c 2007-12-21 15:36:12.000000000 -0500 +@@ -74,7 +74,6 @@ + #define define_mask(_name) { \ + .attr = { \ + .name = #_name, \ +- .owner = THIS_MODULE, \ + .mode = S_IRUGO | S_IWUSR, \ + }, \ + .mask = ML_##_name, \ +diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.c linux-2.6.22-591/fs/ocfs2/cluster/nodemanager.c +--- linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/cluster/nodemanager.c 2007-12-21 15:36:12.000000000 -0500 +@@ -900,6 +900,46 @@ + }, + }; + ++int o2nm_depend_item(struct config_item *item) ++{ ++ return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item); ++} ++ ++void o2nm_undepend_item(struct config_item *item) ++{ ++ configfs_undepend_item(&o2nm_cluster_group.cs_subsys, item); ++} ++ ++int o2nm_depend_this_node(void) ++{ ++ int ret = 0; ++ struct o2nm_node *local_node; ++ ++ local_node = o2nm_get_node_by_num(o2nm_this_node()); ++ if (!local_node) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ ret = o2nm_depend_item(&local_node->nd_item); ++ o2nm_node_put(local_node); ++ ++out: ++ return ret; ++} ++ ++void o2nm_undepend_this_node(void) ++{ ++ struct o2nm_node *local_node; ++ ++ local_node = o2nm_get_node_by_num(o2nm_this_node()); ++ BUG_ON(!local_node); ++ ++ o2nm_undepend_item(&local_node->nd_item); ++ o2nm_node_put(local_node); ++} ++ ++ + static void __exit exit_o2nm(void) + { + if (ocfs2_table_header) +diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.h linux-2.6.22-591/fs/ocfs2/cluster/nodemanager.h +--- linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/cluster/nodemanager.h 2007-12-21 15:36:12.000000000 -0500 +@@ -77,4 +77,9 @@ + void o2nm_node_get(struct o2nm_node *node); + void o2nm_node_put(struct o2nm_node *node); + ++int o2nm_depend_item(struct config_item *item); ++void o2nm_undepend_item(struct config_item *item); ++int o2nm_depend_this_node(void); ++void o2nm_undepend_this_node(void); ++ + #endif /* O2CLUSTER_NODEMANAGER_H */ +diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/tcp.c linux-2.6.22-591/fs/ocfs2/cluster/tcp.c +--- linux-2.6.22-570/fs/ocfs2/cluster/tcp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/cluster/tcp.c 2007-12-21 15:36:12.000000000 -0500 +@@ -261,14 +261,12 @@ + + static void o2net_complete_nodes_nsw(struct o2net_node *nn) + { +- struct list_head *iter, *tmp; ++ struct o2net_status_wait *nsw, *tmp; + unsigned int num_kills = 0; +- struct o2net_status_wait *nsw; + + assert_spin_locked(&nn->nn_lock); + +- list_for_each_safe(iter, tmp, &nn->nn_status_list) { +- nsw = list_entry(iter, struct o2net_status_wait, ns_node_item); ++ list_for_each_entry_safe(nsw, tmp, &nn->nn_status_list, ns_node_item) { + o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0); + num_kills++; + } +@@ -764,13 +762,10 @@ + + void o2net_unregister_handler_list(struct list_head *list) + { +- struct list_head *pos, *n; +- struct o2net_msg_handler *nmh; ++ struct o2net_msg_handler *nmh, *n; + + write_lock(&o2net_handler_lock); +- list_for_each_safe(pos, n, list) { +- nmh = list_entry(pos, struct o2net_msg_handler, +- nh_unregister_item); ++ list_for_each_entry_safe(nmh, n, list, nh_unregister_item) { + mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n", + nmh->nh_func, nmh->nh_msg_type, nmh->nh_key); + rb_erase(&nmh->nh_node, &o2net_handler_tree); +@@ -1638,8 +1633,8 @@ + + void o2net_unregister_hb_callbacks(void) + { +- o2hb_unregister_callback(&o2net_hb_up); +- o2hb_unregister_callback(&o2net_hb_down); ++ o2hb_unregister_callback(NULL, &o2net_hb_up); ++ o2hb_unregister_callback(NULL, &o2net_hb_down); + } + + int o2net_register_hb_callbacks(void) +@@ -1651,9 +1646,9 @@ + o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB, + o2net_hb_node_up_cb, NULL, O2NET_HB_PRI); + +- ret = o2hb_register_callback(&o2net_hb_up); ++ ret = o2hb_register_callback(NULL, &o2net_hb_up); + if (ret == 0) +- ret = o2hb_register_callback(&o2net_hb_down); ++ ret = o2hb_register_callback(NULL, &o2net_hb_down); + + if (ret) + o2net_unregister_hb_callbacks(); +diff -Nurb linux-2.6.22-570/fs/ocfs2/dir.c linux-2.6.22-591/fs/ocfs2/dir.c +--- linux-2.6.22-570/fs/ocfs2/dir.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/dir.c 2007-12-21 15:36:12.000000000 -0500 +@@ -368,7 +368,7 @@ + u32 offset = OCFS2_I(dir)->ip_clusters; + + status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset, +- 1, parent_fe_bh, handle, ++ 1, 0, parent_fe_bh, handle, + data_ac, meta_ac, NULL); + BUG_ON(status == -EAGAIN); + if (status < 0) { +diff -Nurb linux-2.6.22-570/fs/ocfs2/dlm/dlmdomain.c linux-2.6.22-591/fs/ocfs2/dlm/dlmdomain.c +--- linux-2.6.22-570/fs/ocfs2/dlm/dlmdomain.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/dlm/dlmdomain.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1128,8 +1128,8 @@ + + static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm) + { +- o2hb_unregister_callback(&dlm->dlm_hb_up); +- o2hb_unregister_callback(&dlm->dlm_hb_down); ++ o2hb_unregister_callback(NULL, &dlm->dlm_hb_up); ++ o2hb_unregister_callback(NULL, &dlm->dlm_hb_down); + o2net_unregister_handler_list(&dlm->dlm_domain_handlers); + } + +@@ -1141,13 +1141,13 @@ + + o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB, + dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI); +- status = o2hb_register_callback(&dlm->dlm_hb_down); ++ status = o2hb_register_callback(NULL, &dlm->dlm_hb_down); + if (status) + goto bail; + + o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB, + dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI); +- status = o2hb_register_callback(&dlm->dlm_hb_up); ++ status = o2hb_register_callback(NULL, &dlm->dlm_hb_up); + if (status) + goto bail; + +diff -Nurb linux-2.6.22-570/fs/ocfs2/dlm/dlmmaster.c linux-2.6.22-591/fs/ocfs2/dlm/dlmmaster.c +--- linux-2.6.22-570/fs/ocfs2/dlm/dlmmaster.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/dlm/dlmmaster.c 2007-12-21 15:36:12.000000000 -0500 +@@ -192,25 +192,20 @@ + static void dlm_dump_mles(struct dlm_ctxt *dlm) + { + struct dlm_master_list_entry *mle; +- struct list_head *iter; + + mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name); + spin_lock(&dlm->master_lock); +- list_for_each(iter, &dlm->master_list) { +- mle = list_entry(iter, struct dlm_master_list_entry, list); ++ list_for_each_entry(mle, &dlm->master_list, list) + dlm_print_one_mle(mle); +- } + spin_unlock(&dlm->master_lock); + } + + int dlm_dump_all_mles(const char __user *data, unsigned int len) + { +- struct list_head *iter; + struct dlm_ctxt *dlm; + + spin_lock(&dlm_domain_lock); +- list_for_each(iter, &dlm_domains) { +- dlm = list_entry (iter, struct dlm_ctxt, list); ++ list_for_each_entry(dlm, &dlm_domains, list) { + mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name); + dlm_dump_mles(dlm); + } +@@ -454,12 +449,10 @@ + char *name, unsigned int namelen) + { + struct dlm_master_list_entry *tmpmle; +- struct list_head *iter; + + assert_spin_locked(&dlm->master_lock); + +- list_for_each(iter, &dlm->master_list) { +- tmpmle = list_entry(iter, struct dlm_master_list_entry, list); ++ list_for_each_entry(tmpmle, &dlm->master_list, list) { + if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) + continue; + dlm_get_mle(tmpmle); +@@ -472,13 +465,10 @@ + void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up) + { + struct dlm_master_list_entry *mle; +- struct list_head *iter; + + assert_spin_locked(&dlm->spinlock); + +- list_for_each(iter, &dlm->mle_hb_events) { +- mle = list_entry(iter, struct dlm_master_list_entry, +- hb_events); ++ list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) { + if (node_up) + dlm_mle_node_up(dlm, mle, NULL, idx); + else +@@ -2434,7 +2424,7 @@ + int ret; + int i; + int count = 0; +- struct list_head *queue, *iter; ++ struct list_head *queue; + struct dlm_lock *lock; + + assert_spin_locked(&res->spinlock); +@@ -2453,8 +2443,7 @@ + ret = 0; + queue = &res->granted; + for (i = 0; i < 3; i++) { +- list_for_each(iter, queue) { +- lock = list_entry(iter, struct dlm_lock, list); ++ list_for_each_entry(lock, queue, list) { + ++count; + if (lock->ml.node == dlm->node_num) { + mlog(0, "found a lock owned by this node still " +@@ -2923,18 +2912,16 @@ + static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res) + { +- struct list_head *iter, *iter2; + struct list_head *queue = &res->granted; + int i, bit; +- struct dlm_lock *lock; ++ struct dlm_lock *lock, *next; + + assert_spin_locked(&res->spinlock); + + BUG_ON(res->owner == dlm->node_num); + + for (i=0; i<3; i++) { +- list_for_each_safe(iter, iter2, queue) { +- lock = list_entry (iter, struct dlm_lock, list); ++ list_for_each_entry_safe(lock, next, queue, list) { + if (lock->ml.node != dlm->node_num) { + mlog(0, "putting lock for node %u\n", + lock->ml.node); +@@ -2976,7 +2963,6 @@ + { + int i; + struct list_head *queue = &res->granted; +- struct list_head *iter; + struct dlm_lock *lock; + int nodenum; + +@@ -2984,10 +2970,9 @@ + + spin_lock(&res->spinlock); + for (i=0; i<3; i++) { +- list_for_each(iter, queue) { ++ list_for_each_entry(lock, queue, list) { + /* up to the caller to make sure this node + * is alive */ +- lock = list_entry (iter, struct dlm_lock, list); + if (lock->ml.node != dlm->node_num) { + spin_unlock(&res->spinlock); + return lock->ml.node; +@@ -3234,8 +3219,7 @@ + + void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) + { +- struct list_head *iter, *iter2; +- struct dlm_master_list_entry *mle; ++ struct dlm_master_list_entry *mle, *next; + struct dlm_lock_resource *res; + unsigned int hash; + +@@ -3245,9 +3229,7 @@ + + /* clean the master list */ + spin_lock(&dlm->master_lock); +- list_for_each_safe(iter, iter2, &dlm->master_list) { +- mle = list_entry(iter, struct dlm_master_list_entry, list); +- ++ list_for_each_entry_safe(mle, next, &dlm->master_list, list) { + BUG_ON(mle->type != DLM_MLE_BLOCK && + mle->type != DLM_MLE_MASTER && + mle->type != DLM_MLE_MIGRATION); +diff -Nurb linux-2.6.22-570/fs/ocfs2/dlm/dlmrecovery.c linux-2.6.22-591/fs/ocfs2/dlm/dlmrecovery.c +--- linux-2.6.22-570/fs/ocfs2/dlm/dlmrecovery.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/dlm/dlmrecovery.c 2007-12-21 15:36:12.000000000 -0500 +@@ -158,8 +158,7 @@ + struct dlm_ctxt *dlm = + container_of(work, struct dlm_ctxt, dispatched_work); + LIST_HEAD(tmp_list); +- struct list_head *iter, *iter2; +- struct dlm_work_item *item; ++ struct dlm_work_item *item, *next; + dlm_workfunc_t *workfunc; + int tot=0; + +@@ -167,13 +166,12 @@ + list_splice_init(&dlm->work_list, &tmp_list); + spin_unlock(&dlm->work_lock); + +- list_for_each_safe(iter, iter2, &tmp_list) { ++ list_for_each_entry(item, &tmp_list, list) { + tot++; + } + mlog(0, "%s: work thread has %d work items\n", dlm->name, tot); + +- list_for_each_safe(iter, iter2, &tmp_list) { +- item = list_entry(iter, struct dlm_work_item, list); ++ list_for_each_entry_safe(item, next, &tmp_list, list) { + workfunc = item->func; + list_del_init(&item->list); + +@@ -549,7 +547,6 @@ + { + int status = 0; + struct dlm_reco_node_data *ndata; +- struct list_head *iter; + int all_nodes_done; + int destroy = 0; + int pass = 0; +@@ -567,8 +564,7 @@ + + /* safe to access the node data list without a lock, since this + * process is the only one to change the list */ +- list_for_each(iter, &dlm->reco.node_data) { +- ndata = list_entry (iter, struct dlm_reco_node_data, list); ++ list_for_each_entry(ndata, &dlm->reco.node_data, list) { + BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT); + ndata->state = DLM_RECO_NODE_DATA_REQUESTING; + +@@ -655,9 +651,7 @@ + * done, or if anyone died */ + all_nodes_done = 1; + spin_lock(&dlm_reco_state_lock); +- list_for_each(iter, &dlm->reco.node_data) { +- ndata = list_entry (iter, struct dlm_reco_node_data, list); +- ++ list_for_each_entry(ndata, &dlm->reco.node_data, list) { + mlog(0, "checking recovery state of node %u\n", + ndata->node_num); + switch (ndata->state) { +@@ -774,16 +768,14 @@ + + static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node) + { +- struct list_head *iter, *iter2; +- struct dlm_reco_node_data *ndata; ++ struct dlm_reco_node_data *ndata, *next; + LIST_HEAD(tmplist); + + spin_lock(&dlm_reco_state_lock); + list_splice_init(&dlm->reco.node_data, &tmplist); + spin_unlock(&dlm_reco_state_lock); + +- list_for_each_safe(iter, iter2, &tmplist) { +- ndata = list_entry (iter, struct dlm_reco_node_data, list); ++ list_for_each_entry_safe(ndata, next, &tmplist, list) { + list_del_init(&ndata->list); + kfree(ndata); + } +@@ -876,7 +868,6 @@ + struct dlm_lock_resource *res; + struct dlm_ctxt *dlm; + LIST_HEAD(resources); +- struct list_head *iter; + int ret; + u8 dead_node, reco_master; + int skip_all_done = 0; +@@ -920,8 +911,7 @@ + + /* any errors returned will be due to the new_master dying, + * the dlm_reco_thread should detect this */ +- list_for_each(iter, &resources) { +- res = list_entry (iter, struct dlm_lock_resource, recovering); ++ list_for_each_entry(res, &resources, recovering) { + ret = dlm_send_one_lockres(dlm, res, mres, reco_master, + DLM_MRES_RECOVERY); + if (ret < 0) { +@@ -983,7 +973,6 @@ + { + struct dlm_ctxt *dlm = data; + struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf; +- struct list_head *iter; + struct dlm_reco_node_data *ndata = NULL; + int ret = -EINVAL; + +@@ -1000,8 +989,7 @@ + dlm->reco.dead_node, done->node_idx, dlm->node_num); + + spin_lock(&dlm_reco_state_lock); +- list_for_each(iter, &dlm->reco.node_data) { +- ndata = list_entry (iter, struct dlm_reco_node_data, list); ++ list_for_each_entry(ndata, &dlm->reco.node_data, list) { + if (ndata->node_num != done->node_idx) + continue; + +@@ -1049,13 +1037,11 @@ + struct list_head *list, + u8 dead_node) + { +- struct dlm_lock_resource *res; +- struct list_head *iter, *iter2; ++ struct dlm_lock_resource *res, *next; + struct dlm_lock *lock; + + spin_lock(&dlm->spinlock); +- list_for_each_safe(iter, iter2, &dlm->reco.resources) { +- res = list_entry (iter, struct dlm_lock_resource, recovering); ++ list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { + /* always prune any $RECOVERY entries for dead nodes, + * otherwise hangs can occur during later recovery */ + if (dlm_is_recovery_lock(res->lockname.name, +@@ -1169,7 +1155,7 @@ + u8 flags, u8 master) + { + /* mres here is one full page */ +- memset(mres, 0, PAGE_SIZE); ++ clear_page(mres); + mres->lockname_len = namelen; + memcpy(mres->lockname, lockname, namelen); + mres->num_locks = 0; +@@ -1252,7 +1238,7 @@ + struct dlm_migratable_lockres *mres, + u8 send_to, u8 flags) + { +- struct list_head *queue, *iter; ++ struct list_head *queue; + int total_locks, i; + u64 mig_cookie = 0; + struct dlm_lock *lock; +@@ -1278,9 +1264,7 @@ + total_locks = 0; + for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) { + queue = dlm_list_idx_to_ptr(res, i); +- list_for_each(iter, queue) { +- lock = list_entry (iter, struct dlm_lock, list); +- ++ list_for_each_entry(lock, queue, list) { + /* add another lock. */ + total_locks++; + if (!dlm_add_lock_to_array(lock, mres, i)) +@@ -1717,7 +1701,6 @@ + struct dlm_lockstatus *lksb = NULL; + int ret = 0; + int i, j, bad; +- struct list_head *iter; + struct dlm_lock *lock = NULL; + u8 from = O2NM_MAX_NODES; + unsigned int added = 0; +@@ -1755,8 +1738,7 @@ + spin_lock(&res->spinlock); + for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { + tmpq = dlm_list_idx_to_ptr(res, j); +- list_for_each(iter, tmpq) { +- lock = list_entry (iter, struct dlm_lock, list); ++ list_for_each_entry(lock, tmpq, list) { + if (lock->ml.cookie != ml->cookie) + lock = NULL; + else +@@ -1930,8 +1912,8 @@ + struct dlm_lock_resource *res) + { + int i; +- struct list_head *queue, *iter, *iter2; +- struct dlm_lock *lock; ++ struct list_head *queue; ++ struct dlm_lock *lock, *next; + + res->state |= DLM_LOCK_RES_RECOVERING; + if (!list_empty(&res->recovering)) { +@@ -1947,8 +1929,7 @@ + /* find any pending locks and put them back on proper list */ + for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) { + queue = dlm_list_idx_to_ptr(res, i); +- list_for_each_safe(iter, iter2, queue) { +- lock = list_entry (iter, struct dlm_lock, list); ++ list_for_each_entry_safe(lock, next, queue, list) { + dlm_lock_get(lock); + if (lock->convert_pending) { + /* move converting lock back to granted */ +@@ -2013,18 +1994,15 @@ + u8 dead_node, u8 new_master) + { + int i; +- struct list_head *iter, *iter2; + struct hlist_node *hash_iter; + struct hlist_head *bucket; +- +- struct dlm_lock_resource *res; ++ struct dlm_lock_resource *res, *next; + + mlog_entry_void(); + + assert_spin_locked(&dlm->spinlock); + +- list_for_each_safe(iter, iter2, &dlm->reco.resources) { +- res = list_entry (iter, struct dlm_lock_resource, recovering); ++ list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) { + if (res->owner == dead_node) { + list_del_init(&res->recovering); + spin_lock(&res->spinlock); +@@ -2099,7 +2077,7 @@ + static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, u8 dead_node) + { +- struct list_head *iter, *queue; ++ struct list_head *queue; + struct dlm_lock *lock; + int blank_lvb = 0, local = 0; + int i; +@@ -2121,8 +2099,7 @@ + + for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) { + queue = dlm_list_idx_to_ptr(res, i); +- list_for_each(iter, queue) { +- lock = list_entry (iter, struct dlm_lock, list); ++ list_for_each_entry(lock, queue, list) { + if (lock->ml.node == search_node) { + if (dlm_lvb_needs_invalidation(lock, local)) { + /* zero the lksb lvb and lockres lvb */ +@@ -2143,8 +2120,7 @@ + static void dlm_free_dead_locks(struct dlm_ctxt *dlm, + struct dlm_lock_resource *res, u8 dead_node) + { +- struct list_head *iter, *tmpiter; +- struct dlm_lock *lock; ++ struct dlm_lock *lock, *next; + unsigned int freed = 0; + + /* this node is the lockres master: +@@ -2155,24 +2131,21 @@ + assert_spin_locked(&res->spinlock); + + /* TODO: check pending_asts, pending_basts here */ +- list_for_each_safe(iter, tmpiter, &res->granted) { +- lock = list_entry (iter, struct dlm_lock, list); ++ list_for_each_entry_safe(lock, next, &res->granted, list) { + if (lock->ml.node == dead_node) { + list_del_init(&lock->list); + dlm_lock_put(lock); + freed++; + } + } +- list_for_each_safe(iter, tmpiter, &res->converting) { +- lock = list_entry (iter, struct dlm_lock, list); ++ list_for_each_entry_safe(lock, next, &res->converting, list) { + if (lock->ml.node == dead_node) { + list_del_init(&lock->list); + dlm_lock_put(lock); + freed++; + } + } +- list_for_each_safe(iter, tmpiter, &res->blocked) { +- lock = list_entry (iter, struct dlm_lock, list); ++ list_for_each_entry_safe(lock, next, &res->blocked, list) { + if (lock->ml.node == dead_node) { + list_del_init(&lock->list); + dlm_lock_put(lock); +diff -Nurb linux-2.6.22-570/fs/ocfs2/dlmglue.c linux-2.6.22-591/fs/ocfs2/dlmglue.c +--- linux-2.6.22-570/fs/ocfs2/dlmglue.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/ocfs2/dlmglue.c 2007-12-21 15:36:12.000000000 -0500 +@@ -600,15 +600,13 @@ + static void lockres_set_flags(struct ocfs2_lock_res *lockres, + unsigned long newflags) + { +- struct list_head *pos, *tmp; +- struct ocfs2_mask_waiter *mw; ++ struct ocfs2_mask_waiter *mw, *tmp; + + assert_spin_locked(&lockres->l_lock); + + lockres->l_flags = newflags; + +- list_for_each_safe(pos, tmp, &lockres->l_mask_waiters) { +- mw = list_entry(pos, struct ocfs2_mask_waiter, mw_item); ++ list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) { + if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal) + continue; + +diff -Nurb linux-2.6.22-570/fs/ocfs2/endian.h linux-2.6.22-591/fs/ocfs2/endian.h +--- linux-2.6.22-570/fs/ocfs2/endian.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/endian.h 2007-12-21 15:36:12.000000000 -0500 +@@ -32,6 +32,11 @@ + *var = cpu_to_le32(le32_to_cpu(*var) + val); + } + ++static inline void le64_add_cpu(__le64 *var, u64 val) ++{ ++ *var = cpu_to_le64(le64_to_cpu(*var) + val); ++} ++ + static inline void le32_and_cpu(__le32 *var, u32 val) + { + *var = cpu_to_le32(le32_to_cpu(*var) & val); +diff -Nurb linux-2.6.22-570/fs/ocfs2/extent_map.c linux-2.6.22-591/fs/ocfs2/extent_map.c +--- linux-2.6.22-570/fs/ocfs2/extent_map.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/extent_map.c 2007-12-21 15:36:12.000000000 -0500 +@@ -109,17 +109,14 @@ + */ + void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos) + { +- struct list_head *p, *n; +- struct ocfs2_extent_map_item *emi; ++ struct ocfs2_extent_map_item *emi, *n; + struct ocfs2_inode_info *oi = OCFS2_I(inode); + struct ocfs2_extent_map *em = &oi->ip_extent_map; + LIST_HEAD(tmp_list); + unsigned int range; + + spin_lock(&oi->ip_lock); +- list_for_each_safe(p, n, &em->em_list) { +- emi = list_entry(p, struct ocfs2_extent_map_item, ei_list); +- ++ list_for_each_entry_safe(emi, n, &em->em_list, ei_list) { + if (emi->ei_cpos >= cpos) { + /* Full truncate of this record. */ + list_move(&emi->ei_list, &tmp_list); +@@ -136,8 +133,7 @@ + } + spin_unlock(&oi->ip_lock); + +- list_for_each_safe(p, n, &tmp_list) { +- emi = list_entry(p, struct ocfs2_extent_map_item, ei_list); ++ list_for_each_entry_safe(emi, n, &tmp_list, ei_list) { + list_del(&emi->ei_list); + kfree(emi); + } +@@ -377,37 +373,6 @@ + return ret; + } + +-/* +- * Return the index of the extent record which contains cluster #v_cluster. +- * -1 is returned if it was not found. +- * +- * Should work fine on interior and exterior nodes. +- */ +-static int ocfs2_search_extent_list(struct ocfs2_extent_list *el, +- u32 v_cluster) +-{ +- int ret = -1; +- int i; +- struct ocfs2_extent_rec *rec; +- u32 rec_end, rec_start, clusters; +- +- for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { +- rec = &el->l_recs[i]; +- +- rec_start = le32_to_cpu(rec->e_cpos); +- clusters = ocfs2_rec_clusters(el, rec); +- +- rec_end = rec_start + clusters; +- +- if (v_cluster >= rec_start && v_cluster < rec_end) { +- ret = i; +- break; +- } +- } +- +- return ret; +-} +- + int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, + u32 *p_cluster, u32 *num_clusters, + unsigned int *extent_flags) +diff -Nurb linux-2.6.22-570/fs/ocfs2/file.c linux-2.6.22-591/fs/ocfs2/file.c +--- linux-2.6.22-570/fs/ocfs2/file.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/ocfs2/file.c 2007-12-21 15:36:12.000000000 -0500 +@@ -326,9 +326,6 @@ + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)new_i_size); + +- unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); +- truncate_inode_pages(inode->i_mapping, new_i_size); +- + fe = (struct ocfs2_dinode *) di_bh->b_data; + if (!OCFS2_IS_VALID_DINODE(fe)) { + OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); +@@ -363,16 +360,23 @@ + if (new_i_size == le64_to_cpu(fe->i_size)) + goto bail; + ++ down_write(&OCFS2_I(inode)->ip_alloc_sem); ++ + /* This forces other nodes to sync and drop their pages. Do + * this even if we have a truncate without allocation change - + * ocfs2 cluster sizes can be much greater than page size, so + * we have to truncate them anyway. */ + status = ocfs2_data_lock(inode, 1); + if (status < 0) { ++ up_write(&OCFS2_I(inode)->ip_alloc_sem); ++ + mlog_errno(status); + goto bail; + } + ++ unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); ++ truncate_inode_pages(inode->i_mapping, new_i_size); ++ + /* alright, we're going to need to do a full blown alloc size + * change. Orphan the inode so that recovery can complete the + * truncate if necessary. This does the task of marking +@@ -399,6 +403,8 @@ + bail_unlock_data: + ocfs2_data_unlock(inode, 1); + ++ up_write(&OCFS2_I(inode)->ip_alloc_sem); ++ + bail: + + mlog_exit(status); +@@ -419,6 +425,7 @@ + struct inode *inode, + u32 *logical_offset, + u32 clusters_to_add, ++ int mark_unwritten, + struct buffer_head *fe_bh, + handle_t *handle, + struct ocfs2_alloc_context *data_ac, +@@ -431,9 +438,13 @@ + enum ocfs2_alloc_restarted reason = RESTART_NONE; + u32 bit_off, num_bits; + u64 block; ++ u8 flags = 0; + + BUG_ON(!clusters_to_add); + ++ if (mark_unwritten) ++ flags = OCFS2_EXT_UNWRITTEN; ++ + free_extents = ocfs2_num_free_extents(osb, inode, fe); + if (free_extents < 0) { + status = free_extents; +@@ -483,7 +494,7 @@ + num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); + status = ocfs2_insert_extent(osb, handle, inode, fe_bh, + *logical_offset, block, num_bits, +- meta_ac); ++ flags, meta_ac); + if (status < 0) { + mlog_errno(status); + goto leave; +@@ -516,25 +527,28 @@ + * For a given allocation, determine which allocators will need to be + * accessed, and lock them, reserving the appropriate number of bits. + * +- * Called from ocfs2_extend_allocation() for file systems which don't +- * support holes, and from ocfs2_write() for file systems which +- * understand sparse inodes. ++ * Sparse file systems call this from ocfs2_write_begin_nolock() ++ * and ocfs2_allocate_unwritten_extents(). ++ * ++ * File systems which don't support holes call this from ++ * ocfs2_extend_allocation(). + */ + int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, +- u32 clusters_to_add, ++ u32 clusters_to_add, u32 extents_to_split, + struct ocfs2_alloc_context **data_ac, + struct ocfs2_alloc_context **meta_ac) + { + int ret, num_free_extents; ++ unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + + *meta_ac = NULL; + *data_ac = NULL; + + mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " +- "clusters_to_add = %u\n", ++ "clusters_to_add = %u, extents_to_split = %u\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode), +- le32_to_cpu(di->i_clusters), clusters_to_add); ++ le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); + + num_free_extents = ocfs2_num_free_extents(osb, inode, di); + if (num_free_extents < 0) { +@@ -552,9 +566,12 @@ + * + * Most of the time we'll only be seeing this 1 cluster at a time + * anyway. ++ * ++ * Always lock for any unwritten extents - we might want to ++ * remove blocks for a merge. + */ + if (!num_free_extents || +- (ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) { ++ (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { + ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); + if (ret < 0) { + if (ret != -ENOSPC) +@@ -585,14 +602,13 @@ + return ret; + } + +-static int ocfs2_extend_allocation(struct inode *inode, +- u32 clusters_to_add) ++static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, ++ u32 clusters_to_add, int mark_unwritten) + { + int status = 0; + int restart_func = 0; +- int drop_alloc_sem = 0; + int credits; +- u32 prev_clusters, logical_start; ++ u32 prev_clusters; + struct buffer_head *bh = NULL; + struct ocfs2_dinode *fe = NULL; + handle_t *handle = NULL; +@@ -607,7 +623,7 @@ + * This function only exists for file systems which don't + * support holes. + */ +- BUG_ON(ocfs2_sparse_alloc(osb)); ++ BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); + + status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, + OCFS2_BH_CACHED, inode); +@@ -623,19 +639,10 @@ + goto leave; + } + +- logical_start = OCFS2_I(inode)->ip_clusters; +- + restart_all: + BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); + +- /* blocks peope in read/write from reading our allocation +- * until we're done changing it. We depend on i_mutex to block +- * other extend/truncate calls while we're here. Ordering wrt +- * start_trans is important here -- always do it before! */ +- down_write(&OCFS2_I(inode)->ip_alloc_sem); +- drop_alloc_sem = 1; +- +- status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac, ++ status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac, + &meta_ac); + if (status) { + mlog_errno(status); +@@ -668,6 +675,7 @@ + inode, + &logical_start, + clusters_to_add, ++ mark_unwritten, + bh, + handle, + data_ac, +@@ -720,10 +728,6 @@ + OCFS2_I(inode)->ip_clusters, i_size_read(inode)); + + leave: +- if (drop_alloc_sem) { +- up_write(&OCFS2_I(inode)->ip_alloc_sem); +- drop_alloc_sem = 0; +- } + if (handle) { + ocfs2_commit_trans(osb, handle); + handle = NULL; +@@ -749,6 +753,25 @@ + return status; + } + ++static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, ++ u32 clusters_to_add, int mark_unwritten) ++{ ++ int ret; ++ ++ /* ++ * The alloc sem blocks peope in read/write from reading our ++ * allocation until we're done changing it. We depend on ++ * i_mutex to block other extend/truncate calls while we're ++ * here. ++ */ ++ down_write(&OCFS2_I(inode)->ip_alloc_sem); ++ ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add, ++ mark_unwritten); ++ up_write(&OCFS2_I(inode)->ip_alloc_sem); ++ ++ return ret; ++} ++ + /* Some parts of this taken from generic_cont_expand, which turned out + * to be too fragile to do exactly what we need without us having to + * worry about recursive locking in ->prepare_write() and +@@ -890,7 +913,9 @@ + } + + if (clusters_to_add) { +- ret = ocfs2_extend_allocation(inode, clusters_to_add); ++ ret = ocfs2_extend_allocation(inode, ++ OCFS2_I(inode)->ip_clusters, ++ clusters_to_add, 0); + if (ret < 0) { + mlog_errno(ret); + goto out_unlock; +@@ -997,6 +1022,13 @@ + goto bail_unlock; + } + ++ /* ++ * This will intentionally not wind up calling vmtruncate(), ++ * since all the work for a size change has been done above. ++ * Otherwise, we could get into problems with truncate as ++ * ip_alloc_sem is used there to protect against i_size ++ * changes. ++ */ + status = inode_setattr(inode, attr); + if (status < 0) { + mlog_errno(status); +@@ -1072,17 +1104,16 @@ + return ret; + } + +-static int ocfs2_write_remove_suid(struct inode *inode) ++static int __ocfs2_write_remove_suid(struct inode *inode, ++ struct buffer_head *bh) + { + int ret; +- struct buffer_head *bh = NULL; +- struct ocfs2_inode_info *oi = OCFS2_I(inode); + handle_t *handle; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_dinode *di; + + mlog_entry("(Inode %llu, mode 0%o)\n", +- (unsigned long long)oi->ip_blkno, inode->i_mode); ++ (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode); + + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); + if (handle == NULL) { +@@ -1091,17 +1122,11 @@ + goto out; + } + +- ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); +- if (ret < 0) { +- mlog_errno(ret); +- goto out_trans; +- } +- + ret = ocfs2_journal_access(handle, inode, bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret < 0) { + mlog_errno(ret); +- goto out_bh; ++ goto out_trans; + } + + inode->i_mode &= ~S_ISUID; +@@ -1114,8 +1139,7 @@ + ret = ocfs2_journal_dirty(handle, bh); + if (ret < 0) + mlog_errno(ret); +-out_bh: +- brelse(bh); ++ + out_trans: + ocfs2_commit_trans(osb, handle); + out: +@@ -1161,6 +1185,211 @@ + return ret; + } + ++static int ocfs2_write_remove_suid(struct inode *inode) ++{ ++ int ret; ++ struct buffer_head *bh = NULL; ++ struct ocfs2_inode_info *oi = OCFS2_I(inode); ++ ++ ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), ++ oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); ++ if (ret < 0) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ ret = __ocfs2_write_remove_suid(inode, bh); ++out: ++ brelse(bh); ++ return ret; ++} ++ ++/* ++ * Allocate enough extents to cover the region starting at byte offset ++ * start for len bytes. Existing extents are skipped, any extents ++ * added are marked as "unwritten". ++ */ ++static int ocfs2_allocate_unwritten_extents(struct inode *inode, ++ u64 start, u64 len) ++{ ++ int ret; ++ u32 cpos, phys_cpos, clusters, alloc_size; ++ ++ /* ++ * We consider both start and len to be inclusive. ++ */ ++ cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; ++ clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len); ++ clusters -= cpos; ++ ++ while (clusters) { ++ ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, ++ &alloc_size, NULL); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ /* ++ * Hole or existing extent len can be arbitrary, so ++ * cap it to our own allocation request. ++ */ ++ if (alloc_size > clusters) ++ alloc_size = clusters; ++ ++ if (phys_cpos) { ++ /* ++ * We already have an allocation at this ++ * region so we can safely skip it. ++ */ ++ goto next; ++ } ++ ++ ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1); ++ if (ret) { ++ if (ret != -ENOSPC) ++ mlog_errno(ret); ++ goto out; ++ } ++ ++next: ++ cpos += alloc_size; ++ clusters -= alloc_size; ++ } ++ ++ ret = 0; ++out: ++ return ret; ++} ++ ++/* ++ * Parts of this function taken from xfs_change_file_space() ++ */ ++int ocfs2_change_file_space(struct file *file, unsigned int cmd, ++ struct ocfs2_space_resv *sr) ++{ ++ int ret; ++ s64 llen; ++ struct inode *inode = file->f_path.dentry->d_inode; ++ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); ++ struct buffer_head *di_bh = NULL; ++ handle_t *handle; ++ unsigned long long max_off = ocfs2_max_file_offset(inode->i_sb->s_blocksize_bits); ++ ++ if (!ocfs2_writes_unwritten_extents(osb)) ++ return -ENOTTY; ++ ++ if (!S_ISREG(inode->i_mode)) ++ return -EINVAL; ++ ++ if (!(file->f_mode & FMODE_WRITE)) ++ return -EBADF; ++ ++ if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) ++ return -EROFS; ++ ++ mutex_lock(&inode->i_mutex); ++ ++ /* ++ * This prevents concurrent writes on other nodes ++ */ ++ ret = ocfs2_rw_lock(inode, 1); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ ret = ocfs2_meta_lock(inode, &di_bh, 1); ++ if (ret) { ++ mlog_errno(ret); ++ goto out_rw_unlock; ++ } ++ ++ if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) { ++ ret = -EPERM; ++ goto out_meta_unlock; ++ } ++ ++ switch (sr->l_whence) { ++ case 0: /*SEEK_SET*/ ++ break; ++ case 1: /*SEEK_CUR*/ ++ sr->l_start += file->f_pos; ++ break; ++ case 2: /*SEEK_END*/ ++ sr->l_start += i_size_read(inode); ++ break; ++ default: ++ ret = -EINVAL; ++ goto out_meta_unlock; ++ } ++ sr->l_whence = 0; ++ ++ llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len; ++ ++ if (sr->l_start < 0 ++ || sr->l_start > max_off ++ || (sr->l_start + llen) < 0 ++ || (sr->l_start + llen) > max_off) { ++ ret = -EINVAL; ++ goto out_meta_unlock; ++ } ++ ++ if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { ++ if (sr->l_len <= 0) { ++ ret = -EINVAL; ++ goto out_meta_unlock; ++ } ++ } ++ ++ if (should_remove_suid(file->f_path.dentry)) { ++ ret = __ocfs2_write_remove_suid(inode, di_bh); ++ if (ret) { ++ mlog_errno(ret); ++ goto out_meta_unlock; ++ } ++ } ++ ++ down_write(&OCFS2_I(inode)->ip_alloc_sem); ++ /* ++ * This takes unsigned offsets, but the signed ones we pass ++ * have been checked against overflow above. ++ */ ++ ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start, sr->l_len); ++ up_write(&OCFS2_I(inode)->ip_alloc_sem); ++ if (ret) { ++ mlog_errno(ret); ++ goto out_meta_unlock; ++ } ++ ++ /* ++ * We update c/mtime for these changes ++ */ ++ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); ++ if (IS_ERR(handle)) { ++ ret = PTR_ERR(handle); ++ mlog_errno(ret); ++ goto out_meta_unlock; ++ } ++ ++ inode->i_ctime = inode->i_mtime = CURRENT_TIME; ++ ret = ocfs2_mark_inode_dirty(handle, inode, di_bh); ++ if (ret < 0) ++ mlog_errno(ret); ++ ++ ocfs2_commit_trans(osb, handle); ++ ++out_meta_unlock: ++ brelse(di_bh); ++ ocfs2_meta_unlock(inode, 1); ++out_rw_unlock: ++ ocfs2_rw_unlock(inode, 1); ++ ++ mutex_unlock(&inode->i_mutex); ++out: ++ return ret; ++} ++ + static int ocfs2_prepare_inode_for_write(struct dentry *dentry, + loff_t *ppos, + size_t count, +@@ -1331,15 +1560,16 @@ + *basep = base; + } + +-static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp, ++static struct page * ocfs2_get_write_source(char **ret_src_buf, + const struct iovec *cur_iov, + size_t iov_offset) + { + int ret; +- char *buf; ++ char *buf = cur_iov->iov_base + iov_offset; + struct page *src_page = NULL; ++ unsigned long off; + +- buf = cur_iov->iov_base + iov_offset; ++ off = (unsigned long)(buf) & ~PAGE_CACHE_MASK; + + if (!segment_eq(get_fs(), KERNEL_DS)) { + /* +@@ -1378,10 +1608,12 @@ + { + int ret = 0; + ssize_t copied, total = 0; +- size_t iov_offset = 0; ++ size_t iov_offset = 0, bytes; ++ loff_t pos; + const struct iovec *cur_iov = iov; +- struct ocfs2_buffered_write_priv bp; +- struct page *page; ++ struct page *user_page, *page; ++ char *buf, *dst; ++ void *fsdata; + + /* + * handle partial DIO write. Adjust cur_iov if needed. +@@ -1389,21 +1621,38 @@ + ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written); + + do { +- bp.b_cur_off = iov_offset; +- bp.b_cur_iov = cur_iov; ++ pos = *ppos; + +- page = ocfs2_get_write_source(&bp, cur_iov, iov_offset); +- if (IS_ERR(page)) { +- ret = PTR_ERR(page); ++ user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset); ++ if (IS_ERR(user_page)) { ++ ret = PTR_ERR(user_page); + goto out; + } + +- copied = ocfs2_buffered_write_cluster(file, *ppos, count, +- ocfs2_map_and_write_user_data, +- &bp); ++ /* Stay within our page boundaries */ ++ bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)), ++ (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK))); ++ /* Stay within the vector boundary */ ++ bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset); ++ /* Stay within count */ ++ bytes = min(bytes, count); ++ ++ page = NULL; ++ ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0, ++ &page, &fsdata); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } + +- ocfs2_put_write_source(&bp, page); ++ dst = kmap_atomic(page, KM_USER0); ++ memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes); ++ kunmap_atomic(dst, KM_USER0); ++ flush_dcache_page(page); ++ ocfs2_put_write_source(user_page); + ++ copied = ocfs2_write_end(file, file->f_mapping, pos, bytes, ++ bytes, page, fsdata); + if (copied < 0) { + mlog_errno(copied); + ret = copied; +@@ -1411,7 +1660,7 @@ + } + + total += copied; +- *ppos = *ppos + copied; ++ *ppos = pos + copied; + count -= copied; + + ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied); +@@ -1581,52 +1830,46 @@ + struct pipe_buffer *buf, + struct splice_desc *sd) + { +- int ret, count, total = 0; ++ int ret, count; + ssize_t copied = 0; +- struct ocfs2_splice_write_priv sp; ++ struct file *file = sd->file; ++ unsigned int offset; ++ struct page *page = NULL; ++ void *fsdata; ++ char *src, *dst; + + ret = buf->ops->pin(pipe, buf); + if (ret) + goto out; + +- sp.s_sd = sd; +- sp.s_buf = buf; +- sp.s_pipe = pipe; +- sp.s_offset = sd->pos & ~PAGE_CACHE_MASK; +- sp.s_buf_offset = buf->offset; +- ++ offset = sd->pos & ~PAGE_CACHE_MASK; + count = sd->len; +- if (count + sp.s_offset > PAGE_CACHE_SIZE) +- count = PAGE_CACHE_SIZE - sp.s_offset; ++ if (count + offset > PAGE_CACHE_SIZE) ++ count = PAGE_CACHE_SIZE - offset; + +- do { +- /* +- * splice wants us to copy up to one page at a +- * time. For pagesize > cluster size, this means we +- * might enter ocfs2_buffered_write_cluster() more +- * than once, so keep track of our progress here. +- */ +- copied = ocfs2_buffered_write_cluster(sd->file, +- (loff_t)sd->pos + total, +- count, +- ocfs2_map_and_write_splice_data, +- &sp); ++ ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0, ++ &page, &fsdata); ++ if (ret) { ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ src = buf->ops->map(pipe, buf, 1); ++ dst = kmap_atomic(page, KM_USER1); ++ memcpy(dst + offset, src + buf->offset, count); ++ kunmap_atomic(page, KM_USER1); ++ buf->ops->unmap(pipe, buf, src); ++ ++ copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count, ++ page, fsdata); + if (copied < 0) { + mlog_errno(copied); + ret = copied; + goto out; + } +- +- count -= copied; +- sp.s_offset += copied; +- sp.s_buf_offset += copied; +- total += copied; +- } while (count); +- +- ret = 0; + out: + +- return total ? total : ret; ++ return copied ? copied : ret; + } + + static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe, +diff -Nurb linux-2.6.22-570/fs/ocfs2/file.h linux-2.6.22-591/fs/ocfs2/file.h +--- linux-2.6.22-570/fs/ocfs2/file.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/file.h 2007-12-21 15:36:12.000000000 -0500 +@@ -39,15 +39,16 @@ + }; + int ocfs2_do_extend_allocation(struct ocfs2_super *osb, + struct inode *inode, +- u32 *cluster_start, ++ u32 *logical_offset, + u32 clusters_to_add, ++ int mark_unwritten, + struct buffer_head *fe_bh, + handle_t *handle, + struct ocfs2_alloc_context *data_ac, + struct ocfs2_alloc_context *meta_ac, +- enum ocfs2_alloc_restarted *reason); ++ enum ocfs2_alloc_restarted *reason_ret); + int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, +- u32 clusters_to_add, ++ u32 clusters_to_add, u32 extents_to_split, + struct ocfs2_alloc_context **data_ac, + struct ocfs2_alloc_context **meta_ac); + int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); +@@ -61,4 +62,7 @@ + int ocfs2_update_inode_atime(struct inode *inode, + struct buffer_head *bh); + ++int ocfs2_change_file_space(struct file *file, unsigned int cmd, ++ struct ocfs2_space_resv *sr); ++ + #endif /* OCFS2_FILE_H */ +diff -Nurb linux-2.6.22-570/fs/ocfs2/heartbeat.c linux-2.6.22-591/fs/ocfs2/heartbeat.c +--- linux-2.6.22-570/fs/ocfs2/heartbeat.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/heartbeat.c 2007-12-21 15:36:12.000000000 -0500 +@@ -157,16 +157,16 @@ + if (ocfs2_mount_local(osb)) + return 0; + +- status = o2hb_register_callback(&osb->osb_hb_down); ++ status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down); + if (status < 0) { + mlog_errno(status); + goto bail; + } + +- status = o2hb_register_callback(&osb->osb_hb_up); ++ status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up); + if (status < 0) { + mlog_errno(status); +- o2hb_unregister_callback(&osb->osb_hb_down); ++ o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); + } + + bail: +@@ -178,8 +178,8 @@ + if (ocfs2_mount_local(osb)) + return; + +- o2hb_unregister_callback(&osb->osb_hb_down); +- o2hb_unregister_callback(&osb->osb_hb_up); ++ o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down); ++ o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up); + } + + void ocfs2_stop_heartbeat(struct ocfs2_super *osb) +@@ -209,7 +209,7 @@ + envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; + envp[2] = NULL; + +- ret = call_usermodehelper(argv[0], argv, envp, 1); ++ ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); + if (ret < 0) + mlog_errno(ret); + } +diff -Nurb linux-2.6.22-570/fs/ocfs2/ioctl.c linux-2.6.22-591/fs/ocfs2/ioctl.c +--- linux-2.6.22-570/fs/ocfs2/ioctl.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/ocfs2/ioctl.c 2007-12-21 15:36:12.000000000 -0500 +@@ -14,6 +14,7 @@ + #include "ocfs2.h" + #include "alloc.h" + #include "dlmglue.h" ++#include "file.h" + #include "inode.h" + #include "journal.h" + +@@ -115,6 +116,7 @@ + { + unsigned int flags; + int status; ++ struct ocfs2_space_resv sr; + + switch (cmd) { + case OCFS2_IOC_GETFLAGS: +@@ -130,6 +132,12 @@ + + return ocfs2_set_inode_attr(inode, flags, + OCFS2_FL_MODIFIABLE); ++ case OCFS2_IOC_RESVSP: ++ case OCFS2_IOC_RESVSP64: ++ if (copy_from_user(&sr, (int __user *) arg, sizeof(sr))) ++ return -EFAULT; ++ ++ return ocfs2_change_file_space(filp, cmd, &sr); + default: + return -ENOTTY; + } +@@ -148,6 +156,9 @@ + case OCFS2_IOC32_SETFLAGS: + cmd = OCFS2_IOC_SETFLAGS; + break; ++ case OCFS2_IOC_RESVSP: ++ case OCFS2_IOC_RESVSP64: ++ break; + default: + return -ENOIOCTLCMD; + } +diff -Nurb linux-2.6.22-570/fs/ocfs2/journal.c linux-2.6.22-591/fs/ocfs2/journal.c +--- linux-2.6.22-570/fs/ocfs2/journal.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/journal.c 2007-12-21 15:36:12.000000000 -0500 +@@ -722,8 +722,7 @@ + container_of(work, struct ocfs2_journal, j_recovery_work); + struct ocfs2_super *osb = journal->j_osb; + struct ocfs2_dinode *la_dinode, *tl_dinode; +- struct ocfs2_la_recovery_item *item; +- struct list_head *p, *n; ++ struct ocfs2_la_recovery_item *item, *n; + LIST_HEAD(tmp_la_list); + + mlog_entry_void(); +@@ -734,8 +733,7 @@ + list_splice_init(&journal->j_la_cleanups, &tmp_la_list); + spin_unlock(&journal->j_lock); + +- list_for_each_safe(p, n, &tmp_la_list) { +- item = list_entry(p, struct ocfs2_la_recovery_item, lri_list); ++ list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) { + list_del_init(&item->lri_list); + + mlog(0, "Complete recovery for slot %d\n", item->lri_slot); +diff -Nurb linux-2.6.22-570/fs/ocfs2/mmap.c linux-2.6.22-591/fs/ocfs2/mmap.c +--- linux-2.6.22-570/fs/ocfs2/mmap.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/mmap.c 2007-12-21 15:36:12.000000000 -0500 +@@ -37,38 +37,48 @@ + + #include "ocfs2.h" + ++#include "aops.h" + #include "dlmglue.h" + #include "file.h" + #include "inode.h" + #include "mmap.h" + +-static struct page *ocfs2_nopage(struct vm_area_struct * area, +- unsigned long address, +- int *type) ++static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset) + { +- struct page *page = NOPAGE_SIGBUS; +- sigset_t blocked, oldset; +- int ret; +- +- mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address, +- type); +- +- /* The best way to deal with signals in this path is ++ /* The best way to deal with signals in the vm path is + * to block them upfront, rather than allowing the + * locking paths to return -ERESTARTSYS. */ +- sigfillset(&blocked); ++ sigfillset(blocked); + +- /* We should technically never get a bad ret return ++ /* We should technically never get a bad return value + * from sigprocmask */ +- ret = sigprocmask(SIG_BLOCK, &blocked, &oldset); ++ return sigprocmask(SIG_BLOCK, blocked, oldset); ++} ++ ++static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset) ++{ ++ return sigprocmask(SIG_SETMASK, oldset, NULL); ++} ++ ++static struct page *ocfs2_fault(struct vm_area_struct *area, ++ struct fault_data *fdata) ++{ ++ struct page *page = NULL; ++ sigset_t blocked, oldset; ++ int ret; ++ ++ mlog_entry("(area=%p, page offset=%lu)\n", area, fdata->pgoff); ++ ++ ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); + if (ret < 0) { ++ fdata->type = VM_FAULT_SIGBUS; + mlog_errno(ret); + goto out; + } + +- page = filemap_nopage(area, address, type); ++ page = filemap_fault(area, fdata); + +- ret = sigprocmask(SIG_SETMASK, &oldset, NULL); ++ ret = ocfs2_vm_op_unblock_sigs(&oldset); + if (ret < 0) + mlog_errno(ret); + out: +@@ -76,28 +86,136 @@ + return page; + } + +-static struct vm_operations_struct ocfs2_file_vm_ops = { +- .nopage = ocfs2_nopage, +-}; ++static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, ++ struct page *page) ++{ ++ int ret; ++ struct address_space *mapping = inode->i_mapping; ++ loff_t pos = page->index << PAGE_CACHE_SHIFT; ++ unsigned int len = PAGE_CACHE_SIZE; ++ pgoff_t last_index; ++ struct page *locked_page = NULL; ++ void *fsdata; ++ loff_t size = i_size_read(inode); + +-int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) ++ /* ++ * Another node might have truncated while we were waiting on ++ * cluster locks. ++ */ ++ last_index = size >> PAGE_CACHE_SHIFT; ++ if (page->index > last_index) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ /* ++ * The i_size check above doesn't catch the case where nodes ++ * truncated and then re-extended the file. We'll re-check the ++ * page mapping after taking the page lock inside of ++ * ocfs2_write_begin_nolock(). ++ */ ++ if (!PageUptodate(page) || page->mapping != inode->i_mapping) { ++ ret = -EINVAL; ++ goto out; ++ } ++ ++ /* ++ * Call ocfs2_write_begin() and ocfs2_write_end() to take ++ * advantage of the allocation code there. We pass a write ++ * length of the whole page (chopped to i_size) to make sure ++ * the whole thing is allocated. ++ * ++ * Since we know the page is up to date, we don't have to ++ * worry about ocfs2_write_begin() skipping some buffer reads ++ * because the "write" would invalidate their data. ++ */ ++ if (page->index == last_index) ++ len = size & ~PAGE_CACHE_MASK; ++ ++ ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, ++ &fsdata, di_bh, page); ++ if (ret) { ++ if (ret != -ENOSPC) ++ mlog_errno(ret); ++ goto out; ++ } ++ ++ ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page, ++ fsdata); ++ if (ret < 0) { ++ mlog_errno(ret); ++ goto out; ++ } ++ BUG_ON(ret != len); ++ ret = 0; ++out: ++ return ret; ++} ++ ++static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) + { +- int ret = 0, lock_level = 0; +- struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb); ++ struct inode *inode = vma->vm_file->f_path.dentry->d_inode; ++ struct buffer_head *di_bh = NULL; ++ sigset_t blocked, oldset; ++ int ret, ret2; ++ ++ ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); ++ if (ret < 0) { ++ mlog_errno(ret); ++ return ret; ++ } + + /* +- * Only support shared writeable mmap for local mounts which +- * don't know about holes. ++ * The cluster locks taken will block a truncate from another ++ * node. Taking the data lock will also ensure that we don't ++ * attempt page truncation as part of a downconvert. + */ +- if ((!ocfs2_mount_local(osb) || ocfs2_sparse_alloc(osb)) && +- ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) && +- ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { +- mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags); +- /* This is -EINVAL because generic_file_readonly_mmap +- * returns it in a similar situation. */ +- return -EINVAL; ++ ret = ocfs2_meta_lock(inode, &di_bh, 1); ++ if (ret < 0) { ++ mlog_errno(ret); ++ goto out; + } + ++ /* ++ * The alloc sem should be enough to serialize with ++ * ocfs2_truncate_file() changing i_size as well as any thread ++ * modifying the inode btree. ++ */ ++ down_write(&OCFS2_I(inode)->ip_alloc_sem); ++ ++ ret = ocfs2_data_lock(inode, 1); ++ if (ret < 0) { ++ mlog_errno(ret); ++ goto out_meta_unlock; ++ } ++ ++ ret = __ocfs2_page_mkwrite(inode, di_bh, page); ++ ++ ocfs2_data_unlock(inode, 1); ++ ++out_meta_unlock: ++ up_write(&OCFS2_I(inode)->ip_alloc_sem); ++ ++ brelse(di_bh); ++ ocfs2_meta_unlock(inode, 1); ++ ++out: ++ ret2 = ocfs2_vm_op_unblock_sigs(&oldset); ++ if (ret2 < 0) ++ mlog_errno(ret2); ++ ++ return ret; ++} ++ ++static struct vm_operations_struct ocfs2_file_vm_ops = { ++ .nopage = ocfs2_fault, ++ .page_mkwrite = ocfs2_page_mkwrite, ++}; ++ ++int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ int ret = 0, lock_level = 0; ++ + ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode, + file->f_vfsmnt, &lock_level); + if (ret < 0) { +@@ -107,6 +225,7 @@ + ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); + out: + vma->vm_ops = &ocfs2_file_vm_ops; ++ vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; + return 0; + } + +diff -Nurb linux-2.6.22-570/fs/ocfs2/namei.c linux-2.6.22-591/fs/ocfs2/namei.c +--- linux-2.6.22-570/fs/ocfs2/namei.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/ocfs2/namei.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1684,7 +1684,7 @@ + u32 offset = 0; + + inode->i_op = &ocfs2_symlink_inode_operations; +- status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, ++ status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0, + new_fe_bh, + handle, data_ac, NULL, + NULL); +diff -Nurb linux-2.6.22-570/fs/ocfs2/ocfs2.h linux-2.6.22-591/fs/ocfs2/ocfs2.h +--- linux-2.6.22-570/fs/ocfs2/ocfs2.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/ocfs2/ocfs2.h 2007-12-21 15:36:12.000000000 -0500 +@@ -220,6 +220,7 @@ + u16 max_slots; + s16 node_num; + s16 slot_num; ++ s16 preferred_slot; + int s_sectsize_bits; + int s_clustersize; + int s_clustersize_bits; +@@ -306,6 +307,19 @@ + return 0; + } + ++static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb) ++{ ++ /* ++ * Support for sparse files is a pre-requisite ++ */ ++ if (!ocfs2_sparse_alloc(osb)) ++ return 0; ++ ++ if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN) ++ return 1; ++ return 0; ++} ++ + /* set / clear functions because cluster events can make these happen + * in parallel so we want the transitions to be atomic. this also + * means that any future flags osb_flags must be protected by spinlock +diff -Nurb linux-2.6.22-570/fs/ocfs2/ocfs2_fs.h linux-2.6.22-591/fs/ocfs2/ocfs2_fs.h +--- linux-2.6.22-570/fs/ocfs2/ocfs2_fs.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/ocfs2/ocfs2_fs.h 2007-12-21 15:36:12.000000000 -0500 +@@ -88,7 +88,7 @@ + #define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB + #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ + | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) +-#define OCFS2_FEATURE_RO_COMPAT_SUPP 0 ++#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN + + /* + * Heartbeat-only devices are missing journals and other files. The +@@ -116,6 +116,11 @@ + */ + #define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001 + ++/* ++ * Unwritten extents support. ++ */ ++#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001 ++ + /* The byte offset of the first backup block will be 1G. + * The following will be 4G, 16G, 64G, 256G and 1T. + */ +@@ -174,6 +179,32 @@ + #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) + + /* ++ * Space reservation / allocation / free ioctls and argument structure ++ * are designed to be compatible with XFS. ++ * ++ * ALLOCSP* and FREESP* are not and will never be supported, but are ++ * included here for completeness. ++ */ ++struct ocfs2_space_resv { ++ __s16 l_type; ++ __s16 l_whence; ++ __s64 l_start; ++ __s64 l_len; /* len == 0 means until end of file */ ++ __s32 l_sysid; ++ __u32 l_pid; ++ __s32 l_pad[4]; /* reserve area */ ++}; ++ ++#define OCFS2_IOC_ALLOCSP _IOW ('X', 10, struct ocfs2_space_resv) ++#define OCFS2_IOC_FREESP _IOW ('X', 11, struct ocfs2_space_resv) ++#define OCFS2_IOC_RESVSP _IOW ('X', 40, struct ocfs2_space_resv) ++#define OCFS2_IOC_UNRESVSP _IOW ('X', 41, struct ocfs2_space_resv) ++#define OCFS2_IOC_ALLOCSP64 _IOW ('X', 36, struct ocfs2_space_resv) ++#define OCFS2_IOC_FREESP64 _IOW ('X', 37, struct ocfs2_space_resv) ++#define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv) ++#define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv) ++ ++/* + * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) + */ + #define OCFS2_JOURNAL_DIRTY_FL (0x00000001) /* Journal needs recovery */ +diff -Nurb linux-2.6.22-570/fs/ocfs2/slot_map.c linux-2.6.22-591/fs/ocfs2/slot_map.c +--- linux-2.6.22-570/fs/ocfs2/slot_map.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/slot_map.c 2007-12-21 15:36:12.000000000 -0500 +@@ -121,17 +121,25 @@ + return ret; + } + +-static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si) ++static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred) + { + int i; + s16 ret = OCFS2_INVALID_SLOT; + ++ if (preferred >= 0 && preferred < si->si_num_slots) { ++ if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) { ++ ret = preferred; ++ goto out; ++ } ++ } ++ + for(i = 0; i < si->si_num_slots; i++) { + if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) { + ret = (s16) i; + break; + } + } ++out: + return ret; + } + +@@ -248,7 +256,7 @@ + if (slot == OCFS2_INVALID_SLOT) { + /* if no slot yet, then just take 1st available + * one. */ +- slot = __ocfs2_find_empty_slot(si); ++ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); + if (slot == OCFS2_INVALID_SLOT) { + spin_unlock(&si->si_lock); + mlog(ML_ERROR, "no free slots available!\n"); +diff -Nurb linux-2.6.22-570/fs/ocfs2/suballoc.c linux-2.6.22-591/fs/ocfs2/suballoc.c +--- linux-2.6.22-570/fs/ocfs2/suballoc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/suballoc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -98,14 +98,6 @@ + u16 chain); + static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, + u32 wanted); +-static int ocfs2_free_suballoc_bits(handle_t *handle, +- struct inode *alloc_inode, +- struct buffer_head *alloc_bh, +- unsigned int start_bit, +- u64 bg_blkno, +- unsigned int count); +-static inline u64 ocfs2_which_suballoc_group(u64 block, +- unsigned int bit); + static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, + u64 bg_blkno, + u16 bg_bit_off); +@@ -496,13 +488,7 @@ + + (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); + (*ac)->ac_which = OCFS2_AC_USE_META; +- +-#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS +- slot = 0; +-#else + slot = osb->slot_num; +-#endif +- + (*ac)->ac_group_search = ocfs2_block_group_search; + + status = ocfs2_reserve_suballoc_bits(osb, (*ac), +@@ -1626,7 +1612,7 @@ + /* + * expects the suballoc inode to already be locked. + */ +-static int ocfs2_free_suballoc_bits(handle_t *handle, ++int ocfs2_free_suballoc_bits(handle_t *handle, + struct inode *alloc_inode, + struct buffer_head *alloc_bh, + unsigned int start_bit, +@@ -1703,13 +1689,6 @@ + return status; + } + +-static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) +-{ +- u64 group = block - (u64) bit; +- +- return group; +-} +- + int ocfs2_free_dinode(handle_t *handle, + struct inode *inode_alloc_inode, + struct buffer_head *inode_alloc_bh, +@@ -1723,19 +1702,6 @@ + inode_alloc_bh, bit, bg_blkno, 1); + } + +-int ocfs2_free_extent_block(handle_t *handle, +- struct inode *eb_alloc_inode, +- struct buffer_head *eb_alloc_bh, +- struct ocfs2_extent_block *eb) +-{ +- u64 blk = le64_to_cpu(eb->h_blkno); +- u16 bit = le16_to_cpu(eb->h_suballoc_bit); +- u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); +- +- return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh, +- bit, bg_blkno, 1); +-} +- + int ocfs2_free_clusters(handle_t *handle, + struct inode *bitmap_inode, + struct buffer_head *bitmap_bh, +diff -Nurb linux-2.6.22-570/fs/ocfs2/suballoc.h linux-2.6.22-591/fs/ocfs2/suballoc.h +--- linux-2.6.22-570/fs/ocfs2/suballoc.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/suballoc.h 2007-12-21 15:36:12.000000000 -0500 +@@ -86,20 +86,29 @@ + u32 *cluster_start, + u32 *num_clusters); + ++int ocfs2_free_suballoc_bits(handle_t *handle, ++ struct inode *alloc_inode, ++ struct buffer_head *alloc_bh, ++ unsigned int start_bit, ++ u64 bg_blkno, ++ unsigned int count); + int ocfs2_free_dinode(handle_t *handle, + struct inode *inode_alloc_inode, + struct buffer_head *inode_alloc_bh, + struct ocfs2_dinode *di); +-int ocfs2_free_extent_block(handle_t *handle, +- struct inode *eb_alloc_inode, +- struct buffer_head *eb_alloc_bh, +- struct ocfs2_extent_block *eb); + int ocfs2_free_clusters(handle_t *handle, + struct inode *bitmap_inode, + struct buffer_head *bitmap_bh, + u64 start_blk, + unsigned int num_clusters); + ++static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) ++{ ++ u64 group = block - (u64) bit; ++ ++ return group; ++} ++ + static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb, + u64 bg_blkno) + { +diff -Nurb linux-2.6.22-570/fs/ocfs2/super.c linux-2.6.22-591/fs/ocfs2/super.c +--- linux-2.6.22-570/fs/ocfs2/super.c 2007-12-21 15:36:07.000000000 -0500 ++++ linux-2.6.22-591/fs/ocfs2/super.c 2007-12-21 15:36:12.000000000 -0500 +@@ -82,7 +82,8 @@ + MODULE_LICENSE("GPL"); + + static int ocfs2_parse_options(struct super_block *sb, char *options, +- unsigned long *mount_opt, int is_remount); ++ unsigned long *mount_opt, s16 *slot, ++ int is_remount); + static void ocfs2_put_super(struct super_block *sb); + static int ocfs2_mount_volume(struct super_block *sb); + static int ocfs2_remount(struct super_block *sb, int *flags, char *data); +@@ -114,8 +115,6 @@ + static struct inode *ocfs2_alloc_inode(struct super_block *sb); + static void ocfs2_destroy_inode(struct inode *inode); + +-static unsigned long long ocfs2_max_file_offset(unsigned int blockshift); +- + static const struct super_operations ocfs2_sops = { + .statfs = ocfs2_statfs, + .alloc_inode = ocfs2_alloc_inode, +@@ -323,7 +322,7 @@ + /* From xfs_super.c:xfs_max_file_offset + * Copyright (c) 2000-2004 Silicon Graphics, Inc. + */ +-static unsigned long long ocfs2_max_file_offset(unsigned int blockshift) ++unsigned long long ocfs2_max_file_offset(unsigned int blockshift) + { + unsigned int pagefactor = 1; + unsigned int bitshift = BITS_PER_LONG - 1; +@@ -360,9 +359,10 @@ + int incompat_features; + int ret = 0; + unsigned long parsed_options; ++ s16 slot; + struct ocfs2_super *osb = OCFS2_SB(sb); + +- if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { ++ if (!ocfs2_parse_options(sb, data, &parsed_options, &slot, 1)) { + ret = -EINVAL; + goto out; + } +@@ -546,6 +546,7 @@ + struct dentry *root; + int status, sector_size; + unsigned long parsed_opt; ++ s16 slot; + struct inode *inode = NULL; + struct ocfs2_super *osb = NULL; + struct buffer_head *bh = NULL; +@@ -553,7 +554,7 @@ + + mlog_entry("%p, %p, %i", sb, data, silent); + +- if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) { ++ if (!ocfs2_parse_options(sb, data, &parsed_opt, &slot, 0)) { + status = -EINVAL; + goto read_super_error; + } +@@ -583,6 +584,7 @@ + brelse(bh); + bh = NULL; + osb->s_mount_opt = parsed_opt; ++ osb->preferred_slot = slot; + + sb->s_magic = OCFS2_SUPER_MAGIC; + +@@ -728,6 +730,7 @@ + static int ocfs2_parse_options(struct super_block *sb, + char *options, + unsigned long *mount_opt, ++ s16 *slot, + int is_remount) + { + int status; +@@ -737,6 +740,7 @@ + options ? options : "(none)"); + + *mount_opt = 0; ++ *slot = OCFS2_INVALID_SLOT; + + if (!options) { + status = 1; +diff -Nurb linux-2.6.22-570/fs/ocfs2/super.h linux-2.6.22-591/fs/ocfs2/super.h +--- linux-2.6.22-570/fs/ocfs2/super.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ocfs2/super.h 2007-12-21 15:36:12.000000000 -0500 +@@ -45,4 +45,6 @@ + + #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) + ++unsigned long long ocfs2_max_file_offset(unsigned int blockshift); ++ + #endif /* OCFS2_SUPER_H */ +diff -Nurb linux-2.6.22-570/fs/open.c linux-2.6.22-591/fs/open.c +--- linux-2.6.22-570/fs/open.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/open.c 2007-12-21 15:36:12.000000000 -0500 +@@ -362,6 +362,92 @@ + #endif + + /* ++ * sys_fallocate - preallocate blocks or free preallocated blocks ++ * @fd: the file descriptor ++ * @mode: mode specifies if fallocate should preallocate blocks OR free ++ * (unallocate) preallocated blocks. Currently only FA_ALLOCATE and ++ * FA_DEALLOCATE modes are supported. ++ * @offset: The offset within file, from where (un)allocation is being ++ * requested. It should not have a negative value. ++ * @len: The amount (in bytes) of space to be (un)allocated, from the offset. ++ * ++ * This system call, depending on the mode, preallocates or unallocates blocks ++ * for a file. The range of blocks depends on the value of offset and len ++ * arguments provided by the user/application. For FA_ALLOCATE mode, if this ++ * system call succeeds, subsequent writes to the file in the given range ++ * (specified by offset & len) should not fail - even if the file system ++ * later becomes full. Hence the preallocation done is persistent (valid ++ * even after reopen of the file and remount/reboot). ++ * ++ * It is expected that the ->fallocate() inode operation implemented by the ++ * individual file systems will update the file size and/or ctime/mtime ++ * depending on the mode and also on the success of the operation. ++ * ++ * Note: Incase the file system does not support preallocation, ++ * posix_fallocate() should fall back to the library implementation (i.e. ++ * allocating zero-filled new blocks to the file). ++ * ++ * Return Values ++ * 0 : On SUCCESS a value of zero is returned. ++ * error : On Failure, an error code will be returned. ++ * An error code of -ENOSYS or -EOPNOTSUPP should make posix_fallocate() ++ * fall back on library implementation of fallocate. ++ * ++ * Generic fallocate to be added for file systems that do not ++ * support fallocate it. ++ */ ++asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len) ++{ ++ struct file *file; ++ struct inode *inode; ++ long ret = -EINVAL; ++ ++ if (offset < 0 || len <= 0) ++ goto out; ++ ++ /* Return error if mode is not supported */ ++ ret = -EOPNOTSUPP; ++ if (mode != FA_ALLOCATE && mode !=FA_DEALLOCATE) ++ goto out; ++ ++ ret = -EBADF; ++ file = fget(fd); ++ if (!file) ++ goto out; ++ if (!(file->f_mode & FMODE_WRITE)) ++ goto out_fput; ++ ++ inode = file->f_path.dentry->d_inode; ++ ++ ret = -ESPIPE; ++ if (S_ISFIFO(inode->i_mode)) ++ goto out_fput; ++ ++ ret = -ENODEV; ++ /* ++ * Let individual file system decide if it supports preallocation ++ * for directories or not. ++ */ ++ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) ++ goto out_fput; ++ ++ ret = -EFBIG; ++ /* Check for wrap through zero too */ ++ if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) ++ goto out_fput; ++ ++ if (inode->i_op && inode->i_op->fallocate) ++ ret = inode->i_op->fallocate(inode, mode, offset, len); ++ else ++ ret = -ENOSYS; ++ ++out_fput: ++ fput(file); ++out: ++ return ret; ++} ++ ++/* + * access() needs to use the real uid/gid, not the effective uid/gid. + * We do this by temporarily clearing all FS-related capabilities and + * switching the fsuid/fsgid around to the real ones. +diff -Nurb linux-2.6.22-570/fs/partitions/check.c linux-2.6.22-591/fs/partitions/check.c +--- linux-2.6.22-570/fs/partitions/check.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/partitions/check.c 2007-12-21 15:36:12.000000000 -0500 +@@ -397,7 +397,6 @@ + static struct attribute addpartattr = { + .name = "whole_disk", + .mode = S_IRUSR | S_IRGRP | S_IROTH, +- .owner = THIS_MODULE, + }; + + sysfs_create_file(&p->kobj, &addpartattr); +diff -Nurb linux-2.6.22-570/fs/proc/Makefile linux-2.6.22-591/fs/proc/Makefile +--- linux-2.6.22-570/fs/proc/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/proc/Makefile 2007-12-21 15:36:14.000000000 -0500 +@@ -11,6 +11,7 @@ + proc_tty.o proc_misc.o + + proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o ++proc-$(CONFIG_NET) += proc_net.o + proc-$(CONFIG_PROC_KCORE) += kcore.o + proc-$(CONFIG_PROC_VMCORE) += vmcore.o + proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o +diff -Nurb linux-2.6.22-570/fs/proc/array.c linux-2.6.22-591/fs/proc/array.c +--- linux-2.6.22-570/fs/proc/array.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/proc/array.c 2007-12-21 15:36:12.000000000 -0500 +@@ -291,6 +291,15 @@ + return buffer; + } + ++static inline char *task_context_switch_counts(struct task_struct *p, ++ char *buffer) ++{ ++ return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n" ++ "nonvoluntary_ctxt_switches:\t%lu\n", ++ p->nvcsw, ++ p->nivcsw); ++} ++ + static inline char *task_cap(struct task_struct *p, char *buffer) + { + struct vx_info *vxi = p->vx_info; +@@ -328,6 +337,7 @@ + #if defined(CONFIG_S390) + buffer = task_show_regs(task, buffer); + #endif ++ buffer = task_context_switch_counts(task, buffer); + return buffer - orig; + } + +@@ -426,8 +436,9 @@ + + /* Temporary variable needed for gcc-2.96 */ + /* convert timespec -> nsec*/ +- start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC +- + task->start_time.tv_nsec; ++ start_time = ++ (unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC ++ + task->real_start_time.tv_nsec; + /* convert nsec -> ticks */ + start_time = nsec_to_clock_t(start_time); + +diff -Nurb linux-2.6.22-570/fs/proc/base.c linux-2.6.22-591/fs/proc/base.c +--- linux-2.6.22-570/fs/proc/base.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/proc/base.c 2007-12-21 15:36:12.000000000 -0500 +@@ -67,7 +67,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +@@ -490,7 +490,7 @@ + count = PROC_BLOCK_SIZE; + + length = -ENOMEM; +- if (!(page = __get_free_page(GFP_KERNEL))) ++ if (!(page = __get_free_page(GFP_TEMPORARY))) + goto out; + + length = PROC_I(inode)->op.proc_read(task, (char*)page); +@@ -530,7 +530,7 @@ + goto out; + + ret = -ENOMEM; +- page = (char *)__get_free_page(GFP_USER); ++ page = (char *)__get_free_page(GFP_TEMPORARY); + if (!page) + goto out; + +@@ -600,7 +600,7 @@ + goto out; + + copied = -ENOMEM; +- page = (char *)__get_free_page(GFP_USER); ++ page = (char *)__get_free_page(GFP_TEMPORARY); + if (!page) + goto out; + +@@ -633,7 +633,7 @@ + } + #endif + +-static loff_t mem_lseek(struct file * file, loff_t offset, int orig) ++loff_t mem_lseek(struct file * file, loff_t offset, int orig) + { + switch (orig) { + case 0: +@@ -711,42 +711,6 @@ + .write = oom_adjust_write, + }; + +-#ifdef CONFIG_MMU +-static ssize_t clear_refs_write(struct file *file, const char __user *buf, +- size_t count, loff_t *ppos) +-{ +- struct task_struct *task; +- char buffer[PROC_NUMBUF], *end; +- struct mm_struct *mm; +- +- memset(buffer, 0, sizeof(buffer)); +- if (count > sizeof(buffer) - 1) +- count = sizeof(buffer) - 1; +- if (copy_from_user(buffer, buf, count)) +- return -EFAULT; +- if (!simple_strtol(buffer, &end, 0)) +- return -EINVAL; +- if (*end == '\n') +- end++; +- task = get_proc_task(file->f_path.dentry->d_inode); +- if (!task) +- return -ESRCH; +- mm = get_task_mm(task); +- if (mm) { +- clear_refs_smap(mm); +- mmput(mm); +- } +- put_task_struct(task); +- if (end - buffer == 0) +- return -EIO; +- return end - buffer; +-} +- +-static struct file_operations proc_clear_refs_operations = { +- .write = clear_refs_write, +-}; +-#endif +- + #ifdef CONFIG_AUDITSYSCALL + #define TMPBUFLEN 21 + static ssize_t proc_loginuid_read(struct file * file, char __user * buf, +@@ -786,7 +750,7 @@ + /* No partial writes. */ + return -EINVAL; + } +- page = (char*)__get_free_page(GFP_USER); ++ page = (char*)__get_free_page(GFP_TEMPORARY); + if (!page) + return -ENOMEM; + length = -EFAULT; +@@ -815,71 +779,6 @@ + }; + #endif + +-#ifdef CONFIG_SECCOMP +-static ssize_t seccomp_read(struct file *file, char __user *buf, +- size_t count, loff_t *ppos) +-{ +- struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); +- char __buf[20]; +- size_t len; +- +- if (!tsk) +- return -ESRCH; +- /* no need to print the trailing zero, so use only len */ +- len = sprintf(__buf, "%u\n", tsk->seccomp.mode); +- put_task_struct(tsk); +- +- return simple_read_from_buffer(buf, count, ppos, __buf, len); +-} +- +-static ssize_t seccomp_write(struct file *file, const char __user *buf, +- size_t count, loff_t *ppos) +-{ +- struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode); +- char __buf[20], *end; +- unsigned int seccomp_mode; +- ssize_t result; +- +- result = -ESRCH; +- if (!tsk) +- goto out_no_task; +- +- /* can set it only once to be even more secure */ +- result = -EPERM; +- if (unlikely(tsk->seccomp.mode)) +- goto out; +- +- result = -EFAULT; +- memset(__buf, 0, sizeof(__buf)); +- count = min(count, sizeof(__buf) - 1); +- if (copy_from_user(__buf, buf, count)) +- goto out; +- +- seccomp_mode = simple_strtoul(__buf, &end, 0); +- if (*end == '\n') +- end++; +- result = -EINVAL; +- if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { +- tsk->seccomp.mode = seccomp_mode; +- set_tsk_thread_flag(tsk, TIF_SECCOMP); +- } else +- goto out; +- result = -EIO; +- if (unlikely(!(end - __buf))) +- goto out; +- result = end - __buf; +-out: +- put_task_struct(tsk); +-out_no_task: +- return result; +-} +- +-static const struct file_operations proc_seccomp_operations = { +- .read = seccomp_read, +- .write = seccomp_write, +-}; +-#endif /* CONFIG_SECCOMP */ +- + #ifdef CONFIG_FAULT_INJECTION + static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +@@ -954,7 +853,8 @@ + char __user *buffer, int buflen) + { + struct inode * inode; +- char *tmp = (char*)__get_free_page(GFP_KERNEL), *path; ++ char *tmp = (char*)__get_free_page(GFP_TEMPORARY); ++ char *path; + int len; + + if (!tmp) +@@ -1015,7 +915,7 @@ + task_lock(task); + mm = task->mm; + if (mm) +- dumpable = mm->dumpable; ++ dumpable = get_dumpable(mm); + task_unlock(task); + if(dumpable == 1) + return 1; +@@ -1744,7 +1644,7 @@ + goto out; + + length = -ENOMEM; +- page = (char*)__get_free_page(GFP_USER); ++ page = (char*)__get_free_page(GFP_TEMPORARY); + if (!page) + goto out; + +@@ -1804,6 +1704,91 @@ + + #endif + ++#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) ++static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, ++ size_t count, loff_t *ppos) ++{ ++ struct task_struct *task = get_proc_task(file->f_dentry->d_inode); ++ struct mm_struct *mm; ++ char buffer[PROC_NUMBUF]; ++ size_t len; ++ int ret; ++ ++ if (!task) ++ return -ESRCH; ++ ++ ret = 0; ++ mm = get_task_mm(task); ++ if (mm) { ++ len = snprintf(buffer, sizeof(buffer), "%08lx\n", ++ ((mm->flags & MMF_DUMP_FILTER_MASK) >> ++ MMF_DUMP_FILTER_SHIFT)); ++ mmput(mm); ++ ret = simple_read_from_buffer(buf, count, ppos, buffer, len); ++ } ++ ++ put_task_struct(task); ++ ++ return ret; ++} ++ ++static ssize_t proc_coredump_filter_write(struct file *file, ++ const char __user *buf, ++ size_t count, ++ loff_t *ppos) ++{ ++ struct task_struct *task; ++ struct mm_struct *mm; ++ char buffer[PROC_NUMBUF], *end; ++ unsigned int val; ++ int ret; ++ int i; ++ unsigned long mask; ++ ++ ret = -EFAULT; ++ memset(buffer, 0, sizeof(buffer)); ++ if (count > sizeof(buffer) - 1) ++ count = sizeof(buffer) - 1; ++ if (copy_from_user(buffer, buf, count)) ++ goto out_no_task; ++ ++ ret = -EINVAL; ++ val = (unsigned int)simple_strtoul(buffer, &end, 0); ++ if (*end == '\n') ++ end++; ++ if (end - buffer == 0) ++ goto out_no_task; ++ ++ ret = -ESRCH; ++ task = get_proc_task(file->f_dentry->d_inode); ++ if (!task) ++ goto out_no_task; ++ ++ ret = end - buffer; ++ mm = get_task_mm(task); ++ if (!mm) ++ goto out_no_mm; ++ ++ for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { ++ if (val & mask) ++ set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); ++ else ++ clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); ++ } ++ ++ mmput(mm); ++ out_no_mm: ++ put_task_struct(task); ++ out_no_task: ++ return ret; ++} ++ ++static const struct file_operations proc_coredump_filter_operations = { ++ .read = proc_coredump_filter_read, ++ .write = proc_coredump_filter_write, ++}; ++#endif ++ + /* + * /proc/self: + */ +@@ -1995,18 +1980,22 @@ + REG("numa_maps", S_IRUGO, numa_maps), + #endif + REG("mem", S_IRUSR|S_IWUSR, mem), +-#ifdef CONFIG_SECCOMP +- REG("seccomp", S_IRUSR|S_IWUSR, seccomp), +-#endif + LNK("cwd", cwd), + LNK("root", root), + LNK("exe", exe), + REG("mounts", S_IRUGO, mounts), + REG("mountstats", S_IRUSR, mountstats), + #ifdef CONFIG_MMU ++#ifdef CONFIG_PROC_CLEAR_REFS + REG("clear_refs", S_IWUSR, clear_refs), ++#endif ++#ifdef CONFIG_PROC_SMAPS + REG("smaps", S_IRUGO, smaps), + #endif ++#ifdef CONFIG_PROC_PAGEMAP ++ REG("pagemap", S_IRUSR, pagemap), ++#endif ++#endif + #ifdef CONFIG_SECURITY + DIR("attr", S_IRUGO|S_IXUGO, attr_dir), + #endif +@@ -2016,7 +2005,7 @@ + #ifdef CONFIG_SCHEDSTATS + INF("schedstat", S_IRUGO, pid_schedstat), + #endif +-#ifdef CONFIG_CPUSETS ++#ifdef CONFIG_PROC_PID_CPUSET + REG("cpuset", S_IRUGO, cpuset), + #endif + INF("vinfo", S_IRUGO, pid_vx_info), +@@ -2029,6 +2018,9 @@ + #ifdef CONFIG_FAULT_INJECTION + REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), + #endif ++#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE) ++ REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), ++#endif + #ifdef CONFIG_TASK_IO_ACCOUNTING + INF("io", S_IRUGO, pid_io_accounting), + #endif +@@ -2285,17 +2277,21 @@ + REG("numa_maps", S_IRUGO, numa_maps), + #endif + REG("mem", S_IRUSR|S_IWUSR, mem), +-#ifdef CONFIG_SECCOMP +- REG("seccomp", S_IRUSR|S_IWUSR, seccomp), +-#endif + LNK("cwd", cwd), + LNK("root", root), + LNK("exe", exe), + REG("mounts", S_IRUGO, mounts), + #ifdef CONFIG_MMU ++#ifdef CONFIG_PROC_CLEAR_REFS + REG("clear_refs", S_IWUSR, clear_refs), ++#endif ++#ifdef CONFIG_PROC_SMAPS + REG("smaps", S_IRUGO, smaps), + #endif ++#ifdef CONFIG_PROC_PAGEMAP ++ REG("pagemap", S_IRUSR, pagemap), ++#endif ++#endif + #ifdef CONFIG_SECURITY + DIR("attr", S_IRUGO|S_IXUGO, attr_dir), + #endif +@@ -2305,9 +2301,12 @@ + #ifdef CONFIG_SCHEDSTATS + INF("schedstat", S_IRUGO, pid_schedstat), + #endif +-#ifdef CONFIG_CPUSETS ++#ifdef CONFIG_PROC_PID_CPUSET + REG("cpuset", S_IRUGO, cpuset), + #endif ++#ifdef CONFIG_CONTAINERS ++ REG("container", S_IRUGO, container), ++#endif + INF("oom_score", S_IRUGO, oom_score), + REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), + #ifdef CONFIG_AUDITSYSCALL +diff -Nurb linux-2.6.22-570/fs/proc/generic.c linux-2.6.22-591/fs/proc/generic.c +--- linux-2.6.22-570/fs/proc/generic.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/proc/generic.c 2007-12-21 15:36:12.000000000 -0500 +@@ -74,7 +74,7 @@ + nbytes = MAX_NON_LFS - pos; + + dp = PDE(inode); +- if (!(page = (char*) __get_free_page(GFP_KERNEL))) ++ if (!(page = (char*) __get_free_page(GFP_TEMPORARY))) + return -ENOMEM; + + while ((nbytes > 0) && !eof) { +diff -Nurb linux-2.6.22-570/fs/proc/internal.h linux-2.6.22-591/fs/proc/internal.h +--- linux-2.6.22-570/fs/proc/internal.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/proc/internal.h 2007-12-21 15:36:14.000000000 -0500 +@@ -17,6 +17,11 @@ + #else + static inline void proc_sys_init(void) { } + #endif ++#ifdef CONFIG_NET ++extern int proc_net_init(void); ++#else ++static inline int proc_net_init(void) { return 0; } ++#endif + + struct vmalloc_info { + unsigned long used; +@@ -46,15 +51,13 @@ + extern int proc_tgid_stat(struct task_struct *, char *); + extern int proc_pid_status(struct task_struct *, char *); + extern int proc_pid_statm(struct task_struct *, char *); ++extern loff_t mem_lseek(struct file * file, loff_t offset, int orig); + + extern const struct file_operations proc_maps_operations; + extern const struct file_operations proc_numa_maps_operations; + extern const struct file_operations proc_smaps_operations; +- +-extern const struct file_operations proc_maps_operations; +-extern const struct file_operations proc_numa_maps_operations; +-extern const struct file_operations proc_smaps_operations; +- ++extern const struct file_operations proc_clear_refs_operations; ++extern const struct file_operations proc_pagemap_operations; + + void free_proc_entry(struct proc_dir_entry *de); + +diff -Nurb linux-2.6.22-570/fs/proc/proc_misc.c linux-2.6.22-591/fs/proc/proc_misc.c +--- linux-2.6.22-570/fs/proc/proc_misc.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/proc/proc_misc.c 2007-12-21 15:36:14.000000000 -0500 +@@ -122,6 +122,7 @@ + cputime_t idletime = cputime_add(init_task.utime, init_task.stime); + + do_posix_clock_monotonic_gettime(&uptime); ++ monotonic_to_bootbased(&uptime); + cputime_to_timespec(idletime, &idle); + if (vx_flags(VXF_VIRT_UPTIME, 0)) + vx_vsi_uptime(&uptime, &idle); +@@ -463,12 +464,14 @@ + unsigned long jif; + cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; + u64 sum = 0; ++ struct timespec boottime; + + user = nice = system = idle = iowait = + irq = softirq = steal = cputime64_zero; +- jif = - wall_to_monotonic.tv_sec; +- if (wall_to_monotonic.tv_nsec) +- --jif; ++ getboottime(&boottime); ++ jif = boottime.tv_sec; ++ if (boottime.tv_nsec) ++ ++jif; + + for_each_possible_cpu(i) { + int j; +diff -Nurb linux-2.6.22-570/fs/proc/proc_net.c linux-2.6.22-591/fs/proc/proc_net.c +--- linux-2.6.22-570/fs/proc/proc_net.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/proc/proc_net.c 2007-12-21 15:36:14.000000000 -0500 +@@ -0,0 +1,154 @@ ++/* ++ * linux/fs/proc/net.c ++ * ++ * Copyright (C) 2007 ++ * ++ * Author: Eric Biederman ++ * ++ * proc net directory handling functions ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "internal.h" ++ ++static struct proc_dir_entry *proc_net_shadow; ++ ++static struct dentry *proc_net_shadow_dentry(struct dentry *parent, ++ struct proc_dir_entry *de) ++{ ++ struct dentry *shadow = NULL; ++ struct inode *inode; ++ if (!de) ++ goto out; ++ de_get(de); ++ inode = proc_get_inode(parent->d_inode->i_sb, de->low_ino, de); ++ if (!inode) ++ goto out_de_put; ++ shadow = d_alloc_name(parent, de->name); ++ if (!shadow) ++ goto out_iput; ++ shadow->d_op = parent->d_op; /* proc_dentry_operations */ ++ d_instantiate(shadow, inode); ++out: ++ return shadow; ++out_iput: ++ iput(inode); ++out_de_put: ++ de_put(de); ++ goto out; ++} ++ ++static void *proc_net_follow_link(struct dentry *parent, struct nameidata *nd) ++{ ++ struct net *net = current->nsproxy->net_ns; ++ struct dentry *shadow; ++ shadow = proc_net_shadow_dentry(parent, net->proc_net); ++ if (!shadow) ++ return ERR_PTR(-ENOENT); ++ ++ dput(nd->dentry); ++ /* My dentry count is 1 and that should be enough as the ++ * shadow dentry is thrown away immediately. ++ */ ++ nd->dentry = shadow; ++ return NULL; ++} ++ ++static struct dentry *proc_net_lookup(struct inode *dir, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ struct net *net = current->nsproxy->net_ns; ++ struct dentry *shadow; ++ ++ shadow = proc_net_shadow_dentry(nd->dentry, net->proc_net); ++ if (!shadow) ++ return ERR_PTR(-ENOENT); ++ ++ dput(nd->dentry); ++ nd->dentry = shadow; ++ ++ return shadow->d_inode->i_op->lookup(shadow->d_inode, dentry, nd); ++} ++ ++static int proc_net_setattr(struct dentry *dentry, struct iattr *iattr) ++{ ++ struct net *net = current->nsproxy->net_ns; ++ struct dentry *shadow; ++ int ret; ++ ++ shadow = proc_net_shadow_dentry(dentry->d_parent, net->proc_net); ++ if (!shadow) ++ return -ENOENT; ++ ret = shadow->d_inode->i_op->setattr(shadow, iattr); ++ dput(shadow); ++ return ret; ++} ++ ++static const struct file_operations proc_net_dir_operations = { ++ .read = generic_read_dir, ++}; ++ ++static struct inode_operations proc_net_dir_inode_operations = { ++ .follow_link = proc_net_follow_link, ++ .lookup = proc_net_lookup, ++ .setattr = proc_net_setattr, ++}; ++ ++ ++static int proc_net_ns_init(struct net *net) ++{ ++ struct proc_dir_entry *netd, *net_statd; ++ ++ netd = proc_mkdir("net", &net->proc_net_root); ++ if (!netd) ++ return -EEXIST; ++ ++ net_statd = proc_mkdir("stat", netd); ++ if (!net_statd) { ++ remove_proc_entry("net", &net->proc_net_root); ++ return -EEXIST; ++ } ++ ++ netd->data = net; ++ net_statd->data = net; ++ net->proc_net_root.data = net; ++ net->proc_net = netd; ++ net->proc_net_stat = net_statd; ++ ++ return 0; ++} ++ ++static void proc_net_ns_exit(struct net *net) ++{ ++ remove_proc_entry("stat", net->proc_net); ++ remove_proc_entry("net", &net->proc_net_root); ++ ++} ++ ++struct pernet_operations proc_net_ns_ops = { ++ .init = proc_net_ns_init, ++ .exit = proc_net_ns_exit, ++}; ++ ++int proc_net_init(void) ++{ ++ proc_net_shadow = proc_mkdir("net", NULL); ++ proc_net_shadow->proc_iops = &proc_net_dir_inode_operations; ++ proc_net_shadow->proc_fops = &proc_net_dir_operations; ++ ++ return register_pernet_subsys(&proc_net_ns_ops); ++} +diff -Nurb linux-2.6.22-570/fs/proc/root.c linux-2.6.22-591/fs/proc/root.c +--- linux-2.6.22-570/fs/proc/root.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/proc/root.c 2007-12-21 15:36:14.000000000 -0500 +@@ -21,11 +21,11 @@ + + #include "internal.h" + +-struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver; + struct proc_dir_entry *proc_virtual; + + extern void proc_vx_init(void); + ++struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; + static int proc_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) + { +@@ -64,8 +64,8 @@ + return; + } + proc_misc_init(); +- proc_net = proc_mkdir("net", NULL); +- proc_net_stat = proc_mkdir("net/stat", NULL); ++ ++ proc_net_init(); + + #ifdef CONFIG_SYSVIPC + proc_mkdir("sysvipc", NULL); +@@ -163,7 +163,5 @@ + EXPORT_SYMBOL(remove_proc_entry); + EXPORT_SYMBOL(proc_root); + EXPORT_SYMBOL(proc_root_fs); +-EXPORT_SYMBOL(proc_net); +-EXPORT_SYMBOL(proc_net_stat); + EXPORT_SYMBOL(proc_bus); + EXPORT_SYMBOL(proc_root_driver); +diff -Nurb linux-2.6.22-570/fs/proc/task_mmu.c linux-2.6.22-591/fs/proc/task_mmu.c +--- linux-2.6.22-570/fs/proc/task_mmu.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/proc/task_mmu.c 2007-12-21 15:36:12.000000000 -0500 +@@ -5,6 +5,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -114,24 +115,123 @@ + seq_printf(m, "%*c", len, ' '); + } + +-struct mem_size_stats ++static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) + { +- unsigned long resident; +- unsigned long shared_clean; +- unsigned long shared_dirty; +- unsigned long private_clean; +- unsigned long private_dirty; +- unsigned long referenced; +-}; ++ if (vma && vma != priv->tail_vma) { ++ struct mm_struct *mm = vma->vm_mm; ++ up_read(&mm->mmap_sem); ++ mmput(mm); ++ } ++} + +-struct pmd_walker { +- struct vm_area_struct *vma; +- void *private; +- void (*action)(struct vm_area_struct *, pmd_t *, unsigned long, +- unsigned long, void *); +-}; ++static void *m_start(struct seq_file *m, loff_t *pos) ++{ ++ struct proc_maps_private *priv = m->private; ++ unsigned long last_addr = m->version; ++ struct mm_struct *mm; ++ struct vm_area_struct *vma, *tail_vma = NULL; ++ loff_t l = *pos; ++ ++ /* Clear the per syscall fields in priv */ ++ priv->task = NULL; ++ priv->tail_vma = NULL; ++ ++ /* ++ * We remember last_addr rather than next_addr to hit with ++ * mmap_cache most of the time. We have zero last_addr at ++ * the beginning and also after lseek. We will have -1 last_addr ++ * after the end of the vmas. ++ */ ++ ++ if (last_addr == -1UL) ++ return NULL; ++ ++ priv->task = get_pid_task(priv->pid, PIDTYPE_PID); ++ if (!priv->task) ++ return NULL; ++ ++ mm = get_task_mm(priv->task); ++ if (!mm) ++ return NULL; ++ ++ priv->tail_vma = tail_vma = get_gate_vma(priv->task); ++ down_read(&mm->mmap_sem); ++ ++ /* Start with last addr hint */ ++ if (last_addr && (vma = find_vma(mm, last_addr))) { ++ vma = vma->vm_next; ++ goto out; ++ } ++ ++ /* ++ * Check the vma index is within the range and do ++ * sequential scan until m_index. ++ */ ++ vma = NULL; ++ if ((unsigned long)l < mm->map_count) { ++ vma = mm->mmap; ++ while (l-- && vma) ++ vma = vma->vm_next; ++ goto out; ++ } ++ ++ if (l != mm->map_count) ++ tail_vma = NULL; /* After gate vma */ ++ ++out: ++ if (vma) ++ return vma; ++ ++ /* End of vmas has been reached */ ++ m->version = (tail_vma != NULL)? 0: -1UL; ++ up_read(&mm->mmap_sem); ++ mmput(mm); ++ return tail_vma; ++} ++ ++static void *m_next(struct seq_file *m, void *v, loff_t *pos) ++{ ++ struct proc_maps_private *priv = m->private; ++ struct vm_area_struct *vma = v; ++ struct vm_area_struct *tail_vma = priv->tail_vma; ++ ++ (*pos)++; ++ if (vma && (vma != tail_vma) && vma->vm_next) ++ return vma->vm_next; ++ vma_stop(priv, vma); ++ return (vma != tail_vma)? tail_vma: NULL; ++} ++ ++static void m_stop(struct seq_file *m, void *v) ++{ ++ struct proc_maps_private *priv = m->private; ++ struct vm_area_struct *vma = v; ++ ++ vma_stop(priv, vma); ++ if (priv->task) ++ put_task_struct(priv->task); ++} ++ ++static int do_maps_open(struct inode *inode, struct file *file, ++ struct seq_operations *ops) ++{ ++ struct proc_maps_private *priv; ++ int ret = -ENOMEM; ++ priv = kzalloc(sizeof(*priv), GFP_KERNEL); ++ if (priv) { ++ priv->pid = proc_pid(inode); ++ ret = seq_open(file, ops); ++ if (!ret) { ++ struct seq_file *m = file->private_data; ++ m->private = priv; ++ } else { ++ kfree(priv); ++ } ++ } ++ return ret; ++} + +-static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss) ++static int show_map(struct seq_file *m, void *v) + { + struct proc_maps_private *priv = m->private; + struct task_struct *task = priv->task; +@@ -191,38 +291,47 @@ + } + seq_putc(m, '\n'); + +- if (mss) +- seq_printf(m, +- "Size: %8lu kB\n" +- "Rss: %8lu kB\n" +- "Shared_Clean: %8lu kB\n" +- "Shared_Dirty: %8lu kB\n" +- "Private_Clean: %8lu kB\n" +- "Private_Dirty: %8lu kB\n" +- "Referenced: %8lu kB\n", +- (vma->vm_end - vma->vm_start) >> 10, +- mss->resident >> 10, +- mss->shared_clean >> 10, +- mss->shared_dirty >> 10, +- mss->private_clean >> 10, +- mss->private_dirty >> 10, +- mss->referenced >> 10); +- + if (m->count < m->size) /* vma is copied successfully */ + m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; + return 0; + } + +-static int show_map(struct seq_file *m, void *v) ++static struct seq_operations proc_pid_maps_op = { ++ .start = m_start, ++ .next = m_next, ++ .stop = m_stop, ++ .show = show_map ++}; ++ ++static int maps_open(struct inode *inode, struct file *file) + { +- return show_map_internal(m, v, NULL); ++ return do_maps_open(inode, file, &proc_pid_maps_op); + } + +-static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd, +- unsigned long addr, unsigned long end, ++const struct file_operations proc_maps_operations = { ++ .open = maps_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release_private, ++}; ++ ++#ifdef CONFIG_PROC_SMAPS ++struct mem_size_stats ++{ ++ struct vm_area_struct *vma; ++ unsigned long resident; ++ unsigned long shared_clean; ++ unsigned long shared_dirty; ++ unsigned long private_clean; ++ unsigned long private_dirty; ++ unsigned long referenced; ++}; ++ ++static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + void *private) + { + struct mem_size_stats *mss = private; ++ struct vm_area_struct *vma = mss->vma; + pte_t *pte, ptent; + spinlock_t *ptl; + struct page *page; +@@ -256,12 +365,71 @@ + } + pte_unmap_unlock(pte - 1, ptl); + cond_resched(); ++ return 0; + } + +-static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd, +- unsigned long addr, unsigned long end, +- void *private) ++static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range }; ++ ++static int show_smap(struct seq_file *m, void *v) + { ++ struct vm_area_struct *vma = v; ++ struct mem_size_stats mss; ++ int ret; ++ ++ memset(&mss, 0, sizeof mss); ++ mss.vma = vma; ++ if (vma->vm_mm && !is_vm_hugetlb_page(vma)) ++ walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end, ++ &smaps_walk, &mss); ++ ++ ret = show_map(m, v); ++ if (ret) ++ return ret; ++ ++ seq_printf(m, ++ "Size: %8lu kB\n" ++ "Rss: %8lu kB\n" ++ "Shared_Clean: %8lu kB\n" ++ "Shared_Dirty: %8lu kB\n" ++ "Private_Clean: %8lu kB\n" ++ "Private_Dirty: %8lu kB\n" ++ "Referenced: %8lu kB\n", ++ (vma->vm_end - vma->vm_start) >> 10, ++ mss.resident >> 10, ++ mss.shared_clean >> 10, ++ mss.shared_dirty >> 10, ++ mss.private_clean >> 10, ++ mss.private_dirty >> 10, ++ mss.referenced >> 10); ++ ++ return ret; ++} ++ ++static struct seq_operations proc_pid_smaps_op = { ++ .start = m_start, ++ .next = m_next, ++ .stop = m_stop, ++ .show = show_smap ++}; ++ ++static int smaps_open(struct inode *inode, struct file *file) ++{ ++ return do_maps_open(inode, file, &proc_pid_smaps_op); ++} ++ ++const struct file_operations proc_smaps_operations = { ++ .open = smaps_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = seq_release_private, ++}; ++#endif ++ ++#ifdef CONFIG_PROC_CLEAR_REFS ++static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, ++ unsigned long end, void *private) ++{ ++ struct vm_area_struct *vma = private; + pte_t *pte, ptent; + spinlock_t *ptl; + struct page *page; +@@ -282,236 +450,52 @@ + } + pte_unmap_unlock(pte - 1, ptl); + cond_resched(); ++ return 0; + } + +-static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud, +- unsigned long addr, unsigned long end) +-{ +- pmd_t *pmd; +- unsigned long next; +- +- for (pmd = pmd_offset(pud, addr); addr != end; +- pmd++, addr = next) { +- next = pmd_addr_end(addr, end); +- if (pmd_none_or_clear_bad(pmd)) +- continue; +- walker->action(walker->vma, pmd, addr, next, walker->private); +- } +-} +- +-static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd, +- unsigned long addr, unsigned long end) +-{ +- pud_t *pud; +- unsigned long next; +- +- for (pud = pud_offset(pgd, addr); addr != end; +- pud++, addr = next) { +- next = pud_addr_end(addr, end); +- if (pud_none_or_clear_bad(pud)) +- continue; +- walk_pmd_range(walker, pud, addr, next); +- } +-} +- +-/* +- * walk_page_range - walk the page tables of a VMA with a callback +- * @vma - VMA to walk +- * @action - callback invoked for every bottom-level (PTE) page table +- * @private - private data passed to the callback function +- * +- * Recursively walk the page table for the memory area in a VMA, calling +- * a callback for every bottom-level (PTE) page table. +- */ +-static inline void walk_page_range(struct vm_area_struct *vma, +- void (*action)(struct vm_area_struct *, +- pmd_t *, unsigned long, +- unsigned long, void *), +- void *private) +-{ +- unsigned long addr = vma->vm_start; +- unsigned long end = vma->vm_end; +- struct pmd_walker walker = { +- .vma = vma, +- .private = private, +- .action = action, +- }; +- pgd_t *pgd; +- unsigned long next; +- +- for (pgd = pgd_offset(vma->vm_mm, addr); addr != end; +- pgd++, addr = next) { +- next = pgd_addr_end(addr, end); +- if (pgd_none_or_clear_bad(pgd)) +- continue; +- walk_pud_range(&walker, pgd, addr, next); +- } +-} +- +-static int show_smap(struct seq_file *m, void *v) +-{ +- struct vm_area_struct *vma = v; +- struct mem_size_stats mss; ++static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range }; + +- memset(&mss, 0, sizeof mss); +- if (vma->vm_mm && !is_vm_hugetlb_page(vma)) +- walk_page_range(vma, smaps_pte_range, &mss); +- return show_map_internal(m, v, &mss); +-} +- +-void clear_refs_smap(struct mm_struct *mm) ++static ssize_t clear_refs_write(struct file *file, const char __user *buf, ++ size_t count, loff_t *ppos) + { ++ struct task_struct *task; ++ char buffer[13], *end; ++ struct mm_struct *mm; + struct vm_area_struct *vma; + ++ memset(buffer, 0, sizeof(buffer)); ++ if (count > sizeof(buffer) - 1) ++ count = sizeof(buffer) - 1; ++ if (copy_from_user(buffer, buf, count)) ++ return -EFAULT; ++ if (!simple_strtol(buffer, &end, 0)) ++ return -EINVAL; ++ if (*end == '\n') ++ end++; ++ task = get_proc_task(file->f_path.dentry->d_inode); ++ if (!task) ++ return -ESRCH; ++ mm = get_task_mm(task); ++ if (mm) { + down_read(&mm->mmap_sem); + for (vma = mm->mmap; vma; vma = vma->vm_next) +- if (vma->vm_mm && !is_vm_hugetlb_page(vma)) +- walk_page_range(vma, clear_refs_pte_range, NULL); ++ if (!is_vm_hugetlb_page(vma)) ++ walk_page_range(mm, vma->vm_start, vma->vm_end, ++ &clear_refs_walk, vma); + flush_tlb_mm(mm); + up_read(&mm->mmap_sem); +-} +- +-static void *m_start(struct seq_file *m, loff_t *pos) +-{ +- struct proc_maps_private *priv = m->private; +- unsigned long last_addr = m->version; +- struct mm_struct *mm; +- struct vm_area_struct *vma, *tail_vma = NULL; +- loff_t l = *pos; +- +- /* Clear the per syscall fields in priv */ +- priv->task = NULL; +- priv->tail_vma = NULL; +- +- /* +- * We remember last_addr rather than next_addr to hit with +- * mmap_cache most of the time. We have zero last_addr at +- * the beginning and also after lseek. We will have -1 last_addr +- * after the end of the vmas. +- */ +- +- if (last_addr == -1UL) +- return NULL; +- +- priv->task = get_pid_task(priv->pid, PIDTYPE_PID); +- if (!priv->task) +- return NULL; +- +- mm = get_task_mm(priv->task); +- if (!mm) +- return NULL; +- +- priv->tail_vma = tail_vma = get_gate_vma(priv->task); +- down_read(&mm->mmap_sem); +- +- /* Start with last addr hint */ +- if (last_addr && (vma = find_vma(mm, last_addr))) { +- vma = vma->vm_next; +- goto out; +- } +- +- /* +- * Check the vma index is within the range and do +- * sequential scan until m_index. +- */ +- vma = NULL; +- if ((unsigned long)l < mm->map_count) { +- vma = mm->mmap; +- while (l-- && vma) +- vma = vma->vm_next; +- goto out; +- } +- +- if (l != mm->map_count) +- tail_vma = NULL; /* After gate vma */ +- +-out: +- if (vma) +- return vma; +- +- /* End of vmas has been reached */ +- m->version = (tail_vma != NULL)? 0: -1UL; +- up_read(&mm->mmap_sem); +- mmput(mm); +- return tail_vma; +-} +- +-static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma) +-{ +- if (vma && vma != priv->tail_vma) { +- struct mm_struct *mm = vma->vm_mm; +- up_read(&mm->mmap_sem); + mmput(mm); + } ++ put_task_struct(task); ++ if (end - buffer == 0) ++ return -EIO; ++ return end - buffer; + } + +-static void *m_next(struct seq_file *m, void *v, loff_t *pos) +-{ +- struct proc_maps_private *priv = m->private; +- struct vm_area_struct *vma = v; +- struct vm_area_struct *tail_vma = priv->tail_vma; +- +- (*pos)++; +- if (vma && (vma != tail_vma) && vma->vm_next) +- return vma->vm_next; +- vma_stop(priv, vma); +- return (vma != tail_vma)? tail_vma: NULL; +-} +- +-static void m_stop(struct seq_file *m, void *v) +-{ +- struct proc_maps_private *priv = m->private; +- struct vm_area_struct *vma = v; +- +- vma_stop(priv, vma); +- if (priv->task) +- put_task_struct(priv->task); +-} +- +-static struct seq_operations proc_pid_maps_op = { +- .start = m_start, +- .next = m_next, +- .stop = m_stop, +- .show = show_map +-}; +- +-static struct seq_operations proc_pid_smaps_op = { +- .start = m_start, +- .next = m_next, +- .stop = m_stop, +- .show = show_smap +-}; +- +-static int do_maps_open(struct inode *inode, struct file *file, +- struct seq_operations *ops) +-{ +- struct proc_maps_private *priv; +- int ret = -ENOMEM; +- priv = kzalloc(sizeof(*priv), GFP_KERNEL); +- if (priv) { +- priv->pid = proc_pid(inode); +- ret = seq_open(file, ops); +- if (!ret) { +- struct seq_file *m = file->private_data; +- m->private = priv; +- } else { +- kfree(priv); +- } +- } +- return ret; +-} +- +-static int maps_open(struct inode *inode, struct file *file) +-{ +- return do_maps_open(inode, file, &proc_pid_maps_op); +-} +- +-const struct file_operations proc_maps_operations = { +- .open = maps_open, +- .read = seq_read, +- .llseek = seq_lseek, +- .release = seq_release_private, ++const struct file_operations proc_clear_refs_operations = { ++ .write = clear_refs_write, + }; ++#endif + + #ifdef CONFIG_NUMA + extern int show_numa_map(struct seq_file *m, void *v); +@@ -547,14 +531,211 @@ + }; + #endif + +-static int smaps_open(struct inode *inode, struct file *file) ++#ifdef CONFIG_PROC_PAGEMAP ++struct pagemapread { ++ struct mm_struct *mm; ++ unsigned long next; ++ unsigned long *buf; ++ pte_t *ptebuf; ++ unsigned long pos; ++ size_t count; ++ int index; ++ char __user *out; ++}; ++ ++static int flush_pagemap(struct pagemapread *pm) + { +- return do_maps_open(inode, file, &proc_pid_smaps_op); ++ int n = min(pm->count, pm->index * sizeof(unsigned long)); ++ if (copy_to_user(pm->out, pm->buf, n)) ++ return -EFAULT; ++ pm->out += n; ++ pm->pos += n; ++ pm->count -= n; ++ pm->index = 0; ++ cond_resched(); ++ return 0; + } + +-const struct file_operations proc_smaps_operations = { +- .open = smaps_open, +- .read = seq_read, +- .llseek = seq_lseek, +- .release = seq_release_private, ++static int add_to_pagemap(unsigned long addr, unsigned long pfn, ++ struct pagemapread *pm) ++{ ++ pm->buf[pm->index++] = pfn; ++ pm->next = addr + PAGE_SIZE; ++ if (pm->index * sizeof(unsigned long) >= PAGE_SIZE || ++ pm->index * sizeof(unsigned long) >= pm->count) ++ return flush_pagemap(pm); ++ return 0; ++} ++ ++static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ++ void *private) ++{ ++ struct pagemapread *pm = private; ++ pte_t *pte; ++ int err; ++ ++ pte = pte_offset_map(pmd, addr); ++ ++#ifdef CONFIG_HIGHPTE ++ /* copy PTE directory to temporary buffer and unmap it */ ++ memcpy(pm->ptebuf, pte, PAGE_ALIGN((unsigned long)pte) - (unsigned long)pte); ++ pte_unmap(pte); ++ pte = pm->ptebuf; ++#endif ++ ++ for (; addr != end; pte++, addr += PAGE_SIZE) { ++ if (addr < pm->next) ++ continue; ++ if (!pte_present(*pte)) ++ err = add_to_pagemap(addr, -1, pm); ++ else ++ err = add_to_pagemap(addr, pte_pfn(*pte), pm); ++ if (err) ++ return err; ++ } ++ ++#ifndef CONFIG_HIGHPTE ++ pte_unmap(pte - 1); ++#endif ++ ++ return 0; ++} ++ ++static int pagemap_fill(struct pagemapread *pm, unsigned long end) ++{ ++ int ret; ++ ++ while (pm->next != end) { ++ ret = add_to_pagemap(pm->next, -1UL, pm); ++ if (ret) ++ return ret; ++ } ++ return 0; ++} ++ ++static struct mm_walk pagemap_walk = { .pmd_entry = pagemap_pte_range }; ++ ++/* ++ * /proc/pid/pagemap - an array mapping virtual pages to pfns ++ * ++ * For each page in the address space, this file contains one long ++ * representing the corresponding physical page frame number (PFN) or ++ * -1 if the page isn't present. This allows determining precisely ++ * which pages are mapped and comparing mapped pages between ++ * processes. ++ * ++ * Efficient users of this interface will use /proc/pid/maps to ++ * determine which areas of memory are actually mapped and llseek to ++ * skip over unmapped regions. ++ * ++ * The first 4 bytes of this file form a simple header: ++ * ++ * first byte: 0 for big endian, 1 for little ++ * second byte: page shift (eg 12 for 4096 byte pages) ++ * third byte: entry size in bytes (currently either 4 or 8) ++ * fourth byte: header size ++ */ ++static ssize_t pagemap_read(struct file *file, char __user *buf, ++ size_t count, loff_t *ppos) ++{ ++ struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); ++ unsigned long src = *ppos; ++ unsigned long *page; ++ unsigned long addr, end, vend, svpfn, evpfn; ++ struct mm_struct *mm; ++ struct vm_area_struct *vma; ++ struct pagemapread pm; ++ int ret = -ESRCH; ++ ++ if (!task) ++ goto out_no_task; ++ ++ ret = -EACCES; ++ if (!ptrace_may_attach(task)) ++ goto out; ++ ++ ret = -EIO; ++ svpfn = src / sizeof(unsigned long) - 1; ++ addr = PAGE_SIZE * svpfn; ++ if ((svpfn + 1) * sizeof(unsigned long) != src) ++ goto out; ++ evpfn = min((src + count) / sizeof(unsigned long), ++ ((~0UL) >> PAGE_SHIFT) + 1); ++ count = (evpfn - svpfn) * sizeof(unsigned long); ++ end = PAGE_SIZE * evpfn; ++ ++ ret = -ENOMEM; ++ page = kzalloc(PAGE_SIZE, GFP_USER); ++ if (!page) ++ goto out; ++ ++#ifdef CONFIG_HIGHPTE ++ pm.ptebuf = kzalloc(PAGE_SIZE, GFP_USER); ++ if (!pm.ptebuf) ++ goto out_free; ++#endif ++ ++ ret = 0; ++ mm = get_task_mm(task); ++ if (!mm) ++ goto out_freepte; ++ ++ pm.mm = mm; ++ pm.next = addr; ++ pm.buf = page; ++ pm.pos = src; ++ pm.count = count; ++ pm.index = 0; ++ pm.out = buf; ++ ++ if (svpfn == -1) { ++ add_to_pagemap(pm.next, 0, &pm); ++ ((char *)page)[0] = (ntohl(1) != 1); ++ ((char *)page)[1] = PAGE_SHIFT; ++ ((char *)page)[2] = sizeof(unsigned long); ++ ((char *)page)[3] = sizeof(unsigned long); ++ } ++ ++ down_read(&mm->mmap_sem); ++ vma = find_vma(mm, pm.next); ++ while (pm.count > 0 && vma) { ++ if (!ptrace_may_attach(task)) { ++ ret = -EIO; ++ goto out_mm; ++ } ++ vend = min(vma->vm_start - 1, end - 1) + 1; ++ ret = pagemap_fill(&pm, vend); ++ if (ret || !pm.count) ++ break; ++ vend = min(vma->vm_end - 1, end - 1) + 1; ++ ret = walk_page_range(mm, vma->vm_start, vend, ++ &pagemap_walk, &pm); ++ vma = vma->vm_next; ++ } ++ up_read(&mm->mmap_sem); ++ ++ ret = pagemap_fill(&pm, end); ++ ++ *ppos = pm.pos; ++ if (!ret) ++ ret = pm.pos - src; ++ ++out_mm: ++ mmput(mm); ++out_freepte: ++#ifdef CONFIG_HIGHPTE ++ kfree(pm.ptebuf); ++out_free: ++#endif ++ kfree(page); ++out: ++ put_task_struct(task); ++out_no_task: ++ return ret; ++} ++ ++const struct file_operations proc_pagemap_operations = { ++ .llseek = mem_lseek, /* borrow this */ ++ .read = pagemap_read, + }; ++#endif +diff -Nurb linux-2.6.22-570/fs/ramfs/inode.c linux-2.6.22-591/fs/ramfs/inode.c +--- linux-2.6.22-570/fs/ramfs/inode.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/ramfs/inode.c 2007-12-21 15:36:12.000000000 -0500 +@@ -60,6 +60,7 @@ + inode->i_blocks = 0; + inode->i_mapping->a_ops = &ramfs_aops; + inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; ++ mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + default: +diff -Nurb linux-2.6.22-570/fs/revoke.c linux-2.6.22-591/fs/revoke.c +--- linux-2.6.22-570/fs/revoke.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/revoke.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,777 @@ ++/* ++ * fs/revoke.c - Invalidate all current open file descriptors of an inode. ++ * ++ * Copyright (C) 2006-2007 Pekka Enberg ++ * ++ * This file is released under the GPLv2. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/** ++ * fileset - an array of file pointers. ++ * @files: the array of file pointers ++ * @nr: number of elements in the array ++ * @end: index to next unused file pointer ++ */ ++struct fileset { ++ struct file **files; ++ unsigned long nr; ++ unsigned long end; ++}; ++ ++/** ++ * revoke_details - details of the revoke operation ++ * @inode: invalidate open file descriptors of this inode ++ * @fset: set of files that point to a revoked inode ++ * @restore_start: index to the first file pointer that is currently in ++ * use by a file descriptor but the real file has not ++ * been revoked ++ */ ++struct revoke_details { ++ struct fileset *fset; ++ unsigned long restore_start; ++}; ++ ++static struct kmem_cache *revokefs_inode_cache; ++ ++static inline bool fset_is_full(struct fileset *set) ++{ ++ return set->nr == set->end; ++} ++ ++static inline struct file *fset_get_filp(struct fileset *set) ++{ ++ return set->files[set->end++]; ++} ++ ++static struct fileset *alloc_fset(unsigned long size) ++{ ++ struct fileset *fset; ++ ++ fset = kzalloc(sizeof *fset, GFP_KERNEL); ++ if (!fset) ++ return NULL; ++ ++ fset->files = kcalloc(size, sizeof(struct file *), GFP_KERNEL); ++ if (!fset->files) { ++ kfree(fset); ++ return NULL; ++ } ++ fset->nr = size; ++ return fset; ++} ++ ++static void free_fset(struct fileset *fset) ++{ ++ int i; ++ ++ for (i = fset->end; i < fset->nr; i++) ++ fput(fset->files[i]); ++ ++ kfree(fset->files); ++ kfree(fset); ++} ++ ++/* ++ * Revoked file descriptors point to inodes in the revokefs filesystem. ++ */ ++static struct vfsmount *revokefs_mnt; ++ ++static struct file *get_revoked_file(void) ++{ ++ struct dentry *dentry; ++ struct inode *inode; ++ struct file *filp; ++ struct qstr name; ++ ++ filp = get_empty_filp(); ++ if (!filp) ++ goto err; ++ ++ inode = new_inode(revokefs_mnt->mnt_sb); ++ if (!inode) ++ goto err_inode; ++ ++ name.name = "revoked_file"; ++ name.len = strlen(name.name); ++ dentry = d_alloc(revokefs_mnt->mnt_sb->s_root, &name); ++ if (!dentry) ++ goto err_dentry; ++ ++ d_instantiate(dentry, inode); ++ ++ filp->f_mapping = inode->i_mapping; ++ filp->f_dentry = dget(dentry); ++ filp->f_vfsmnt = mntget(revokefs_mnt); ++ filp->f_op = fops_get(inode->i_fop); ++ filp->f_pos = 0; ++ ++ return filp; ++ ++ err_dentry: ++ iput(inode); ++ err_inode: ++ fput(filp); ++ err: ++ return NULL; ++} ++ ++static inline bool can_revoke_file(struct file *file, struct inode *inode, ++ struct file *to_exclude) ++{ ++ if (!file || file == to_exclude) ++ return false; ++ ++ return file->f_dentry->d_inode == inode; ++} ++ ++/* ++ * LOCKING: task_lock(owner) ++ */ ++static int revoke_fds(struct task_struct *owner, ++ struct inode *inode, ++ struct file *to_exclude, struct fileset *fset) ++{ ++ struct files_struct *files; ++ struct fdtable *fdt; ++ unsigned int fd; ++ int err = 0; ++ ++ files = get_files_struct(owner); ++ if (!files) ++ goto out; ++ ++ spin_lock(&files->file_lock); ++ fdt = files_fdtable(files); ++ ++ for (fd = 0; fd < fdt->max_fds; fd++) { ++ struct revokefs_inode_info *info; ++ struct file *filp, *new_filp; ++ struct inode *new_inode; ++ ++ filp = fcheck_files(files, fd); ++ if (!can_revoke_file(filp, inode, to_exclude)) ++ continue; ++ ++ if (!filp->f_op->revoke) { ++ err = -EOPNOTSUPP; ++ goto failed; ++ } ++ ++ if (fset_is_full(fset)) { ++ err = -ENOMEM; ++ goto failed; ++ } ++ ++ new_filp = fset_get_filp(fset); ++ ++ /* ++ * Replace original struct file pointer with a pointer to ++ * a 'revoked file.' After this point, we don't need to worry ++ * about racing with sys_close or sys_dup. ++ */ ++ rcu_assign_pointer(fdt->fd[fd], new_filp); ++ ++ /* ++ * Hold on to task until we can take down the file and its ++ * mmap. ++ */ ++ get_task_struct(owner); ++ ++ new_inode = new_filp->f_dentry->d_inode; ++ make_revoked_inode(new_inode, inode->i_mode & S_IFMT); ++ ++ info = revokefs_i(new_inode); ++ info->fd = fd; ++ info->file = filp; ++ info->owner = owner; ++ } ++ failed: ++ spin_unlock(&files->file_lock); ++ put_files_struct(files); ++ out: ++ return err; ++} ++ ++static inline bool can_revoke_vma(struct vm_area_struct *vma, ++ struct inode *inode, struct file *to_exclude) ++{ ++ struct file *file = vma->vm_file; ++ ++ if (vma->vm_flags & VM_REVOKED) ++ return false; ++ ++ if (!file || file == to_exclude) ++ return false; ++ ++ return file->f_path.dentry->d_inode == inode; ++} ++ ++static int __revoke_break_cow(struct task_struct *tsk, struct inode *inode, ++ struct file *to_exclude) ++{ ++ struct mm_struct *mm = tsk->mm; ++ struct vm_area_struct *vma; ++ int err = 0; ++ ++ down_read(&mm->mmap_sem); ++ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { ++ int ret; ++ ++ if (vma->vm_flags & VM_SHARED) ++ continue; ++ ++ if (!can_revoke_vma(vma, inode, to_exclude)) ++ continue; ++ ++ ret = get_user_pages(tsk, tsk->mm, vma->vm_start, ++ vma_pages(vma), 1, 1, NULL, NULL); ++ if (ret < 0) { ++ err = ret; ++ break; ++ } ++ ++ unlink_file_vma(vma); ++ fput(vma->vm_file); ++ vma->vm_file = NULL; ++ } ++ up_read(&mm->mmap_sem); ++ return err; ++} ++ ++static int revoke_break_cow(struct fileset *fset, struct inode *inode, ++ struct file *to_exclude) ++{ ++ unsigned long i; ++ int err = 0; ++ ++ for (i = 0; i < fset->end; i++) { ++ struct revokefs_inode_info *info; ++ struct file *this; ++ ++ this = fset->files[i]; ++ info = revokefs_i(this->f_dentry->d_inode); ++ ++ err = __revoke_break_cow(info->owner, inode, to_exclude); ++ if (err) ++ break; ++ } ++ return err; ++} ++ ++/* ++ * LOCKING: down_write(&mm->mmap_sem) ++ * -> spin_lock(&mapping->i_mmap_lock) ++ */ ++static int revoke_vma(struct vm_area_struct *vma, struct zap_details *details) ++{ ++ unsigned long restart_addr, start_addr, end_addr; ++ int need_break; ++ ++ start_addr = vma->vm_start; ++ end_addr = vma->vm_end; ++ ++ again: ++ restart_addr = zap_page_range(vma, start_addr, end_addr - start_addr, ++ details); ++ ++ need_break = need_resched() || need_lockbreak(details->i_mmap_lock); ++ if (need_break) ++ goto out_need_break; ++ ++ if (restart_addr < end_addr) { ++ start_addr = restart_addr; ++ goto again; ++ } ++ vma->vm_flags |= VM_REVOKED; ++ return 0; ++ ++ out_need_break: ++ spin_unlock(details->i_mmap_lock); ++ cond_resched(); ++ spin_lock(details->i_mmap_lock); ++ return -EINTR; ++} ++ ++/* ++ * LOCKING: spin_lock(&mapping->i_mmap_lock) ++ */ ++static int revoke_mm(struct mm_struct *mm, struct address_space *mapping, ++ struct file *to_exclude) ++{ ++ struct vm_area_struct *vma; ++ struct zap_details details; ++ int err = 0; ++ ++ details.i_mmap_lock = &mapping->i_mmap_lock; ++ ++ /* ++ * If ->mmap_sem is under contention, we continue scanning other ++ * mms and try again later. ++ */ ++ if (!down_write_trylock(&mm->mmap_sem)) { ++ err = -EAGAIN; ++ goto out; ++ } ++ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { ++ if (!(vma->vm_flags & VM_SHARED)) ++ continue; ++ ++ if (!can_revoke_vma(vma, mapping->host, to_exclude)) ++ continue; ++ ++ err = revoke_vma(vma, &details); ++ if (err) ++ break; ++ ++ __unlink_file_vma(vma); ++ fput(vma->vm_file); ++ vma->vm_file = NULL; ++ } ++ up_write(&mm->mmap_sem); ++ out: ++ return err; ++} ++ ++/* ++ * LOCKING: spin_lock(&mapping->i_mmap_lock) ++ */ ++static void revoke_mapping_tree(struct address_space *mapping, ++ struct file *to_exclude) ++{ ++ struct vm_area_struct *vma; ++ struct prio_tree_iter iter; ++ int try_again = 0; ++ ++ restart: ++ vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) { ++ int err; ++ ++ if (!(vma->vm_flags & VM_SHARED)) ++ continue; ++ ++ if (likely(!can_revoke_vma(vma, mapping->host, to_exclude))) ++ continue; ++ ++ err = revoke_mm(vma->vm_mm, mapping, to_exclude); ++ if (err == -EAGAIN) ++ try_again = 1; ++ ++ goto restart; ++ } ++ if (try_again) { ++ cond_resched(); ++ goto restart; ++ } ++} ++ ++/* ++ * LOCKING: spin_lock(&mapping->i_mmap_lock) ++ */ ++static void revoke_mapping_list(struct address_space *mapping, ++ struct file *to_exclude) ++{ ++ struct vm_area_struct *vma; ++ int try_again = 0; ++ ++ restart: ++ list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) { ++ int err; ++ ++ if (likely(!can_revoke_vma(vma, mapping->host, to_exclude))) ++ continue; ++ ++ err = revoke_mm(vma->vm_mm, mapping, to_exclude); ++ if (err == -EAGAIN) { ++ try_again = 1; ++ continue; ++ } ++ if (err == -EINTR) ++ goto restart; ++ } ++ if (try_again) { ++ cond_resched(); ++ goto restart; ++ } ++} ++ ++static void revoke_mapping(struct address_space *mapping, struct file *to_exclude) ++{ ++ spin_lock(&mapping->i_mmap_lock); ++ if (unlikely(!prio_tree_empty(&mapping->i_mmap))) ++ revoke_mapping_tree(mapping, to_exclude); ++ if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) ++ revoke_mapping_list(mapping, to_exclude); ++ spin_unlock(&mapping->i_mmap_lock); ++} ++ ++static void restore_file(struct revokefs_inode_info *info) ++{ ++ struct files_struct *files; ++ ++ files = get_files_struct(info->owner); ++ if (files) { ++ struct fdtable *fdt; ++ struct file *filp; ++ ++ spin_lock(&files->file_lock); ++ fdt = files_fdtable(files); ++ ++ filp = fdt->fd[info->fd]; ++ if (filp) ++ fput(filp); ++ ++ rcu_assign_pointer(fdt->fd[info->fd], info->file); ++ FD_SET(info->fd, fdt->close_on_exec); ++ spin_unlock(&files->file_lock); ++ put_files_struct(files); ++ } ++ put_task_struct(info->owner); ++ info->owner = NULL; /* To avoid double-restore. */ ++} ++ ++static void restore_files(struct revoke_details *details) ++{ ++ unsigned long i; ++ ++ for (i = details->restore_start; i < details->fset->end; i++) { ++ struct revokefs_inode_info *info; ++ struct file *filp; ++ ++ filp = details->fset->files[i]; ++ info = revokefs_i(filp->f_dentry->d_inode); ++ ++ restore_file(info); ++ } ++} ++ ++static int revoke_files(struct revoke_details *details) ++{ ++ unsigned long i; ++ int err = 0; ++ ++ for (i = 0; i < details->fset->end; i++) { ++ struct revokefs_inode_info *info; ++ struct file *this, *filp; ++ struct inode *inode; ++ ++ this = details->fset->files[i]; ++ inode = this->f_dentry->d_inode; ++ info = revokefs_i(inode); ++ ++ /* ++ * Increase count before attempting to close file as ++ * an partially closed file can no longer be restored. ++ */ ++ details->restore_start++; ++ filp = info->file; ++ err = filp->f_op->revoke(filp, inode->i_mapping); ++ put_task_struct(info->owner); ++ info->owner = NULL; /* To avoid restoring closed file. */ ++ if (err) ++ goto out; ++ } ++ out: ++ return err; ++} ++ ++/* ++ * Returns the maximum number of file descriptors pointing to an inode. ++ * ++ * LOCKING: read_lock(&tasklist_lock) ++ */ ++static unsigned long inode_fds(struct inode *inode, struct file *to_exclude) ++{ ++ struct task_struct *g, *p; ++ unsigned long nr_fds = 0; ++ ++ do_each_thread(g, p) { ++ struct files_struct *files; ++ struct fdtable *fdt; ++ unsigned int fd; ++ ++ files = get_files_struct(p); ++ if (!files) ++ continue; ++ ++ spin_lock(&files->file_lock); ++ fdt = files_fdtable(files); ++ for (fd = 0; fd < fdt->max_fds; fd++) { ++ struct file *file; ++ ++ file = fcheck_files(files, fd); ++ if (can_revoke_file(file, inode, to_exclude)) { ++ nr_fds += fdt->max_fds; ++ break; ++ } ++ } ++ spin_unlock(&files->file_lock); ++ put_files_struct(files); ++ } ++ while_each_thread(g, p); ++ return nr_fds; ++} ++ ++static struct fileset *__alloc_revoke_fset(unsigned long size) ++{ ++ struct fileset *fset; ++ int i; ++ ++ fset = alloc_fset(size); ++ if (!fset) ++ return NULL; ++ ++ for (i = 0; i < fset->nr; i++) { ++ struct file *filp; ++ ++ filp = get_revoked_file(); ++ if (!filp) ++ goto err; ++ ++ fset->files[i] = filp; ++ } ++ return fset; ++ err: ++ free_fset(fset); ++ return NULL; ++} ++ ++static struct fileset *alloc_revoke_fset(struct inode *inode, struct file *to_exclude) ++{ ++ unsigned long nr_fds; ++ ++ read_lock(&tasklist_lock); ++ nr_fds = inode_fds(inode, to_exclude); ++ read_unlock(&tasklist_lock); ++ ++ return __alloc_revoke_fset(nr_fds); ++} ++ ++static int do_revoke(struct inode *inode, struct file *to_exclude) ++{ ++ struct revoke_details details; ++ struct fileset *fset = NULL; ++ struct task_struct *g, *p; ++ int err = 0; ++ ++ if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) { ++ err = -EPERM; ++ goto out; ++ } ++ ++ retry: ++ if (signal_pending(current)) { ++ err = -ERESTARTSYS; ++ goto out; ++ } ++ ++ /* ++ * Pre-allocate memory because the first pass is done under ++ * tasklist_lock. ++ */ ++ fset = alloc_revoke_fset(inode, to_exclude); ++ if (!fset) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ read_lock(&tasklist_lock); ++ ++ /* ++ * If someone forked while we were allocating memory, try again. ++ */ ++ if (inode_fds(inode, to_exclude) > fset->nr) { ++ read_unlock(&tasklist_lock); ++ free_fset(fset); ++ goto retry; ++ } ++ ++ details.fset = fset; ++ details.restore_start = 0; ++ ++ /* ++ * First revoke the descriptors. After we are done, no one can start ++ * new operations on them. ++ */ ++ do_each_thread(g, p) { ++ err = revoke_fds(p, inode, to_exclude, fset); ++ if (err) ++ goto exit_loop; ++ } ++ while_each_thread(g, p); ++ exit_loop: ++ read_unlock(&tasklist_lock); ++ ++ if (err) ++ goto out_restore; ++ ++ /* ++ * Take down shared memory mappings. ++ */ ++ revoke_mapping(inode->i_mapping, to_exclude); ++ ++ /* ++ * Break COW for private mappings. ++ */ ++ err = revoke_break_cow(fset, inode, to_exclude); ++ if (err) ++ goto out_restore; ++ ++ /* ++ * Now, revoke the files for good. ++ */ ++ err = revoke_files(&details); ++ if (err) ++ goto out_restore; ++ ++ out_free_table: ++ free_fset(fset); ++ out: ++ return err; ++ ++ out_restore: ++ restore_files(&details); ++ goto out_free_table; ++} ++ ++asmlinkage long sys_revokeat(int dfd, const char __user * filename) ++{ ++ struct nameidata nd; ++ int err; ++ ++ err = __user_walk_fd(dfd, filename, 0, &nd); ++ if (!err) { ++ err = do_revoke(nd.dentry->d_inode, NULL); ++ path_release(&nd); ++ } ++ return err; ++} ++ ++asmlinkage long sys_frevoke(unsigned int fd) ++{ ++ struct file *file = fget(fd); ++ int err = -EBADF; ++ ++ if (file) { ++ err = do_revoke(file->f_dentry->d_inode, file); ++ fput(file); ++ } ++ return err; ++} ++ ++int generic_file_revoke(struct file *file, struct address_space *new_mapping) ++{ ++ struct address_space *mapping = file->f_mapping; ++ int err; ++ ++ /* ++ * Flush pending writes. ++ */ ++ err = do_fsync(file, 1); ++ if (err) ++ goto out; ++ ++ file->f_mapping = new_mapping; ++ ++ /* ++ * Make pending reads fail. ++ */ ++ err = invalidate_inode_pages2(mapping); ++ ++ out: ++ return err; ++} ++EXPORT_SYMBOL(generic_file_revoke); ++ ++/* ++ * Filesystem for revoked files. ++ */ ++ ++static struct inode *revokefs_alloc_inode(struct super_block *sb) ++{ ++ struct revokefs_inode_info *info; ++ ++ info = kmem_cache_alloc(revokefs_inode_cache, GFP_KERNEL); ++ if (!info) ++ return NULL; ++ ++ return &info->vfs_inode; ++} ++ ++static void revokefs_destroy_inode(struct inode *inode) ++{ ++ kmem_cache_free(revokefs_inode_cache, revokefs_i(inode)); ++} ++ ++static struct super_operations revokefs_super_ops = { ++ .alloc_inode = revokefs_alloc_inode, ++ .destroy_inode = revokefs_destroy_inode, ++ .drop_inode = generic_delete_inode, ++}; ++ ++static int revokefs_get_sb(struct file_system_type *fs_type, ++ int flags, const char *dev_name, void *data, ++ struct vfsmount *mnt) ++{ ++ return get_sb_pseudo(fs_type, "revoke:", &revokefs_super_ops, ++ REVOKEFS_MAGIC, mnt); ++} ++ ++static struct file_system_type revokefs_fs_type = { ++ .name = "revokefs", ++ .get_sb = revokefs_get_sb, ++ .kill_sb = kill_anon_super ++}; ++ ++static void revokefs_init_inode(void *obj, struct kmem_cache *cache, ++ unsigned long flags) ++{ ++ struct revokefs_inode_info *info = obj; ++ ++ info->owner = NULL; ++ inode_init_once(&info->vfs_inode); ++} ++ ++static int __init revokefs_init(void) ++{ ++ int err = -ENOMEM; ++ ++ revokefs_inode_cache = ++ kmem_cache_create("revokefs_inode_cache", ++ sizeof(struct revokefs_inode_info), ++ 0, ++ (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | ++ SLAB_MEM_SPREAD), revokefs_init_inode, NULL); ++ if (!revokefs_inode_cache) ++ goto out; ++ ++ err = register_filesystem(&revokefs_fs_type); ++ if (err) ++ goto err_register; ++ ++ revokefs_mnt = kern_mount(&revokefs_fs_type); ++ if (IS_ERR(revokefs_mnt)) { ++ err = PTR_ERR(revokefs_mnt); ++ goto err_mnt; ++ } ++ out: ++ return err; ++ err_mnt: ++ unregister_filesystem(&revokefs_fs_type); ++ err_register: ++ kmem_cache_destroy(revokefs_inode_cache); ++ return err; ++} ++ ++late_initcall(revokefs_init); +diff -Nurb linux-2.6.22-570/fs/revoked_inode.c linux-2.6.22-591/fs/revoked_inode.c +--- linux-2.6.22-570/fs/revoked_inode.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/revoked_inode.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,417 @@ ++/* ++ * fs/revoked_inode.c ++ * ++ * Copyright (C) 2007 Pekka Enberg ++ * ++ * Provide stub functions for revoked inodes. Based on fs/bad_inode.c which is ++ * ++ * Copyright (C) 1997 Stephen Tweedie ++ * ++ * This file is released under the GPLv2. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static loff_t revoked_file_llseek(struct file *file, loff_t offset, int origin) ++{ ++ return -EBADF; ++} ++ ++static ssize_t revoked_file_read(struct file *filp, char __user * buf, ++ size_t size, loff_t * ppos) ++{ ++ return -EBADF; ++} ++ ++static ssize_t revoked_special_file_read(struct file *filp, char __user * buf, ++ size_t size, loff_t * ppos) ++{ ++ return 0; ++} ++ ++static ssize_t revoked_file_write(struct file *filp, const char __user * buf, ++ size_t siz, loff_t * ppos) ++{ ++ return -EBADF; ++} ++ ++static ssize_t revoked_file_aio_read(struct kiocb *iocb, ++ const struct iovec *iov, ++ unsigned long nr_segs, loff_t pos) ++{ ++ return -EBADF; ++} ++ ++static ssize_t revoked_file_aio_write(struct kiocb *iocb, ++ const struct iovec *iov, ++ unsigned long nr_segs, loff_t pos) ++{ ++ return -EBADF; ++} ++ ++static int revoked_file_readdir(struct file *filp, void *dirent, ++ filldir_t filldir) ++{ ++ return -EBADF; ++} ++ ++static unsigned int revoked_file_poll(struct file *filp, poll_table * wait) ++{ ++ return POLLERR; ++} ++ ++static int revoked_file_ioctl(struct inode *inode, struct file *filp, ++ unsigned int cmd, unsigned long arg) ++{ ++ return -EBADF; ++} ++ ++static long revoked_file_unlocked_ioctl(struct file *file, unsigned cmd, ++ unsigned long arg) ++{ ++ return -EBADF; ++} ++ ++static long revoked_file_compat_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ return -EBADF; ++} ++ ++static int revoked_file_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ return -EBADF; ++} ++ ++static int revoked_file_open(struct inode *inode, struct file *filp) ++{ ++ return -EBADF; ++} ++ ++static int revoked_file_flush(struct file *file, fl_owner_t id) ++{ ++ return filp_close(file, id); ++} ++ ++static int revoked_file_release(struct inode *inode, struct file *filp) ++{ ++ return -EBADF; ++} ++ ++static int revoked_file_fsync(struct file *file, struct dentry *dentry, ++ int datasync) ++{ ++ return -EBADF; ++} ++ ++static int revoked_file_aio_fsync(struct kiocb *iocb, int datasync) ++{ ++ return -EBADF; ++} ++ ++static int revoked_file_fasync(int fd, struct file *filp, int on) ++{ ++ return -EBADF; ++} ++ ++static int revoked_file_lock(struct file *file, int cmd, struct file_lock *fl) ++{ ++ return -EBADF; ++} ++ ++static ssize_t revoked_file_sendfile(struct file *in_file, loff_t * ppos, ++ size_t count, read_actor_t actor, ++ void *target) ++{ ++ return -EBADF; ++} ++ ++static ssize_t revoked_file_sendpage(struct file *file, struct page *page, ++ int off, size_t len, loff_t * pos, ++ int more) ++{ ++ return -EBADF; ++} ++ ++static unsigned long revoked_file_get_unmapped_area(struct file *file, ++ unsigned long addr, ++ unsigned long len, ++ unsigned long pgoff, ++ unsigned long flags) ++{ ++ return -EBADF; ++} ++ ++static int revoked_file_check_flags(int flags) ++{ ++ return -EBADF; ++} ++ ++static int revoked_file_dir_notify(struct file *file, unsigned long arg) ++{ ++ return -EBADF; ++} ++ ++static int revoked_file_flock(struct file *filp, int cmd, struct file_lock *fl) ++{ ++ return -EBADF; ++} ++ ++static ssize_t revoked_file_splice_write(struct pipe_inode_info *pipe, ++ struct file *out, loff_t * ppos, ++ size_t len, unsigned int flags) ++{ ++ return -EBADF; ++} ++ ++static ssize_t revoked_file_splice_read(struct file *in, loff_t * ppos, ++ struct pipe_inode_info *pipe, ++ size_t len, unsigned int flags) ++{ ++ return -EBADF; ++} ++ ++static const struct file_operations revoked_file_ops = { ++ .llseek = revoked_file_llseek, ++ .read = revoked_file_read, ++ .write = revoked_file_write, ++ .aio_read = revoked_file_aio_read, ++ .aio_write = revoked_file_aio_write, ++ .readdir = revoked_file_readdir, ++ .poll = revoked_file_poll, ++ .ioctl = revoked_file_ioctl, ++ .unlocked_ioctl = revoked_file_unlocked_ioctl, ++ .compat_ioctl = revoked_file_compat_ioctl, ++ .mmap = revoked_file_mmap, ++ .open = revoked_file_open, ++ .flush = revoked_file_flush, ++ .release = revoked_file_release, ++ .fsync = revoked_file_fsync, ++ .aio_fsync = revoked_file_aio_fsync, ++ .fasync = revoked_file_fasync, ++ .lock = revoked_file_lock, ++ .sendfile = revoked_file_sendfile, ++ .sendpage = revoked_file_sendpage, ++ .get_unmapped_area = revoked_file_get_unmapped_area, ++ .check_flags = revoked_file_check_flags, ++ .dir_notify = revoked_file_dir_notify, ++ .flock = revoked_file_flock, ++ .splice_write = revoked_file_splice_write, ++ .splice_read = revoked_file_splice_read, ++}; ++ ++static const struct file_operations revoked_special_file_ops = { ++ .llseek = revoked_file_llseek, ++ .read = revoked_special_file_read, ++ .write = revoked_file_write, ++ .aio_read = revoked_file_aio_read, ++ .aio_write = revoked_file_aio_write, ++ .readdir = revoked_file_readdir, ++ .poll = revoked_file_poll, ++ .ioctl = revoked_file_ioctl, ++ .unlocked_ioctl = revoked_file_unlocked_ioctl, ++ .compat_ioctl = revoked_file_compat_ioctl, ++ .mmap = revoked_file_mmap, ++ .open = revoked_file_open, ++ .flush = revoked_file_flush, ++ .release = revoked_file_release, ++ .fsync = revoked_file_fsync, ++ .aio_fsync = revoked_file_aio_fsync, ++ .fasync = revoked_file_fasync, ++ .lock = revoked_file_lock, ++ .sendfile = revoked_file_sendfile, ++ .sendpage = revoked_file_sendpage, ++ .get_unmapped_area = revoked_file_get_unmapped_area, ++ .check_flags = revoked_file_check_flags, ++ .dir_notify = revoked_file_dir_notify, ++ .flock = revoked_file_flock, ++ .splice_write = revoked_file_splice_write, ++ .splice_read = revoked_file_splice_read, ++}; ++ ++static int revoked_inode_create(struct inode *dir, struct dentry *dentry, ++ int mode, struct nameidata *nd) ++{ ++ return -EBADF; ++} ++ ++static struct dentry *revoked_inode_lookup(struct inode *dir, ++ struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ return ERR_PTR(-EBADF); ++} ++ ++static int revoked_inode_link(struct dentry *old_dentry, struct inode *dir, ++ struct dentry *dentry) ++{ ++ return -EBADF; ++} ++ ++static int revoked_inode_unlink(struct inode *dir, struct dentry *dentry) ++{ ++ return -EBADF; ++} ++ ++static int revoked_inode_symlink(struct inode *dir, struct dentry *dentry, ++ const char *symname) ++{ ++ return -EBADF; ++} ++ ++static int revoked_inode_mkdir(struct inode *dir, struct dentry *dentry, ++ int mode) ++{ ++ return -EBADF; ++} ++ ++static int revoked_inode_rmdir(struct inode *dir, struct dentry *dentry) ++{ ++ return -EBADF; ++} ++ ++static int revoked_inode_mknod(struct inode *dir, struct dentry *dentry, ++ int mode, dev_t rdev) ++{ ++ return -EBADF; ++} ++ ++static int revoked_inode_rename(struct inode *old_dir, ++ struct dentry *old_dentry, ++ struct inode *new_dir, ++ struct dentry *new_dentry) ++{ ++ return -EBADF; ++} ++ ++static int revoked_inode_readlink(struct dentry *dentry, char __user * buffer, ++ int buflen) ++{ ++ return -EBADF; ++} ++ ++static int revoked_inode_permission(struct inode *inode, int mask, ++ struct nameidata *nd) ++{ ++ return -EBADF; ++} ++ ++static int revoked_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, ++ struct kstat *stat) ++{ ++ return -EBADF; ++} ++ ++static int revoked_inode_setattr(struct dentry *direntry, struct iattr *attrs) ++{ ++ return -EBADF; ++} ++ ++static int revoked_inode_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ return -EBADF; ++} ++ ++static ssize_t revoked_inode_getxattr(struct dentry *dentry, const char *name, ++ void *buffer, size_t size) ++{ ++ return -EBADF; ++} ++ ++static ssize_t revoked_inode_listxattr(struct dentry *dentry, char *buffer, ++ size_t buffer_size) ++{ ++ return -EBADF; ++} ++ ++static int revoked_inode_removexattr(struct dentry *dentry, const char *name) ++{ ++ return -EBADF; ++} ++ ++static struct inode_operations revoked_inode_ops = { ++ .create = revoked_inode_create, ++ .lookup = revoked_inode_lookup, ++ .link = revoked_inode_link, ++ .unlink = revoked_inode_unlink, ++ .symlink = revoked_inode_symlink, ++ .mkdir = revoked_inode_mkdir, ++ .rmdir = revoked_inode_rmdir, ++ .mknod = revoked_inode_mknod, ++ .rename = revoked_inode_rename, ++ .readlink = revoked_inode_readlink, ++ /* follow_link must be no-op, otherwise unmounting this inode ++ won't work */ ++ /* put_link returns void */ ++ /* truncate returns void */ ++ .permission = revoked_inode_permission, ++ .getattr = revoked_inode_getattr, ++ .setattr = revoked_inode_setattr, ++ .setxattr = revoked_inode_setxattr, ++ .getxattr = revoked_inode_getxattr, ++ .listxattr = revoked_inode_listxattr, ++ .removexattr = revoked_inode_removexattr, ++ /* truncate_range returns void */ ++}; ++ ++static int revoked_readpage(struct file *file, struct page *page) ++{ ++ return -EIO; ++} ++ ++static int revoked_writepage(struct page *page, struct writeback_control *wbc) ++{ ++ return -EIO; ++} ++ ++static int revoked_prepare_write(struct file *file, struct page *page, ++ unsigned from, unsigned to) ++{ ++ return -EIO; ++} ++ ++static int revoked_commit_write(struct file *file, struct page *page, ++ unsigned from, unsigned to) ++{ ++ return -EIO; ++} ++ ++static ssize_t revoked_direct_IO(int rw, struct kiocb *iocb, ++ const struct iovec *iov, loff_t offset, ++ unsigned long nr_segs) ++{ ++ return -EIO; ++} ++ ++static const struct address_space_operations revoked_aops = { ++ .readpage = revoked_readpage, ++ .writepage = revoked_writepage, ++ .prepare_write = revoked_prepare_write, ++ .commit_write = revoked_commit_write, ++ .direct_IO = revoked_direct_IO, ++}; ++ ++void make_revoked_inode(struct inode *inode, int mode) ++{ ++ remove_inode_hash(inode); ++ ++ inode->i_mode = mode; ++ inode->i_atime = inode->i_mtime = inode->i_ctime = ++ current_fs_time(inode->i_sb); ++ inode->i_op = &revoked_inode_ops; ++ ++ if (special_file(mode)) ++ inode->i_fop = &revoked_special_file_ops; ++ else ++ inode->i_fop = &revoked_file_ops; ++ ++ inode->i_mapping->a_ops = &revoked_aops; ++} +diff -Nurb linux-2.6.22-570/fs/splice.c linux-2.6.22-591/fs/splice.c +--- linux-2.6.22-570/fs/splice.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/fs/splice.c 2007-12-21 15:36:14.000000000 -0500 +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + struct partial_page { + unsigned int offset; +@@ -932,6 +933,10 @@ + if (unlikely(ret < 0)) + return ret; + ++ ret = security_file_permission(out, MAY_WRITE); ++ if (unlikely(ret < 0)) ++ return ret; ++ + return out->f_op->splice_write(pipe, out, ppos, len, flags); + } + +@@ -954,6 +959,10 @@ + if (unlikely(ret < 0)) + return ret; + ++ ret = security_file_permission(in, MAY_READ); ++ if (unlikely(ret < 0)) ++ return ret; ++ + return in->f_op->splice_read(in, ppos, pipe, len, flags); + } + +@@ -1272,6 +1281,7 @@ + static long do_vmsplice(struct file *file, const struct iovec __user *iov, + unsigned long nr_segs, unsigned int flags) + { ++ long err; + struct pipe_inode_info *pipe; + struct page *pages[PIPE_BUFFERS]; + struct partial_page partial[PIPE_BUFFERS]; +@@ -1290,6 +1300,10 @@ + else if (unlikely(!nr_segs)) + return 0; + ++ err = security_file_permission(file, MAY_WRITE); ++ if (unlikely(err < 0)) ++ return err; ++ + spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, + flags & SPLICE_F_GIFT); + if (spd.nr_pages <= 0) +diff -Nurb linux-2.6.22-570/fs/stack.c linux-2.6.22-591/fs/stack.c +--- linux-2.6.22-570/fs/stack.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/stack.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1,8 +1,20 @@ ++/* ++ * Copyright (c) 2006-2007 Erez Zadok ++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2006-2007 Stony Brook University ++ * Copyright (c) 2006-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ + #include + #include + #include + +-/* does _NOT_ require i_mutex to be held. ++/* ++ * does _NOT_ require i_mutex to be held. + * + * This function cannot be inlined since i_size_{read,write} is rather + * heavy-weight on 32-bit systems +@@ -14,7 +26,8 @@ + } + EXPORT_SYMBOL_GPL(fsstack_copy_inode_size); + +-/* copy all attributes; get_nlinks is optional way to override the i_nlink ++/* ++ * copy all attributes; get_nlinks is optional way to override the i_nlink + * copying + */ + void fsstack_copy_attr_all(struct inode *dest, const struct inode *src, +diff -Nurb linux-2.6.22-570/fs/sync.c linux-2.6.22-591/fs/sync.c +--- linux-2.6.22-570/fs/sync.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/sync.c 2007-12-21 15:36:14.000000000 -0500 +@@ -174,6 +174,9 @@ + * already-instantiated disk blocks, there are no guarantees here that the data + * will be available after a crash. + */ ++/* It would be nice if people remember that not all the world's an i386 ++ when they introduce new system calls */ ++ + asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, + unsigned int flags) + { +Files linux-2.6.22-570/fs/sysfs/.symlink.c.swp and linux-2.6.22-591/fs/sysfs/.symlink.c.swp differ +diff -Nurb linux-2.6.22-570/fs/sysfs/bin.c linux-2.6.22-591/fs/sysfs/bin.c +--- linux-2.6.22-570/fs/sysfs/bin.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/sysfs/bin.c 2007-12-22 02:12:47.000000000 -0500 +@@ -20,29 +20,41 @@ + + #include "sysfs.h" + ++struct bin_buffer { ++ struct mutex mutex; ++ void *buffer; ++ int mmapped; ++}; ++ + static int + fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count) + { +- struct bin_attribute * attr = to_bin_attr(dentry); +- struct kobject * kobj = to_kobj(dentry->d_parent); ++ struct sysfs_dirent *attr_sd = dentry->d_fsdata; ++ struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; ++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; ++ int rc; ++ ++ /* need attr_sd for attr, its parent for kobj */ ++ if (!sysfs_get_active_two(attr_sd)) ++ return -ENODEV; ++ ++ rc = -EIO; ++ if (attr->read) ++ rc = attr->read(kobj, attr, buffer, off, count); + +- if (!attr->read) +- return -EIO; ++ sysfs_put_active_two(attr_sd); + +- return attr->read(kobj, buffer, off, count); ++ return rc; + } + + static ssize_t +-read(struct file * file, char __user * userbuf, size_t count, loff_t * off) ++read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) + { +- char *buffer = file->private_data; ++ struct bin_buffer *bb = file->private_data; + struct dentry *dentry = file->f_path.dentry; + int size = dentry->d_inode->i_size; + loff_t offs = *off; +- int ret; +- +- if (count > PAGE_SIZE) +- count = PAGE_SIZE; ++ int count = min_t(size_t, bytes, PAGE_SIZE); + + if (size) { + if (offs > size) +@@ -51,43 +63,56 @@ + count = size - offs; + } + +- ret = fill_read(dentry, buffer, offs, count); +- if (ret < 0) +- return ret; +- count = ret; ++ mutex_lock(&bb->mutex); + +- if (copy_to_user(userbuf, buffer, count)) +- return -EFAULT; ++ count = fill_read(dentry, bb->buffer, offs, count); ++ if (count < 0) ++ goto out_unlock; ++ ++ if (copy_to_user(userbuf, bb->buffer, count)) { ++ count = -EFAULT; ++ goto out_unlock; ++ } + +- pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count); ++ pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); + + *off = offs + count; + ++ out_unlock: ++ mutex_unlock(&bb->mutex); + return count; + } + + static int + flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count) + { +- struct bin_attribute *attr = to_bin_attr(dentry); +- struct kobject *kobj = to_kobj(dentry->d_parent); ++ struct sysfs_dirent *attr_sd = dentry->d_fsdata; ++ struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; ++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; ++ int rc; + +- if (!attr->write) +- return -EIO; ++ /* need attr_sd for attr, its parent for kobj */ ++ if (!sysfs_get_active_two(attr_sd)) ++ return -ENODEV; + +- return attr->write(kobj, buffer, offset, count); ++ rc = -EIO; ++ if (attr->write) ++ rc = attr->write(kobj, attr, buffer, offset, count); ++ ++ sysfs_put_active_two(attr_sd); ++ ++ return rc; + } + +-static ssize_t write(struct file * file, const char __user * userbuf, +- size_t count, loff_t * off) ++static ssize_t write(struct file *file, const char __user *userbuf, ++ size_t bytes, loff_t *off) + { +- char *buffer = file->private_data; ++ struct bin_buffer *bb = file->private_data; + struct dentry *dentry = file->f_path.dentry; + int size = dentry->d_inode->i_size; + loff_t offs = *off; ++ int count = min_t(size_t, bytes, PAGE_SIZE); + +- if (count > PAGE_SIZE) +- count = PAGE_SIZE; + if (size) { + if (offs > size) + return 0; +@@ -95,72 +120,100 @@ + count = size - offs; + } + +- if (copy_from_user(buffer, userbuf, count)) +- return -EFAULT; ++ mutex_lock(&bb->mutex); + +- count = flush_write(dentry, buffer, offs, count); ++ if (copy_from_user(bb->buffer, userbuf, count)) { ++ count = -EFAULT; ++ goto out_unlock; ++ } ++ ++ count = flush_write(dentry, bb->buffer, offs, count); + if (count > 0) + *off = offs + count; ++ ++ out_unlock: ++ mutex_unlock(&bb->mutex); + return count; + } + + static int mmap(struct file *file, struct vm_area_struct *vma) + { +- struct dentry *dentry = file->f_path.dentry; +- struct bin_attribute *attr = to_bin_attr(dentry); +- struct kobject *kobj = to_kobj(dentry->d_parent); ++ struct bin_buffer *bb = file->private_data; ++ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; ++ struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; ++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; ++ int rc; ++ ++ mutex_lock(&bb->mutex); ++ ++ /* need attr_sd for attr, its parent for kobj */ ++ if (!sysfs_get_active_two(attr_sd)) ++ return -ENODEV; ++ ++ rc = -EINVAL; ++ if (attr->mmap) ++ rc = attr->mmap(kobj, attr, vma); ++ ++ if (rc == 0 && !bb->mmapped) ++ bb->mmapped = 1; ++ else ++ sysfs_put_active_two(attr_sd); + +- if (!attr->mmap) +- return -EINVAL; ++ mutex_unlock(&bb->mutex); + +- return attr->mmap(kobj, attr, vma); ++ return rc; + } + + static int open(struct inode * inode, struct file * file) + { +- struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); +- struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); +- int error = -EINVAL; +- +- if (!kobj || !attr) +- goto Done; +- +- /* Grab the module reference for this attribute if we have one */ +- error = -ENODEV; +- if (!try_module_get(attr->attr.owner)) +- goto Done; ++ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; ++ struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr; ++ struct bin_buffer *bb = NULL; ++ int error; ++ ++ /* need attr_sd for attr */ ++ if (!sysfs_get_active(attr_sd)) ++ return -ENODEV; + + error = -EACCES; + if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap)) +- goto Error; ++ goto err_out; + if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap)) +- goto Error; ++ goto err_out; + + error = -ENOMEM; +- file->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); +- if (!file->private_data) +- goto Error; +- +- error = 0; +- goto Done; +- +- Error: +- module_put(attr->attr.owner); +- Done: +- if (error) +- kobject_put(kobj); ++ bb = kzalloc(sizeof(*bb), GFP_KERNEL); ++ if (!bb) ++ goto err_out; ++ ++ bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); ++ if (!bb->buffer) ++ goto err_out; ++ ++ mutex_init(&bb->mutex); ++ file->private_data = bb; ++ ++ /* open succeeded, put active reference and pin attr_sd */ ++ sysfs_put_active(attr_sd); ++ sysfs_get(attr_sd); ++ return 0; ++ ++ err_out: ++ sysfs_put_active(attr_sd); ++ kfree(bb); + return error; + } + + static int release(struct inode * inode, struct file * file) + { +- struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent); +- struct bin_attribute * attr = to_bin_attr(file->f_path.dentry); +- u8 * buffer = file->private_data; +- +- kobject_put(kobj); +- module_put(attr->attr.owner); +- kfree(buffer); ++ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; ++ struct bin_buffer *bb = file->private_data; ++ ++ if (bb->mmapped) ++ sysfs_put_active_two(attr_sd); ++ sysfs_put(attr_sd); ++ kfree(bb->buffer); ++ kfree(bb); + return 0; + } + +@@ -181,9 +234,9 @@ + + int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) + { +- BUG_ON(!kobj || !kobj->dentry || !attr); ++ BUG_ON(!kobj || !kobj->sd || !attr); + +- return sysfs_add_file(kobj->dentry, &attr->attr, SYSFS_KOBJ_BIN_ATTR); ++ return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); + } + + +@@ -195,7 +248,7 @@ + + void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) + { +- if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) { ++ if (sysfs_hash_and_remove(kobj, kobj->sd, attr->attr.name) < 0) { + printk(KERN_ERR "%s: " + "bad dentry or inode or no such file: \"%s\"\n", + __FUNCTION__, attr->attr.name); +diff -Nurb linux-2.6.22-570/fs/sysfs/dir.c linux-2.6.22-591/fs/sysfs/dir.c +--- linux-2.6.22-570/fs/sysfs/dir.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/sysfs/dir.c 2007-12-23 01:58:30.000000000 -0500 +@@ -9,21 +9,442 @@ + #include + #include + #include ++#include ++#include + #include + #include "sysfs.h" + +-DECLARE_RWSEM(sysfs_rename_sem); +-spinlock_t sysfs_lock = SPIN_LOCK_UNLOCKED; ++static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd); ++ ++DEFINE_MUTEX(sysfs_mutex); ++spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED; ++ ++static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED; ++static DEFINE_IDA(sysfs_ino_ida); ++ ++static struct sysfs_dirent *find_shadow_sd(struct sysfs_dirent *parent_sd, const void *target) ++{ ++ /* Find the shadow directory for the specified tag */ ++ struct sysfs_dirent *sd; ++ ++ for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) { ++ if (sd->s_name != target) ++ continue; ++ break; ++ } ++ return sd; ++} ++ ++static const void *find_shadow_tag(struct kobject *kobj) ++{ ++ /* Find the tag the current kobj is cached with */ ++ return kobj->sd->s_parent->s_name; ++} ++ ++/** ++ * sysfs_link_sibling - link sysfs_dirent into sibling list ++ * @sd: sysfs_dirent of interest ++ * ++ * Link @sd into its sibling list which starts from ++ * sd->s_parent->s_children. ++ * ++ * Locking: ++ * mutex_lock(sysfs_mutex) ++ */ ++ ++/** ++ * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list ++ * @sd: sysfs_dirent of interest ++ * ++ * Unlink @sd from its sibling list which starts from ++ * sd->s_parent->s_children. ++ * ++ * Locking: ++ * mutex_lock(sysfs_mutex) ++ */ ++ ++void sysfs_link_sibling(struct sysfs_dirent *sd) ++{ ++ struct sysfs_dirent *parent_sd = sd->s_parent; ++ ++ BUG_ON(sd->s_sibling); ++ sd->s_sibling = parent_sd->s_children; ++ parent_sd->s_children = sd; ++} ++/** ++ * sysfs_get_dentry - get dentry for the given sysfs_dirent ++ * @sd: sysfs_dirent of interest ++ * ++ * Get dentry for @sd. Dentry is looked up if currently not ++ * present. This function climbs sysfs_dirent tree till it ++ * reaches a sysfs_dirent with valid dentry attached and descends ++ * down from there looking up dentry for each step. ++ * ++ * LOCKING: ++ * Kernel thread context (may sleep) ++ * ++ * RETURNS: ++ * Pointer to found dentry on success, ERR_PTR() value on error. ++ */ ++struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) ++{ ++ struct sysfs_dirent *cur; ++ struct dentry *parent_dentry, *dentry; ++ int i, depth; ++ ++ /* Find the first parent which has valid s_dentry and get the ++ * dentry. ++ */ ++ mutex_lock(&sysfs_mutex); ++ restart0: ++ spin_lock(&sysfs_assoc_lock); ++ restart1: ++ spin_lock(&dcache_lock); ++ ++ dentry = NULL; ++ depth = 0; ++ cur = sd; ++ while (!cur->s_dentry || !cur->s_dentry->d_inode) { ++ if (cur->s_flags & SYSFS_FLAG_REMOVED) { ++ dentry = ERR_PTR(-ENOENT); ++ depth = 0; ++ break; ++ } ++ cur = cur->s_parent; ++ depth++; ++ } ++ if (!IS_ERR(dentry)) ++ dentry = dget_locked(cur->s_dentry); ++ ++ spin_unlock(&dcache_lock); ++ spin_unlock(&sysfs_assoc_lock); ++ ++ /* from the found dentry, look up depth times */ ++ while (depth--) { ++ /* find and get depth'th ancestor */ ++ for (cur = sd, i = 0; cur && i < depth; i++) ++ cur = cur->s_parent; ++ ++ /* This can happen if tree structure was modified due ++ * to move/rename. Restart. ++ */ ++ if (i != depth) { ++ dput(dentry); ++ goto restart0; ++ } ++ ++ sysfs_get(cur); ++ ++ mutex_unlock(&sysfs_mutex); ++ ++ /* look it up */ ++ parent_dentry = dentry; ++ dentry = lookup_one_len_kern(cur->s_name, parent_dentry, ++ strlen(cur->s_name)); ++ dput(parent_dentry); ++ ++ if (IS_ERR(dentry)) { ++ sysfs_put(cur); ++ return dentry; ++ } ++ ++ mutex_lock(&sysfs_mutex); ++ spin_lock(&sysfs_assoc_lock); ++ ++ /* This, again, can happen if tree structure has ++ * changed and we looked up the wrong thing. Restart. ++ */ ++ if (cur->s_dentry != dentry) { ++ dput(dentry); ++ sysfs_put(cur); ++ goto restart1; ++ } ++ ++ spin_unlock(&sysfs_assoc_lock); ++ ++ sysfs_put(cur); ++ } ++ ++ mutex_unlock(&sysfs_mutex); ++ return dentry; ++} ++ ++/** ++ * sysfs_link_sibling - link sysfs_dirent into sibling list ++ * @sd: sysfs_dirent of interest ++ * ++ * Link @sd into its sibling list which starts from ++ * sd->s_parent->s_children. ++ * ++ * Locking: ++ * mutex_lock(sd->s_parent->dentry->d_inode->i_mutex) ++ */ ++ ++/** ++ * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list ++ * @sd: sysfs_dirent of interest ++ * ++ * Unlink @sd from its sibling list which starts from ++ * sd->s_parent->s_children. ++ * ++ * Locking: ++ * mutex_lock(sd->s_parent->dentry->d_inode->i_mutex) ++ */ ++void sysfs_unlink_sibling(struct sysfs_dirent *sd) ++{ ++ struct sysfs_dirent **pos; ++ ++ for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) { ++ if (*pos == sd) { ++ *pos = sd->s_sibling; ++ sd->s_sibling = NULL; ++ break; ++ } ++ } ++} ++ ++/** ++ * sysfs_get_dentry - get dentry for the given sysfs_dirent ++ * @sd: sysfs_dirent of interest ++ * ++ * Get dentry for @sd. Dentry is looked up if currently not ++ * present. This function climbs sysfs_dirent tree till it ++ * reaches a sysfs_dirent with valid dentry attached and descends ++ * down from there looking up dentry for each step. ++ * ++ * LOCKING: ++ * Kernel thread context (may sleep) ++ * ++ * RETURNS: ++ * Pointer to found dentry on success, ERR_PTR() value on error. ++ */ ++ ++/** ++ * sysfs_get_active - get an active reference to sysfs_dirent ++ * @sd: sysfs_dirent to get an active reference to ++ * ++ * Get an active reference of @sd. This function is noop if @sd ++ * is NULL. ++ * ++ * RETURNS: ++ * Pointer to @sd on success, NULL on failure. ++ */ ++/** ++ * sysfs_put_active - put an active reference to sysfs_dirent ++ * @sd: sysfs_dirent to put an active reference to ++ * ++ * Put an active reference to @sd. This function is noop if @sd ++ * is NULL. ++ */ ++void sysfs_put_active(struct sysfs_dirent *sd) ++{ ++ struct completion *cmpl; ++ int v; ++ ++ if (unlikely(!sd)) ++ return; ++ ++ v = atomic_dec_return(&sd->s_active); ++ if (likely(v != SD_DEACTIVATED_BIAS)) ++ return; ++ ++ /* atomic_dec_return() is a mb(), we'll always see the updated ++ * sd->s_sibling. ++ */ ++ cmpl = (void *)sd->s_sibling; ++ complete(cmpl); ++} ++ ++/** ++ * sysfs_get_active_two - get active references to sysfs_dirent and parent ++ * @sd: sysfs_dirent of interest ++ * ++ * Get active reference to @sd and its parent. Parent's active ++ * reference is grabbed first. This function is noop if @sd is ++ * NULL. ++ * ++ * RETURNS: ++ * Pointer to @sd on success, NULL on failure. ++ */ ++struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd) ++{ ++ if (sd) { ++ if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent))) ++ return NULL; ++ if (unlikely(!sysfs_get_active(sd))) { ++ sysfs_put_active(sd->s_parent); ++ return NULL; ++ } ++ } ++ return sd; ++} ++ ++/** ++ * sysfs_put_active_two - put active references to sysfs_dirent and parent ++ * @sd: sysfs_dirent of interest ++ * ++ * Put active references to @sd and its parent. This function is ++ * noop if @sd is NULL. ++ */ ++void sysfs_put_active_two(struct sysfs_dirent *sd) ++{ ++ if (sd) { ++ sysfs_put_active(sd); ++ sysfs_put_active(sd->s_parent); ++ } ++} ++ ++/** ++ * sysfs_deactivate - deactivate sysfs_dirent ++ * @sd: sysfs_dirent to deactivate ++ * ++ * Deny new active references and drain existing ones. ++ */ ++static void sysfs_deactivate(struct sysfs_dirent *sd) ++{ ++ DECLARE_COMPLETION_ONSTACK(wait); ++ int v; ++ ++ BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED)); ++ sd->s_sibling = (void *)&wait; ++ ++ /* atomic_add_return() is a mb(), put_active() will always see ++ * the updated sd->s_sibling. ++ */ ++ v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); ++ ++ if (v != SD_DEACTIVATED_BIAS) ++ wait_for_completion(&wait); ++ ++ sd->s_sibling = NULL; ++} ++ ++/** ++ * sysfs_get_active - get an active reference to sysfs_dirent ++ * @sd: sysfs_dirent to get an active reference to ++ * ++ * Get an active reference of @sd. This function is noop if @sd ++ * is NULL. ++ * ++ * RETURNS: ++ * Pointer to @sd on success, NULL on failure. ++ */ ++struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) ++{ ++ if (unlikely(!sd)) ++ return NULL; ++ ++ while (1) { ++ int v, t; ++ ++ v = atomic_read(&sd->s_active); ++ if (unlikely(v < 0)) ++ return NULL; ++ ++ t = atomic_cmpxchg(&sd->s_active, v, v + 1); ++ if (likely(t == v)) ++ return sd; ++ if (t < 0) ++ return NULL; ++ ++ cpu_relax(); ++ } ++} ++/** ++ * sysfs_put_active - put an active reference to sysfs_dirent ++ * @sd: sysfs_dirent to put an active reference to ++ * ++ * Put an active reference to @sd. This function is noop if @sd ++ * is NULL. ++ */ ++ ++/** ++ * sysfs_get_active_two - get active references to sysfs_dirent and parent ++ * @sd: sysfs_dirent of interest ++ * ++ * Get active reference to @sd and its parent. Parent's active ++ * reference is grabbed first. This function is noop if @sd is ++ * NULL. ++ * ++ * RETURNS: ++ * Pointer to @sd on success, NULL on failure. ++ */ ++ ++/** ++ * sysfs_put_active_two - put active references to sysfs_dirent and parent ++ * @sd: sysfs_dirent of interest ++ * ++ * Put active references to @sd and its parent. This function is ++ * noop if @sd is NULL. ++ */ ++ ++/** ++ * sysfs_deactivate - deactivate sysfs_dirent ++ * @sd: sysfs_dirent to deactivate ++ * ++ * Deny new active references and drain existing ones. s_active ++ * will be unlocked when the sysfs_dirent is released. ++ */ ++ ++static int sysfs_alloc_ino(ino_t *pino) ++{ ++ int ino, rc; ++ ++ retry: ++ spin_lock(&sysfs_ino_lock); ++ rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino); ++ spin_unlock(&sysfs_ino_lock); ++ ++ if (rc == -EAGAIN) { ++ if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL)) ++ goto retry; ++ rc = -ENOMEM; ++ } ++ ++ *pino = ino; ++ return rc; ++} ++ ++static void sysfs_free_ino(ino_t ino) ++{ ++ spin_lock(&sysfs_ino_lock); ++ ida_remove(&sysfs_ino_ida, ino); ++ spin_unlock(&sysfs_ino_lock); ++} ++ ++void release_sysfs_dirent(struct sysfs_dirent * sd) ++{ ++ struct sysfs_dirent *parent_sd; ++ ++ repeat: ++ /* Moving/renaming is always done while holding reference. ++ * sd->s_parent won't change beneath us. ++ */ ++ parent_sd = sd->s_parent; ++ ++ if (sysfs_type(sd) == SYSFS_KOBJ_LINK) ++ sysfs_put(sd->s_elem.symlink.target_sd); ++ if (sysfs_type(sd) & SYSFS_COPY_NAME) ++ kfree(sd->s_name); ++ kfree(sd->s_iattr); ++ if (sysfs_type(sd) != SYSFS_SHADOW_DIR) ++ sysfs_free_ino(sd->s_ino); ++ kmem_cache_free(sysfs_dir_cachep, sd); ++ ++ sd = parent_sd; ++ if (sd && atomic_dec_and_test(&sd->s_count)) ++ goto repeat; ++} + + static void sysfs_d_iput(struct dentry * dentry, struct inode * inode) + { + struct sysfs_dirent * sd = dentry->d_fsdata; + + if (sd) { +- /* sd->s_dentry is protected with sysfs_lock. This +- * allows sysfs_drop_dentry() to dereference it. ++ /* sd->s_dentry is protected with sysfs_assoc_lock. ++ * This allows sysfs_drop_dentry() to dereference it. + */ +- spin_lock(&sysfs_lock); ++ spin_lock(&sysfs_assoc_lock); + + /* The dentry might have been deleted or another + * lookup could have happened updating sd->s_dentry to +@@ -32,7 +453,7 @@ + */ + if (sd->s_dentry == dentry) + sd->s_dentry = NULL; +- spin_unlock(&sysfs_lock); ++ spin_unlock(&sysfs_assoc_lock); + sysfs_put(sd); + } + iput(inode); +@@ -42,344 +463,594 @@ + .d_iput = sysfs_d_iput, + }; + +-static unsigned int sysfs_inode_counter; +-ino_t sysfs_get_inum(void) ++struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) + { +- if (unlikely(sysfs_inode_counter < 3)) +- sysfs_inode_counter = 3; +- return sysfs_inode_counter++; +-} ++ char *dup_name = NULL; ++ struct sysfs_dirent *sd = NULL; + +-/* +- * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent +- */ +-static struct sysfs_dirent * __sysfs_new_dirent(void * element) +-{ +- struct sysfs_dirent * sd; ++ if (type & SYSFS_COPY_NAME) { ++ name = dup_name = kstrdup(name, GFP_KERNEL); ++ if (!name) ++ goto err_out; ++ } + + sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); + if (!sd) +- return NULL; ++ goto err_out; ++ ++ if (sysfs_alloc_ino(&sd->s_ino)) ++ goto err_out; + +- sd->s_ino = sysfs_get_inum(); + atomic_set(&sd->s_count, 1); ++ atomic_set(&sd->s_active, 0); + atomic_set(&sd->s_event, 1); +- INIT_LIST_HEAD(&sd->s_children); +- INIT_LIST_HEAD(&sd->s_sibling); +- sd->s_element = element; ++ ++ sd->s_name = name; ++ sd->s_mode = mode; ++ sd->s_flags = type; + + return sd; ++ ++ err_out: ++ kfree(dup_name); ++ kmem_cache_free(sysfs_dir_cachep, sd); ++ return NULL; + } + +-static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd, +- struct sysfs_dirent *sd) ++/** ++ * sysfs_attach_dentry - associate sysfs_dirent with dentry ++ * @sd: target sysfs_dirent ++ * @dentry: dentry to associate ++ * ++ * Associate @sd with @dentry. This is protected by ++ * sysfs_assoc_lock to avoid race with sysfs_d_iput(). ++ * ++ * LOCKING: ++ * mutex_lock(sysfs_mutex) ++ */ ++static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry) + { +- if (sd) +- list_add(&sd->s_sibling, &parent_sd->s_children); ++ dentry->d_op = &sysfs_dentry_ops; ++ dentry->d_fsdata = sysfs_get(sd); ++ ++ /* protect sd->s_dentry against sysfs_d_iput */ ++ spin_lock(&sysfs_assoc_lock); ++ sd->s_dentry = dentry; ++ spin_unlock(&sysfs_assoc_lock); ++ ++ if (dentry->d_flags & DCACHE_UNHASHED) ++ d_rehash(dentry); + } + +-static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd, +- void * element) ++static int sysfs_ilookup_test(struct inode *inode, void *arg) + { +- struct sysfs_dirent *sd; +- sd = __sysfs_new_dirent(element); +- __sysfs_list_dirent(parent_sd, sd); +- return sd; ++ struct sysfs_dirent *sd = arg; ++ return inode->i_ino == sd->s_ino; + } + +-/* ++/** ++ * sysfs_addrm_start - prepare for sysfs_dirent add/remove ++ * @acxt: pointer to sysfs_addrm_cxt to be used ++ * @parent_sd: parent sysfs_dirent + * +- * Return -EEXIST if there is already a sysfs element with the same name for +- * the same parent. ++ * This function is called when the caller is about to add or ++ * remove sysfs_dirent under @parent_sd. This function acquires ++ * sysfs_mutex, grabs inode for @parent_sd if available and lock ++ * i_mutex of it. @acxt is used to keep and pass context to ++ * other addrm functions. + * +- * called with parent inode's i_mutex held ++ * LOCKING: ++ * Kernel thread context (may sleep). sysfs_mutex is locked on ++ * return. i_mutex of parent inode is locked on return if ++ * available. + */ +-int sysfs_dirent_exist(struct sysfs_dirent *parent_sd, +- const unsigned char *new) ++void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, ++ struct sysfs_dirent *parent_sd) + { +- struct sysfs_dirent * sd; ++ struct inode *inode; + +- list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { +- if (sd->s_element) { +- const unsigned char *existing = sysfs_get_name(sd); +- if (strcmp(existing, new)) +- continue; +- else +- return -EEXIST; +- } ++ memset(acxt, 0, sizeof(*acxt)); ++ acxt->parent_sd = parent_sd; ++ ++ /* Lookup parent inode. inode initialization and I_NEW ++ * clearing are protected by sysfs_mutex. By grabbing it and ++ * looking up with _nowait variant, inode state can be ++ * determined reliably. ++ */ ++ mutex_lock(&sysfs_mutex); ++ ++ inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, ++ parent_sd); ++ ++ if (inode && !(inode->i_state & I_NEW)) { ++ /* parent inode available */ ++ acxt->parent_inode = inode; ++ ++ /* sysfs_mutex is below i_mutex in lock hierarchy. ++ * First, trylock i_mutex. If fails, unlock ++ * sysfs_mutex and lock them in order. ++ */ ++ if (!mutex_trylock(&inode->i_mutex)) { ++ mutex_unlock(&sysfs_mutex); ++ mutex_lock(&inode->i_mutex); ++ mutex_lock(&sysfs_mutex); + } ++ } else ++ iput(inode); ++} + +- return 0; ++/** ++ * sysfs_add_one - add sysfs_dirent to parent ++ * @acxt: addrm context to use ++ * @sd: sysfs_dirent to be added ++ * ++ * Get @acxt->parent_sd and set sd->s_parent to it and increment ++ * nlink of parent inode if @sd is a directory. @sd is NOT ++ * linked into the children list of the parent. The caller ++ * should invoke sysfs_link_sibling() after this function ++ * completes if @sd needs to be on the children list. ++ * ++ * This function should be called between calls to ++ * sysfs_addrm_start() and sysfs_addrm_finish() and should be ++ * passed the same @acxt as passed to sysfs_addrm_start(). ++ * ++ * LOCKING: ++ * Determined by sysfs_addrm_start(). ++ */ ++void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) ++{ ++ sd->s_parent = sysfs_get(acxt->parent_sd); ++ ++ if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode) ++ inc_nlink(acxt->parent_inode); ++ ++ acxt->cnt++; + } + ++/** ++ * sysfs_remove_one - remove sysfs_dirent from parent ++ * @acxt: addrm context to use ++ * @sd: sysfs_dirent to be added ++ * ++ * Mark @sd removed and drop nlink of parent inode if @sd is a ++ * directory. @sd is NOT unlinked from the children list of the ++ * parent. The caller is repsonsible for removing @sd from the ++ * children list before calling this function. ++ * ++ * This function should be called between calls to ++ * sysfs_addrm_start() and sysfs_addrm_finish() and should be ++ * passed the same @acxt as passed to sysfs_addrm_start(). ++ * ++ * LOCKING: ++ * Determined by sysfs_addrm_start(). ++ */ ++void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) ++{ ++ BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED)); ++ ++ sd->s_flags |= SYSFS_FLAG_REMOVED; ++ sd->s_sibling = acxt->removed; ++ acxt->removed = sd; ++ ++ if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode) ++ drop_nlink(acxt->parent_inode); ++ ++ acxt->cnt++; ++} + +-static struct sysfs_dirent * +-__sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type) ++/** ++ * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent ++ * @sd: target sysfs_dirent ++ * ++ * Drop dentry for @sd. @sd must have been unlinked from its ++ * parent on entry to this function such that it can't be looked ++ * up anymore. ++ * ++ * @sd->s_dentry which is protected with sysfs_assoc_lock points ++ * to the currently associated dentry but we're not holding a ++ * reference to it and racing with dput(). Grab dcache_lock and ++ * verify dentry before dropping it. If @sd->s_dentry is NULL or ++ * dput() beats us, no need to bother. ++ */ ++static void sysfs_drop_dentry(struct sysfs_dirent *sd) + { +- struct sysfs_dirent * sd; ++ struct dentry *dentry = NULL; ++ struct inode *inode; + +- sd = __sysfs_new_dirent(element); +- if (!sd) +- goto out; ++ /* We're not holding a reference to ->s_dentry dentry but the ++ * field will stay valid as long as sysfs_assoc_lock is held. ++ */ ++ spin_lock(&sysfs_assoc_lock); ++ spin_lock(&dcache_lock); + +- sd->s_mode = mode; +- sd->s_type = type; +- sd->s_dentry = dentry; +- if (dentry) { +- dentry->d_fsdata = sysfs_get(sd); +- dentry->d_op = &sysfs_dentry_ops; +- } ++ /* drop dentry if it's there and dput() didn't kill it yet */ ++ if (sd->s_dentry && sd->s_dentry->d_inode) { ++ dentry = dget_locked(sd->s_dentry); ++ spin_lock(&dentry->d_lock); ++ __d_drop(dentry); ++ spin_unlock(&dentry->d_lock); ++ } ++ ++ spin_unlock(&dcache_lock); ++ spin_unlock(&sysfs_assoc_lock); ++ ++ /* dentries for shadowed directories are pinned, unpin */ ++ if ((sysfs_type(sd) == SYSFS_SHADOW_DIR) || ++ (sd->s_flags & SYSFS_FLAG_SHADOWED)) ++ dput(dentry); ++ dput(dentry); + +-out: +- return sd; ++ /* adjust nlink and update timestamp */ ++ inode = ilookup(sysfs_sb, sd->s_ino); ++ if (inode) { ++ mutex_lock(&inode->i_mutex); ++ ++ inode->i_ctime = CURRENT_TIME; ++ drop_nlink(inode); ++ if (sysfs_type(sd) == SYSFS_DIR) ++ drop_nlink(inode); ++ ++ mutex_unlock(&inode->i_mutex); ++ iput(inode); ++ } + } + +-int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry, +- void * element, umode_t mode, int type) ++/** ++ * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent ++ * @sd: target sysfs_dirent ++ * ++ * Drop dentry for @sd. @sd must have been unlinked from its ++ * parent on entry to this function such that it can't be looked ++ * up anymore. ++ * ++ * @sd->s_dentry which is protected with sysfs_assoc_lock points ++ * to the currently associated dentry but we're not holding a ++ * reference to it and racing with dput(). Grab dcache_lock and ++ * verify dentry before dropping it. If @sd->s_dentry is NULL or ++ * dput() beats us, no need to bother. ++ */ ++ ++ ++/** ++ * sysfs_addrm_finish - finish up sysfs_dirent add/remove ++ * @acxt: addrm context to finish up ++ * ++ * Finish up sysfs_dirent add/remove. Resources acquired by ++ * sysfs_addrm_start() are released and removed sysfs_dirents are ++ * cleaned up. Timestamps on the parent inode are updated. ++ * ++ * LOCKING: ++ * All mutexes acquired by sysfs_addrm_start() are released. ++ * ++ * RETURNS: ++ * Number of added/removed sysfs_dirents since sysfs_addrm_start(). ++ */ ++int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) + { +- struct sysfs_dirent *sd; ++ /* release resources acquired by sysfs_addrm_start() */ ++ mutex_unlock(&sysfs_mutex); ++ if (acxt->parent_inode) { ++ struct inode *inode = acxt->parent_inode; ++ ++ /* if added/removed, update timestamps on the parent */ ++ if (acxt->cnt) ++ inode->i_ctime = inode->i_mtime = CURRENT_TIME; + +- sd = __sysfs_make_dirent(dentry, element, mode, type); +- __sysfs_list_dirent(parent_sd, sd); ++ mutex_unlock(&inode->i_mutex); ++ iput(inode); ++ } + +- return sd ? 0 : -ENOMEM; ++ /* kill removed sysfs_dirents */ ++ while (acxt->removed) { ++ struct sysfs_dirent *sd = acxt->removed; ++ ++ acxt->removed = sd->s_sibling; ++ sd->s_sibling = NULL; ++ ++ sysfs_prune_shadow_sd(sd->s_parent); ++ sysfs_drop_dentry(sd); ++ sysfs_deactivate(sd); ++ sysfs_put(sd); ++ } ++ ++ return acxt->cnt; + } + +-static int init_dir(struct inode * inode) ++/** ++ * sysfs_find_dirent - find sysfs_dirent with the given name ++ * @parent_sd: sysfs_dirent to search under ++ * @name: name to look for ++ * ++ * Look for sysfs_dirent with name @name under @parent_sd. ++ * ++ * LOCKING: ++ * mutex_lock(sysfs_mutex) ++ * ++ * RETURNS: ++ * Pointer to sysfs_dirent if found, NULL if not. ++ */ ++struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, ++ const unsigned char *name) + { +- inode->i_op = &sysfs_dir_inode_operations; +- inode->i_fop = &sysfs_dir_operations; ++ struct sysfs_dirent *sd; + +- /* directory inodes start off with i_nlink == 2 (for "." entry) */ +- inc_nlink(inode); +- return 0; ++ for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) ++ if (sysfs_type(sd) && !strcmp(sd->s_name, name)) ++ return sd; ++ return NULL; + } + +-static int init_file(struct inode * inode) ++/** ++ * sysfs_get_dirent - find and get sysfs_dirent with the given name ++ * @parent_sd: sysfs_dirent to search under ++ * @name: name to look for ++ * ++ * Look for sysfs_dirent with name @name under @parent_sd and get ++ * it if found. ++ * ++ * LOCKING: ++ * Kernel thread context (may sleep). Grabs sysfs_mutex. ++ * ++ * RETURNS: ++ * Pointer to sysfs_dirent if found, NULL if not. ++ */ ++struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, ++ const unsigned char *name) + { +- inode->i_size = PAGE_SIZE; +- inode->i_fop = &sysfs_file_operations; +- return 0; +-} ++ struct sysfs_dirent *sd; + +-static int init_symlink(struct inode * inode) +-{ +- inode->i_op = &sysfs_symlink_inode_operations; +- return 0; ++ mutex_lock(&sysfs_mutex); ++ sd = sysfs_find_dirent(parent_sd, name); ++ sysfs_get(sd); ++ mutex_unlock(&sysfs_mutex); ++ ++ return sd; + } + +-static int create_dir(struct kobject * k, struct dentry * p, +- const char * n, struct dentry ** d) ++static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, ++ const char *name, struct sysfs_dirent **p_sd) + { +- int error; + umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; ++ struct sysfs_addrm_cxt acxt; ++ struct sysfs_dirent *sd; ++ int err; + +- mutex_lock(&p->d_inode->i_mutex); +- *d = lookup_one_len(n, p, strlen(n)); +- if (!IS_ERR(*d)) { +- if (sysfs_dirent_exist(p->d_fsdata, n)) +- error = -EEXIST; +- else +- error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, +- SYSFS_DIR); +- if (!error) { +- error = sysfs_create(*d, mode, init_dir); +- if (!error) { +- inc_nlink(p->d_inode); +- (*d)->d_op = &sysfs_dentry_ops; +- d_rehash(*d); +- } ++ /* allocate */ ++ sd = sysfs_new_dirent(name, mode, SYSFS_DIR); ++ if (!sd) ++ return -ENOMEM; ++ sd->s_elem.dir.kobj = kobj; ++ ++ /* link in */ ++ sysfs_addrm_start(&acxt, parent_sd); ++ err = -ENOENT; ++ if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd)) ++ goto addrm_finish; ++ ++ err = -EEXIST; ++ if (!sysfs_find_dirent(acxt.parent_sd, name)) { ++ sysfs_add_one(&acxt, sd); ++ sysfs_link_sibling(sd); ++ err = 0; ++ } ++addrm_finish: ++ if (sysfs_addrm_finish(&acxt)) { ++ *p_sd = sd; ++ return 0; + } +- if (error && (error != -EEXIST)) { +- struct sysfs_dirent *sd = (*d)->d_fsdata; +- if (sd) { +- list_del_init(&sd->s_sibling); ++ + sysfs_put(sd); +- } +- d_drop(*d); +- } +- dput(*d); +- } else +- error = PTR_ERR(*d); +- mutex_unlock(&p->d_inode->i_mutex); +- return error; ++ return err; + } + +- +-int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d) ++int sysfs_create_subdir(struct kobject *kobj, const char *name, ++ struct sysfs_dirent **p_sd) + { +- return create_dir(k,k->dentry,n,d); ++ return create_dir(kobj, kobj->sd, name, p_sd); + } + + /** + * sysfs_create_dir - create a directory for an object. + * @kobj: object we're creating directory for. +- * @shadow_parent: parent parent object. + */ +- +-int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent) ++int sysfs_create_dir(struct kobject * kobj) + { +- struct dentry * dentry = NULL; +- struct dentry * parent; ++ struct sysfs_dirent *parent_sd, *sd; + int error = 0; + + BUG_ON(!kobj); + +- if (shadow_parent) +- parent = shadow_parent; +- else if (kobj->parent) +- parent = kobj->parent->dentry; ++ if (kobj->parent) ++ parent_sd = kobj->parent->sd; + else if (sysfs_mount && sysfs_mount->mnt_sb) +- parent = sysfs_mount->mnt_sb->s_root; ++ parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata; + else + return -EFAULT; + +- error = create_dir(kobj,parent,kobject_name(kobj),&dentry); ++ error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd); + if (!error) +- kobj->dentry = dentry; ++ kobj->sd = sd; + return error; + } + +-/* attaches attribute's sysfs_dirent to the dentry corresponding to the +- * attribute file +- */ +-static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry) ++static int sysfs_count_nlink(struct sysfs_dirent *sd) + { +- struct attribute * attr = NULL; +- struct bin_attribute * bin_attr = NULL; +- int (* init) (struct inode *) = NULL; +- int error = 0; ++ struct sysfs_dirent *child; ++ int nr = 0; + +- if (sd->s_type & SYSFS_KOBJ_BIN_ATTR) { +- bin_attr = sd->s_element; +- attr = &bin_attr->attr; +- } else { +- attr = sd->s_element; +- init = init_file; +- } ++ for (child = sd->s_children; child; child = child->s_sibling) ++ if (sysfs_type(child) == SYSFS_DIR) ++ nr++; ++ return nr + 2; ++} + +- dentry->d_fsdata = sysfs_get(sd); +- /* protect sd->s_dentry against sysfs_d_iput */ +- spin_lock(&sysfs_lock); +- sd->s_dentry = dentry; +- spin_unlock(&sysfs_lock); +- error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init); +- if (error) { +- sysfs_put(sd); +- return error; +- } ++static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata; ++ struct sysfs_dirent * sd; ++ struct bin_attribute *bin_attr; ++ struct inode *inode; ++ int found = 0; + +- if (bin_attr) { +- dentry->d_inode->i_size = bin_attr->size; +- dentry->d_inode->i_fop = &bin_fops; ++ for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) { ++ if (sysfs_type(sd) && ++ !strcmp(sd->s_name, dentry->d_name.name)) { ++ found = 1; ++ break; ++ } + } +- dentry->d_op = &sysfs_dentry_ops; +- d_rehash(dentry); + +- return 0; +-} ++ /* no such entry */ ++ if (!found) ++ return NULL; + +-static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry) +-{ +- int err = 0; ++ /* attach dentry and inode */ ++ inode = sysfs_get_inode(sd); ++ if (!inode) ++ return ERR_PTR(-ENOMEM); ++ ++ mutex_lock(&sysfs_mutex); ++ ++ if (inode->i_state & I_NEW) { ++ /* initialize inode according to type */ ++ switch (sysfs_type(sd)) { ++ case SYSFS_DIR: ++ inode->i_op = &sysfs_dir_inode_operations; ++ inode->i_fop = &sysfs_dir_operations; ++ inode->i_nlink = sysfs_count_nlink(sd); ++ break; ++ case SYSFS_KOBJ_ATTR: ++ inode->i_size = PAGE_SIZE; ++ inode->i_fop = &sysfs_file_operations; ++ break; ++ case SYSFS_KOBJ_BIN_ATTR: ++ bin_attr = sd->s_elem.bin_attr.bin_attr; ++ inode->i_size = bin_attr->size; ++ inode->i_fop = &bin_fops; ++ break; ++ case SYSFS_KOBJ_LINK: ++ inode->i_op = &sysfs_symlink_inode_operations; ++ break; ++ default: ++ BUG(); ++ } ++ } + +- dentry->d_fsdata = sysfs_get(sd); +- /* protect sd->s_dentry against sysfs_d_iput */ +- spin_lock(&sysfs_lock); +- sd->s_dentry = dentry; +- spin_unlock(&sysfs_lock); +- err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink); +- if (!err) { +- dentry->d_op = &sysfs_dentry_ops; +- d_rehash(dentry); +- } else +- sysfs_put(sd); ++ sysfs_instantiate(dentry, inode); ++ sysfs_attach_dentry(sd, dentry); + +- return err; ++ mutex_unlock(&sysfs_mutex); ++ ++ return NULL; + } + +-static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, +- struct nameidata *nd) ++static void *sysfs_shadow_follow_link(struct dentry *dentry, struct nameidata *nd) + { +- struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata; +- struct sysfs_dirent * sd; +- int err = 0; ++ struct sysfs_dirent *sd; ++ struct dentry *dest; + +- list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { +- if (sd->s_type & SYSFS_NOT_PINNED) { +- const unsigned char * name = sysfs_get_name(sd); ++ sd = dentry->d_fsdata; ++ dest = NULL; ++ if (sd->s_flags & SYSFS_FLAG_SHADOWED) { ++ const struct shadow_dir_operations *shadow_ops; ++ const void *tag; + +- if (strcmp(name, dentry->d_name.name)) +- continue; ++ mutex_lock(&sysfs_mutex); + +- if (sd->s_type & SYSFS_KOBJ_LINK) +- err = sysfs_attach_link(sd, dentry); +- else +- err = sysfs_attach_attr(sd, dentry); +- break; +- } ++ shadow_ops = dentry->d_inode->i_private; ++ tag = shadow_ops->current_tag(); ++ ++ sd = find_shadow_sd(sd, tag); ++ if (sd) ++ dest = sd->s_dentry; ++ dget(dest); ++ ++ mutex_unlock(&sysfs_mutex); + } ++ if (!dest) ++ dest = dget(dentry); ++ dput(nd->dentry); ++ nd->dentry = dest; + +- return ERR_PTR(err); ++ return NULL; + } + ++ + const struct inode_operations sysfs_dir_inode_operations = { + .lookup = sysfs_lookup, + .setattr = sysfs_setattr, ++ .follow_link = sysfs_shadow_follow_link, + }; + +-static void remove_dir(struct dentry * d) ++static void __remove_dir(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) + { +- struct dentry * parent = dget(d->d_parent); +- struct sysfs_dirent * sd; ++ sysfs_unlink_sibling(sd); ++ sysfs_remove_one(acxt, sd); ++} + +- mutex_lock(&parent->d_inode->i_mutex); +- d_delete(d); +- sd = d->d_fsdata; +- list_del_init(&sd->s_sibling); +- sysfs_put(sd); +- if (d->d_inode) +- simple_rmdir(parent->d_inode,d); ++static void remove_dir(struct sysfs_dirent *sd) ++{ ++ struct sysfs_addrm_cxt acxt; + +- pr_debug(" o %s removing done (%d)\n",d->d_name.name, +- atomic_read(&d->d_count)); ++ sysfs_addrm_start(&acxt, sd->s_parent); ++ __remove_dir(&acxt, sd); ++ sysfs_addrm_finish(&acxt); ++} + +- mutex_unlock(&parent->d_inode->i_mutex); +- dput(parent); ++void sysfs_remove_subdir(struct sysfs_dirent *sd) ++{ ++ remove_dir(sd); + } + +-void sysfs_remove_subdir(struct dentry * d) ++static void sysfs_empty_dir(struct sysfs_addrm_cxt *acxt, ++ struct sysfs_dirent *dir_sd) + { +- remove_dir(d); ++ struct sysfs_dirent **pos; ++ ++ pos = &dir_sd->s_children; ++ while (*pos) { ++ struct sysfs_dirent *sd = *pos; ++ ++ if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) { ++ *pos = sd->s_sibling; ++ sd->s_sibling = NULL; ++ sysfs_remove_one(acxt, sd); ++ } else ++ pos = &(*pos)->s_sibling; ++ } + } + ++static void sysfs_remove_shadows(struct sysfs_addrm_cxt * acxt, ++ struct sysfs_dirent *dir_sd) ++{ ++ struct sysfs_dirent **pos; ++ ++ pos = &dir_sd->s_children; ++ while (*pos) { ++ struct sysfs_dirent *sd = *pos; ++ ++ sysfs_empty_dir(acxt, sd); ++ __remove_dir(acxt, sd); ++ } ++} + +-static void __sysfs_remove_dir(struct dentry *dentry) ++static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) + { +- struct sysfs_dirent * parent_sd; +- struct sysfs_dirent * sd, * tmp; ++ struct sysfs_addrm_cxt acxt; + +- dget(dentry); +- if (!dentry) ++ if (!dir_sd) + return; + +- pr_debug("sysfs %s: removing dir\n",dentry->d_name.name); +- mutex_lock(&dentry->d_inode->i_mutex); +- parent_sd = dentry->d_fsdata; +- list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) { +- if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED)) +- continue; +- list_del_init(&sd->s_sibling); +- sysfs_drop_dentry(sd, dentry); +- sysfs_put(sd); +- } +- mutex_unlock(&dentry->d_inode->i_mutex); ++ pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); ++ sysfs_addrm_start(&acxt, dir_sd); ++ if (sysfs_type(dir_sd) == SYSFS_DIR) ++ sysfs_empty_dir(&acxt, dir_sd); ++ else ++ sysfs_remove_shadows(&acxt, dir_sd); ++ sysfs_addrm_finish(&acxt); + +- remove_dir(dentry); +- /** +- * Drop reference from dget() on entrance. +- */ +- dput(dentry); ++ remove_dir(dir_sd); + } + + /** +@@ -393,102 +1064,154 @@ + + void sysfs_remove_dir(struct kobject * kobj) + { +- __sysfs_remove_dir(kobj->dentry); +- kobj->dentry = NULL; ++ struct sysfs_dirent *sd = kobj->sd; ++ ++ spin_lock(&sysfs_assoc_lock); ++ kobj->sd = NULL; ++ spin_unlock(&sysfs_assoc_lock); ++ ++ __sysfs_remove_dir(sd); + } + +-int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent, +- const char *new_name) ++int sysfs_rename_dir(struct kobject * kobj, const char *new_name) + { +- int error = 0; +- struct dentry * new_dentry; ++ struct dentry *old_dentry, *new_dentry, *parent; ++ struct sysfs_addrm_cxt acxt; ++ struct sysfs_dirent *sd; ++ const char *dup_name; ++ int error; + +- if (!new_parent) +- return -EFAULT; ++ dup_name = NULL; ++ new_dentry = NULL; + +- down_write(&sysfs_rename_sem); +- mutex_lock(&new_parent->d_inode->i_mutex); ++ sd = kobj->sd; ++ sysfs_addrm_start(&acxt, sd->s_parent); ++ error = -ENOENT; ++ if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd)) ++ goto addrm_finish; ++ ++ error = -EEXIST; ++ if (sysfs_find_dirent(acxt.parent_sd, new_name)) ++ goto addrm_finish; + +- new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name)); +- if (!IS_ERR(new_dentry)) { +- /* By allowing two different directories with the +- * same d_parent we allow this routine to move +- * between different shadows of the same directory +- */ +- if (kobj->dentry->d_parent->d_inode != new_parent->d_inode) +- return -EINVAL; +- else if (new_dentry->d_parent->d_inode != new_parent->d_inode) + error = -EINVAL; +- else if (new_dentry == kobj->dentry) ++ if ((sd->s_parent == acxt.parent_sd) && ++ (strcmp(new_name, sd->s_name) == 0)) ++ goto addrm_finish; ++ ++ old_dentry = sd->s_dentry; ++ parent = acxt.parent_sd->s_dentry; ++ if (old_dentry) { ++ old_dentry = sd->s_dentry; ++ parent = acxt.parent_sd->s_dentry; ++ new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); ++ if (IS_ERR(new_dentry)) { ++ error = PTR_ERR(new_dentry); ++ goto addrm_finish; ++ } ++ + error = -EINVAL; +- else if (!new_dentry->d_inode) { ++ if (old_dentry == new_dentry) ++ goto addrm_finish; ++ } ++ ++ /* rename kobject and sysfs_dirent */ ++ error = -ENOMEM; ++ new_name = dup_name = kstrdup(new_name, GFP_KERNEL); ++ if (!new_name) ++ goto addrm_finish; ++ + error = kobject_set_name(kobj, "%s", new_name); +- if (!error) { +- struct sysfs_dirent *sd, *parent_sd; ++ if (error) ++ goto addrm_finish; + +- d_add(new_dentry, NULL); +- d_move(kobj->dentry, new_dentry); ++ dup_name = sd->s_name; ++ sd->s_name = new_name; + +- sd = kobj->dentry->d_fsdata; +- parent_sd = new_parent->d_fsdata; ++ /* move under the new parent */ ++ sysfs_unlink_sibling(sd); ++ sysfs_get(acxt.parent_sd); ++ sysfs_put(sd->s_parent); ++ sd->s_parent = acxt.parent_sd; ++ sysfs_link_sibling(sd); + +- list_del_init(&sd->s_sibling); +- list_add(&sd->s_sibling, &parent_sd->s_children); +- } +- else +- d_drop(new_dentry); +- } else +- error = -EEXIST; +- dput(new_dentry); ++ if (new_dentry) { ++ d_add(new_dentry, NULL); ++ d_move(old_dentry, new_dentry); + } +- mutex_unlock(&new_parent->d_inode->i_mutex); +- up_write(&sysfs_rename_sem); ++ error = 0; ++addrm_finish: ++ sysfs_addrm_finish(&acxt); + ++ kfree(dup_name); ++ dput(new_dentry); + return error; + } + +-int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent) ++int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) + { +- struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry; +- struct sysfs_dirent *new_parent_sd, *sd; ++ struct sysfs_dirent *sd = kobj->sd; ++ struct sysfs_dirent *new_parent_sd; ++ struct dentry *old_parent, *new_parent = NULL; ++ struct dentry *old_dentry = NULL, *new_dentry = NULL; + int error; + +- old_parent_dentry = kobj->parent ? +- kobj->parent->dentry : sysfs_mount->mnt_sb->s_root; +- new_parent_dentry = new_parent ? +- new_parent->dentry : sysfs_mount->mnt_sb->s_root; ++ BUG_ON(!sd->s_parent); ++ new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root; ++ ++ /* get dentries */ ++ old_dentry = sysfs_get_dentry(sd); ++ if (IS_ERR(old_dentry)) { ++ error = PTR_ERR(old_dentry); ++ goto out_dput; ++ } ++ old_parent = sd->s_parent->s_dentry; ++ ++ new_parent = sysfs_get_dentry(new_parent_sd); ++ if (IS_ERR(new_parent)) { ++ error = PTR_ERR(new_parent); ++ goto out_dput; ++ } + +- if (old_parent_dentry->d_inode == new_parent_dentry->d_inode) +- return 0; /* nothing to move */ ++ if (old_parent->d_inode == new_parent->d_inode) { ++ error = 0; ++ goto out_dput; /* nothing to move */ ++ } + again: +- mutex_lock(&old_parent_dentry->d_inode->i_mutex); +- if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) { +- mutex_unlock(&old_parent_dentry->d_inode->i_mutex); ++ mutex_lock(&old_parent->d_inode->i_mutex); ++ if (!mutex_trylock(&new_parent->d_inode->i_mutex)) { ++ mutex_unlock(&old_parent->d_inode->i_mutex); + goto again; + } + +- new_parent_sd = new_parent_dentry->d_fsdata; +- sd = kobj->dentry->d_fsdata; +- +- new_dentry = lookup_one_len(kobj->name, new_parent_dentry, +- strlen(kobj->name)); ++ new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name)); + if (IS_ERR(new_dentry)) { + error = PTR_ERR(new_dentry); +- goto out; ++ goto out_unlock; + } else + error = 0; + d_add(new_dentry, NULL); +- d_move(kobj->dentry, new_dentry); ++ d_move(sd->s_dentry, new_dentry); + dput(new_dentry); + + /* Remove from old parent's list and insert into new parent's list. */ +- list_del_init(&sd->s_sibling); +- list_add(&sd->s_sibling, &new_parent_sd->s_children); ++ mutex_lock(&sysfs_mutex); + +-out: +- mutex_unlock(&new_parent_dentry->d_inode->i_mutex); +- mutex_unlock(&old_parent_dentry->d_inode->i_mutex); ++ sysfs_unlink_sibling(sd); ++ sysfs_get(new_parent_sd); ++ sysfs_put(sd->s_parent); ++ sd->s_parent = new_parent_sd; ++ sysfs_link_sibling(sd); ++ ++ mutex_unlock(&sysfs_mutex); + ++ out_unlock: ++ mutex_unlock(&new_parent->d_inode->i_mutex); ++ mutex_unlock(&old_parent->d_inode->i_mutex); ++ out_dput: ++ dput(new_parent); ++ dput(old_dentry); ++ dput(new_dentry); + return error; + } + +@@ -496,23 +1219,27 @@ + { + struct dentry * dentry = file->f_path.dentry; + struct sysfs_dirent * parent_sd = dentry->d_fsdata; ++ struct sysfs_dirent * sd; + +- mutex_lock(&dentry->d_inode->i_mutex); +- file->private_data = sysfs_new_dirent(parent_sd, NULL); +- mutex_unlock(&dentry->d_inode->i_mutex); +- +- return file->private_data ? 0 : -ENOMEM; ++ sd = sysfs_new_dirent("_DIR_", 0, 0); ++ if (sd) { ++ mutex_lock(&sysfs_mutex); ++ sd->s_parent = sysfs_get(parent_sd); ++ sysfs_link_sibling(sd); ++ mutex_unlock(&sysfs_mutex); ++ } + ++ file->private_data = sd; ++ return sd ? 0 : -ENOMEM; + } + + static int sysfs_dir_close(struct inode *inode, struct file *file) + { +- struct dentry * dentry = file->f_path.dentry; + struct sysfs_dirent * cursor = file->private_data; + +- mutex_lock(&dentry->d_inode->i_mutex); +- list_del_init(&cursor->s_sibling); +- mutex_unlock(&dentry->d_inode->i_mutex); ++ mutex_lock(&sysfs_mutex); ++ sysfs_unlink_sibling(cursor); ++ mutex_unlock(&sysfs_mutex); + + release_sysfs_dirent(cursor); + +@@ -530,7 +1257,7 @@ + struct dentry *dentry = filp->f_path.dentry; + struct sysfs_dirent * parent_sd = dentry->d_fsdata; + struct sysfs_dirent *cursor = filp->private_data; +- struct list_head *p, *q = &cursor->s_sibling; ++ struct sysfs_dirent **pos; + ino_t ino; + int i = filp->f_pos; + +@@ -543,38 +1270,55 @@ + i++; + /* fallthrough */ + case 1: +- ino = parent_ino(dentry); ++ if (parent_sd->s_parent) ++ ino = parent_sd->s_parent->s_ino; ++ else ++ ino = parent_sd->s_ino; + if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) + break; + filp->f_pos++; + i++; + /* fallthrough */ + default: ++ /* If I am the shadow master return nothing. */ ++ if (parent_sd->s_flags & SYSFS_FLAG_SHADOWED) ++ return 0; ++ ++ mutex_lock(&sysfs_mutex); ++ pos = &parent_sd->s_children; ++ while (*pos != cursor) ++ pos = &(*pos)->s_sibling; ++ ++ /* unlink cursor */ ++ *pos = cursor->s_sibling; ++ + if (filp->f_pos == 2) +- list_move(q, &parent_sd->s_children); ++ pos = &parent_sd->s_children; + +- for (p=q->next; p!= &parent_sd->s_children; p=p->next) { +- struct sysfs_dirent *next; ++ for ( ; *pos; pos = &(*pos)->s_sibling) { ++ struct sysfs_dirent *next = *pos; + const char * name; + int len; + +- next = list_entry(p, struct sysfs_dirent, +- s_sibling); +- if (!next->s_element) ++ if (!sysfs_type(next)) + continue; + +- name = sysfs_get_name(next); ++ name = next->s_name; + len = strlen(name); + ino = next->s_ino; + + if (filldir(dirent, name, len, filp->f_pos, ino, + dt_type(next)) < 0) +- return 0; ++ break; + +- list_move(q, p); +- p = q; + filp->f_pos++; + } ++ ++ /* put cursor back in */ ++ cursor->s_sibling = *pos; ++ *pos = cursor; ++ ++ mutex_unlock(&sysfs_mutex); + } + return 0; + } +@@ -583,7 +1327,6 @@ + { + struct dentry * dentry = file->f_path.dentry; + +- mutex_lock(&dentry->d_inode->i_mutex); + switch (origin) { + case 1: + offset += file->f_pos; +@@ -591,127 +1334,224 @@ + if (offset >= 0) + break; + default: +- mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); + return -EINVAL; + } + if (offset != file->f_pos) { ++ mutex_lock(&sysfs_mutex); ++ + file->f_pos = offset; + if (file->f_pos >= 2) { + struct sysfs_dirent *sd = dentry->d_fsdata; + struct sysfs_dirent *cursor = file->private_data; +- struct list_head *p; ++ struct sysfs_dirent **pos; + loff_t n = file->f_pos - 2; + +- list_del(&cursor->s_sibling); +- p = sd->s_children.next; +- while (n && p != &sd->s_children) { +- struct sysfs_dirent *next; +- next = list_entry(p, struct sysfs_dirent, +- s_sibling); +- if (next->s_element) ++ sysfs_unlink_sibling(cursor); ++ ++ pos = &sd->s_children; ++ while (n && *pos) { ++ struct sysfs_dirent *next = *pos; ++ if (sysfs_type(next)) + n--; +- p = p->next; ++ pos = &(*pos)->s_sibling; + } +- list_add_tail(&cursor->s_sibling, p); ++ ++ cursor->s_sibling = *pos; ++ *pos = cursor; + } ++ ++ mutex_unlock(&sysfs_mutex); + } +- mutex_unlock(&dentry->d_inode->i_mutex); ++ + return offset; + } + ++const struct file_operations sysfs_dir_operations = { ++ .open = sysfs_dir_open, ++ .release = sysfs_dir_close, ++ .llseek = sysfs_dir_lseek, ++ .read = generic_read_dir, ++ .readdir = sysfs_readdir, ++}; + +-/** +- * sysfs_make_shadowed_dir - Setup so a directory can be shadowed +- * @kobj: object we're creating shadow of. +- */ + +-int sysfs_make_shadowed_dir(struct kobject *kobj, +- void * (*follow_link)(struct dentry *, struct nameidata *)) ++static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd) + { +- struct inode *inode; +- struct inode_operations *i_op; ++ struct sysfs_addrm_cxt acxt; + +- inode = kobj->dentry->d_inode; +- if (inode->i_op != &sysfs_dir_inode_operations) +- return -EINVAL; ++ /* If a shadow directory goes empty remove it. */ ++ if (sysfs_type(sd) != SYSFS_SHADOW_DIR) ++ return; + +- i_op = kmalloc(sizeof(*i_op), GFP_KERNEL); +- if (!i_op) +- return -ENOMEM; ++ if (sd->s_children) ++ return; + +- memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op)); +- i_op->follow_link = follow_link; ++ sysfs_addrm_start(&acxt, sd->s_parent); + +- /* Locking of inode->i_op? +- * Since setting i_op is a single word write and they +- * are atomic we should be ok here. +- */ +- inode->i_op = i_op; +- return 0; +-} ++ if (sd->s_flags & SYSFS_FLAG_REMOVED) ++ goto addrm_finish; + +-/** +- * sysfs_create_shadow_dir - create a shadow directory for an object. +- * @kobj: object we're creating directory for. +- * +- * sysfs_make_shadowed_dir must already have been called on this +- * directory. +- */ ++ if (sd->s_children) ++ goto addrm_finish; + +-struct dentry *sysfs_create_shadow_dir(struct kobject *kobj) ++ __remove_dir(&acxt, sd); ++addrm_finish: ++ sysfs_addrm_finish(&acxt); ++} ++ ++static struct sysfs_dirent *add_shadow_sd(struct sysfs_dirent *parent_sd, const void *tag) + { +- struct sysfs_dirent *sd; +- struct dentry *parent, *dir, *shadow; ++ struct sysfs_dirent *sd = NULL; ++ struct dentry *dir, *shadow; + struct inode *inode; + +- dir = kobj->dentry; ++ dir = parent_sd->s_dentry; + inode = dir->d_inode; +- parent = dir->d_parent; +- shadow = ERR_PTR(-EINVAL); +- if (!sysfs_is_shadowed_inode(inode)) +- goto out; + +- shadow = d_alloc(parent, &dir->d_name); ++ shadow = d_alloc(dir->d_parent, &dir->d_name); + if (!shadow) +- goto nomem; ++ goto out; ++ ++ /* Since the shadow directory is reachable make it look ++ * like it is actually hashed. ++ */ ++ shadow->d_hash.pprev = &shadow->d_hash.next; ++ shadow->d_hash.next = NULL; ++ shadow->d_flags &= ~DCACHE_UNHASHED; + +- sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR); ++ sd = sysfs_new_dirent(tag, parent_sd->s_mode, SYSFS_SHADOW_DIR); + if (!sd) +- goto nomem; ++ goto error; + +- d_instantiate(shadow, igrab(inode)); +- inc_nlink(inode); +- inc_nlink(parent->d_inode); +- shadow->d_op = &sysfs_dentry_ops; ++ sd->s_elem.dir.kobj = parent_sd->s_elem.dir.kobj; ++ sd->s_parent = sysfs_get(parent_sd); + +- dget(shadow); /* Extra count - pin the dentry in core */ ++ /* Use the inode number of the parent we are shadowing */ ++ sysfs_free_ino(sd->s_ino); ++ sd->s_ino = parent_sd->s_ino; + ++ inc_nlink(inode); ++ inc_nlink(dir->d_parent->d_inode); ++ ++ sysfs_link_sibling(sd); ++ __iget(inode); ++ sysfs_instantiate(shadow, inode); ++ sysfs_attach_dentry(sd, shadow); + out: +- return shadow; +-nomem: ++ return sd; ++error: + dput(shadow); +- shadow = ERR_PTR(-ENOMEM); + goto out; + } + ++int sysfs_resolve_for_create(struct kobject *kobj, ++ struct sysfs_dirent **parent_sd) ++{ ++ const struct shadow_dir_operations *shadow_ops; ++ struct sysfs_dirent *sd, *shadow_sd; ++ ++ sd = *parent_sd; ++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR) ++ sd = sd->s_parent; ++ ++ if (sd->s_flags & SYSFS_FLAG_SHADOWED) { ++ const void *tag; ++ ++ shadow_ops = sd->s_dentry->d_inode->i_private; ++ tag = shadow_ops->kobject_tag(kobj); ++ ++ shadow_sd = find_shadow_sd(sd, tag); ++ if (!shadow_sd) ++ shadow_sd = add_shadow_sd(sd, tag); ++ sd = shadow_sd; ++ } ++ if (sd) { ++ *parent_sd = sd; ++ return 1; ++ } ++ return 0; ++} ++ ++int sysfs_resolve_for_remove(struct kobject *kobj, ++ struct sysfs_dirent **parent_sd) ++{ ++ struct sysfs_dirent *sd; ++ /* If dentry is a shadow directory find the shadow that is ++ * stored under the same tag as kobj. This allows removal ++ * of dirents to function properly even if the value of ++ * kobject_tag() has changed since we initially created ++ * the dirents assoctated with kobj. ++ */ ++ ++ sd = *parent_sd; ++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR) ++ sd = sd->s_parent; ++ if (sd->s_flags & SYSFS_FLAG_SHADOWED) { ++ const void *tag; ++ ++ tag = find_shadow_tag(kobj); ++ sd = find_shadow_sd(sd, tag); ++ } ++ if (sd) { ++ *parent_sd = sd; ++ return 1; ++ } ++ return 0; ++} ++ + /** +- * sysfs_remove_shadow_dir - remove an object's directory. +- * @shadow: dentry of shadow directory ++ * sysfs_enable_shadowing - Automatically create shadows of a directory ++ * @kobj: object to automatically shadow + * +- * The only thing special about this is that we remove any files in +- * the directory before we remove the directory, and we've inlined +- * what used to be sysfs_rmdir() below, instead of calling separately. ++ * Once shadowing has been enabled on a directory the contents ++ * of the directory become dependent upon context. ++ * ++ * shadow_ops->current_tag() returns the context for the current ++ * process. ++ * ++ * shadow_ops->kobject_tag() returns the context that a given kobj ++ * resides in. ++ * ++ * Using those methods the sysfs code on shadowed directories ++ * carefully stores the files so that when we lookup files ++ * we get the proper answer for our context. ++ * ++ * If the context of a kobject is changed it is expected that ++ * the kobject will be renamed so the appopriate sysfs data structures ++ * can be updated. + */ +- +-void sysfs_remove_shadow_dir(struct dentry *shadow) ++int sysfs_enable_shadowing(struct kobject *kobj, ++ const struct shadow_dir_operations *shadow_ops) + { +- __sysfs_remove_dir(shadow); ++ struct sysfs_dirent *sd; ++ struct dentry *dentry; ++ int err; ++ ++ /* Find the dentry for the shadowed directory and ++ * increase it's count. ++ */ ++ err = -ENOENT; ++ sd = kobj->sd; ++ dentry = sysfs_get_dentry(sd); ++ if (!dentry) ++ goto out; ++ ++ mutex_lock(&sysfs_mutex); ++ err = -EINVAL; ++ /* We can only enable shadowing on empty directories ++ * where shadowing is not already enabled. ++ */ ++ if (!sd->s_children && (sysfs_type(sd) == SYSFS_DIR) && ++ !(sd->s_flags & SYSFS_FLAG_REMOVED) && ++ !(sd->s_flags & SYSFS_FLAG_SHADOWED)) { ++ sd->s_flags |= SYSFS_FLAG_SHADOWED; ++ dentry->d_inode->i_private = (void *)shadow_ops; ++ err = 0; ++ } ++ mutex_unlock(&sysfs_mutex); ++out: ++ if (err) ++ dput(dentry); ++ return err; + } + +-const struct file_operations sysfs_dir_operations = { +- .open = sysfs_dir_open, +- .release = sysfs_dir_close, +- .llseek = sysfs_dir_lseek, +- .read = generic_read_dir, +- .readdir = sysfs_readdir, +-}; +diff -Nurb linux-2.6.22-570/fs/sysfs/dir.c.orig linux-2.6.22-591/fs/sysfs/dir.c.orig +--- linux-2.6.22-570/fs/sysfs/dir.c.orig 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/sysfs/dir.c.orig 2007-12-22 20:43:14.000000000 -0500 +@@ -0,0 +1,1558 @@ ++/* ++ * dir.c - Operations for sysfs directories. ++ */ ++ ++#undef DEBUG ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "sysfs.h" ++ ++static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd); ++ ++DEFINE_MUTEX(sysfs_mutex); ++spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED; ++ ++static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED; ++static DEFINE_IDA(sysfs_ino_ida); ++ ++static struct sysfs_dirent *find_shadow_sd(struct sysfs_dirent *parent_sd, const void *target) ++{ ++ /* Find the shadow directory for the specified tag */ ++ struct sysfs_dirent *sd; ++ ++ for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) { ++ if (sd->s_name != target) ++ continue; ++ break; ++ } ++ return sd; ++} ++ ++static const void *find_shadow_tag(struct kobject *kobj) ++{ ++ /* Find the tag the current kobj is cached with */ ++ return kobj->sd->s_parent->s_name; ++} ++ ++/** ++ * sysfs_link_sibling - link sysfs_dirent into sibling list ++ * @sd: sysfs_dirent of interest ++ * ++ * Link @sd into its sibling list which starts from ++ * sd->s_parent->s_children. ++ * ++ * Locking: ++ * mutex_lock(sysfs_mutex) ++ */ ++ ++/** ++ * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list ++ * @sd: sysfs_dirent of interest ++ * ++ * Unlink @sd from its sibling list which starts from ++ * sd->s_parent->s_children. ++ * ++ * Locking: ++ * mutex_lock(sysfs_mutex) ++ */ ++ ++ ++/** ++ * sysfs_get_dentry - get dentry for the given sysfs_dirent ++ * @sd: sysfs_dirent of interest ++ * ++ * Get dentry for @sd. Dentry is looked up if currently not ++ * present. This function climbs sysfs_dirent tree till it ++ * reaches a sysfs_dirent with valid dentry attached and descends ++ * down from there looking up dentry for each step. ++ * ++ * LOCKING: ++ * Kernel thread context (may sleep) ++ * ++ * RETURNS: ++ * Pointer to found dentry on success, ERR_PTR() value on error. ++ */ ++struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) ++{ ++ struct sysfs_dirent *cur; ++ struct dentry *parent_dentry, *dentry; ++ int i, depth; ++ ++ /* Find the first parent which has valid s_dentry and get the ++ * dentry. ++ */ ++ mutex_lock(&sysfs_mutex); ++ restart0: ++ spin_lock(&sysfs_assoc_lock); ++ restart1: ++ spin_lock(&dcache_lock); ++ ++ dentry = NULL; ++ depth = 0; ++ cur = sd; ++ while (!cur->s_dentry || !cur->s_dentry->d_inode) { ++ if (cur->s_flags & SYSFS_FLAG_REMOVED) { ++ dentry = ERR_PTR(-ENOENT); ++ depth = 0; ++ break; ++ } ++ cur = cur->s_parent; ++ depth++; ++ } ++ if (!IS_ERR(dentry)) ++ dentry = dget_locked(cur->s_dentry); ++ ++ spin_unlock(&dcache_lock); ++ spin_unlock(&sysfs_assoc_lock); ++ ++ /* from the found dentry, look up depth times */ ++ while (depth--) { ++ /* find and get depth'th ancestor */ ++ for (cur = sd, i = 0; cur && i < depth; i++) ++ cur = cur->s_parent; ++ ++ /* This can happen if tree structure was modified due ++ * to move/rename. Restart. ++ */ ++ if (i != depth) { ++ dput(dentry); ++ goto restart0; ++ } ++ ++ sysfs_get(cur); ++ ++ mutex_unlock(&sysfs_mutex); ++ ++ /* look it up */ ++ parent_dentry = dentry; ++ dentry = lookup_one_len_kern(cur->s_name, parent_dentry, ++ strlen(cur->s_name)); ++ dput(parent_dentry); ++ ++ if (IS_ERR(dentry)) { ++ sysfs_put(cur); ++ return dentry; ++ } ++ ++ mutex_lock(&sysfs_mutex); ++ spin_lock(&sysfs_assoc_lock); ++ ++ /* This, again, can happen if tree structure has ++ * changed and we looked up the wrong thing. Restart. ++ */ ++ if (cur->s_dentry != dentry) { ++ dput(dentry); ++ sysfs_put(cur); ++ goto restart1; ++ } ++ ++ spin_unlock(&sysfs_assoc_lock); ++ ++ sysfs_put(cur); ++ } ++ ++ mutex_unlock(&sysfs_mutex); ++ return dentry; ++} ++ ++/** ++ * sysfs_link_sibling - link sysfs_dirent into sibling list ++ * @sd: sysfs_dirent of interest ++ * ++ * Link @sd into its sibling list which starts from ++ * sd->s_parent->s_children. ++ * ++ * Locking: ++ * mutex_lock(sd->s_parent->dentry->d_inode->i_mutex) ++ */ ++void sysfs_link_sibling(struct sysfs_dirent *sd) ++{ ++ struct sysfs_dirent *parent_sd = sd->s_parent; ++ ++ BUG_ON(sd->s_sibling); ++ sd->s_sibling = parent_sd->s_children; ++ parent_sd->s_children = sd; ++} ++ ++/** ++ * sysfs_unlink_sibling - unlink sysfs_dirent from sibling list ++ * @sd: sysfs_dirent of interest ++ * ++ * Unlink @sd from its sibling list which starts from ++ * sd->s_parent->s_children. ++ * ++ * Locking: ++ * mutex_lock(sd->s_parent->dentry->d_inode->i_mutex) ++ */ ++void sysfs_unlink_sibling(struct sysfs_dirent *sd) ++{ ++ struct sysfs_dirent **pos; ++ ++ for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) { ++ if (*pos == sd) { ++ *pos = sd->s_sibling; ++ sd->s_sibling = NULL; ++ break; ++ } ++ } ++} ++ ++/** ++ * sysfs_get_dentry - get dentry for the given sysfs_dirent ++ * @sd: sysfs_dirent of interest ++ * ++ * Get dentry for @sd. Dentry is looked up if currently not ++ * present. This function climbs sysfs_dirent tree till it ++ * reaches a sysfs_dirent with valid dentry attached and descends ++ * down from there looking up dentry for each step. ++ * ++ * LOCKING: ++ * Kernel thread context (may sleep) ++ * ++ * RETURNS: ++ * Pointer to found dentry on success, ERR_PTR() value on error. ++ */ ++ ++/** ++ * sysfs_get_active - get an active reference to sysfs_dirent ++ * @sd: sysfs_dirent to get an active reference to ++ * ++ * Get an active reference of @sd. This function is noop if @sd ++ * is NULL. ++ * ++ * RETURNS: ++ * Pointer to @sd on success, NULL on failure. ++ */ ++/** ++ * sysfs_put_active - put an active reference to sysfs_dirent ++ * @sd: sysfs_dirent to put an active reference to ++ * ++ * Put an active reference to @sd. This function is noop if @sd ++ * is NULL. ++ */ ++void sysfs_put_active(struct sysfs_dirent *sd) ++{ ++ struct completion *cmpl; ++ int v; ++ ++ if (unlikely(!sd)) ++ return; ++ ++ v = atomic_dec_return(&sd->s_active); ++ if (likely(v != SD_DEACTIVATED_BIAS)) ++ return; ++ ++ /* atomic_dec_return() is a mb(), we'll always see the updated ++ * sd->s_sibling. ++ */ ++ cmpl = (void *)sd->s_sibling; ++ complete(cmpl); ++} ++ ++/** ++ * sysfs_get_active_two - get active references to sysfs_dirent and parent ++ * @sd: sysfs_dirent of interest ++ * ++ * Get active reference to @sd and its parent. Parent's active ++ * reference is grabbed first. This function is noop if @sd is ++ * NULL. ++ * ++ * RETURNS: ++ * Pointer to @sd on success, NULL on failure. ++ */ ++struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd) ++{ ++ if (sd) { ++ if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent))) ++ return NULL; ++ if (unlikely(!sysfs_get_active(sd))) { ++ sysfs_put_active(sd->s_parent); ++ return NULL; ++ } ++ } ++ return sd; ++} ++ ++/** ++ * sysfs_put_active_two - put active references to sysfs_dirent and parent ++ * @sd: sysfs_dirent of interest ++ * ++ * Put active references to @sd and its parent. This function is ++ * noop if @sd is NULL. ++ */ ++void sysfs_put_active_two(struct sysfs_dirent *sd) ++{ ++ if (sd) { ++ sysfs_put_active(sd); ++ sysfs_put_active(sd->s_parent); ++ } ++} ++ ++/** ++ * sysfs_deactivate - deactivate sysfs_dirent ++ * @sd: sysfs_dirent to deactivate ++ * ++ * Deny new active references and drain existing ones. ++ */ ++static void sysfs_deactivate(struct sysfs_dirent *sd) ++{ ++ DECLARE_COMPLETION_ONSTACK(wait); ++ int v; ++ ++ BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED)); ++ sd->s_sibling = (void *)&wait; ++ ++ /* atomic_add_return() is a mb(), put_active() will always see ++ * the updated sd->s_sibling. ++ */ ++ v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); ++ ++ if (v != SD_DEACTIVATED_BIAS) ++ wait_for_completion(&wait); ++ ++ sd->s_sibling = NULL; ++} ++ ++/** ++ * sysfs_get_active - get an active reference to sysfs_dirent ++ * @sd: sysfs_dirent to get an active reference to ++ * ++ * Get an active reference of @sd. This function is noop if @sd ++ * is NULL. ++ * ++ * RETURNS: ++ * Pointer to @sd on success, NULL on failure. ++ */ ++struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) ++{ ++ if (unlikely(!sd)) ++ return NULL; ++ ++ while (1) { ++ int v, t; ++ ++ v = atomic_read(&sd->s_active); ++ if (unlikely(v < 0)) ++ return NULL; ++ ++ t = atomic_cmpxchg(&sd->s_active, v, v + 1); ++ if (likely(t == v)) ++ return sd; ++ if (t < 0) ++ return NULL; ++ ++ cpu_relax(); ++ } ++} ++/** ++ * sysfs_put_active - put an active reference to sysfs_dirent ++ * @sd: sysfs_dirent to put an active reference to ++ * ++ * Put an active reference to @sd. This function is noop if @sd ++ * is NULL. ++ */ ++ ++/** ++ * sysfs_get_active_two - get active references to sysfs_dirent and parent ++ * @sd: sysfs_dirent of interest ++ * ++ * Get active reference to @sd and its parent. Parent's active ++ * reference is grabbed first. This function is noop if @sd is ++ * NULL. ++ * ++ * RETURNS: ++ * Pointer to @sd on success, NULL on failure. ++ */ ++ ++/** ++ * sysfs_put_active_two - put active references to sysfs_dirent and parent ++ * @sd: sysfs_dirent of interest ++ * ++ * Put active references to @sd and its parent. This function is ++ * noop if @sd is NULL. ++ */ ++ ++/** ++ * sysfs_deactivate - deactivate sysfs_dirent ++ * @sd: sysfs_dirent to deactivate ++ * ++ * Deny new active references and drain existing ones. s_active ++ * will be unlocked when the sysfs_dirent is released. ++ */ ++ ++static int sysfs_alloc_ino(ino_t *pino) ++{ ++ int ino, rc; ++ ++ retry: ++ spin_lock(&sysfs_ino_lock); ++ rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino); ++ spin_unlock(&sysfs_ino_lock); ++ ++ if (rc == -EAGAIN) { ++ if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL)) ++ goto retry; ++ rc = -ENOMEM; ++ } ++ ++ *pino = ino; ++ return rc; ++} ++ ++static void sysfs_free_ino(ino_t ino) ++{ ++ spin_lock(&sysfs_ino_lock); ++ ida_remove(&sysfs_ino_ida, ino); ++ spin_unlock(&sysfs_ino_lock); ++} ++ ++void release_sysfs_dirent(struct sysfs_dirent * sd) ++{ ++ struct sysfs_dirent *parent_sd; ++ ++ repeat: ++ /* Moving/renaming is always done while holding reference. ++ * sd->s_parent won't change beneath us. ++ */ ++ parent_sd = sd->s_parent; ++ ++ if (sysfs_type(sd) == SYSFS_KOBJ_LINK) ++ sysfs_put(sd->s_elem.symlink.target_sd); ++ if (sysfs_type(sd) & SYSFS_COPY_NAME) ++ kfree(sd->s_name); ++ kfree(sd->s_iattr); ++ if (sysfs_type(sd) != SYSFS_SHADOW_DIR) ++ sysfs_free_ino(sd->s_ino); ++ kmem_cache_free(sysfs_dir_cachep, sd); ++ ++ sd = parent_sd; ++ if (sd && atomic_dec_and_test(&sd->s_count)) ++ goto repeat; ++} ++ ++static void sysfs_d_iput(struct dentry * dentry, struct inode * inode) ++{ ++ struct sysfs_dirent * sd = dentry->d_fsdata; ++ ++ if (sd) { ++ /* sd->s_dentry is protected with sysfs_assoc_lock. ++ * This allows sysfs_drop_dentry() to dereference it. ++ */ ++ spin_lock(&sysfs_assoc_lock); ++ ++ /* The dentry might have been deleted or another ++ * lookup could have happened updating sd->s_dentry to ++ * point the new dentry. Ignore if it isn't pointing ++ * to this dentry. ++ */ ++ if (sd->s_dentry == dentry) ++ sd->s_dentry = NULL; ++ spin_unlock(&sysfs_assoc_lock); ++ sysfs_put(sd); ++ } ++ iput(inode); ++} ++ ++static struct dentry_operations sysfs_dentry_ops = { ++ .d_iput = sysfs_d_iput, ++}; ++ ++struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) ++{ ++ char *dup_name = NULL; ++ struct sysfs_dirent *sd = NULL; ++ ++ if (type & SYSFS_COPY_NAME) { ++ name = dup_name = kstrdup(name, GFP_KERNEL); ++ if (!name) ++ goto err_out; ++ } ++ ++ sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); ++ if (!sd) ++ goto err_out; ++ ++ if (sysfs_alloc_ino(&sd->s_ino)) ++ goto err_out; ++ ++ atomic_set(&sd->s_count, 1); ++ atomic_set(&sd->s_active, 0); ++ atomic_set(&sd->s_event, 1); ++ ++ sd->s_name = name; ++ sd->s_mode = mode; ++ sd->s_flags = type; ++ ++ return sd; ++ ++ err_out: ++ kfree(dup_name); ++ kmem_cache_free(sysfs_dir_cachep, sd); ++ return NULL; ++} ++ ++/** ++ * sysfs_attach_dentry - associate sysfs_dirent with dentry ++ * @sd: target sysfs_dirent ++ * @dentry: dentry to associate ++ * ++ * Associate @sd with @dentry. This is protected by ++ * sysfs_assoc_lock to avoid race with sysfs_d_iput(). ++ * ++ * LOCKING: ++ * mutex_lock(sysfs_mutex) ++ */ ++static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry) ++{ ++ dentry->d_op = &sysfs_dentry_ops; ++ dentry->d_fsdata = sysfs_get(sd); ++ ++ /* protect sd->s_dentry against sysfs_d_iput */ ++ spin_lock(&sysfs_assoc_lock); ++ sd->s_dentry = dentry; ++ spin_unlock(&sysfs_assoc_lock); ++ ++ if (dentry->d_flags & DCACHE_UNHASHED) ++ d_rehash(dentry); ++} ++ ++static int sysfs_ilookup_test(struct inode *inode, void *arg) ++{ ++ struct sysfs_dirent *sd = arg; ++ return inode->i_ino == sd->s_ino; ++} ++ ++/** ++ * sysfs_addrm_start - prepare for sysfs_dirent add/remove ++ * @acxt: pointer to sysfs_addrm_cxt to be used ++ * @parent_sd: parent sysfs_dirent ++ * ++ * This function is called when the caller is about to add or ++ * remove sysfs_dirent under @parent_sd. This function acquires ++ * sysfs_mutex, grabs inode for @parent_sd if available and lock ++ * i_mutex of it. @acxt is used to keep and pass context to ++ * other addrm functions. ++ * ++ * LOCKING: ++ * Kernel thread context (may sleep). sysfs_mutex is locked on ++ * return. i_mutex of parent inode is locked on return if ++ * available. ++ */ ++void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, ++ struct sysfs_dirent *parent_sd) ++{ ++ struct inode *inode; ++ ++ memset(acxt, 0, sizeof(*acxt)); ++ acxt->parent_sd = parent_sd; ++ ++ /* Lookup parent inode. inode initialization and I_NEW ++ * clearing are protected by sysfs_mutex. By grabbing it and ++ * looking up with _nowait variant, inode state can be ++ * determined reliably. ++ */ ++ mutex_lock(&sysfs_mutex); ++ ++ inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, ++ parent_sd); ++ ++ if (inode && !(inode->i_state & I_NEW)) { ++ /* parent inode available */ ++ acxt->parent_inode = inode; ++ ++ /* sysfs_mutex is below i_mutex in lock hierarchy. ++ * First, trylock i_mutex. If fails, unlock ++ * sysfs_mutex and lock them in order. ++ */ ++ if (!mutex_trylock(&inode->i_mutex)) { ++ mutex_unlock(&sysfs_mutex); ++ mutex_lock(&inode->i_mutex); ++ mutex_lock(&sysfs_mutex); ++ } ++ } else ++ iput(inode); ++} ++ ++/** ++ * sysfs_add_one - add sysfs_dirent to parent ++ * @acxt: addrm context to use ++ * @sd: sysfs_dirent to be added ++ * ++ * Get @acxt->parent_sd and set sd->s_parent to it and increment ++ * nlink of parent inode if @sd is a directory. @sd is NOT ++ * linked into the children list of the parent. The caller ++ * should invoke sysfs_link_sibling() after this function ++ * completes if @sd needs to be on the children list. ++ * ++ * This function should be called between calls to ++ * sysfs_addrm_start() and sysfs_addrm_finish() and should be ++ * passed the same @acxt as passed to sysfs_addrm_start(). ++ * ++ * LOCKING: ++ * Determined by sysfs_addrm_start(). ++ */ ++void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) ++{ ++ sd->s_parent = sysfs_get(acxt->parent_sd); ++ ++ if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode) ++ inc_nlink(acxt->parent_inode); ++ ++ acxt->cnt++; ++} ++ ++/** ++ * sysfs_remove_one - remove sysfs_dirent from parent ++ * @acxt: addrm context to use ++ * @sd: sysfs_dirent to be added ++ * ++ * Mark @sd removed and drop nlink of parent inode if @sd is a ++ * directory. @sd is NOT unlinked from the children list of the ++ * parent. The caller is repsonsible for removing @sd from the ++ * children list before calling this function. ++ * ++ * This function should be called between calls to ++ * sysfs_addrm_start() and sysfs_addrm_finish() and should be ++ * passed the same @acxt as passed to sysfs_addrm_start(). ++ * ++ * LOCKING: ++ * Determined by sysfs_addrm_start(). ++ */ ++void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) ++{ ++ BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED)); ++ ++ sd->s_flags |= SYSFS_FLAG_REMOVED; ++ sd->s_sibling = acxt->removed; ++ acxt->removed = sd; ++ ++ if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode) ++ drop_nlink(acxt->parent_inode); ++ ++ acxt->cnt++; ++} ++ ++/** ++ * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent ++ * @sd: target sysfs_dirent ++ * ++ * Drop dentry for @sd. @sd must have been unlinked from its ++ * parent on entry to this function such that it can't be looked ++ * up anymore. ++ * ++ * @sd->s_dentry which is protected with sysfs_assoc_lock points ++ * to the currently associated dentry but we're not holding a ++ * reference to it and racing with dput(). Grab dcache_lock and ++ * verify dentry before dropping it. If @sd->s_dentry is NULL or ++ * dput() beats us, no need to bother. ++ */ ++static void sysfs_drop_dentry(struct sysfs_dirent *sd) ++{ ++ struct dentry *dentry = NULL; ++ struct inode *inode; ++ ++ /* We're not holding a reference to ->s_dentry dentry but the ++ * field will stay valid as long as sysfs_assoc_lock is held. ++ */ ++ spin_lock(&sysfs_assoc_lock); ++ spin_lock(&dcache_lock); ++ ++ /* drop dentry if it's there and dput() didn't kill it yet */ ++ if (sd->s_dentry && sd->s_dentry->d_inode) { ++ dentry = dget_locked(sd->s_dentry); ++ spin_lock(&dentry->d_lock); ++ __d_drop(dentry); ++ spin_unlock(&dentry->d_lock); ++ } ++ ++ spin_unlock(&dcache_lock); ++ spin_unlock(&sysfs_assoc_lock); ++ ++ /* dentries for shadowed directories are pinned, unpin */ ++ if ((sysfs_type(sd) == SYSFS_SHADOW_DIR) || ++ (sd->s_flags & SYSFS_FLAG_SHADOWED)) ++ dput(dentry); ++ dput(dentry); ++ ++ /* adjust nlink and update timestamp */ ++ inode = ilookup(sysfs_sb, sd->s_ino); ++ if (inode) { ++ mutex_lock(&inode->i_mutex); ++ ++ inode->i_ctime = CURRENT_TIME; ++ drop_nlink(inode); ++ if (sysfs_type(sd) == SYSFS_DIR) ++ drop_nlink(inode); ++ ++ mutex_unlock(&inode->i_mutex); ++ iput(inode); ++ } ++} ++ ++/** ++ * sysfs_drop_dentry - drop dentry for the specified sysfs_dirent ++ * @sd: target sysfs_dirent ++ * ++ * Drop dentry for @sd. @sd must have been unlinked from its ++ * parent on entry to this function such that it can't be looked ++ * up anymore. ++ * ++ * @sd->s_dentry which is protected with sysfs_assoc_lock points ++ * to the currently associated dentry but we're not holding a ++ * reference to it and racing with dput(). Grab dcache_lock and ++ * verify dentry before dropping it. If @sd->s_dentry is NULL or ++ * dput() beats us, no need to bother. ++ */ ++ ++ ++/** ++ * sysfs_addrm_finish - finish up sysfs_dirent add/remove ++ * @acxt: addrm context to finish up ++ * ++ * Finish up sysfs_dirent add/remove. Resources acquired by ++ * sysfs_addrm_start() are released and removed sysfs_dirents are ++ * cleaned up. Timestamps on the parent inode are updated. ++ * ++ * LOCKING: ++ * All mutexes acquired by sysfs_addrm_start() are released. ++ * ++ * RETURNS: ++ * Number of added/removed sysfs_dirents since sysfs_addrm_start(). ++ */ ++int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) ++{ ++ /* release resources acquired by sysfs_addrm_start() */ ++ mutex_unlock(&sysfs_mutex); ++ if (acxt->parent_inode) { ++ struct inode *inode = acxt->parent_inode; ++ ++ /* if added/removed, update timestamps on the parent */ ++ if (acxt->cnt) ++ inode->i_ctime = inode->i_mtime = CURRENT_TIME; ++ ++ mutex_unlock(&inode->i_mutex); ++ iput(inode); ++ } ++ ++ /* kill removed sysfs_dirents */ ++ while (acxt->removed) { ++ struct sysfs_dirent *sd = acxt->removed; ++ ++ acxt->removed = sd->s_sibling; ++ sd->s_sibling = NULL; ++ ++ sysfs_prune_shadow_sd(sd->s_parent); ++ sysfs_drop_dentry(sd); ++ sysfs_deactivate(sd); ++ sysfs_put(sd); ++ } ++ ++ return acxt->cnt; ++} ++ ++/** ++ * sysfs_find_dirent - find sysfs_dirent with the given name ++ * @parent_sd: sysfs_dirent to search under ++ * @name: name to look for ++ * ++ * Look for sysfs_dirent with name @name under @parent_sd. ++ * ++ * LOCKING: ++ * mutex_lock(sysfs_mutex) ++ * ++ * RETURNS: ++ * Pointer to sysfs_dirent if found, NULL if not. ++ */ ++struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, ++ const unsigned char *name) ++{ ++ struct sysfs_dirent *sd; ++ ++ for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) ++ if (sysfs_type(sd) && !strcmp(sd->s_name, name)) ++ return sd; ++ return NULL; ++} ++ ++/** ++ * sysfs_get_dirent - find and get sysfs_dirent with the given name ++ * @parent_sd: sysfs_dirent to search under ++ * @name: name to look for ++ * ++ * Look for sysfs_dirent with name @name under @parent_sd and get ++ * it if found. ++ * ++ * LOCKING: ++ * Kernel thread context (may sleep). Grabs sysfs_mutex. ++ * ++ * RETURNS: ++ * Pointer to sysfs_dirent if found, NULL if not. ++ */ ++struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, ++ const unsigned char *name) ++{ ++ struct sysfs_dirent *sd; ++ ++ mutex_lock(&sysfs_mutex); ++ sd = sysfs_find_dirent(parent_sd, name); ++ sysfs_get(sd); ++ mutex_unlock(&sysfs_mutex); ++ ++ return sd; ++} ++ ++static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, ++ const char *name, struct sysfs_dirent **p_sd) ++{ ++ umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; ++ struct sysfs_addrm_cxt acxt; ++ struct sysfs_dirent *sd; ++ int err; ++ ++ /* allocate */ ++ sd = sysfs_new_dirent(name, mode, SYSFS_DIR); ++ if (!sd) ++ return -ENOMEM; ++ sd->s_elem.dir.kobj = kobj; ++ ++ /* link in */ ++ sysfs_addrm_start(&acxt, parent_sd); ++ err = -ENOENT; ++ if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd)) ++ goto addrm_finish; ++ ++ err = -EEXIST; ++ if (!sysfs_find_dirent(acxt.parent_sd, name)) { ++ sysfs_add_one(&acxt, sd); ++ sysfs_link_sibling(sd); ++ err = 0; ++ } ++addrm_finish: ++ if (sysfs_addrm_finish(&acxt)) { ++ *p_sd = sd; ++ return 0; ++ } ++ ++ sysfs_put(sd); ++ return err; ++} ++ ++int sysfs_create_subdir(struct kobject *kobj, const char *name, ++ struct sysfs_dirent **p_sd) ++{ ++ return create_dir(kobj, kobj->sd, name, p_sd); ++} ++ ++/** ++ * sysfs_create_dir - create a directory for an object. ++ * @kobj: object we're creating directory for. ++ */ ++int sysfs_create_dir(struct kobject * kobj) ++{ ++ struct sysfs_dirent *parent_sd, *sd; ++ int error = 0; ++ ++ BUG_ON(!kobj); ++ ++ if (kobj->parent) ++ parent_sd = kobj->parent->sd; ++ else if (sysfs_mount && sysfs_mount->mnt_sb) ++ parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata; ++ else ++ return -EFAULT; ++ ++ error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd); ++ if (!error) ++ kobj->sd = sd; ++ return error; ++} ++ ++static int sysfs_count_nlink(struct sysfs_dirent *sd) ++{ ++ struct sysfs_dirent *child; ++ int nr = 0; ++ ++ for (child = sd->s_children; child; child = child->s_sibling) ++ if (sysfs_type(child) == SYSFS_DIR) ++ nr++; ++ return nr + 2; ++} ++ ++static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata; ++ struct sysfs_dirent * sd; ++ struct bin_attribute *bin_attr; ++ struct inode *inode; ++ int found = 0; ++ ++ for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) { ++ if (sysfs_type(sd) && ++ !strcmp(sd->s_name, dentry->d_name.name)) { ++ found = 1; ++ break; ++ } ++ } ++ ++ /* no such entry */ ++ if (!found) ++ return NULL; ++ ++ /* attach dentry and inode */ ++ inode = sysfs_get_inode(sd); ++ if (!inode) ++ return ERR_PTR(-ENOMEM); ++ ++ mutex_lock(&sysfs_mutex); ++ ++ if (inode->i_state & I_NEW) { ++ /* initialize inode according to type */ ++ switch (sysfs_type(sd)) { ++ case SYSFS_DIR: ++ inode->i_op = &sysfs_dir_inode_operations; ++ inode->i_fop = &sysfs_dir_operations; ++ inode->i_nlink = sysfs_count_nlink(sd); ++ break; ++ case SYSFS_KOBJ_ATTR: ++ inode->i_size = PAGE_SIZE; ++ inode->i_fop = &sysfs_file_operations; ++ break; ++ case SYSFS_KOBJ_BIN_ATTR: ++ bin_attr = sd->s_elem.bin_attr.bin_attr; ++ inode->i_size = bin_attr->size; ++ inode->i_fop = &bin_fops; ++ break; ++ case SYSFS_KOBJ_LINK: ++ inode->i_op = &sysfs_symlink_inode_operations; ++ break; ++ default: ++ BUG(); ++ } ++ } ++ ++ sysfs_instantiate(dentry, inode); ++ sysfs_attach_dentry(sd, dentry); ++ ++ mutex_unlock(&sysfs_mutex); ++ ++ return NULL; ++} ++ ++static void *sysfs_shadow_follow_link(struct dentry *dentry, struct nameidata *nd) ++{ ++ struct sysfs_dirent *sd; ++ struct dentry *dest; ++ ++ sd = dentry->d_fsdata; ++ dest = NULL; ++ if (sd->s_flags & SYSFS_FLAG_SHADOWED) { ++ const struct shadow_dir_operations *shadow_ops; ++ const void *tag; ++ ++ mutex_lock(&sysfs_mutex); ++ ++ shadow_ops = dentry->d_inode->i_private; ++ tag = shadow_ops->current_tag(); ++ ++ sd = find_shadow_sd(sd, tag); ++ if (sd) ++ dest = sd->s_dentry; ++ dget(dest); ++ ++ mutex_unlock(&sysfs_mutex); ++ } ++ if (!dest) ++ dest = dget(dentry); ++ dput(nd->dentry); ++ nd->dentry = dest; ++ ++ return NULL; ++} ++ ++ ++const struct inode_operations sysfs_dir_inode_operations = { ++ .lookup = sysfs_lookup, ++ .setattr = sysfs_setattr, ++ .follow_link = sysfs_shadow_follow_link, ++}; ++ ++static void __remove_dir(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) ++{ ++ sysfs_unlink_sibling(sd); ++ sysfs_remove_one(acxt, sd); ++} ++ ++static void remove_dir(struct sysfs_dirent *sd) ++{ ++ struct sysfs_addrm_cxt acxt; ++ ++ sysfs_addrm_start(&acxt, sd->s_parent); ++ __remove_dir(&acxt, sd); ++ sysfs_addrm_finish(&acxt); ++} ++ ++void sysfs_remove_subdir(struct sysfs_dirent *sd) ++{ ++ remove_dir(sd); ++} ++ ++static void sysfs_empty_dir(struct sysfs_addrm_cxt *acxt, ++ struct sysfs_dirent *dir_sd) ++{ ++ struct sysfs_dirent **pos; ++ ++ pos = &dir_sd->s_children; ++ while (*pos) { ++ struct sysfs_dirent *sd = *pos; ++ ++ if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) { ++ *pos = sd->s_sibling; ++ sd->s_sibling = NULL; ++ sysfs_remove_one(acxt, sd); ++ } else ++ pos = &(*pos)->s_sibling; ++ } ++} ++ ++static void sysfs_remove_shadows(struct sysfs_addrm_cxt * acxt, ++ struct sysfs_dirent *dir_sd) ++{ ++ struct sysfs_dirent **pos; ++ ++ pos = &dir_sd->s_children; ++ while (*pos) { ++ struct sysfs_dirent *sd = *pos; ++ ++ sysfs_empty_dir(acxt, sd); ++ __remove_dir(acxt, sd); ++ } ++} ++ ++static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) ++{ ++ struct sysfs_addrm_cxt acxt; ++ ++ if (!dir_sd) ++ return; ++ ++ pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); ++ sysfs_addrm_start(&acxt, dir_sd); ++ if (sysfs_type(dir_sd) == SYSFS_DIR) ++ sysfs_empty_dir(&acxt, dir_sd); ++ else ++ sysfs_remove_shadows(&acxt, dir_sd); ++ sysfs_addrm_finish(&acxt); ++ ++ remove_dir(dir_sd); ++} ++ ++/** ++ * sysfs_remove_dir - remove an object's directory. ++ * @kobj: object. ++ * ++ * The only thing special about this is that we remove any files in ++ * the directory before we remove the directory, and we've inlined ++ * what used to be sysfs_rmdir() below, instead of calling separately. ++ */ ++ ++void sysfs_remove_dir(struct kobject * kobj) ++{ ++ struct sysfs_dirent *sd = kobj->sd; ++ ++ spin_lock(&sysfs_assoc_lock); ++ kobj->sd = NULL; ++ spin_unlock(&sysfs_assoc_lock); ++ ++ __sysfs_remove_dir(sd); ++} ++ ++int sysfs_rename_dir(struct kobject * kobj, const char *new_name) ++{ ++ struct dentry *old_dentry, *new_dentry, *parent; ++ struct sysfs_addrm_cxt acxt; ++ struct sysfs_dirent *sd; ++ const char *dup_name; ++ int error; ++ ++ dup_name = NULL; ++ new_dentry = NULL; ++ ++ sd = kobj->sd; ++ sysfs_addrm_start(&acxt, sd->s_parent); ++ error = -ENOENT; ++ if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd)) ++ goto addrm_finish; ++ ++ error = -EEXIST; ++ if (sysfs_find_dirent(acxt.parent_sd, new_name)) ++ goto addrm_finish; ++ ++ error = -EINVAL; ++ if ((sd->s_parent == acxt.parent_sd) && ++ (strcmp(new_name, sd->s_name) == 0)) ++ goto addrm_finish; ++ ++ old_dentry = sd->s_dentry; ++ parent = acxt.parent_sd->s_dentry; ++ if (old_dentry) { ++ old_dentry = sd->s_dentry; ++ parent = acxt.parent_sd->s_dentry; ++ new_dentry = lookup_one_len(new_name, parent, strlen(new_name)); ++ if (IS_ERR(new_dentry)) { ++ error = PTR_ERR(new_dentry); ++ goto addrm_finish; ++ } ++ ++ error = -EINVAL; ++ if (old_dentry == new_dentry) ++ goto addrm_finish; ++ } ++ ++ /* rename kobject and sysfs_dirent */ ++ error = -ENOMEM; ++ new_name = dup_name = kstrdup(new_name, GFP_KERNEL); ++ if (!new_name) ++ goto addrm_finish; ++ ++ error = kobject_set_name(kobj, "%s", new_name); ++ if (error) ++ goto addrm_finish; ++ ++ dup_name = sd->s_name; ++ sd->s_name = new_name; ++ ++ /* move under the new parent */ ++ sysfs_unlink_sibling(sd); ++ sysfs_get(acxt.parent_sd); ++ sysfs_put(sd->s_parent); ++ sd->s_parent = acxt.parent_sd; ++ sysfs_link_sibling(sd); ++ ++ if (new_dentry) { ++ d_add(new_dentry, NULL); ++ d_move(old_dentry, new_dentry); ++ } ++ error = 0; ++addrm_finish: ++ sysfs_addrm_finish(&acxt); ++ ++ kfree(dup_name); ++ dput(new_dentry); ++ return error; ++} ++ ++int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) ++{ ++ struct sysfs_dirent *sd = kobj->sd; ++ struct sysfs_dirent *new_parent_sd; ++ struct dentry *old_parent, *new_parent = NULL; ++ struct dentry *old_dentry = NULL, *new_dentry = NULL; ++ int error; ++ ++ BUG_ON(!sd->s_parent); ++ new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root; ++ ++ /* get dentries */ ++ old_dentry = sysfs_get_dentry(sd); ++ if (IS_ERR(old_dentry)) { ++ error = PTR_ERR(old_dentry); ++ goto out_dput; ++ } ++ old_parent = sd->s_parent->s_dentry; ++ ++ new_parent = sysfs_get_dentry(new_parent_sd); ++ if (IS_ERR(new_parent)) { ++ error = PTR_ERR(new_parent); ++ goto out_dput; ++ } ++ ++ if (old_parent->d_inode == new_parent->d_inode) { ++ error = 0; ++ goto out_dput; /* nothing to move */ ++ } ++again: ++ mutex_lock(&old_parent->d_inode->i_mutex); ++ if (!mutex_trylock(&new_parent->d_inode->i_mutex)) { ++ mutex_unlock(&old_parent->d_inode->i_mutex); ++ goto again; ++ } ++ ++ new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name)); ++ if (IS_ERR(new_dentry)) { ++ error = PTR_ERR(new_dentry); ++ goto out_unlock; ++ } else ++ error = 0; ++ d_add(new_dentry, NULL); ++ d_move(sd->s_dentry, new_dentry); ++ dput(new_dentry); ++ ++ /* Remove from old parent's list and insert into new parent's list. */ ++ mutex_lock(&sysfs_mutex); ++ ++ sysfs_unlink_sibling(sd); ++ sysfs_get(new_parent_sd); ++ sysfs_put(sd->s_parent); ++ sd->s_parent = new_parent_sd; ++ sysfs_link_sibling(sd); ++ ++ mutex_unlock(&sysfs_mutex); ++ ++ out_unlock: ++ mutex_unlock(&new_parent->d_inode->i_mutex); ++ mutex_unlock(&old_parent->d_inode->i_mutex); ++ out_dput: ++ dput(new_parent); ++ dput(old_dentry); ++ dput(new_dentry); ++ return error; ++} ++ ++static int sysfs_dir_open(struct inode *inode, struct file *file) ++{ ++ struct dentry * dentry = file->f_path.dentry; ++ struct sysfs_dirent * parent_sd = dentry->d_fsdata; ++ struct sysfs_dirent * sd; ++ ++ sd = sysfs_new_dirent("_DIR_", 0, 0); ++ if (sd) { ++ mutex_lock(&sysfs_mutex); ++ sd->s_parent = sysfs_get(parent_sd); ++ sysfs_link_sibling(sd); ++ mutex_unlock(&sysfs_mutex); ++ } ++ ++ file->private_data = sd; ++ return sd ? 0 : -ENOMEM; ++} ++ ++static int sysfs_dir_close(struct inode *inode, struct file *file) ++{ ++ struct sysfs_dirent * cursor = file->private_data; ++ ++ mutex_lock(&sysfs_mutex); ++ sysfs_unlink_sibling(cursor); ++ mutex_unlock(&sysfs_mutex); ++ ++ release_sysfs_dirent(cursor); ++ ++ return 0; ++} ++ ++/* Relationship between s_mode and the DT_xxx types */ ++static inline unsigned char dt_type(struct sysfs_dirent *sd) ++{ ++ return (sd->s_mode >> 12) & 15; ++} ++ ++static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) ++{ ++ struct dentry *dentry = filp->f_path.dentry; ++ struct sysfs_dirent * parent_sd = dentry->d_fsdata; ++ struct sysfs_dirent *cursor = filp->private_data; ++ struct sysfs_dirent **pos; ++ ino_t ino; ++ int i = filp->f_pos; ++ ++ switch (i) { ++ case 0: ++ ino = parent_sd->s_ino; ++ if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) ++ break; ++ filp->f_pos++; ++ i++; ++ /* fallthrough */ ++ case 1: ++ if (parent_sd->s_parent) ++ ino = parent_sd->s_parent->s_ino; ++ else ++ ino = parent_sd->s_ino; ++ if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) ++ break; ++ filp->f_pos++; ++ i++; ++ /* fallthrough */ ++ default: ++ /* If I am the shadow master return nothing. */ ++ if (parent_sd->s_flags & SYSFS_FLAG_SHADOWED) ++ return 0; ++ ++ mutex_lock(&sysfs_mutex); ++ pos = &parent_sd->s_children; ++ while (*pos != cursor) ++ pos = &(*pos)->s_sibling; ++ ++ /* unlink cursor */ ++ *pos = cursor->s_sibling; ++ ++ if (filp->f_pos == 2) ++ pos = &parent_sd->s_children; ++ ++ for ( ; *pos; pos = &(*pos)->s_sibling) { ++ struct sysfs_dirent *next = *pos; ++ const char * name; ++ int len; ++ ++ if (!sysfs_type(next)) ++ continue; ++ ++ name = next->s_name; ++ len = strlen(name); ++ ino = next->s_ino; ++ ++ if (filldir(dirent, name, len, filp->f_pos, ino, ++ dt_type(next)) < 0) ++ break; ++ ++ filp->f_pos++; ++ } ++ ++ /* put cursor back in */ ++ cursor->s_sibling = *pos; ++ *pos = cursor; ++ ++ mutex_unlock(&sysfs_mutex); ++ } ++ return 0; ++} ++ ++static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin) ++{ ++ struct dentry * dentry = file->f_path.dentry; ++ ++ switch (origin) { ++ case 1: ++ offset += file->f_pos; ++ case 0: ++ if (offset >= 0) ++ break; ++ default: ++ return -EINVAL; ++ } ++ if (offset != file->f_pos) { ++ mutex_lock(&sysfs_mutex); ++ ++ file->f_pos = offset; ++ if (file->f_pos >= 2) { ++ struct sysfs_dirent *sd = dentry->d_fsdata; ++ struct sysfs_dirent *cursor = file->private_data; ++ struct sysfs_dirent **pos; ++ loff_t n = file->f_pos - 2; ++ ++ sysfs_unlink_sibling(cursor); ++ ++ pos = &sd->s_children; ++ while (n && *pos) { ++ struct sysfs_dirent *next = *pos; ++ if (sysfs_type(next)) ++ n--; ++ pos = &(*pos)->s_sibling; ++ } ++ ++ cursor->s_sibling = *pos; ++ *pos = cursor; ++ } ++ ++ mutex_unlock(&sysfs_mutex); ++ } ++ ++ return offset; ++} ++ ++const struct file_operations sysfs_dir_operations = { ++ .open = sysfs_dir_open, ++ .release = sysfs_dir_close, ++ .llseek = sysfs_dir_lseek, ++ .read = generic_read_dir, ++ .readdir = sysfs_readdir, ++}; ++ ++ ++static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd) ++{ ++ struct sysfs_addrm_cxt acxt; ++ ++ /* If a shadow directory goes empty remove it. */ ++ if (sysfs_type(sd) != SYSFS_SHADOW_DIR) ++ return; ++ ++ if (sd->s_children) ++ return; ++ ++ sysfs_addrm_start(&acxt, sd->s_parent); ++ ++ if (sd->s_flags & SYSFS_FLAG_REMOVED) ++ goto addrm_finish; ++ ++ if (sd->s_children) ++ goto addrm_finish; ++ ++ __remove_dir(&acxt, sd); ++addrm_finish: ++ sysfs_addrm_finish(&acxt); ++} ++ ++static struct sysfs_dirent *add_shadow_sd(struct sysfs_dirent *parent_sd, const void *tag) ++{ ++ struct sysfs_dirent *sd = NULL; ++ struct dentry *dir, *shadow; ++ struct inode *inode; ++ ++ dir = parent_sd->s_dentry; ++ inode = dir->d_inode; ++ ++ shadow = d_alloc(dir->d_parent, &dir->d_name); ++ if (!shadow) ++ goto out; ++ ++ /* Since the shadow directory is reachable make it look ++ * like it is actually hashed. ++ */ ++ shadow->d_hash.pprev = &shadow->d_hash.next; ++ shadow->d_hash.next = NULL; ++ shadow->d_flags &= ~DCACHE_UNHASHED; ++ ++ sd = sysfs_new_dirent(tag, parent_sd->s_mode, SYSFS_SHADOW_DIR); ++ if (!sd) ++ goto error; ++ ++ sd->s_elem.dir.kobj = parent_sd->s_elem.dir.kobj; ++ sd->s_parent = sysfs_get(parent_sd); ++ ++ /* Use the inode number of the parent we are shadowing */ ++ sysfs_free_ino(sd->s_ino); ++ sd->s_ino = parent_sd->s_ino; ++ ++ inc_nlink(inode); ++ inc_nlink(dir->d_parent->d_inode); ++ ++ sysfs_link_sibling(sd); ++ __iget(inode); ++ sysfs_instantiate(shadow, inode); ++ sysfs_attach_dentry(sd, shadow); ++out: ++ return sd; ++error: ++ dput(shadow); ++ goto out; ++} ++ ++int sysfs_resolve_for_create(struct kobject *kobj, ++ struct sysfs_dirent **parent_sd) ++{ ++ const struct shadow_dir_operations *shadow_ops; ++ struct sysfs_dirent *sd, *shadow_sd; ++ ++ sd = *parent_sd; ++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR) ++ sd = sd->s_parent; ++ ++ if (sd->s_flags & SYSFS_FLAG_SHADOWED) { ++ const void *tag; ++ ++ shadow_ops = sd->s_dentry->d_inode->i_private; ++ tag = shadow_ops->kobject_tag(kobj); ++ ++ shadow_sd = find_shadow_sd(sd, tag); ++ if (!shadow_sd) ++ shadow_sd = add_shadow_sd(sd, tag); ++ sd = shadow_sd; ++ } ++ if (sd) { ++ *parent_sd = sd; ++ return 1; ++ } ++ return 0; ++} ++ ++int sysfs_resolve_for_remove(struct kobject *kobj, ++ struct sysfs_dirent **parent_sd) ++{ ++ struct sysfs_dirent *sd; ++ /* If dentry is a shadow directory find the shadow that is ++ * stored under the same tag as kobj. This allows removal ++ * of dirents to function properly even if the value of ++ * kobject_tag() has changed since we initially created ++ * the dirents assoctated with kobj. ++ */ ++ ++ sd = *parent_sd; ++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR) ++ sd = sd->s_parent; ++ if (sd->s_flags & SYSFS_FLAG_SHADOWED) { ++ const void *tag; ++ ++ tag = find_shadow_tag(kobj); ++ sd = find_shadow_sd(sd, tag); ++ } ++ if (sd) { ++ *parent_sd = sd; ++ return 1; ++ } ++ return 0; ++} ++ ++/** ++ * sysfs_enable_shadowing - Automatically create shadows of a directory ++ * @kobj: object to automatically shadow ++ * ++ * Once shadowing has been enabled on a directory the contents ++ * of the directory become dependent upon context. ++ * ++ * shadow_ops->current_tag() returns the context for the current ++ * process. ++ * ++ * shadow_ops->kobject_tag() returns the context that a given kobj ++ * resides in. ++ * ++ * Using those methods the sysfs code on shadowed directories ++ * carefully stores the files so that when we lookup files ++ * we get the proper answer for our context. ++ * ++ * If the context of a kobject is changed it is expected that ++ * the kobject will be renamed so the appopriate sysfs data structures ++ * can be updated. ++ */ ++int sysfs_enable_shadowing(struct kobject *kobj, ++ const struct shadow_dir_operations *shadow_ops) ++{ ++ struct sysfs_dirent *sd; ++ struct dentry *dentry; ++ int err; ++ ++ /* Find the dentry for the shadowed directory and ++ * increase it's count. ++ */ ++ err = -ENOENT; ++ sd = kobj->sd; ++ dentry = sysfs_get_dentry(sd); ++ if (!dentry) ++ goto out; ++ ++ mutex_lock(&sysfs_mutex); ++ err = -EINVAL; ++ /* We can only enable shadowing on empty directories ++ * where shadowing is not already enabled. ++ */ ++ if (!sd->s_children && (sysfs_type(sd) == SYSFS_DIR) && ++ !(sd->s_flags & SYSFS_FLAG_REMOVED) && ++ !(sd->s_flags & SYSFS_FLAG_SHADOWED)) { ++ sd->s_flags |= SYSFS_FLAG_SHADOWED; ++ dentry->d_inode->i_private = (void *)shadow_ops; ++ err = 0; ++ } ++ mutex_unlock(&sysfs_mutex); ++out: ++ if (err) ++ dput(dentry); ++ return err; ++} ++ +diff -Nurb linux-2.6.22-570/fs/sysfs/file.c linux-2.6.22-591/fs/sysfs/file.c +--- linux-2.6.22-570/fs/sysfs/file.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/fs/sysfs/file.c 2007-12-21 15:36:14.000000000 -0500 +@@ -50,29 +50,15 @@ + .store = subsys_attr_store, + }; + +-/** +- * add_to_collection - add buffer to a collection +- * @buffer: buffer to be added +- * @node: inode of set to add to +- */ +- +-static inline void +-add_to_collection(struct sysfs_buffer *buffer, struct inode *node) +-{ +- struct sysfs_buffer_collection *set = node->i_private; +- +- mutex_lock(&node->i_mutex); +- list_add(&buffer->associates, &set->associates); +- mutex_unlock(&node->i_mutex); +-} +- +-static inline void +-remove_from_collection(struct sysfs_buffer *buffer, struct inode *node) +-{ +- mutex_lock(&node->i_mutex); +- list_del(&buffer->associates); +- mutex_unlock(&node->i_mutex); +-} ++struct sysfs_buffer { ++ size_t count; ++ loff_t pos; ++ char * page; ++ struct sysfs_ops * ops; ++ struct semaphore sem; ++ int needs_read_fill; ++ int event; ++}; + + /** + * fill_read_buffer - allocate and fill buffer from object. +@@ -87,9 +73,8 @@ + */ + static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer) + { +- struct sysfs_dirent * sd = dentry->d_fsdata; +- struct attribute * attr = to_attr(dentry); +- struct kobject * kobj = to_kobj(dentry->d_parent); ++ struct sysfs_dirent *attr_sd = dentry->d_fsdata; ++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; + struct sysfs_ops * ops = buffer->ops; + int ret = 0; + ssize_t count; +@@ -99,8 +84,15 @@ + if (!buffer->page) + return -ENOMEM; + +- buffer->event = atomic_read(&sd->s_event); +- count = ops->show(kobj,attr,buffer->page); ++ /* need attr_sd for attr and ops, its parent for kobj */ ++ if (!sysfs_get_active_two(attr_sd)) ++ return -ENODEV; ++ ++ buffer->event = atomic_read(&attr_sd->s_event); ++ count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page); ++ ++ sysfs_put_active_two(attr_sd); ++ + BUG_ON(count > (ssize_t)PAGE_SIZE); + if (count >= 0) { + buffer->needs_read_fill = 0; +@@ -138,9 +130,6 @@ + + down(&buffer->sem); + if (buffer->needs_read_fill) { +- if (buffer->orphaned) +- retval = -ENODEV; +- else + retval = fill_read_buffer(file->f_path.dentry,buffer); + if (retval) + goto out; +@@ -199,11 +188,20 @@ + static int + flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count) + { +- struct attribute * attr = to_attr(dentry); +- struct kobject * kobj = to_kobj(dentry->d_parent); ++ struct sysfs_dirent *attr_sd = dentry->d_fsdata; ++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; + struct sysfs_ops * ops = buffer->ops; ++ int rc; ++ ++ /* need attr_sd for attr and ops, its parent for kobj */ ++ if (!sysfs_get_active_two(attr_sd)) ++ return -ENODEV; ++ ++ rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count); ++ ++ sysfs_put_active_two(attr_sd); + +- return ops->store(kobj,attr,buffer->page,count); ++ return rc; + } + + +@@ -231,37 +229,29 @@ + ssize_t len; + + down(&buffer->sem); +- if (buffer->orphaned) { +- len = -ENODEV; +- goto out; +- } + len = fill_write_buffer(buffer, buf, count); + if (len > 0) + len = flush_write_buffer(file->f_path.dentry, buffer, len); + if (len > 0) + *ppos += len; +-out: + up(&buffer->sem); + return len; + } + + static int sysfs_open_file(struct inode *inode, struct file *file) + { +- struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent); +- struct attribute * attr = to_attr(file->f_path.dentry); +- struct sysfs_buffer_collection *set; ++ struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; ++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; + struct sysfs_buffer * buffer; ++ + struct sysfs_ops * ops = NULL; +- int error = 0; ++ int error; + +- if (!kobj || !attr) +- goto Einval; + +- /* Grab the module reference for this attribute if we have one */ +- if (!try_module_get(attr->owner)) { +- error = -ENODEV; +- goto Done; +- } ++ /* need attr_sr for attr and ops, its parent for kobj */ ++ ++ if (!sysfs_get_active_two(attr_sd)) ++ return -ENODEV; + + /* if the kobject has no ktype, then we assume that it is a subsystem + * itself, and use ops for it. +@@ -277,20 +267,7 @@ + * or the subsystem have no operations. + */ + if (!ops) +- goto Eaccess; +- +- /* make sure we have a collection to add our buffers to */ +- mutex_lock(&inode->i_mutex); +- if (!(set = inode->i_private)) { +- if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) { +- mutex_unlock(&inode->i_mutex); +- error = -ENOMEM; +- goto Done; +- } else { +- INIT_LIST_HEAD(&set->associates); +- } +- } +- mutex_unlock(&inode->i_mutex); ++ goto err_out; + + /* File needs write support. + * The inode's perms must say it's ok, +@@ -299,7 +276,7 @@ + if (file->f_mode & FMODE_WRITE) { + + if (!(inode->i_mode & S_IWUGO) || !ops->store) +- goto Eaccess; ++ goto err_out; + + } + +@@ -309,48 +286,38 @@ + */ + if (file->f_mode & FMODE_READ) { + if (!(inode->i_mode & S_IRUGO) || !ops->show) +- goto Eaccess; ++ goto err_out; + } + + /* No error? Great, allocate a buffer for the file, and store it + * it in file->private_data for easy access. + */ ++ error = -ENOMEM; + buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); +- if (buffer) { +- INIT_LIST_HEAD(&buffer->associates); ++ if (!buffer) ++ goto err_out; ++ + init_MUTEX(&buffer->sem); + buffer->needs_read_fill = 1; + buffer->ops = ops; +- add_to_collection(buffer, inode); + file->private_data = buffer; +- } else +- error = -ENOMEM; +- goto Done; + +- Einval: +- error = -EINVAL; +- goto Done; +- Eaccess: +- error = -EACCES; +- module_put(attr->owner); +- Done: +- if (error) +- kobject_put(kobj); ++ /* open succeeded, put active references and pin attr_sd */ ++ sysfs_put_active_two(attr_sd); ++ sysfs_get(attr_sd); ++ return 0; ++ ++ err_out: ++ sysfs_put_active_two(attr_sd); + return error; + } + + static int sysfs_release(struct inode * inode, struct file * filp) + { +- struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); +- struct attribute * attr = to_attr(filp->f_path.dentry); +- struct module * owner = attr->owner; +- struct sysfs_buffer * buffer = filp->private_data; ++ struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; ++ struct sysfs_buffer *buffer = filp->private_data; + +- if (buffer) +- remove_from_collection(buffer, inode); +- kobject_put(kobj); +- /* After this point, attr should not be accessed. */ +- module_put(owner); ++ sysfs_put(attr_sd); + + if (buffer) { + if (buffer->page) +@@ -377,57 +344,43 @@ + static unsigned int sysfs_poll(struct file *filp, poll_table *wait) + { + struct sysfs_buffer * buffer = filp->private_data; +- struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent); +- struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata; +- int res = 0; +- +- poll_wait(filp, &kobj->poll, wait); ++ struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; ++ struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj; + +- if (buffer->event != atomic_read(&sd->s_event)) { +- res = POLLERR|POLLPRI; +- buffer->needs_read_fill = 1; +- } ++ /* need parent for the kobj, grab both */ ++ if (!sysfs_get_active_two(attr_sd)) ++ goto trigger; + +- return res; +-} ++ poll_wait(filp, &kobj->poll, wait); + ++ sysfs_put_active_two(attr_sd); + +-static struct dentry *step_down(struct dentry *dir, const char * name) +-{ +- struct dentry * de; ++ if (buffer->event != atomic_read(&attr_sd->s_event)) ++ goto trigger; + +- if (dir == NULL || dir->d_inode == NULL) +- return NULL; ++ return 0; + +- mutex_lock(&dir->d_inode->i_mutex); +- de = lookup_one_len(name, dir, strlen(name)); +- mutex_unlock(&dir->d_inode->i_mutex); +- dput(dir); +- if (IS_ERR(de)) +- return NULL; +- if (de->d_inode == NULL) { +- dput(de); +- return NULL; +- } +- return de; ++ trigger: ++ buffer->needs_read_fill = 1; ++ return POLLERR|POLLPRI; + } + +-void sysfs_notify(struct kobject * k, char *dir, char *attr) ++void sysfs_notify(struct kobject *k, char *dir, char *attr) + { +- struct dentry *de = k->dentry; +- if (de) +- dget(de); +- if (de && dir) +- de = step_down(de, dir); +- if (de && attr) +- de = step_down(de, attr); +- if (de) { +- struct sysfs_dirent * sd = de->d_fsdata; +- if (sd) ++ struct sysfs_dirent *sd = k->sd; ++ ++ mutex_lock(&sysfs_mutex); ++ ++ if (sd && dir) ++ sd = sysfs_find_dirent(sd, dir); ++ if (sd && attr) ++ sd = sysfs_find_dirent(sd, attr); ++ if (sd) { + atomic_inc(&sd->s_event); + wake_up_interruptible(&k->poll); +- dput(de); + } ++ ++ mutex_unlock(&sysfs_mutex); + } + EXPORT_SYMBOL_GPL(sysfs_notify); + +@@ -441,19 +394,30 @@ + }; + + +-int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type) ++int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, ++ int type) + { +- struct sysfs_dirent * parent_sd = dir->d_fsdata; + umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG; +- int error = -EEXIST; ++ struct sysfs_addrm_cxt acxt; ++ struct sysfs_dirent *sd; + +- mutex_lock(&dir->d_inode->i_mutex); +- if (!sysfs_dirent_exist(parent_sd, attr->name)) +- error = sysfs_make_dirent(parent_sd, NULL, (void *)attr, +- mode, type); +- mutex_unlock(&dir->d_inode->i_mutex); ++ sd = sysfs_new_dirent(attr->name, mode, type); ++ if (!sd) ++ return -ENOMEM; ++ sd->s_elem.attr.attr = (void *)attr; + +- return error; ++ sysfs_addrm_start(&acxt, dir_sd); ++ ++ if (!sysfs_find_dirent(dir_sd, attr->name)) { ++ sysfs_add_one(&acxt, sd); ++ sysfs_link_sibling(sd); ++ } ++ ++ if (sysfs_addrm_finish(&acxt)) ++ return 0; ++ ++ sysfs_put(sd); ++ return -EEXIST; + } + + +@@ -465,9 +429,9 @@ + + int sysfs_create_file(struct kobject * kobj, const struct attribute * attr) + { +- BUG_ON(!kobj || !kobj->dentry || !attr); ++ BUG_ON(!kobj || !kobj->sd || !attr); + +- return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR); ++ return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR); + + } + +@@ -481,16 +445,16 @@ + int sysfs_add_file_to_group(struct kobject *kobj, + const struct attribute *attr, const char *group) + { +- struct dentry *dir; ++ struct sysfs_dirent *dir_sd; + int error; + +- dir = lookup_one_len(group, kobj->dentry, strlen(group)); +- if (IS_ERR(dir)) +- error = PTR_ERR(dir); +- else { +- error = sysfs_add_file(dir, attr, SYSFS_KOBJ_ATTR); +- dput(dir); +- } ++ dir_sd = sysfs_get_dirent(kobj->sd, group); ++ if (!dir_sd) ++ return -ENOENT; ++ ++ error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR); ++ sysfs_put(dir_sd); ++ + return error; + } + EXPORT_SYMBOL_GPL(sysfs_add_file_to_group); +@@ -503,30 +467,31 @@ + */ + int sysfs_update_file(struct kobject * kobj, const struct attribute * attr) + { +- struct dentry * dir = kobj->dentry; +- struct dentry * victim; +- int res = -ENOENT; +- +- mutex_lock(&dir->d_inode->i_mutex); +- victim = lookup_one_len(attr->name, dir, strlen(attr->name)); +- if (!IS_ERR(victim)) { +- /* make sure dentry is really there */ +- if (victim->d_inode && +- (victim->d_parent->d_inode == dir->d_inode)) { +- victim->d_inode->i_mtime = CURRENT_TIME; +- fsnotify_modify(victim); +- res = 0; +- } else +- d_drop(victim); ++ struct sysfs_dirent *victim_sd = NULL; ++ struct dentry *victim = NULL; ++ int rc; ++ ++ rc = -ENOENT; ++ victim_sd = sysfs_get_dirent(kobj->sd, attr->name); ++ if (!victim_sd) ++ goto out; + +- /** +- * Drop the reference acquired from lookup_one_len() above. +- */ +- dput(victim); ++ victim = sysfs_get_dentry(victim_sd); ++ if (IS_ERR(victim)) { ++ rc = PTR_ERR(victim); ++ victim = NULL; ++ goto out; + } +- mutex_unlock(&dir->d_inode->i_mutex); + +- return res; ++ mutex_lock(&victim->d_inode->i_mutex); ++ victim->d_inode->i_mtime = CURRENT_TIME; ++ fsnotify_modify(victim); ++ mutex_unlock(&victim->d_inode->i_mutex); ++ rc = 0; ++ out: ++ dput(victim); ++ sysfs_put(victim_sd); ++ return rc; + } + + +@@ -539,30 +504,34 @@ + */ + int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode) + { +- struct dentry *dir = kobj->dentry; +- struct dentry *victim; ++ struct sysfs_dirent *victim_sd = NULL; ++ struct dentry *victim = NULL; + struct inode * inode; + struct iattr newattrs; +- int res = -ENOENT; ++ int rc; ++ ++ rc = -ENOENT; ++ victim_sd = sysfs_get_dirent(kobj->sd, attr->name); ++ if (!victim_sd) ++ goto out; ++ ++ victim = sysfs_get_dentry(victim_sd); ++ if (IS_ERR(victim)) { ++ rc = PTR_ERR(victim); ++ victim = NULL; ++ goto out; ++ } + +- mutex_lock(&dir->d_inode->i_mutex); +- victim = lookup_one_len(attr->name, dir, strlen(attr->name)); +- if (!IS_ERR(victim)) { +- if (victim->d_inode && +- (victim->d_parent->d_inode == dir->d_inode)) { + inode = victim->d_inode; + mutex_lock(&inode->i_mutex); +- newattrs.ia_mode = (mode & S_IALLUGO) | +- (inode->i_mode & ~S_IALLUGO); ++ newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; +- res = notify_change(victim, &newattrs); ++ rc = notify_change(victim, &newattrs); + mutex_unlock(&inode->i_mutex); +- } ++ out: + dput(victim); +- } +- mutex_unlock(&dir->d_inode->i_mutex); +- +- return res; ++ sysfs_put(victim_sd); ++ return rc; + } + EXPORT_SYMBOL_GPL(sysfs_chmod_file); + +@@ -577,7 +546,7 @@ + + void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr) + { +- sysfs_hash_and_remove(kobj->dentry, attr->name); ++ sysfs_hash_and_remove(kobj, kobj->sd, attr->name); + } + + +@@ -590,12 +559,12 @@ + void sysfs_remove_file_from_group(struct kobject *kobj, + const struct attribute *attr, const char *group) + { +- struct dentry *dir; ++ struct sysfs_dirent *dir_sd; + +- dir = lookup_one_len(group, kobj->dentry, strlen(group)); +- if (!IS_ERR(dir)) { +- sysfs_hash_and_remove(dir, attr->name); +- dput(dir); ++ dir_sd = sysfs_get_dirent(kobj->sd, group); ++ if (dir_sd) { ++ sysfs_hash_and_remove(kobj, dir_sd, attr->name); ++ sysfs_put(dir_sd); + } + } + EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); +diff -Nurb linux-2.6.22-570/fs/sysfs/group.c linux-2.6.22-591/fs/sysfs/group.c +--- linux-2.6.22-570/fs/sysfs/group.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/sysfs/group.c 2007-12-21 15:36:14.000000000 -0500 +@@ -13,31 +13,29 @@ + #include + #include + #include +-#include + #include + #include "sysfs.h" + + +-static void remove_files(struct dentry * dir, +- const struct attribute_group * grp) ++static void remove_files(struct kobject *kobj, struct sysfs_dirent *dir_sd, ++ const struct attribute_group *grp) + { + struct attribute *const* attr; + + for (attr = grp->attrs; *attr; attr++) +- sysfs_hash_and_remove(dir,(*attr)->name); ++ sysfs_hash_and_remove(kobj, dir_sd, (*attr)->name); + } + +-static int create_files(struct dentry * dir, +- const struct attribute_group * grp) ++static int create_files(struct kobject *kobj, struct sysfs_dirent *dir_sd, ++ const struct attribute_group *grp) + { + struct attribute *const* attr; + int error = 0; + +- for (attr = grp->attrs; *attr && !error; attr++) { +- error = sysfs_add_file(dir, *attr, SYSFS_KOBJ_ATTR); +- } ++ for (attr = grp->attrs; *attr && !error; attr++) ++ error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); + if (error) +- remove_files(dir,grp); ++ remove_files(kobj, dir_sd, grp); + return error; + } + +@@ -45,44 +43,44 @@ + int sysfs_create_group(struct kobject * kobj, + const struct attribute_group * grp) + { +- struct dentry * dir; ++ struct sysfs_dirent *sd; + int error; + +- BUG_ON(!kobj || !kobj->dentry); ++ BUG_ON(!kobj || !kobj->sd); + + if (grp->name) { +- error = sysfs_create_subdir(kobj,grp->name,&dir); ++ error = sysfs_create_subdir(kobj, grp->name, &sd); + if (error) + return error; + } else +- dir = kobj->dentry; +- dir = dget(dir); +- if ((error = create_files(dir,grp))) { ++ sd = kobj->sd; ++ sysfs_get(sd); ++ error = create_files(kobj, sd, grp); ++ if (error) { + if (grp->name) +- sysfs_remove_subdir(dir); ++ sysfs_remove_subdir(sd); + } +- dput(dir); ++ sysfs_put(sd); + return error; + } + + void sysfs_remove_group(struct kobject * kobj, + const struct attribute_group * grp) + { +- struct dentry * dir; ++ struct sysfs_dirent *dir_sd = kobj->sd; ++ struct sysfs_dirent *sd; + + if (grp->name) { +- dir = lookup_one_len_kern(grp->name, kobj->dentry, +- strlen(grp->name)); +- BUG_ON(IS_ERR(dir)); +- } +- else +- dir = dget(kobj->dentry); ++ sd = sysfs_get_dirent(dir_sd, grp->name); ++ BUG_ON(!sd); ++ } else ++ sd = sysfs_get(dir_sd); + +- remove_files(dir,grp); ++ remove_files(kobj, sd, grp); + if (grp->name) +- sysfs_remove_subdir(dir); +- /* release the ref. taken in this routine */ +- dput(dir); ++ sysfs_remove_subdir(sd); ++ ++ sysfs_put(sd); + } + + +diff -Nurb linux-2.6.22-570/fs/sysfs/inode.c linux-2.6.22-591/fs/sysfs/inode.c +--- linux-2.6.22-570/fs/sysfs/inode.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/sysfs/inode.c 2007-12-23 01:18:26.000000000 -0500 +@@ -34,16 +34,6 @@ + .setattr = sysfs_setattr, + }; + +-void sysfs_delete_inode(struct inode *inode) +-{ +- /* Free the shadowed directory inode operations */ +- if (sysfs_is_shadowed_inode(inode)) { +- kfree(inode->i_op); +- inode->i_op = NULL; +- } +- return generic_delete_inode(inode); +-} +- + int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) + { + struct inode * inode = dentry->d_inode; +@@ -133,10 +123,8 @@ + */ + static struct lock_class_key sysfs_inode_imutex_key; + +-struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd) ++void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) + { +- struct inode * inode = new_inode(sysfs_sb); +- if (inode) { + inode->i_blocks = 0; + inode->i_mapping->a_ops = &sysfs_aops; + inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info; +@@ -151,169 +139,81 @@ + */ + set_inode_attr(inode, sd->s_iattr); + } else +- set_default_inode_attr(inode, mode); +- } +- return inode; +-} +- +-int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *)) +-{ +- int error = 0; +- struct inode * inode = NULL; +- if (dentry) { +- if (!dentry->d_inode) { +- struct sysfs_dirent * sd = dentry->d_fsdata; +- if ((inode = sysfs_new_inode(mode, sd))) { +- if (dentry->d_parent && dentry->d_parent->d_inode) { +- struct inode *p_inode = dentry->d_parent->d_inode; +- p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME; +- } +- goto Proceed; +- } +- else +- error = -ENOMEM; +- } else +- error = -EEXIST; +- } else +- error = -ENOENT; +- goto Done; +- +- Proceed: +- if (init) +- error = init(inode); +- if (!error) { +- d_instantiate(dentry, inode); +- if (S_ISDIR(mode)) +- dget(dentry); /* pin only directory dentry in core */ +- } else +- iput(inode); +- Done: +- return error; ++ set_default_inode_attr(inode, sd->s_mode); + } + +-/* +- * Get the name for corresponding element represented by the given sysfs_dirent ++/** ++ * sysfs_get_inode - get inode for sysfs_dirent ++ * @sd: sysfs_dirent to allocate inode for ++ * ++ * Get inode for @sd. If such inode doesn't exist, a new inode ++ * is allocated and basics are initialized. New inode is ++ * returned locked. ++ * ++ * LOCKING: ++ * Kernel thread context (may sleep). ++ * ++ * RETURNS: ++ * Pointer to allocated inode on success, NULL on failure. + */ +-const unsigned char * sysfs_get_name(struct sysfs_dirent *sd) ++struct inode * sysfs_get_inode(struct sysfs_dirent *sd) + { +- struct attribute * attr; +- struct bin_attribute * bin_attr; +- struct sysfs_symlink * sl; +- +- BUG_ON(!sd || !sd->s_element); +- +- switch (sd->s_type) { +- case SYSFS_DIR: +- /* Always have a dentry so use that */ +- return sd->s_dentry->d_name.name; +- +- case SYSFS_KOBJ_ATTR: +- attr = sd->s_element; +- return attr->name; +- +- case SYSFS_KOBJ_BIN_ATTR: +- bin_attr = sd->s_element; +- return bin_attr->attr.name; +- +- case SYSFS_KOBJ_LINK: +- sl = sd->s_element; +- return sl->link_name; +- } +- return NULL; +-} ++ struct inode *inode; + +-static inline void orphan_all_buffers(struct inode *node) +-{ +- struct sysfs_buffer_collection *set; +- struct sysfs_buffer *buf; ++ inode = iget_locked(sysfs_sb, sd->s_ino); ++ if (inode && (inode->i_state & I_NEW)) ++ sysfs_init_inode(sd, inode); + +- mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD); +- set = node->i_private; +- if (set) { +- list_for_each_entry(buf, &set->associates, associates) { +- down(&buf->sem); +- buf->orphaned = 1; +- up(&buf->sem); +- } +- } +- mutex_unlock(&node->i_mutex); ++ return inode; + } + +- +-/* +- * Unhashes the dentry corresponding to given sysfs_dirent +- * Called with parent inode's i_mutex held. ++/** ++ * sysfs_instantiate - instantiate dentry ++ * @dentry: dentry to be instantiated ++ * @inode: inode associated with @sd ++ * ++ * Unlock @inode if locked and instantiate @dentry with @inode. ++ * ++ * LOCKING: ++ * None. + */ +-void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent) ++void sysfs_instantiate(struct dentry *dentry, struct inode *inode) + { +- struct dentry *dentry = NULL; +- struct inode *inode; ++ BUG_ON(!dentry || dentry->d_inode); + +- /* We're not holding a reference to ->s_dentry dentry but the +- * field will stay valid as long as sysfs_lock is held. +- */ +- spin_lock(&sysfs_lock); +- spin_lock(&dcache_lock); ++ if (inode->i_state & I_NEW) ++ unlock_new_inode(inode); + +- /* dget dentry if it's still alive */ +- if (sd->s_dentry && sd->s_dentry->d_inode) +- dentry = dget_locked(sd->s_dentry); +- +- spin_unlock(&dcache_lock); +- spin_unlock(&sysfs_lock); +- +- /* drop dentry */ +- if (dentry) { +- spin_lock(&dcache_lock); +- spin_lock(&dentry->d_lock); +- if (!d_unhashed(dentry) && dentry->d_inode) { +- inode = dentry->d_inode; +- spin_lock(&inode->i_lock); +- __iget(inode); +- spin_unlock(&inode->i_lock); +- dget_locked(dentry); +- __d_drop(dentry); +- spin_unlock(&dentry->d_lock); +- spin_unlock(&dcache_lock); +- simple_unlink(parent->d_inode, dentry); +- orphan_all_buffers(inode); +- iput(inode); +- } else { +- spin_unlock(&dentry->d_lock); +- spin_unlock(&dcache_lock); +- } +- +- dput(dentry); +- } ++ d_instantiate(dentry, inode); + } + +-int sysfs_hash_and_remove(struct dentry * dir, const char * name) ++int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd, const char *name) + { +- struct sysfs_dirent * sd; +- struct sysfs_dirent * parent_sd; +- int found = 0; ++ struct sysfs_addrm_cxt acxt; ++ struct sysfs_dirent **pos, *sd; + +- if (!dir) ++ if (!dir_sd) + return -ENOENT; + +- if (dir->d_inode == NULL) +- /* no inode means this hasn't been made visible yet */ +- return -ENOENT; + +- parent_sd = dir->d_fsdata; +- mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); +- list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { +- if (!sd->s_element) ++ sysfs_addrm_start(&acxt, dir_sd); ++ if (!sysfs_resolve_for_remove(kobj, &acxt.parent_sd)) ++ goto addrm_finish; ++ ++ for (pos = &acxt.parent_sd->s_children; *pos; pos = &(*pos)->s_sibling) { ++ sd = *pos; ++ ++ if (!sysfs_type(sd)) + continue; +- if (!strcmp(sysfs_get_name(sd), name)) { +- list_del_init(&sd->s_sibling); +- sysfs_drop_dentry(sd, dir); +- sysfs_put(sd); +- found = 1; ++ if (!strcmp(sd->s_name, name)) { ++ *pos = sd->s_sibling; ++ sd->s_sibling = NULL; ++ sysfs_remove_one(&acxt, sd); + break; + } + } +- mutex_unlock(&dir->d_inode->i_mutex); +- +- return found ? 0 : -ENOENT; ++addrm_finish: ++ if (sysfs_addrm_finish(&acxt)) ++ return 0; ++ return -ENOENT; + } +diff -Nurb linux-2.6.22-570/fs/sysfs/mount.c linux-2.6.22-591/fs/sysfs/mount.c +--- linux-2.6.22-570/fs/sysfs/mount.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/sysfs/mount.c 2007-12-21 15:36:14.000000000 -0500 +@@ -17,28 +17,18 @@ + struct super_block * sysfs_sb = NULL; + struct kmem_cache *sysfs_dir_cachep; + +-static void sysfs_clear_inode(struct inode *inode); +- + static const struct super_operations sysfs_ops = { + .statfs = simple_statfs, +- .drop_inode = sysfs_delete_inode, +- .clear_inode = sysfs_clear_inode, ++ .drop_inode = generic_delete_inode, + }; + +-static struct sysfs_dirent sysfs_root = { +- .s_sibling = LIST_HEAD_INIT(sysfs_root.s_sibling), +- .s_children = LIST_HEAD_INIT(sysfs_root.s_children), +- .s_element = NULL, +- .s_type = SYSFS_ROOT, +- .s_iattr = NULL, ++struct sysfs_dirent sysfs_root = { ++ .s_count = ATOMIC_INIT(1), ++ .s_flags = SYSFS_ROOT, ++ .s_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, + .s_ino = 1, + }; + +-static void sysfs_clear_inode(struct inode *inode) +-{ +- kfree(inode->i_private); +-} +- + static int sysfs_fill_super(struct super_block *sb, void *data, int silent) + { + struct inode *inode; +@@ -51,17 +41,18 @@ + sb->s_time_gran = 1; + sysfs_sb = sb; + +- inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, +- &sysfs_root); +- if (inode) { ++ inode = new_inode(sysfs_sb); ++ if (!inode) { ++ pr_debug("sysfs: could not get root inode\n"); ++ return -ENOMEM; ++ } ++ ++ sysfs_init_inode(&sysfs_root, inode); ++ + inode->i_op = &sysfs_dir_inode_operations; + inode->i_fop = &sysfs_dir_operations; + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); +- } else { +- pr_debug("sysfs: could not get root inode\n"); +- return -ENOMEM; +- } + + root = d_alloc_root(inode); + if (!root) { +@@ -69,6 +60,7 @@ + iput(inode); + return -ENOMEM; + } ++ sysfs_root.s_dentry = root; + root->d_fsdata = &sysfs_root; + sb->s_root = root; + return 0; +diff -Nurb linux-2.6.22-570/fs/sysfs/symlink.c linux-2.6.22-591/fs/sysfs/symlink.c +--- linux-2.6.22-570/fs/sysfs/symlink.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/sysfs/symlink.c 2007-12-21 15:36:14.000000000 -0500 +@@ -11,71 +11,49 @@ + + #include "sysfs.h" + +-static int object_depth(struct kobject * kobj) ++static int object_depth(struct sysfs_dirent *sd) + { +- struct kobject * p = kobj; + int depth = 0; +- do { depth++; } while ((p = p->parent)); ++ ++ for (; sd->s_parent; sd = sd->s_parent) { ++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR) ++ continue; ++ depth++; ++ } ++ + return depth; + } + +-static int object_path_length(struct kobject * kobj) ++static int object_path_length(struct sysfs_dirent * sd) + { +- struct kobject * p = kobj; + int length = 1; +- do { +- length += strlen(kobject_name(p)) + 1; +- p = p->parent; +- } while (p); ++ ++ for (; sd->s_parent; sd = sd->s_parent) { ++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR) ++ continue; ++ length += strlen(sd->s_name) + 1; ++ } ++ + return length; + } + +-static void fill_object_path(struct kobject * kobj, char * buffer, int length) ++static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length) + { +- struct kobject * p; +- ++ int cur; + --length; +- for (p = kobj; p; p = p->parent) { +- int cur = strlen(kobject_name(p)); ++ for (; sd->s_parent; sd = sd->s_parent) { ++ if (sysfs_type(sd) == SYSFS_SHADOW_DIR) ++ continue; ++ ++ cur = strlen(sd->s_name); + + /* back up enough to print this bus id with '/' */ + length -= cur; +- strncpy(buffer + length,kobject_name(p),cur); ++ strncpy(buffer + length, sd->s_name, cur); + *(buffer + --length) = '/'; + } + } + +-static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target) +-{ +- struct sysfs_dirent * parent_sd = parent->d_fsdata; +- struct sysfs_symlink * sl; +- int error = 0; +- +- error = -ENOMEM; +- sl = kmalloc(sizeof(*sl), GFP_KERNEL); +- if (!sl) +- goto exit1; +- +- sl->link_name = kmalloc(strlen(name) + 1, GFP_KERNEL); +- if (!sl->link_name) +- goto exit2; +- +- strcpy(sl->link_name, name); +- sl->target_kobj = kobject_get(target); +- +- error = sysfs_make_dirent(parent_sd, NULL, sl, S_IFLNK|S_IRWXUGO, +- SYSFS_KOBJ_LINK); +- if (!error) +- return 0; +- +- kobject_put(target); +- kfree(sl->link_name); +-exit2: +- kfree(sl); +-exit1: +- return error; +-} +- + /** + * sysfs_create_link - create symlink between two objects. + * @kobj: object whose directory we're creating the link in. +@@ -84,29 +62,80 @@ + */ + int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name) + { +- struct dentry *dentry = NULL; +- int error = -EEXIST; ++ struct sysfs_dirent *parent_sd = NULL; ++ struct sysfs_dirent *target_sd = NULL; ++ struct sysfs_dirent *sd = NULL; ++ struct sysfs_addrm_cxt acxt; ++ int error; + + BUG_ON(!name); + + if (!kobj) { + if (sysfs_mount && sysfs_mount->mnt_sb) +- dentry = sysfs_mount->mnt_sb->s_root; ++ parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata; + } else +- dentry = kobj->dentry; ++ parent_sd = kobj->sd; ++ ++ error = -EFAULT; ++ if (!parent_sd) ++ goto out_put; ++ ++ /* target->sd can go away beneath us but is protected with ++ * sysfs_assoc_lock. Fetch target_sd from it. ++ */ ++ spin_lock(&sysfs_assoc_lock); ++ if (target->sd) ++ target_sd = sysfs_get(target->sd); ++ spin_unlock(&sysfs_assoc_lock); ++ ++ error = -ENOENT; ++ if (!target_sd) ++ goto out_put; ++ ++ error = -ENOMEM; ++ sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK); ++ if (!sd) ++ goto out_put; ++ sd->s_elem.symlink.target_sd = target_sd; ++ ++ sysfs_addrm_start(&acxt, parent_sd); ++ if (!sysfs_resolve_for_create(target, &acxt.parent_sd)) ++ goto addrm_finish; ++ ++ if (!sysfs_find_dirent(acxt.parent_sd, name)) { ++ sysfs_add_one(&acxt, sd); ++ sysfs_link_sibling(sd); ++ } + +- if (!dentry) +- return -EFAULT; ++addrm_finish: ++ if (sysfs_addrm_finish(&acxt)) ++ return 0; + +- mutex_lock(&dentry->d_inode->i_mutex); +- if (!sysfs_dirent_exist(dentry->d_fsdata, name)) +- error = sysfs_add_link(dentry, name, target); +- mutex_unlock(&dentry->d_inode->i_mutex); ++ error = -EEXIST; ++ /* fall through */ ++ out_put: ++ sysfs_put(target_sd); ++ sysfs_put(sd); + return error; + } + + + /** ++ * sysfs_delete_link - remove symlink in object's directory. ++ * @kobj: object we're acting for. ++ * @targ: object we're pointing to. ++ * @name: name of the symlink to remove. ++ * ++ * Unlike sysfs_remove_link sysfs_delete_link has enough information ++ * to successfully delete symlinks in shadow directories. ++ */ ++void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, ++ const char *name) ++{ ++ sysfs_hash_and_remove(targ, kobj->sd, name); ++} ++ ++/** + * sysfs_remove_link - remove symlink in object's directory. + * @kobj: object we're acting for. + * @name: name of the symlink to remove. +@@ -114,17 +143,33 @@ + + void sysfs_remove_link(struct kobject * kobj, const char * name) + { +- sysfs_hash_and_remove(kobj->dentry,name); ++ sysfs_hash_and_remove(kobj, kobj->sd, name); + } + +-static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target, +- char *path) ++/** ++ * sysfs_rename_link - rename symlink in object's directory. ++ * @kobj: object we're acting for. ++ * @targ: object we're pointing to. ++ * @old: previous name of the symlink. ++ * @new: new name of the symlink. ++ * ++ * A helper function for the common rename symlink idiom. ++ */ ++int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, ++ const char *old, const char *new) ++{ ++ sysfs_delete_link(kobj, targ, old); ++ return sysfs_create_link(kobj, targ, new); ++} ++ ++static int sysfs_get_target_path(struct sysfs_dirent * parent_sd, ++ struct sysfs_dirent * target_sd, char *path) + { + char * s; + int depth, size; + +- depth = object_depth(kobj); +- size = object_path_length(target) + depth * 3 - 1; ++ depth = object_depth(parent_sd); ++ size = object_path_length(target_sd) + depth * 3 - 1; + if (size > PATH_MAX) + return -ENAMETOOLONG; + +@@ -133,7 +178,7 @@ + for (s = path; depth--; s += 3) + strcpy(s,"../"); + +- fill_object_path(target, path, size); ++ fill_object_path(target_sd, path, size); + pr_debug("%s: path = '%s'\n", __FUNCTION__, path); + + return 0; +@@ -141,27 +186,16 @@ + + static int sysfs_getlink(struct dentry *dentry, char * path) + { +- struct kobject *kobj, *target_kobj; +- int error = 0; +- +- kobj = sysfs_get_kobject(dentry->d_parent); +- if (!kobj) +- return -EINVAL; +- +- target_kobj = sysfs_get_kobject(dentry); +- if (!target_kobj) { +- kobject_put(kobj); +- return -EINVAL; +- } ++ struct sysfs_dirent *sd = dentry->d_fsdata; ++ struct sysfs_dirent *parent_sd = sd->s_parent; ++ struct sysfs_dirent *target_sd = sd->s_elem.symlink.target_sd; ++ int error; ++ ++ mutex_lock(&sysfs_mutex); ++ error = sysfs_get_target_path(parent_sd, target_sd, path); ++ mutex_unlock(&sysfs_mutex); + +- down_read(&sysfs_rename_sem); +- error = sysfs_get_target_path(kobj, target_kobj, path); +- up_read(&sysfs_rename_sem); +- +- kobject_put(kobj); +- kobject_put(target_kobj); + return error; +- + } + + static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) +diff -Nurb linux-2.6.22-570/fs/sysfs/sysfs.h linux-2.6.22-591/fs/sysfs/sysfs.h +--- linux-2.6.22-570/fs/sysfs/sysfs.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/sysfs/sysfs.h 2007-12-21 15:36:14.000000000 -0500 +@@ -1,9 +1,40 @@ ++struct sysfs_elem_dir { ++ struct kobject * kobj; ++}; ++ ++struct sysfs_elem_symlink { ++ struct sysfs_dirent * target_sd; ++}; ++ ++struct sysfs_elem_attr { ++ struct attribute * attr; ++}; ++ ++struct sysfs_elem_bin_attr { ++ struct bin_attribute * bin_attr; ++}; ++ ++/* ++ * As long as s_count reference is held, the sysfs_dirent itself is ++ * accessible. Dereferencing s_elem or any other outer entity ++ * requires s_active reference. ++ */ + struct sysfs_dirent { + atomic_t s_count; +- struct list_head s_sibling; +- struct list_head s_children; +- void * s_element; +- int s_type; ++ atomic_t s_active; ++ struct sysfs_dirent * s_parent; ++ struct sysfs_dirent * s_sibling; ++ struct sysfs_dirent * s_children; ++ const char * s_name; ++ ++ union { ++ struct sysfs_elem_dir dir; ++ struct sysfs_elem_symlink symlink; ++ struct sysfs_elem_attr attr; ++ struct sysfs_elem_bin_attr bin_attr; ++ } s_elem; ++ ++ unsigned int s_flags; + umode_t s_mode; + ino_t s_ino; + struct dentry * s_dentry; +@@ -11,30 +42,77 @@ + atomic_t s_event; + }; + ++#define SD_DEACTIVATED_BIAS INT_MIN ++ ++struct sysfs_addrm_cxt { ++ struct sysfs_dirent *parent_sd; ++ struct inode *parent_inode; ++ struct sysfs_dirent *removed; ++ int cnt; ++}; ++ ++/* ++ * A sysfs file which deletes another file when written to need to ++ * write lock the s_active of the victim while its s_active is read ++ * locked for the write operation. Tell lockdep that this is okay. ++ */ ++enum sysfs_s_active_class ++{ ++ SYSFS_S_ACTIVE_NORMAL, /* file r/w access, etc - default */ ++ SYSFS_S_ACTIVE_DEACTIVATE, /* file deactivation */ ++}; ++ + extern struct vfsmount * sysfs_mount; ++extern struct sysfs_dirent sysfs_root; + extern struct kmem_cache *sysfs_dir_cachep; + +-extern void sysfs_delete_inode(struct inode *inode); +-extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *); +-extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *)); +- +-extern int sysfs_dirent_exist(struct sysfs_dirent *, const unsigned char *); +-extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *, +- umode_t, int); +- +-extern int sysfs_add_file(struct dentry *, const struct attribute *, int); +-extern int sysfs_hash_and_remove(struct dentry * dir, const char * name); ++extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd); ++extern void sysfs_link_sibling(struct sysfs_dirent *sd); ++extern void sysfs_unlink_sibling(struct sysfs_dirent *sd); ++ ++extern int sysfs_resolve_for_create(struct kobject *kobj, ++ struct sysfs_dirent **parent_sd); ++extern int sysfs_resolve_for_remove(struct kobject *kobj, ++ struct sysfs_dirent **parent_sd); ++ ++extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd); ++extern void sysfs_put_active(struct sysfs_dirent *sd); ++extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd); ++extern void sysfs_put_active_two(struct sysfs_dirent *sd); ++extern void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, ++ struct sysfs_dirent *parent_sd); ++extern void sysfs_add_one(struct sysfs_addrm_cxt *acxt, ++ struct sysfs_dirent *sd); ++extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, ++ struct sysfs_dirent *sd); ++extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); ++ ++extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode); ++extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd); ++extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode); ++ ++extern void release_sysfs_dirent(struct sysfs_dirent * sd); ++extern struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, ++ const unsigned char *name); ++extern struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, ++ const unsigned char *name); ++extern struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, ++ int type); ++ ++extern int sysfs_add_file(struct sysfs_dirent *dir_sd, ++ const struct attribute *attr, int type); ++extern int sysfs_hash_and_remove(struct kobject *kobj, ++ struct sysfs_dirent *dir_sd, const char *name); + extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name); + +-extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **); +-extern void sysfs_remove_subdir(struct dentry *); ++extern int sysfs_create_subdir(struct kobject *kobj, const char *name, ++ struct sysfs_dirent **p_sd); ++extern void sysfs_remove_subdir(struct sysfs_dirent *sd); + +-extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd); +-extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent); + extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); + +-extern spinlock_t sysfs_lock; +-extern struct rw_semaphore sysfs_rename_sem; ++extern spinlock_t sysfs_assoc_lock; ++extern struct mutex sysfs_mutex; + extern struct super_block * sysfs_sb; + extern const struct file_operations sysfs_dir_operations; + extern const struct file_operations sysfs_file_operations; +@@ -42,73 +120,9 @@ + extern const struct inode_operations sysfs_dir_inode_operations; + extern const struct inode_operations sysfs_symlink_inode_operations; + +-struct sysfs_symlink { +- char * link_name; +- struct kobject * target_kobj; +-}; +- +-struct sysfs_buffer { +- struct list_head associates; +- size_t count; +- loff_t pos; +- char * page; +- struct sysfs_ops * ops; +- struct semaphore sem; +- int orphaned; +- int needs_read_fill; +- int event; +-}; +- +-struct sysfs_buffer_collection { +- struct list_head associates; +-}; +- +-static inline struct kobject * to_kobj(struct dentry * dentry) +-{ +- struct sysfs_dirent * sd = dentry->d_fsdata; +- return ((struct kobject *) sd->s_element); +-} +- +-static inline struct attribute * to_attr(struct dentry * dentry) +-{ +- struct sysfs_dirent * sd = dentry->d_fsdata; +- return ((struct attribute *) sd->s_element); +-} +- +-static inline struct bin_attribute * to_bin_attr(struct dentry * dentry) +-{ +- struct sysfs_dirent * sd = dentry->d_fsdata; +- return ((struct bin_attribute *) sd->s_element); +-} +- +-static inline struct kobject *sysfs_get_kobject(struct dentry *dentry) ++static inline unsigned int sysfs_type(struct sysfs_dirent *sd) + { +- struct kobject * kobj = NULL; +- +- spin_lock(&dcache_lock); +- if (!d_unhashed(dentry)) { +- struct sysfs_dirent * sd = dentry->d_fsdata; +- if (sd->s_type & SYSFS_KOBJ_LINK) { +- struct sysfs_symlink * sl = sd->s_element; +- kobj = kobject_get(sl->target_kobj); +- } else +- kobj = kobject_get(sd->s_element); +- } +- spin_unlock(&dcache_lock); +- +- return kobj; +-} +- +-static inline void release_sysfs_dirent(struct sysfs_dirent * sd) +-{ +- if (sd->s_type & SYSFS_KOBJ_LINK) { +- struct sysfs_symlink * sl = sd->s_element; +- kfree(sl->link_name); +- kobject_put(sl->target_kobj); +- kfree(sl); +- } +- kfree(sd->s_iattr); +- kmem_cache_free(sysfs_dir_cachep, sd); ++ return sd->s_flags & SYSFS_TYPE_MASK; + } + + static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd) +@@ -122,11 +136,6 @@ + + static inline void sysfs_put(struct sysfs_dirent * sd) + { +- if (atomic_dec_and_test(&sd->s_count)) ++ if (sd && atomic_dec_and_test(&sd->s_count)) + release_sysfs_dirent(sd); + } +- +-static inline int sysfs_is_shadowed_inode(struct inode *inode) +-{ +- return S_ISDIR(inode->i_mode) && inode->i_op->follow_link; +-} +diff -Nurb linux-2.6.22-570/fs/unionfs/Makefile linux-2.6.22-591/fs/unionfs/Makefile +--- linux-2.6.22-570/fs/unionfs/Makefile 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/Makefile 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,7 @@ ++obj-$(CONFIG_UNION_FS) += unionfs.o ++ ++unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \ ++ rdstate.o copyup.o dirhelper.o rename.o unlink.o \ ++ lookup.o commonfops.o dirfops.o sioq.o mmap.o ++ ++unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o +diff -Nurb linux-2.6.22-570/fs/unionfs/commonfops.c linux-2.6.22-591/fs/unionfs/commonfops.c +--- linux-2.6.22-570/fs/unionfs/commonfops.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/commonfops.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,748 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * 1) Copyup the file ++ * 2) Rename the file to '.unionfs' - obviously ++ * stolen from NFS's silly rename ++ */ ++static int copyup_deleted_file(struct file *file, struct dentry *dentry, ++ int bstart, int bindex) ++{ ++ static unsigned int counter; ++ const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2; ++ const int countersize = sizeof(counter) * 2; ++ const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1; ++ char name[nlen + 1]; ++ ++ int err; ++ struct dentry *tmp_dentry = NULL; ++ struct dentry *hidden_dentry; ++ struct dentry *hidden_dir_dentry = NULL; ++ ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bstart); ++ ++ sprintf(name, ".unionfs%*.*lx", ++ i_inosize, i_inosize, hidden_dentry->d_inode->i_ino); ++ ++retry: ++ /* ++ * Loop, looking for an unused temp name to copyup to. ++ * ++ * It's somewhat silly that we look for a free temp tmp name in the ++ * source branch (bstart) instead of the dest branch (bindex), where ++ * the final name will be created. We _will_ catch it if somehow ++ * the name exists in the dest branch, but it'd be nice to catch it ++ * sooner than later. ++ */ ++ tmp_dentry = NULL; ++ do { ++ char *suffix = name + nlen - countersize; ++ ++ dput(tmp_dentry); ++ counter++; ++ sprintf(suffix, "%*.*x", countersize, countersize, counter); ++ ++ printk(KERN_DEBUG "unionfs: trying to rename %s to %s\n", ++ dentry->d_name.name, name); ++ ++ tmp_dentry = lookup_one_len(name, hidden_dentry->d_parent, ++ nlen); ++ if (IS_ERR(tmp_dentry)) { ++ err = PTR_ERR(tmp_dentry); ++ goto out; ++ } ++ } while (tmp_dentry->d_inode != NULL); /* need negative dentry */ ++ dput(tmp_dentry); ++ ++ err = copyup_named_file(dentry->d_parent->d_inode, file, name, bstart, ++ bindex, file->f_dentry->d_inode->i_size); ++ if (err == -EEXIST) ++ goto retry; ++ else if (err) ++ goto out; ++ ++ /* bring it to the same state as an unlinked file */ ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry)); ++ hidden_dir_dentry = lock_parent(hidden_dentry); ++ err = vfs_unlink(hidden_dir_dentry->d_inode, hidden_dentry); ++ unlock_dir(hidden_dir_dentry); ++ ++out: ++ return err; ++} ++ ++/* ++ * put all references held by upper struct file and free lower file pointer ++ * array ++ */ ++static void cleanup_file(struct file *file) ++{ ++ int bindex, bstart, bend; ++ struct file **lf; ++ struct super_block *sb = file->f_dentry->d_sb; ++ ++ lf = UNIONFS_F(file)->lower_files; ++ bstart = fbstart(file); ++ bend = fbend(file); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ if (unionfs_lower_file_idx(file, bindex)) { ++ /* ++ * Find new index of matching branch with an open ++ * file, since branches could have been added or ++ * deleted causing the one with open files to shift. ++ */ ++ int i; /* holds (possibly) updated branch index */ ++ int old_bid; ++ ++ old_bid = UNIONFS_F(file)->saved_branch_ids[bindex]; ++ i = branch_id_to_idx(sb, old_bid); ++ if (i < 0) ++ printk(KERN_ERR "unionfs: no superblock for " ++ "file %p\n", file); ++ else { ++ /* decrement count of open files */ ++ branchput(sb, i); ++ /* ++ * fput will perform an mntput for us on the ++ * correct branch. Although we're using the ++ * file's old branch configuration, bindex, ++ * which is the old index, correctly points ++ * to the right branch in the file's branch ++ * list. In other words, we're going to ++ * mntput the correct branch even if ++ * branches have been added/removed. ++ */ ++ fput(unionfs_lower_file_idx(file, bindex)); ++ } ++ } ++ } ++ ++ UNIONFS_F(file)->lower_files = NULL; ++ kfree(lf); ++ kfree(UNIONFS_F(file)->saved_branch_ids); ++ /* set to NULL because caller needs to know if to kfree on error */ ++ UNIONFS_F(file)->saved_branch_ids = NULL; ++} ++ ++/* open all lower files for a given file */ ++static int open_all_files(struct file *file) ++{ ++ int bindex, bstart, bend, err = 0; ++ struct file *hidden_file; ++ struct dentry *hidden_dentry; ++ struct dentry *dentry = file->f_dentry; ++ struct super_block *sb = dentry->d_sb; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!hidden_dentry) ++ continue; ++ ++ dget(hidden_dentry); ++ unionfs_mntget(dentry, bindex); ++ branchget(sb, bindex); ++ ++ hidden_file = ++ dentry_open(hidden_dentry, ++ unionfs_lower_mnt_idx(dentry, bindex), ++ file->f_flags); ++ if (IS_ERR(hidden_file)) { ++ err = PTR_ERR(hidden_file); ++ goto out; ++ } else ++ unionfs_set_lower_file_idx(file, bindex, hidden_file); ++ } ++out: ++ return err; ++} ++ ++/* open the highest priority file for a given upper file */ ++static int open_highest_file(struct file *file, int willwrite) ++{ ++ int bindex, bstart, bend, err = 0; ++ struct file *hidden_file; ++ struct dentry *hidden_dentry; ++ ++ struct dentry *dentry = file->f_dentry; ++ struct inode *parent_inode = dentry->d_parent->d_inode; ++ struct super_block *sb = dentry->d_sb; ++ size_t inode_size = dentry->d_inode->i_size; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ hidden_dentry = unionfs_lower_dentry(dentry); ++ if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) { ++ for (bindex = bstart - 1; bindex >= 0; bindex--) { ++ err = copyup_file(parent_inode, file, bstart, bindex, ++ inode_size); ++ if (!err) ++ break; ++ } ++ atomic_set(&UNIONFS_F(file)->generation, ++ atomic_read(&UNIONFS_I(dentry->d_inode)-> ++ generation)); ++ goto out; ++ } ++ ++ dget(hidden_dentry); ++ unionfs_mntget(dentry, bstart); ++ branchget(sb, bstart); ++ hidden_file = dentry_open(hidden_dentry, ++ unionfs_lower_mnt_idx(dentry, bstart), ++ file->f_flags); ++ if (IS_ERR(hidden_file)) { ++ err = PTR_ERR(hidden_file); ++ goto out; ++ } ++ unionfs_set_lower_file(file, hidden_file); ++ /* Fix up the position. */ ++ hidden_file->f_pos = file->f_pos; ++ ++ memcpy(&hidden_file->f_ra, &file->f_ra, sizeof(struct file_ra_state)); ++out: ++ return err; ++} ++ ++/* perform a delayed copyup of a read-write file on a read-only branch */ ++static int do_delayed_copyup(struct file *file, struct dentry *dentry) ++{ ++ int bindex, bstart, bend, err = 0; ++ struct inode *parent_inode = dentry->d_parent->d_inode; ++ loff_t inode_size = file->f_dentry->d_inode->i_size; ++ ++ bstart = fbstart(file); ++ bend = fbend(file); ++ ++ BUG_ON(!S_ISREG(file->f_dentry->d_inode->i_mode)); ++ ++ for (bindex = bstart - 1; bindex >= 0; bindex--) { ++ if (!d_deleted(file->f_dentry)) ++ err = copyup_file(parent_inode, file, bstart, ++ bindex, inode_size); ++ else ++ err = copyup_deleted_file(file, dentry, bstart, ++ bindex); ++ ++ if (!err) ++ break; ++ } ++ if (!err && (bstart > fbstart(file))) { ++ bend = fbend(file); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ if (unionfs_lower_file_idx(file, bindex)) { ++ branchput(dentry->d_sb, bindex); ++ fput(unionfs_lower_file_idx(file, bindex)); ++ unionfs_set_lower_file_idx(file, bindex, NULL); ++ } ++ } ++ fbend(file) = bend; ++ } ++ return err; ++} ++ ++/* ++ * Revalidate the struct file ++ * @file: file to revalidate ++ * @willwrite: 1 if caller may cause changes to the file; 0 otherwise. ++ */ ++int unionfs_file_revalidate(struct file *file, int willwrite) ++{ ++ struct super_block *sb; ++ struct dentry *dentry; ++ int sbgen, fgen, dgen; ++ int bstart, bend; ++ int size; ++ ++ int err = 0; ++ ++ dentry = file->f_dentry; ++ unionfs_lock_dentry(dentry); ++ sb = dentry->d_sb; ++ ++ /* ++ * First revalidate the dentry inside struct file, ++ * but not unhashed dentries. ++ */ ++ if (!d_deleted(dentry) && ++ !__unionfs_d_revalidate_chain(dentry, NULL)) { ++ err = -ESTALE; ++ goto out_nofree; ++ } ++ ++ sbgen = atomic_read(&UNIONFS_SB(sb)->generation); ++ dgen = atomic_read(&UNIONFS_D(dentry)->generation); ++ fgen = atomic_read(&UNIONFS_F(file)->generation); ++ ++ BUG_ON(sbgen > dgen); ++ ++ /* ++ * There are two cases we are interested in. The first is if the ++ * generation is lower than the super-block. The second is if ++ * someone has copied up this file from underneath us, we also need ++ * to refresh things. ++ */ ++ if (!d_deleted(dentry) && ++ (sbgen > fgen || dbstart(dentry) != fbstart(file))) { ++ /* First we throw out the existing files. */ ++ cleanup_file(file); ++ ++ /* Now we reopen the file(s) as in unionfs_open. */ ++ bstart = fbstart(file) = dbstart(dentry); ++ bend = fbend(file) = dbend(dentry); ++ ++ size = sizeof(struct file *) * sbmax(sb); ++ UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL); ++ if (!UNIONFS_F(file)->lower_files) { ++ err = -ENOMEM; ++ goto out; ++ } ++ size = sizeof(int) * sbmax(sb); ++ UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL); ++ if (!UNIONFS_F(file)->saved_branch_ids) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ if (S_ISDIR(dentry->d_inode->i_mode)) { ++ /* We need to open all the files. */ ++ err = open_all_files(file); ++ if (err) ++ goto out; ++ } else { ++ /* We only open the highest priority branch. */ ++ err = open_highest_file(file, willwrite); ++ if (err) ++ goto out; ++ } ++ atomic_set(&UNIONFS_F(file)->generation, ++ atomic_read(&UNIONFS_I(dentry->d_inode)-> ++ generation)); ++ } ++ ++ /* Copyup on the first write to a file on a readonly branch. */ ++ if (willwrite && IS_WRITE_FLAG(file->f_flags) && ++ !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) && ++ is_robranch(dentry)) { ++ printk(KERN_DEBUG "unionfs: Doing delayed copyup of a " ++ "read-write file on a read-only branch.\n"); ++ err = do_delayed_copyup(file, dentry); ++ } ++ ++out: ++ if (err) { ++ kfree(UNIONFS_F(file)->lower_files); ++ kfree(UNIONFS_F(file)->saved_branch_ids); ++ } ++out_nofree: ++ unionfs_unlock_dentry(dentry); ++ return err; ++} ++ ++/* unionfs_open helper function: open a directory */ ++static int __open_dir(struct inode *inode, struct file *file) ++{ ++ struct dentry *hidden_dentry; ++ struct file *hidden_file; ++ int bindex, bstart, bend; ++ ++ bstart = fbstart(file) = dbstart(file->f_dentry); ++ bend = fbend(file) = dbend(file->f_dentry); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_dentry = ++ unionfs_lower_dentry_idx(file->f_dentry, bindex); ++ if (!hidden_dentry) ++ continue; ++ ++ dget(hidden_dentry); ++ unionfs_mntget(file->f_dentry, bindex); ++ hidden_file = dentry_open(hidden_dentry, ++ unionfs_lower_mnt_idx(file->f_dentry, ++ bindex), ++ file->f_flags); ++ if (IS_ERR(hidden_file)) ++ return PTR_ERR(hidden_file); ++ ++ unionfs_set_lower_file_idx(file, bindex, hidden_file); ++ ++ /* ++ * The branchget goes after the open, because otherwise ++ * we would miss the reference on release. ++ */ ++ branchget(inode->i_sb, bindex); ++ } ++ ++ return 0; ++} ++ ++/* unionfs_open helper function: open a file */ ++static int __open_file(struct inode *inode, struct file *file) ++{ ++ struct dentry *hidden_dentry; ++ struct file *hidden_file; ++ int hidden_flags; ++ int bindex, bstart, bend; ++ ++ hidden_dentry = unionfs_lower_dentry(file->f_dentry); ++ hidden_flags = file->f_flags; ++ ++ bstart = fbstart(file) = dbstart(file->f_dentry); ++ bend = fbend(file) = dbend(file->f_dentry); ++ ++ /* ++ * check for the permission for hidden file. If the error is ++ * COPYUP_ERR, copyup the file. ++ */ ++ if (hidden_dentry->d_inode && is_robranch(file->f_dentry)) { ++ /* ++ * if the open will change the file, copy it up otherwise ++ * defer it. ++ */ ++ if (hidden_flags & O_TRUNC) { ++ int size = 0; ++ int err = -EROFS; ++ ++ /* copyup the file */ ++ for (bindex = bstart - 1; bindex >= 0; bindex--) { ++ err = copyup_file( ++ file->f_dentry->d_parent->d_inode, ++ file, bstart, bindex, size); ++ if (!err) ++ break; ++ } ++ return err; ++ } else ++ hidden_flags &= ~(OPEN_WRITE_FLAGS); ++ } ++ ++ dget(hidden_dentry); ++ ++ /* ++ * dentry_open will decrement mnt refcnt if err. ++ * otherwise fput() will do an mntput() for us upon file close. ++ */ ++ unionfs_mntget(file->f_dentry, bstart); ++ hidden_file = ++ dentry_open(hidden_dentry, ++ unionfs_lower_mnt_idx(file->f_dentry, bstart), ++ hidden_flags); ++ if (IS_ERR(hidden_file)) ++ return PTR_ERR(hidden_file); ++ ++ unionfs_set_lower_file(file, hidden_file); ++ branchget(inode->i_sb, bstart); ++ ++ return 0; ++} ++ ++int unionfs_open(struct inode *inode, struct file *file) ++{ ++ int err = 0; ++ struct file *hidden_file = NULL; ++ struct dentry *dentry = NULL; ++ int bindex = 0, bstart = 0, bend = 0; ++ int size; ++ ++ unionfs_read_lock(inode->i_sb); ++ ++ file->private_data = ++ kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL); ++ if (!UNIONFS_F(file)) { ++ err = -ENOMEM; ++ goto out_nofree; ++ } ++ fbstart(file) = -1; ++ fbend(file) = -1; ++ atomic_set(&UNIONFS_F(file)->generation, ++ atomic_read(&UNIONFS_I(inode)->generation)); ++ ++ size = sizeof(struct file *) * sbmax(inode->i_sb); ++ UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL); ++ if (!UNIONFS_F(file)->lower_files) { ++ err = -ENOMEM; ++ goto out; ++ } ++ size = sizeof(int) * sbmax(inode->i_sb); ++ UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL); ++ if (!UNIONFS_F(file)->saved_branch_ids) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ dentry = file->f_dentry; ++ unionfs_lock_dentry(dentry); ++ ++ bstart = fbstart(file) = dbstart(dentry); ++ bend = fbend(file) = dbend(dentry); ++ ++ /* increment, so that we can flush appropriately */ ++ atomic_inc(&UNIONFS_I(dentry->d_inode)->totalopens); ++ ++ /* ++ * open all directories and make the unionfs file struct point to ++ * these hidden file structs ++ */ ++ if (S_ISDIR(inode->i_mode)) ++ err = __open_dir(inode, file); /* open a dir */ ++ else ++ err = __open_file(inode, file); /* open a file */ ++ ++ /* freeing the allocated resources, and fput the opened files */ ++ if (err) { ++ atomic_dec(&UNIONFS_I(dentry->d_inode)->totalopens); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_file = unionfs_lower_file_idx(file, bindex); ++ if (!hidden_file) ++ continue; ++ ++ branchput(file->f_dentry->d_sb, bindex); ++ /* fput calls dput for hidden_dentry */ ++ fput(hidden_file); ++ } ++ } ++ ++ unionfs_unlock_dentry(dentry); ++ ++out: ++ if (err) { ++ kfree(UNIONFS_F(file)->lower_files); ++ kfree(UNIONFS_F(file)->saved_branch_ids); ++ kfree(UNIONFS_F(file)); ++ } ++out_nofree: ++ unionfs_read_unlock(inode->i_sb); ++ return err; ++} ++ ++/* ++ * release all lower object references & free the file info structure ++ * ++ * No need to grab sb info's rwsem. ++ */ ++int unionfs_file_release(struct inode *inode, struct file *file) ++{ ++ struct file *hidden_file = NULL; ++ struct unionfs_file_info *fileinfo; ++ struct unionfs_inode_info *inodeinfo; ++ struct super_block *sb = inode->i_sb; ++ int bindex, bstart, bend; ++ int fgen; ++ int err; ++ ++ unionfs_read_lock(sb); ++ /* ++ * Yes, we have to revalidate this file even if it's being released. ++ * This is important for open-but-unlinked files, as well as mmap ++ * support. ++ */ ++ if ((err = unionfs_file_revalidate(file, 1))) ++ return err; ++ fileinfo = UNIONFS_F(file); ++ BUG_ON(file->f_dentry->d_inode != inode); ++ inodeinfo = UNIONFS_I(inode); ++ ++ /* fput all the hidden files */ ++ fgen = atomic_read(&fileinfo->generation); ++ bstart = fbstart(file); ++ bend = fbend(file); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_file = unionfs_lower_file_idx(file, bindex); ++ ++ if (hidden_file) { ++ fput(hidden_file); ++ branchput(inode->i_sb, bindex); ++ } ++ } ++ kfree(fileinfo->lower_files); ++ kfree(fileinfo->saved_branch_ids); ++ ++ if (fileinfo->rdstate) { ++ fileinfo->rdstate->access = jiffies; ++ printk(KERN_DEBUG "unionfs: saving rdstate with cookie " ++ "%u [%d.%lld]\n", ++ fileinfo->rdstate->cookie, ++ fileinfo->rdstate->bindex, ++ (long long)fileinfo->rdstate->dirpos); ++ spin_lock(&inodeinfo->rdlock); ++ inodeinfo->rdcount++; ++ list_add_tail(&fileinfo->rdstate->cache, ++ &inodeinfo->readdircache); ++ mark_inode_dirty(inode); ++ spin_unlock(&inodeinfo->rdlock); ++ fileinfo->rdstate = NULL; ++ } ++ kfree(fileinfo); ++ return 0; ++} ++ ++/* pass the ioctl to the lower fs */ ++static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ struct file *hidden_file; ++ int err; ++ ++ hidden_file = unionfs_lower_file(file); ++ ++ err = security_file_ioctl(hidden_file, cmd, arg); ++ if (err) ++ goto out; ++ ++ err = -ENOTTY; ++ if (!hidden_file || !hidden_file->f_op) ++ goto out; ++ if (hidden_file->f_op->unlocked_ioctl) { ++ err = hidden_file->f_op->unlocked_ioctl(hidden_file, cmd, arg); ++ } else if (hidden_file->f_op->ioctl) { ++ lock_kernel(); ++ err = hidden_file->f_op->ioctl(hidden_file->f_dentry->d_inode, ++ hidden_file, cmd, arg); ++ unlock_kernel(); ++ } ++ ++out: ++ return err; ++} ++ ++/* ++ * return to user-space the branch indices containing the file in question ++ * ++ * We use fd_set and therefore we are limited to the number of the branches ++ * to FD_SETSIZE, which is currently 1024 - plenty for most people ++ */ ++static int unionfs_ioctl_queryfile(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ int err = 0; ++ fd_set branchlist; ++ ++ int bstart = 0, bend = 0, bindex = 0; ++ struct dentry *dentry, *hidden_dentry; ++ ++ dentry = file->f_dentry; ++ unionfs_lock_dentry(dentry); ++ if ((err = unionfs_partial_lookup(dentry))) ++ goto out; ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ FD_ZERO(&branchlist); ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!hidden_dentry) ++ continue; ++ if (hidden_dentry->d_inode) ++ FD_SET(bindex, &branchlist); ++ } ++ ++ err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set)); ++ if (err) ++ err = -EFAULT; ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ return err < 0 ? err : bend; ++} ++ ++long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) ++{ ++ long err; ++ ++ unionfs_read_lock(file->f_path.dentry->d_sb); ++ ++ if ((err = unionfs_file_revalidate(file, 1))) ++ goto out; ++ ++ /* check if asked for local commands */ ++ switch (cmd) { ++ case UNIONFS_IOCTL_INCGEN: ++ /* Increment the superblock generation count */ ++ printk("unionfs: incgen ioctl deprecated; " ++ "use \"-o remount,incgen\"\n"); ++ err = -ENOSYS; ++ break; ++ ++ case UNIONFS_IOCTL_QUERYFILE: ++ /* Return list of branches containing the given file */ ++ err = unionfs_ioctl_queryfile(file, cmd, arg); ++ break; ++ ++ default: ++ /* pass the ioctl down */ ++ err = do_ioctl(file, cmd, arg); ++ break; ++ } ++ ++out: ++ unionfs_read_unlock(file->f_path.dentry->d_sb); ++ return err; ++} ++ ++int unionfs_flush(struct file *file, fl_owner_t id) ++{ ++ int err = 0; ++ struct file *hidden_file = NULL; ++ struct dentry *dentry = file->f_dentry; ++ int bindex, bstart, bend; ++ ++ unionfs_read_lock(file->f_path.dentry->d_sb); ++ ++ if ((err = unionfs_file_revalidate(file, 1))) ++ goto out; ++ ++ if (!atomic_dec_and_test(&UNIONFS_I(dentry->d_inode)->totalopens)) ++ goto out; ++ ++ unionfs_lock_dentry(dentry); ++ ++ bstart = fbstart(file); ++ bend = fbend(file); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_file = unionfs_lower_file_idx(file, bindex); ++ ++ if (hidden_file && hidden_file->f_op && ++ hidden_file->f_op->flush) { ++ err = hidden_file->f_op->flush(hidden_file, id); ++ if (err) ++ goto out_lock; ++ ++ /* if there are no more refs to the dentry, dput it */ ++ if (d_deleted(dentry)) { ++ dput(unionfs_lower_dentry_idx(dentry, bindex)); ++ unionfs_set_lower_dentry_idx(dentry, bindex, ++ NULL); ++ } ++ } ++ ++ } ++ ++out_lock: ++ unionfs_unlock_dentry(dentry); ++out: ++ unionfs_read_unlock(file->f_path.dentry->d_sb); ++ return err; ++} +diff -Nurb linux-2.6.22-570/fs/unionfs/copyup.c linux-2.6.22-591/fs/unionfs/copyup.c +--- linux-2.6.22-570/fs/unionfs/copyup.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/copyup.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,806 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * For detailed explanation of copyup see: ++ * Documentation/filesystems/unionfs/concepts.txt ++ */ ++ ++/* forward definitions */ ++static int copyup_named_dentry(struct inode *dir, struct dentry *dentry, ++ int bstart, int new_bindex, const char *name, ++ int namelen, struct file **copyup_file, ++ loff_t len); ++static struct dentry *create_parents_named(struct inode *dir, ++ struct dentry *dentry, ++ const char *name, int bindex); ++ ++#ifdef CONFIG_UNION_FS_XATTR ++/* copyup all extended attrs for a given dentry */ ++static int copyup_xattrs(struct dentry *old_hidden_dentry, ++ struct dentry *new_hidden_dentry) ++{ ++ int err = 0; ++ ssize_t list_size = -1; ++ char *name_list = NULL; ++ char *attr_value = NULL; ++ char *name_list_orig = NULL; ++ ++ list_size = vfs_listxattr(old_hidden_dentry, NULL, 0); ++ ++ if (list_size <= 0) { ++ err = list_size; ++ goto out; ++ } ++ ++ name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX); ++ if (!name_list || IS_ERR(name_list)) { ++ err = PTR_ERR(name_list); ++ goto out; ++ } ++ list_size = vfs_listxattr(old_hidden_dentry, name_list, list_size); ++ attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX); ++ if (!attr_value || IS_ERR(attr_value)) { ++ err = PTR_ERR(name_list); ++ goto out; ++ } ++ name_list_orig = name_list; ++ while (*name_list) { ++ ssize_t size; ++ ++ /* Lock here since vfs_getxattr doesn't lock for us */ ++ mutex_lock(&old_hidden_dentry->d_inode->i_mutex); ++ size = vfs_getxattr(old_hidden_dentry, name_list, ++ attr_value, XATTR_SIZE_MAX); ++ mutex_unlock(&old_hidden_dentry->d_inode->i_mutex); ++ if (size < 0) { ++ err = size; ++ goto out; ++ } ++ ++ if (size > XATTR_SIZE_MAX) { ++ err = -E2BIG; ++ goto out; ++ } ++ /* Don't lock here since vfs_setxattr does it for us. */ ++ err = vfs_setxattr(new_hidden_dentry, name_list, attr_value, ++ size, 0); ++ ++ if (err < 0) ++ goto out; ++ name_list += strlen(name_list) + 1; ++ } ++out: ++ name_list = name_list_orig; ++ ++ if (name_list) ++ unionfs_xattr_free(name_list, list_size + 1); ++ if (attr_value) ++ unionfs_xattr_free(attr_value, XATTR_SIZE_MAX); ++ /* It is no big deal if this fails, we just roll with the punches. */ ++ if (err == -ENOTSUPP || err == -EOPNOTSUPP) ++ err = 0; ++ return err; ++} ++#endif /* CONFIG_UNION_FS_XATTR */ ++ ++/* Determine the mode based on the copyup flags, and the existing dentry. */ ++static int copyup_permissions(struct super_block *sb, ++ struct dentry *old_hidden_dentry, ++ struct dentry *new_hidden_dentry) ++{ ++ struct inode *i = old_hidden_dentry->d_inode; ++ struct iattr newattrs; ++ int err; ++ ++ newattrs.ia_atime = i->i_atime; ++ newattrs.ia_mtime = i->i_mtime; ++ newattrs.ia_ctime = i->i_ctime; ++ ++ newattrs.ia_gid = i->i_gid; ++ newattrs.ia_uid = i->i_uid; ++ ++ newattrs.ia_mode = i->i_mode; ++ ++ newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME | ++ ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE | ++ ATTR_GID | ATTR_UID | ATTR_MODE; ++ ++ err = notify_change(new_hidden_dentry, &newattrs); ++ ++ return err; ++} ++ ++int copyup_dentry(struct inode *dir, struct dentry *dentry, ++ int bstart, int new_bindex, ++ struct file **copyup_file, loff_t len) ++{ ++ return copyup_named_dentry(dir, dentry, bstart, new_bindex, ++ dentry->d_name.name, ++ dentry->d_name.len, copyup_file, len); ++} ++ ++/* ++ * create the new device/file/directory - use copyup_permission to copyup ++ * times, and mode ++ * ++ * if the object being copied up is a regular file, the file is only created, ++ * the contents have to be copied up separately ++ */ ++static int __copyup_ndentry(struct dentry *old_hidden_dentry, ++ struct dentry *new_hidden_dentry, ++ struct dentry *new_hidden_parent_dentry, ++ char *symbuf) ++{ ++ int err = 0; ++ umode_t old_mode = old_hidden_dentry->d_inode->i_mode; ++ struct sioq_args args; ++ ++ if (S_ISDIR(old_mode)) { ++ args.mkdir.parent = new_hidden_parent_dentry->d_inode; ++ args.mkdir.dentry = new_hidden_dentry; ++ args.mkdir.mode = old_mode; ++ ++ run_sioq(__unionfs_mkdir, &args); ++ err = args.err; ++ } else if (S_ISLNK(old_mode)) { ++ args.symlink.parent = new_hidden_parent_dentry->d_inode; ++ args.symlink.dentry = new_hidden_dentry; ++ args.symlink.symbuf = symbuf; ++ args.symlink.mode = old_mode; ++ ++ run_sioq(__unionfs_symlink, &args); ++ err = args.err; ++ } else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) || ++ S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) { ++ args.mknod.parent = new_hidden_parent_dentry->d_inode; ++ args.mknod.dentry = new_hidden_dentry; ++ args.mknod.mode = old_mode; ++ args.mknod.dev = old_hidden_dentry->d_inode->i_rdev; ++ ++ run_sioq(__unionfs_mknod, &args); ++ err = args.err; ++ } else if (S_ISREG(old_mode)) { ++ args.create.parent = new_hidden_parent_dentry->d_inode; ++ args.create.dentry = new_hidden_dentry; ++ args.create.mode = old_mode; ++ args.create.nd = NULL; ++ ++ run_sioq(__unionfs_create, &args); ++ err = args.err; ++ } else { ++ printk(KERN_ERR "unionfs: unknown inode type %d\n", ++ old_mode); ++ BUG(); ++ } ++ ++ return err; ++} ++ ++static int __copyup_reg_data(struct dentry *dentry, ++ struct dentry *new_hidden_dentry, int new_bindex, ++ struct dentry *old_hidden_dentry, int old_bindex, ++ struct file **copyup_file, loff_t len) ++{ ++ struct super_block *sb = dentry->d_sb; ++ struct file *input_file; ++ struct file *output_file; ++ mm_segment_t old_fs; ++ char *buf = NULL; ++ ssize_t read_bytes, write_bytes; ++ loff_t size; ++ int err = 0; ++ ++ /* open old file */ ++ unionfs_mntget(dentry, old_bindex); ++ branchget(sb, old_bindex); ++ input_file = dentry_open(old_hidden_dentry, ++ unionfs_lower_mnt_idx(dentry, old_bindex), ++ O_RDONLY | O_LARGEFILE); ++ if (IS_ERR(input_file)) { ++ dput(old_hidden_dentry); ++ err = PTR_ERR(input_file); ++ goto out; ++ } ++ if (!input_file->f_op || !input_file->f_op->read) { ++ err = -EINVAL; ++ goto out_close_in; ++ } ++ ++ /* open new file */ ++ dget(new_hidden_dentry); ++ unionfs_mntget(dentry, new_bindex); ++ branchget(sb, new_bindex); ++ output_file = dentry_open(new_hidden_dentry, ++ unionfs_lower_mnt_idx(dentry, new_bindex), ++ O_WRONLY | O_LARGEFILE); ++ if (IS_ERR(output_file)) { ++ err = PTR_ERR(output_file); ++ goto out_close_in2; ++ } ++ if (!output_file->f_op || !output_file->f_op->write) { ++ err = -EINVAL; ++ goto out_close_out; ++ } ++ ++ /* allocating a buffer */ ++ buf = kmalloc(PAGE_SIZE, GFP_KERNEL); ++ if (!buf) { ++ err = -ENOMEM; ++ goto out_close_out; ++ } ++ ++ input_file->f_pos = 0; ++ output_file->f_pos = 0; ++ ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ ++ size = len; ++ err = 0; ++ do { ++ if (len >= PAGE_SIZE) ++ size = PAGE_SIZE; ++ else if ((len < PAGE_SIZE) && (len > 0)) ++ size = len; ++ ++ len -= PAGE_SIZE; ++ ++ read_bytes = ++ input_file->f_op->read(input_file, ++ (char __user *)buf, size, ++ &input_file->f_pos); ++ if (read_bytes <= 0) { ++ err = read_bytes; ++ break; ++ } ++ ++ write_bytes = ++ output_file->f_op->write(output_file, ++ (char __user *)buf, ++ read_bytes, ++ &output_file->f_pos); ++ if ((write_bytes < 0) || (write_bytes < read_bytes)) { ++ err = write_bytes; ++ break; ++ } ++ } while ((read_bytes > 0) && (len > 0)); ++ ++ set_fs(old_fs); ++ ++ kfree(buf); ++ ++ if (!err) ++ err = output_file->f_op->fsync(output_file, ++ new_hidden_dentry, 0); ++ ++ if (err) ++ goto out_close_out; ++ ++ if (copyup_file) { ++ *copyup_file = output_file; ++ goto out_close_in; ++ } ++ ++out_close_out: ++ fput(output_file); ++ ++out_close_in2: ++ branchput(sb, new_bindex); ++ ++out_close_in: ++ fput(input_file); ++ ++out: ++ branchput(sb, old_bindex); ++ ++ return err; ++} ++ ++/* ++ * dput the lower references for old and new dentry & clear a lower dentry ++ * pointer ++ */ ++static void __clear(struct dentry *dentry, struct dentry *old_hidden_dentry, ++ int old_bstart, int old_bend, ++ struct dentry *new_hidden_dentry, int new_bindex) ++{ ++ /* get rid of the hidden dentry and all its traces */ ++ unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL); ++ set_dbstart(dentry, old_bstart); ++ set_dbend(dentry, old_bend); ++ ++ dput(new_hidden_dentry); ++ dput(old_hidden_dentry); ++} ++ ++/* copy up a dentry to a file of specified name */ ++static int copyup_named_dentry(struct inode *dir, struct dentry *dentry, ++ int bstart, int new_bindex, const char *name, ++ int namelen, struct file **copyup_file, ++ loff_t len) ++{ ++ struct dentry *new_hidden_dentry; ++ struct dentry *old_hidden_dentry = NULL; ++ struct super_block *sb; ++ int err = 0; ++ int old_bindex; ++ int old_bstart; ++ int old_bend; ++ struct dentry *new_hidden_parent_dentry = NULL; ++ mm_segment_t oldfs; ++ char *symbuf = NULL; ++ ++ verify_locked(dentry); ++ ++ old_bindex = bstart; ++ old_bstart = dbstart(dentry); ++ old_bend = dbend(dentry); ++ ++ BUG_ON(new_bindex < 0); ++ BUG_ON(new_bindex >= old_bindex); ++ ++ sb = dir->i_sb; ++ ++ if ((err = is_robranch_super(sb, new_bindex))) ++ goto out; ++ ++ /* Create the directory structure above this dentry. */ ++ new_hidden_dentry = ++ create_parents_named(dir, dentry, name, new_bindex); ++ if (IS_ERR(new_hidden_dentry)) { ++ err = PTR_ERR(new_hidden_dentry); ++ goto out; ++ } ++ ++ old_hidden_dentry = unionfs_lower_dentry_idx(dentry, old_bindex); ++ /* we conditionally dput this old_hidden_dentry at end of function */ ++ dget(old_hidden_dentry); ++ ++ /* For symlinks, we must read the link before we lock the directory. */ ++ if (S_ISLNK(old_hidden_dentry->d_inode->i_mode)) { ++ ++ symbuf = kmalloc(PATH_MAX, GFP_KERNEL); ++ if (!symbuf) { ++ __clear(dentry, old_hidden_dentry, ++ old_bstart, old_bend, ++ new_hidden_dentry, new_bindex); ++ err = -ENOMEM; ++ goto out_free; ++ } ++ ++ oldfs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = old_hidden_dentry->d_inode->i_op->readlink( ++ old_hidden_dentry, ++ (char __user *)symbuf, ++ PATH_MAX); ++ set_fs(oldfs); ++ if (err) { ++ __clear(dentry, old_hidden_dentry, ++ old_bstart, old_bend, ++ new_hidden_dentry, new_bindex); ++ goto out_free; ++ } ++ symbuf[err] = '\0'; ++ } ++ ++ /* Now we lock the parent, and create the object in the new branch. */ ++ new_hidden_parent_dentry = lock_parent(new_hidden_dentry); ++ ++ /* create the new inode */ ++ err = __copyup_ndentry(old_hidden_dentry, new_hidden_dentry, ++ new_hidden_parent_dentry, symbuf); ++ ++ if (err) { ++ __clear(dentry, old_hidden_dentry, ++ old_bstart, old_bend, ++ new_hidden_dentry, new_bindex); ++ goto out_unlock; ++ } ++ ++ /* We actually copyup the file here. */ ++ if (S_ISREG(old_hidden_dentry->d_inode->i_mode)) ++ err = __copyup_reg_data(dentry, new_hidden_dentry, new_bindex, ++ old_hidden_dentry, old_bindex, ++ copyup_file, len); ++ if (err) ++ goto out_unlink; ++ ++ /* Set permissions. */ ++ if ((err = copyup_permissions(sb, old_hidden_dentry, ++ new_hidden_dentry))) ++ goto out_unlink; ++ ++#ifdef CONFIG_UNION_FS_XATTR ++ /* Selinux uses extended attributes for permissions. */ ++ if ((err = copyup_xattrs(old_hidden_dentry, new_hidden_dentry))) ++ goto out_unlink; ++#endif ++ ++ /* do not allow files getting deleted to be re-interposed */ ++ if (!d_deleted(dentry)) ++ unionfs_reinterpose(dentry); ++ ++ goto out_unlock; ++ ++out_unlink: ++ /* ++ * copyup failed, because we possibly ran out of space or ++ * quota, or something else happened so let's unlink; we don't ++ * really care about the return value of vfs_unlink ++ */ ++ vfs_unlink(new_hidden_parent_dentry->d_inode, new_hidden_dentry); ++ ++ if (copyup_file) { ++ /* need to close the file */ ++ ++ fput(*copyup_file); ++ branchput(sb, new_bindex); ++ } ++ ++ /* ++ * TODO: should we reset the error to something like -EIO? ++ * ++ * If we don't reset, the user may get some nonsensical errors, but ++ * on the other hand, if we reset to EIO, we guarantee that the user ++ * will get a "confusing" error message. ++ */ ++ ++out_unlock: ++ unlock_dir(new_hidden_parent_dentry); ++ ++out_free: ++ /* ++ * If old_hidden_dentry was a directory, we need to dput it. If it ++ * was a file, then it was already dput indirectly by other ++ * functions we call above which operate on regular files. ++ */ ++ if (old_hidden_dentry && old_hidden_dentry->d_inode && ++ S_ISDIR(old_hidden_dentry->d_inode->i_mode)) ++ dput(old_hidden_dentry); ++ kfree(symbuf); ++ ++out: ++ return err; ++} ++ ++/* ++ * This function creates a copy of a file represented by 'file' which ++ * currently resides in branch 'bstart' to branch 'new_bindex.' The copy ++ * will be named "name". ++ */ ++int copyup_named_file(struct inode *dir, struct file *file, char *name, ++ int bstart, int new_bindex, loff_t len) ++{ ++ int err = 0; ++ struct file *output_file = NULL; ++ ++ err = copyup_named_dentry(dir, file->f_dentry, bstart, ++ new_bindex, name, strlen(name), &output_file, ++ len); ++ if (!err) { ++ fbstart(file) = new_bindex; ++ unionfs_set_lower_file_idx(file, new_bindex, output_file); ++ } ++ ++ return err; ++} ++ ++/* ++ * This function creates a copy of a file represented by 'file' which ++ * currently resides in branch 'bstart' to branch 'new_bindex'. ++ */ ++int copyup_file(struct inode *dir, struct file *file, int bstart, ++ int new_bindex, loff_t len) ++{ ++ int err = 0; ++ struct file *output_file = NULL; ++ ++ err = copyup_dentry(dir, file->f_dentry, bstart, new_bindex, ++ &output_file, len); ++ if (!err) { ++ fbstart(file) = new_bindex; ++ unionfs_set_lower_file_idx(file, new_bindex, output_file); ++ } ++ ++ return err; ++} ++ ++/* ++ * This function replicates the directory structure up-to given dentry in the ++ * bindex branch. Can create directory structure recursively to the right ++ * also. ++ */ ++struct dentry *create_parents(struct inode *dir, struct dentry *dentry, ++ int bindex) ++{ ++ return create_parents_named(dir, dentry, dentry->d_name.name, bindex); ++} ++ ++/* purge a dentry's lower-branch states (dput/mntput, etc.) */ ++static void __cleanup_dentry(struct dentry *dentry, int bindex, ++ int old_bstart, int old_bend) ++{ ++ int loop_start; ++ int loop_end; ++ int new_bstart = -1; ++ int new_bend = -1; ++ int i; ++ ++ loop_start = min(old_bstart, bindex); ++ loop_end = max(old_bend, bindex); ++ ++ /* ++ * This loop sets the bstart and bend for the new dentry by ++ * traversing from left to right. It also dputs all negative ++ * dentries except bindex ++ */ ++ for (i = loop_start; i <= loop_end; i++) { ++ if (!unionfs_lower_dentry_idx(dentry, i)) ++ continue; ++ ++ if (i == bindex) { ++ new_bend = i; ++ if (new_bstart < 0) ++ new_bstart = i; ++ continue; ++ } ++ ++ if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) { ++ dput(unionfs_lower_dentry_idx(dentry, i)); ++ unionfs_set_lower_dentry_idx(dentry, i, NULL); ++ ++ unionfs_mntput(dentry, i); ++ unionfs_set_lower_mnt_idx(dentry, i, NULL); ++ } else { ++ if (new_bstart < 0) ++ new_bstart = i; ++ new_bend = i; ++ } ++ } ++ ++ if (new_bstart < 0) ++ new_bstart = bindex; ++ if (new_bend < 0) ++ new_bend = bindex; ++ set_dbstart(dentry, new_bstart); ++ set_dbend(dentry, new_bend); ++ ++} ++ ++/* set lower inode ptr and update bstart & bend if necessary */ ++static void __set_inode(struct dentry *upper, struct dentry *lower, ++ int bindex) ++{ ++ unionfs_set_lower_inode_idx(upper->d_inode, bindex, ++ igrab(lower->d_inode)); ++ if (likely(ibstart(upper->d_inode) > bindex)) ++ ibstart(upper->d_inode) = bindex; ++ if (likely(ibend(upper->d_inode) < bindex)) ++ ibend(upper->d_inode) = bindex; ++ ++} ++ ++/* set lower dentry ptr and update bstart & bend if necessary */ ++static void __set_dentry(struct dentry *upper, struct dentry *lower, ++ int bindex) ++{ ++ unionfs_set_lower_dentry_idx(upper, bindex, lower); ++ if (likely(dbstart(upper) > bindex)) ++ set_dbstart(upper, bindex); ++ if (likely(dbend(upper) < bindex)) ++ set_dbend(upper, bindex); ++} ++ ++/* ++ * This function replicates the directory structure up-to given dentry ++ * in the bindex branch. ++ */ ++static struct dentry *create_parents_named(struct inode *dir, ++ struct dentry *dentry, ++ const char *name, int bindex) ++{ ++ int err; ++ struct dentry *child_dentry; ++ struct dentry *parent_dentry; ++ struct dentry *hidden_parent_dentry = NULL; ++ struct dentry *hidden_dentry = NULL; ++ const char *childname; ++ unsigned int childnamelen; ++ ++ int nr_dentry; ++ int count = 0; ++ ++ int old_bstart; ++ int old_bend; ++ struct dentry **path = NULL; ++ struct super_block *sb; ++ ++ verify_locked(dentry); ++ ++ if ((err = is_robranch_super(dir->i_sb, bindex))) { ++ hidden_dentry = ERR_PTR(err); ++ goto out; ++ } ++ ++ old_bstart = dbstart(dentry); ++ old_bend = dbend(dentry); ++ ++ hidden_dentry = ERR_PTR(-ENOMEM); ++ ++ /* There is no sense allocating any less than the minimum. */ ++ nr_dentry = 1; ++ path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL); ++ if (!path) ++ goto out; ++ ++ /* assume the negative dentry of unionfs as the parent dentry */ ++ parent_dentry = dentry; ++ ++ /* ++ * This loop finds the first parent that exists in the given branch. ++ * We start building the directory structure from there. At the end ++ * of the loop, the following should hold: ++ * - child_dentry is the first nonexistent child ++ * - parent_dentry is the first existent parent ++ * - path[0] is the = deepest child ++ * - path[count] is the first child to create ++ */ ++ do { ++ child_dentry = parent_dentry; ++ ++ /* find the parent directory dentry in unionfs */ ++ parent_dentry = child_dentry->d_parent; ++ unionfs_lock_dentry(parent_dentry); ++ ++ /* find out the hidden_parent_dentry in the given branch */ ++ hidden_parent_dentry = ++ unionfs_lower_dentry_idx(parent_dentry, bindex); ++ ++ /* grow path table */ ++ if (count == nr_dentry) { ++ void *p; ++ ++ nr_dentry *= 2; ++ p = krealloc(path, nr_dentry * sizeof(struct dentry *), GFP_KERNEL); ++ if (!p) { ++ hidden_dentry = ERR_PTR(-ENOMEM); ++ goto out; ++ } ++ path = p; ++ } ++ ++ /* store the child dentry */ ++ path[count++] = child_dentry; ++ } while (!hidden_parent_dentry); ++ count--; ++ ++ sb = dentry->d_sb; ++ ++ /* ++ * This is basically while(child_dentry != dentry). This loop is ++ * horrible to follow and should be replaced with cleaner code. ++ */ ++ while (1) { ++ /* get hidden parent dir in the current branch */ ++ hidden_parent_dentry = ++ unionfs_lower_dentry_idx(parent_dentry, bindex); ++ unionfs_unlock_dentry(parent_dentry); ++ ++ /* init the values to lookup */ ++ childname = child_dentry->d_name.name; ++ childnamelen = child_dentry->d_name.len; ++ ++ if (child_dentry != dentry) { ++ /* lookup child in the underlying file system */ ++ hidden_dentry = ++ lookup_one_len(childname, hidden_parent_dentry, ++ childnamelen); ++ if (IS_ERR(hidden_dentry)) ++ goto out; ++ } else { ++ ++ /* ++ * is the name a whiteout of the child name ? ++ * lookup the whiteout child in the underlying file ++ * system ++ */ ++ hidden_dentry = ++ lookup_one_len(name, hidden_parent_dentry, ++ strlen(name)); ++ if (IS_ERR(hidden_dentry)) ++ goto out; ++ ++ /* ++ * Replace the current dentry (if any) with the new ++ * one. ++ */ ++ dput(unionfs_lower_dentry_idx(dentry, bindex)); ++ unionfs_set_lower_dentry_idx(dentry, bindex, ++ hidden_dentry); ++ ++ __cleanup_dentry(dentry, bindex, old_bstart, old_bend); ++ break; ++ } ++ ++ if (hidden_dentry->d_inode) { ++ /* ++ * since this already exists we dput to avoid ++ * multiple references on the same dentry ++ */ ++ dput(hidden_dentry); ++ } else { ++ struct sioq_args args; ++ ++ /* its a negative dentry, create a new dir */ ++ hidden_parent_dentry = lock_parent(hidden_dentry); ++ ++ args.mkdir.parent = hidden_parent_dentry->d_inode; ++ args.mkdir.dentry = hidden_dentry; ++ args.mkdir.mode = child_dentry->d_inode->i_mode; ++ ++ run_sioq(__unionfs_mkdir, &args); ++ err = args.err; ++ ++ if (!err) ++ err = copyup_permissions(dir->i_sb, ++ child_dentry, ++ hidden_dentry); ++ unlock_dir(hidden_parent_dentry); ++ if (err) { ++ struct inode *inode = hidden_dentry->d_inode; ++ /* ++ * If we get here, it means that we created a new ++ * dentry+inode, but copying permissions failed. ++ * Therefore, we should delete this inode and dput ++ * the dentry so as not to leave cruft behind. ++ * ++ * XXX: call dentry_iput() instead, but then we have ++ * to export that symbol. ++ */ ++ if (hidden_dentry->d_op && hidden_dentry->d_op->d_iput) ++ hidden_dentry->d_op->d_iput(hidden_dentry, ++ inode); ++ else ++ iput(inode); ++ hidden_dentry->d_inode = NULL; ++ ++ dput(hidden_dentry); ++ hidden_dentry = ERR_PTR(err); ++ goto out; ++ } ++ ++ } ++ ++ __set_inode(child_dentry, hidden_dentry, bindex); ++ __set_dentry(child_dentry, hidden_dentry, bindex); ++ ++ parent_dentry = child_dentry; ++ child_dentry = path[--count]; ++ } ++out: ++ /* cleanup any leftover locks from the do/while loop above */ ++ if (IS_ERR(hidden_dentry)) ++ while (count) ++ unionfs_unlock_dentry(path[count--]); ++ kfree(path); ++ return hidden_dentry; ++} +diff -Nurb linux-2.6.22-570/fs/unionfs/dentry.c linux-2.6.22-591/fs/unionfs/dentry.c +--- linux-2.6.22-570/fs/unionfs/dentry.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/dentry.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,353 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Revalidate a single dentry. ++ * Assume that dentry's info node is locked. ++ * Assume that parent(s) are all valid already, but ++ * the child may not yet be valid. ++ * Returns 1 if valid, 0 otherwise. ++ */ ++static int __unionfs_d_revalidate_one(struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ int valid = 1; /* default is valid (1); invalid is 0. */ ++ struct dentry *hidden_dentry; ++ int bindex, bstart, bend; ++ int sbgen, dgen; ++ int positive = 0; ++ int locked = 0; ++ int interpose_flag; ++ ++ struct nameidata lowernd; /* TODO: be gentler to the stack */ ++ ++ if (nd) ++ memcpy(&lowernd, nd, sizeof(struct nameidata)); ++ else ++ memset(&lowernd, 0, sizeof(struct nameidata)); ++ ++ verify_locked(dentry); ++ ++ /* if the dentry is unhashed, do NOT revalidate */ ++ if (d_deleted(dentry)) { ++ printk(KERN_DEBUG "unionfs: unhashed dentry being " ++ "revalidated: %*s\n", ++ dentry->d_name.len, dentry->d_name.name); ++ goto out; ++ } ++ ++ BUG_ON(dbstart(dentry) == -1); ++ if (dentry->d_inode) ++ positive = 1; ++ dgen = atomic_read(&UNIONFS_D(dentry)->generation); ++ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation); ++ /* ++ * If we are working on an unconnected dentry, then there is no ++ * revalidation to be done, because this file does not exist within ++ * the namespace, and Unionfs operates on the namespace, not data. ++ */ ++ if (sbgen != dgen) { ++ struct dentry *result; ++ int pdgen; ++ ++ /* The root entry should always be valid */ ++ BUG_ON(IS_ROOT(dentry)); ++ ++ /* We can't work correctly if our parent isn't valid. */ ++ pdgen = atomic_read(&UNIONFS_D(dentry->d_parent)->generation); ++ BUG_ON(pdgen != sbgen); /* should never happen here */ ++ ++ /* Free the pointers for our inodes and this dentry. */ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ if (bstart >= 0) { ++ struct dentry *hidden_dentry; ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_dentry = ++ unionfs_lower_dentry_idx(dentry, ++ bindex); ++ dput(hidden_dentry); ++ } ++ } ++ set_dbstart(dentry, -1); ++ set_dbend(dentry, -1); ++ ++ interpose_flag = INTERPOSE_REVAL_NEG; ++ if (positive) { ++ interpose_flag = INTERPOSE_REVAL; ++ /* ++ * During BRM, the VFS could already hold a lock on ++ * a file being read, so don't lock it again ++ * (deadlock), but if you lock it in this function, ++ * then release it here too. ++ */ ++ if (!mutex_is_locked(&dentry->d_inode->i_mutex)) { ++ mutex_lock(&dentry->d_inode->i_mutex); ++ locked = 1; ++ } ++ ++ bstart = ibstart(dentry->d_inode); ++ bend = ibend(dentry->d_inode); ++ if (bstart >= 0) { ++ struct inode *hidden_inode; ++ for (bindex = bstart; bindex <= bend; ++ bindex++) { ++ hidden_inode = ++ unionfs_lower_inode_idx( ++ dentry->d_inode, ++ bindex); ++ iput(hidden_inode); ++ } ++ } ++ kfree(UNIONFS_I(dentry->d_inode)->lower_inodes); ++ UNIONFS_I(dentry->d_inode)->lower_inodes = NULL; ++ ibstart(dentry->d_inode) = -1; ++ ibend(dentry->d_inode) = -1; ++ if (locked) ++ mutex_unlock(&dentry->d_inode->i_mutex); ++ } ++ ++ result = unionfs_lookup_backend(dentry, &lowernd, ++ interpose_flag); ++ if (result) { ++ if (IS_ERR(result)) { ++ valid = 0; ++ goto out; ++ } ++ /* ++ * current unionfs_lookup_backend() doesn't return ++ * a valid dentry ++ */ ++ dput(dentry); ++ dentry = result; ++ } ++ ++ if (positive && UNIONFS_I(dentry->d_inode)->stale) { ++ make_bad_inode(dentry->d_inode); ++ d_drop(dentry); ++ valid = 0; ++ goto out; ++ } ++ goto out; ++ } ++ ++ /* The revalidation must occur across all branches */ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ BUG_ON(bstart == -1); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!hidden_dentry || !hidden_dentry->d_op ++ || !hidden_dentry->d_op->d_revalidate) ++ continue; ++ if (!hidden_dentry->d_op->d_revalidate(hidden_dentry, ++ &lowernd)) ++ valid = 0; ++ } ++ ++ if (!dentry->d_inode) ++ valid = 0; ++ ++ if (valid) { ++ fsstack_copy_attr_all(dentry->d_inode, ++ unionfs_lower_inode(dentry->d_inode), ++ unionfs_get_nlinks); ++ fsstack_copy_inode_size(dentry->d_inode, ++ unionfs_lower_inode(dentry->d_inode)); ++ } ++ ++out: ++ return valid; ++} ++ ++/* ++ * Revalidate a parent chain of dentries, then the actual node. ++ * Assumes that dentry is locked, but will lock all parents if/when needed. ++ */ ++int __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd) ++{ ++ int valid = 0; /* default is invalid (0); valid is 1. */ ++ struct dentry **chain = NULL; /* chain of dentries to reval */ ++ int chain_len = 0; ++ struct dentry *dtmp; ++ int sbgen, dgen, i; ++ int saved_bstart, saved_bend, bindex; ++ ++ /* find length of chain needed to revalidate */ ++ /* XXX: should I grab some global (dcache?) lock? */ ++ chain_len = 0; ++ sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation); ++ dtmp = dentry->d_parent; ++ dgen = atomic_read(&UNIONFS_D(dtmp)->generation); ++ while (sbgen != dgen) { ++ /* The root entry should always be valid */ ++ BUG_ON(IS_ROOT(dtmp)); ++ chain_len++; ++ dtmp = dtmp->d_parent; ++ dgen = atomic_read(&UNIONFS_D(dtmp)->generation); ++ } ++ if (chain_len == 0) ++ goto out_this; /* shortcut if parents are OK */ ++ ++ /* ++ * Allocate array of dentries to reval. We could use linked lists, ++ * but the number of entries we need to alloc here is often small, ++ * and short lived, so locality will be better. ++ */ ++ chain = kzalloc(chain_len * sizeof(struct dentry *), GFP_KERNEL); ++ if (!chain) { ++ printk("unionfs: no more memory in %s\n", __FUNCTION__); ++ goto out; ++ } ++ ++ /* ++ * lock all dentries in chain, in child to parent order. ++ * if failed, then sleep for a little, then retry. ++ */ ++ dtmp = dentry->d_parent; ++ for (i=chain_len-1; i>=0; i--) { ++ chain[i] = dget(dtmp); ++ dtmp = dtmp->d_parent; ++ } ++ ++ /* ++ * call __unionfs_d_revalidate() on each dentry, but in parent to ++ * child order. ++ */ ++ for (i=0; id_sb)->generation); ++ dgen = atomic_read(&UNIONFS_D(chain[i])->generation); ++ ++ valid = __unionfs_d_revalidate_one(chain[i], nd); ++ /* XXX: is this the correct mntput condition?! */ ++ if (valid && chain_len > 0 && ++ sbgen != dgen && chain[i]->d_inode && ++ S_ISDIR(chain[i]->d_inode->i_mode)) { ++ for (bindex = saved_bstart; bindex <= saved_bend; ++ bindex++) ++ unionfs_mntput(chain[i], bindex); ++ } ++ unionfs_unlock_dentry(chain[i]); ++ ++ if (!valid) ++ goto out_free; ++ } ++ ++ ++out_this: ++ /* finally, lock this dentry and revalidate it */ ++ verify_locked(dentry); ++ dgen = atomic_read(&UNIONFS_D(dentry)->generation); ++ valid = __unionfs_d_revalidate_one(dentry, nd); ++ ++ /* ++ * If __unionfs_d_revalidate_one() succeeded above, then it will ++ * have incremented the refcnt of the mnt's, but also the branch ++ * indices of the dentry will have been updated (to take into ++ * account any branch insertions/deletion. So the current ++ * dbstart/dbend match the current, and new, indices of the mnts ++ * which __unionfs_d_revalidate_one has incremented. Note: the "if" ++ * test below does not depend on whether chain_len was 0 or greater. ++ */ ++ if (valid && sbgen != dgen) ++ for (bindex = dbstart(dentry); ++ bindex <= dbend(dentry); ++ bindex++) ++ unionfs_mntput(dentry, bindex); ++ ++out_free: ++ /* unlock/dput all dentries in chain and return status */ ++ if (chain_len > 0) { ++ for (i=0; id_sb); ++ ++ unionfs_lock_dentry(dentry); ++ err = __unionfs_d_revalidate_chain(dentry, nd); ++ unionfs_unlock_dentry(dentry); ++ ++ unionfs_read_unlock(dentry->d_sb); ++ ++ return err; ++} ++ ++/* ++ * At this point no one can reference this dentry, so we don't have to be ++ * careful about concurrent access. ++ */ ++static void unionfs_d_release(struct dentry *dentry) ++{ ++ int bindex, bstart, bend; ++ ++ unionfs_read_lock(dentry->d_sb); ++ ++ /* this could be a negative dentry, so check first */ ++ if (!UNIONFS_D(dentry)) { ++ printk(KERN_DEBUG "unionfs: dentry without private data: %.*s", ++ dentry->d_name.len, dentry->d_name.name); ++ goto out; ++ } else if (dbstart(dentry) < 0) { ++ /* this is due to a failed lookup */ ++ printk(KERN_DEBUG "unionfs: dentry without hidden " ++ "dentries: %.*s", ++ dentry->d_name.len, dentry->d_name.name); ++ goto out_free; ++ } ++ ++ /* Release all the hidden dentries */ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ dput(unionfs_lower_dentry_idx(dentry, bindex)); ++ unionfs_mntput(dentry, bindex); ++ ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ unionfs_set_lower_mnt_idx(dentry, bindex, NULL); ++ } ++ /* free private data (unionfs_dentry_info) here */ ++ kfree(UNIONFS_D(dentry)->lower_paths); ++ UNIONFS_D(dentry)->lower_paths = NULL; ++ ++out_free: ++ /* No need to unlock it, because it is disappeared. */ ++ free_dentry_private_data(dentry); ++ ++out: ++ unionfs_read_unlock(dentry->d_sb); ++ return; ++} ++ ++struct dentry_operations unionfs_dops = { ++ .d_revalidate = unionfs_d_revalidate, ++ .d_release = unionfs_d_release, ++}; +diff -Nurb linux-2.6.22-570/fs/unionfs/dirfops.c linux-2.6.22-591/fs/unionfs/dirfops.c +--- linux-2.6.22-570/fs/unionfs/dirfops.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/dirfops.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,276 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* Make sure our rdstate is playing by the rules. */ ++static void verify_rdstate_offset(struct unionfs_dir_state *rdstate) ++{ ++ BUG_ON(rdstate->offset >= DIREOF); ++ BUG_ON(rdstate->cookie >= MAXRDCOOKIE); ++} ++ ++struct unionfs_getdents_callback { ++ struct unionfs_dir_state *rdstate; ++ void *dirent; ++ int entries_written; ++ int filldir_called; ++ int filldir_error; ++ filldir_t filldir; ++ struct super_block *sb; ++}; ++ ++/* based on generic filldir in fs/readir.c */ ++static int unionfs_filldir(void *dirent, const char *name, int namelen, ++ loff_t offset, u64 ino, unsigned int d_type) ++{ ++ struct unionfs_getdents_callback *buf = dirent; ++ struct filldir_node *found = NULL; ++ int err = 0; ++ int is_wh_entry = 0; ++ ++ buf->filldir_called++; ++ ++ if ((namelen > UNIONFS_WHLEN) && ++ !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) { ++ name += UNIONFS_WHLEN; ++ namelen -= UNIONFS_WHLEN; ++ is_wh_entry = 1; ++ } ++ ++ found = find_filldir_node(buf->rdstate, name, namelen); ++ ++ if (found) ++ goto out; ++ ++ /* if 'name' isn't a whiteout, filldir it. */ ++ if (!is_wh_entry) { ++ off_t pos = rdstate2offset(buf->rdstate); ++ u64 unionfs_ino = ino; ++ ++ if (!err) { ++ err = buf->filldir(buf->dirent, name, namelen, pos, ++ unionfs_ino, d_type); ++ buf->rdstate->offset++; ++ verify_rdstate_offset(buf->rdstate); ++ } ++ } ++ /* ++ * If we did fill it, stuff it in our hash, otherwise return an ++ * error. ++ */ ++ if (err) { ++ buf->filldir_error = err; ++ goto out; ++ } ++ buf->entries_written++; ++ if ((err = add_filldir_node(buf->rdstate, name, namelen, ++ buf->rdstate->bindex, is_wh_entry))) ++ buf->filldir_error = err; ++ ++out: ++ return err; ++} ++ ++static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir) ++{ ++ int err = 0; ++ struct file *hidden_file = NULL; ++ struct inode *inode = NULL; ++ struct unionfs_getdents_callback buf; ++ struct unionfs_dir_state *uds; ++ int bend; ++ loff_t offset; ++ ++ unionfs_read_lock(file->f_path.dentry->d_sb); ++ ++ if ((err = unionfs_file_revalidate(file, 0))) ++ goto out; ++ ++ inode = file->f_dentry->d_inode; ++ ++ uds = UNIONFS_F(file)->rdstate; ++ if (!uds) { ++ if (file->f_pos == DIREOF) { ++ goto out; ++ } else if (file->f_pos > 0) { ++ uds = find_rdstate(inode, file->f_pos); ++ if (!uds) { ++ err = -ESTALE; ++ goto out; ++ } ++ UNIONFS_F(file)->rdstate = uds; ++ } else { ++ init_rdstate(file); ++ uds = UNIONFS_F(file)->rdstate; ++ } ++ } ++ bend = fbend(file); ++ ++ while (uds->bindex <= bend) { ++ hidden_file = unionfs_lower_file_idx(file, uds->bindex); ++ if (!hidden_file) { ++ uds->bindex++; ++ uds->dirpos = 0; ++ continue; ++ } ++ ++ /* prepare callback buffer */ ++ buf.filldir_called = 0; ++ buf.filldir_error = 0; ++ buf.entries_written = 0; ++ buf.dirent = dirent; ++ buf.filldir = filldir; ++ buf.rdstate = uds; ++ buf.sb = inode->i_sb; ++ ++ /* Read starting from where we last left off. */ ++ offset = vfs_llseek(hidden_file, uds->dirpos, SEEK_SET); ++ if (offset < 0) { ++ err = offset; ++ goto out; ++ } ++ err = vfs_readdir(hidden_file, unionfs_filldir, &buf); ++ ++ /* Save the position for when we continue. */ ++ offset = vfs_llseek(hidden_file, 0, SEEK_CUR); ++ if (offset < 0) { ++ err = offset; ++ goto out; ++ } ++ uds->dirpos = offset; ++ ++ /* Copy the atime. */ ++ fsstack_copy_attr_atime(inode, hidden_file->f_dentry->d_inode); ++ ++ if (err < 0) ++ goto out; ++ ++ if (buf.filldir_error) ++ break; ++ ++ if (!buf.entries_written) { ++ uds->bindex++; ++ uds->dirpos = 0; ++ } ++ } ++ ++ if (!buf.filldir_error && uds->bindex >= bend) { ++ /* Save the number of hash entries for next time. */ ++ UNIONFS_I(inode)->hashsize = uds->hashentries; ++ free_rdstate(uds); ++ UNIONFS_F(file)->rdstate = NULL; ++ file->f_pos = DIREOF; ++ } else ++ file->f_pos = rdstate2offset(uds); ++ ++out: ++ unionfs_read_unlock(file->f_path.dentry->d_sb); ++ return err; ++} ++ ++/* ++ * This is not meant to be a generic repositioning function. If you do ++ * things that aren't supported, then we return EINVAL. ++ * ++ * What is allowed: ++ * (1) seeking to the same position that you are currently at ++ * This really has no effect, but returns where you are. ++ * (2) seeking to the beginning of the file ++ * This throws out all state, and lets you begin again. ++ */ ++static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin) ++{ ++ struct unionfs_dir_state *rdstate; ++ loff_t err; ++ ++ unionfs_read_lock(file->f_path.dentry->d_sb); ++ ++ if ((err = unionfs_file_revalidate(file, 0))) ++ goto out; ++ ++ rdstate = UNIONFS_F(file)->rdstate; ++ ++ /* ++ * we let users seek to their current position, but not anywhere ++ * else. ++ */ ++ if (!offset) { ++ switch (origin) { ++ case SEEK_SET: ++ if (rdstate) { ++ free_rdstate(rdstate); ++ UNIONFS_F(file)->rdstate = NULL; ++ } ++ init_rdstate(file); ++ err = 0; ++ break; ++ case SEEK_CUR: ++ err = file->f_pos; ++ break; ++ case SEEK_END: ++ /* Unsupported, because we would break everything. */ ++ err = -EINVAL; ++ break; ++ } ++ } else { ++ switch (origin) { ++ case SEEK_SET: ++ if (rdstate) { ++ if (offset == rdstate2offset(rdstate)) ++ err = offset; ++ else if (file->f_pos == DIREOF) ++ err = DIREOF; ++ else ++ err = -EINVAL; ++ } else { ++ rdstate = find_rdstate(file->f_dentry->d_inode, ++ offset); ++ if (rdstate) { ++ UNIONFS_F(file)->rdstate = rdstate; ++ err = rdstate->offset; ++ } else ++ err = -EINVAL; ++ } ++ break; ++ case SEEK_CUR: ++ case SEEK_END: ++ /* Unsupported, because we would break everything. */ ++ err = -EINVAL; ++ break; ++ } ++ } ++ ++out: ++ unionfs_read_unlock(file->f_path.dentry->d_sb); ++ return err; ++} ++ ++/* ++ * Trimmed directory options, we shouldn't pass everything down since ++ * we don't want to operate on partial directories. ++ */ ++struct file_operations unionfs_dir_fops = { ++ .llseek = unionfs_dir_llseek, ++ .read = generic_read_dir, ++ .readdir = unionfs_readdir, ++ .unlocked_ioctl = unionfs_ioctl, ++ .open = unionfs_open, ++ .release = unionfs_file_release, ++ .flush = unionfs_flush, ++}; +diff -Nurb linux-2.6.22-570/fs/unionfs/dirhelper.c linux-2.6.22-591/fs/unionfs/dirhelper.c +--- linux-2.6.22-570/fs/unionfs/dirhelper.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/dirhelper.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,273 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Delete all of the whiteouts in a given directory for rmdir. ++ * ++ * hidden directory inode should be locked ++ */ ++int do_delete_whiteouts(struct dentry *dentry, int bindex, ++ struct unionfs_dir_state *namelist) ++{ ++ int err = 0; ++ struct dentry *hidden_dir_dentry = NULL; ++ struct dentry *hidden_dentry; ++ char *name = NULL, *p; ++ struct inode *hidden_dir; ++ ++ int i; ++ struct list_head *pos; ++ struct filldir_node *cursor; ++ ++ /* Find out hidden parent dentry */ ++ hidden_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ BUG_ON(!S_ISDIR(hidden_dir_dentry->d_inode->i_mode)); ++ hidden_dir = hidden_dir_dentry->d_inode; ++ BUG_ON(!S_ISDIR(hidden_dir->i_mode)); ++ ++ err = -ENOMEM; ++ name = __getname(); ++ if (!name) ++ goto out; ++ strcpy(name, UNIONFS_WHPFX); ++ p = name + UNIONFS_WHLEN; ++ ++ err = 0; ++ for (i = 0; !err && i < namelist->size; i++) { ++ list_for_each(pos, &namelist->list[i]) { ++ cursor = ++ list_entry(pos, struct filldir_node, ++ file_list); ++ /* Only operate on whiteouts in this branch. */ ++ if (cursor->bindex != bindex) ++ continue; ++ if (!cursor->whiteout) ++ continue; ++ ++ strcpy(p, cursor->name); ++ hidden_dentry = ++ lookup_one_len(name, hidden_dir_dentry, ++ cursor->namelen + ++ UNIONFS_WHLEN); ++ if (IS_ERR(hidden_dentry)) { ++ err = PTR_ERR(hidden_dentry); ++ break; ++ } ++ if (hidden_dentry->d_inode) ++ err = vfs_unlink(hidden_dir, hidden_dentry); ++ dput(hidden_dentry); ++ if (err) ++ break; ++ } ++ } ++ ++ __putname(name); ++ ++ /* After all of the removals, we should copy the attributes once. */ ++ fsstack_copy_attr_times(dentry->d_inode, hidden_dir_dentry->d_inode); ++ ++out: ++ return err; ++} ++ ++/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */ ++int delete_whiteouts(struct dentry *dentry, int bindex, ++ struct unionfs_dir_state *namelist) ++{ ++ int err; ++ struct super_block *sb; ++ struct dentry *hidden_dir_dentry; ++ struct inode *hidden_dir; ++ ++ struct sioq_args args; ++ ++ sb = dentry->d_sb; ++ ++ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode)); ++ BUG_ON(bindex < dbstart(dentry)); ++ BUG_ON(bindex > dbend(dentry)); ++ err = is_robranch_super(sb, bindex); ++ if (err) ++ goto out; ++ ++ hidden_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ BUG_ON(!S_ISDIR(hidden_dir_dentry->d_inode->i_mode)); ++ hidden_dir = hidden_dir_dentry->d_inode; ++ BUG_ON(!S_ISDIR(hidden_dir->i_mode)); ++ ++ mutex_lock(&hidden_dir->i_mutex); ++ if (!permission(hidden_dir, MAY_WRITE | MAY_EXEC, NULL)) ++ err = do_delete_whiteouts(dentry, bindex, namelist); ++ else { ++ args.deletewh.namelist = namelist; ++ args.deletewh.dentry = dentry; ++ args.deletewh.bindex = bindex; ++ run_sioq(__delete_whiteouts, &args); ++ err = args.err; ++ } ++ mutex_unlock(&hidden_dir->i_mutex); ++ ++out: ++ return err; ++} ++ ++#define RD_NONE 0 ++#define RD_CHECK_EMPTY 1 ++/* The callback structure for check_empty. */ ++struct unionfs_rdutil_callback { ++ int err; ++ int filldir_called; ++ struct unionfs_dir_state *rdstate; ++ int mode; ++}; ++ ++/* This filldir function makes sure only whiteouts exist within a directory. */ ++static int readdir_util_callback(void *dirent, const char *name, int namelen, ++ loff_t offset, u64 ino, unsigned int d_type) ++{ ++ int err = 0; ++ struct unionfs_rdutil_callback *buf = dirent; ++ int whiteout = 0; ++ struct filldir_node *found; ++ ++ buf->filldir_called = 1; ++ ++ if (name[0] == '.' && (namelen == 1 || ++ (name[1] == '.' && namelen == 2))) ++ goto out; ++ ++ if (namelen > UNIONFS_WHLEN && ++ !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) { ++ namelen -= UNIONFS_WHLEN; ++ name += UNIONFS_WHLEN; ++ whiteout = 1; ++ } ++ ++ found = find_filldir_node(buf->rdstate, name, namelen); ++ /* If it was found in the table there was a previous whiteout. */ ++ if (found) ++ goto out; ++ ++ /* ++ * if it wasn't found and isn't a whiteout, the directory isn't ++ * empty. ++ */ ++ err = -ENOTEMPTY; ++ if ((buf->mode == RD_CHECK_EMPTY) && !whiteout) ++ goto out; ++ ++ err = add_filldir_node(buf->rdstate, name, namelen, ++ buf->rdstate->bindex, whiteout); ++ ++out: ++ buf->err = err; ++ return err; ++} ++ ++/* Is a directory logically empty? */ ++int check_empty(struct dentry *dentry, struct unionfs_dir_state **namelist) ++{ ++ int err = 0; ++ struct dentry *hidden_dentry = NULL; ++ struct super_block *sb; ++ struct file *hidden_file; ++ struct unionfs_rdutil_callback *buf = NULL; ++ int bindex, bstart, bend, bopaque; ++ ++ sb = dentry->d_sb; ++ ++ ++ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode)); ++ ++ if ((err = unionfs_partial_lookup(dentry))) ++ goto out; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ bopaque = dbopaque(dentry); ++ if (0 <= bopaque && bopaque < bend) ++ bend = bopaque; ++ ++ buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL); ++ if (!buf) { ++ err = -ENOMEM; ++ goto out; ++ } ++ buf->err = 0; ++ buf->mode = RD_CHECK_EMPTY; ++ buf->rdstate = alloc_rdstate(dentry->d_inode, bstart); ++ if (!buf->rdstate) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ /* Process the hidden directories with rdutil_callback as a filldir. */ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!hidden_dentry) ++ continue; ++ if (!hidden_dentry->d_inode) ++ continue; ++ if (!S_ISDIR(hidden_dentry->d_inode->i_mode)) ++ continue; ++ ++ dget(hidden_dentry); ++ unionfs_mntget(dentry, bindex); ++ branchget(sb, bindex); ++ hidden_file = ++ dentry_open(hidden_dentry, ++ unionfs_lower_mnt_idx(dentry, bindex), ++ O_RDONLY); ++ if (IS_ERR(hidden_file)) { ++ err = PTR_ERR(hidden_file); ++ dput(hidden_dentry); ++ branchput(sb, bindex); ++ goto out; ++ } ++ ++ do { ++ buf->filldir_called = 0; ++ buf->rdstate->bindex = bindex; ++ err = vfs_readdir(hidden_file, ++ readdir_util_callback, buf); ++ if (buf->err) ++ err = buf->err; ++ } while ((err >= 0) && buf->filldir_called); ++ ++ /* fput calls dput for hidden_dentry */ ++ fput(hidden_file); ++ branchput(sb, bindex); ++ ++ if (err < 0) ++ goto out; ++ } ++ ++out: ++ if (buf) { ++ if (namelist && !err) ++ *namelist = buf->rdstate; ++ else if (buf->rdstate) ++ free_rdstate(buf->rdstate); ++ kfree(buf); ++ } ++ ++ ++ return err; ++} +diff -Nurb linux-2.6.22-570/fs/unionfs/fanout.h linux-2.6.22-591/fs/unionfs/fanout.h +--- linux-2.6.22-570/fs/unionfs/fanout.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/fanout.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,308 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef _FANOUT_H_ ++#define _FANOUT_H_ ++ ++/* ++ * Inode to private data ++ * ++ * Since we use containers and the struct inode is _inside_ the ++ * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL ++ * inode pointer), return a valid non-NULL pointer. ++ */ ++static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode) ++{ ++ return container_of(inode, struct unionfs_inode_info, vfs_inode); ++} ++ ++#define ibstart(ino) (UNIONFS_I(ino)->bstart) ++#define ibend(ino) (UNIONFS_I(ino)->bend) ++ ++/* Superblock to private data */ ++#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info) ++#define sbstart(sb) 0 ++#define sbend(sb) (UNIONFS_SB(sb)->bend) ++#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1) ++#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id) ++ ++/* File to private Data */ ++#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data)) ++#define fbstart(file) (UNIONFS_F(file)->bstart) ++#define fbend(file) (UNIONFS_F(file)->bend) ++ ++/* macros to manipulate branch IDs in stored in our superblock */ ++static inline int branch_id(struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ return UNIONFS_SB(sb)->data[index].branch_id; ++} ++ ++static inline void set_branch_id(struct super_block *sb, int index, int val) ++{ ++ BUG_ON(!sb || index < 0); ++ UNIONFS_SB(sb)->data[index].branch_id = val; ++} ++ ++static inline void new_branch_id(struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id); ++} ++ ++/* ++ * Find new index of matching branch with an existing superblock a a known ++ * (possibly old) id. This is needed because branches could have been ++ * added/deleted causing the branchs of any open files to shift. ++ * ++ * @sb: the new superblock which may have new/different branch IDs ++ * @id: the old/existing id we're looking for ++ * Returns index of newly found branch (0 or greater), -1 otherwise. ++ */ ++static inline int branch_id_to_idx(struct super_block *sb, int id) ++{ ++ int i; ++ for (i = 0; i < sbmax(sb); i++) { ++ if (branch_id(sb, i) == id) ++ return i; ++ } ++ /* ++ * XXX: maybe we should BUG_ON if not found new branch index? ++ * (really that should never happen). ++ */ ++ printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id); ++ return -1; ++} ++ ++/* File to lower file. */ ++static inline struct file *unionfs_lower_file(const struct file *f) ++{ ++ BUG_ON(!f); ++ return UNIONFS_F(f)->lower_files[fbstart(f)]; ++} ++ ++static inline struct file *unionfs_lower_file_idx(const struct file *f, ++ int index) ++{ ++ BUG_ON(!f || index < 0); ++ return UNIONFS_F(f)->lower_files[index]; ++} ++ ++static inline void unionfs_set_lower_file_idx(struct file *f, int index, ++ struct file *val) ++{ ++ BUG_ON(!f || index < 0); ++ UNIONFS_F(f)->lower_files[index] = val; ++ /* save branch ID (may be redundant?) */ ++ UNIONFS_F(f)->saved_branch_ids[index] = ++ branch_id((f)->f_dentry->d_sb, index); ++} ++ ++static inline void unionfs_set_lower_file(struct file *f, struct file *val) ++{ ++ BUG_ON(!f); ++ unionfs_set_lower_file_idx((f), fbstart(f), (val)); ++} ++ ++/* Inode to lower inode. */ ++static inline struct inode *unionfs_lower_inode(const struct inode *i) ++{ ++ BUG_ON(!i); ++ return UNIONFS_I(i)->lower_inodes[ibstart(i)]; ++} ++ ++static inline struct inode *unionfs_lower_inode_idx(const struct inode *i, ++ int index) ++{ ++ BUG_ON(!i || index < 0); ++ return UNIONFS_I(i)->lower_inodes[index]; ++} ++ ++static inline void unionfs_set_lower_inode_idx(struct inode *i, int index, ++ struct inode *val) ++{ ++ BUG_ON(!i || index < 0); ++ UNIONFS_I(i)->lower_inodes[index] = val; ++} ++ ++static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val) ++{ ++ BUG_ON(!i); ++ UNIONFS_I(i)->lower_inodes[ibstart(i)] = val; ++} ++ ++/* Superblock to lower superblock. */ ++static inline struct super_block *unionfs_lower_super( ++ const struct super_block *sb) ++{ ++ BUG_ON(!sb); ++ return UNIONFS_SB(sb)->data[sbstart(sb)].sb; ++} ++ ++static inline struct super_block *unionfs_lower_super_idx( ++ const struct super_block *sb, ++ int index) ++{ ++ BUG_ON(!sb || index < 0); ++ return UNIONFS_SB(sb)->data[index].sb; ++} ++ ++static inline void unionfs_set_lower_super_idx(struct super_block *sb, ++ int index, ++ struct super_block *val) ++{ ++ BUG_ON(!sb || index < 0); ++ UNIONFS_SB(sb)->data[index].sb = val; ++} ++ ++static inline void unionfs_set_lower_super(struct super_block *sb, ++ struct super_block *val) ++{ ++ BUG_ON(!sb); ++ UNIONFS_SB(sb)->data[sbstart(sb)].sb = val; ++} ++ ++/* Branch count macros. */ ++static inline int branch_count(const struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ return atomic_read(&UNIONFS_SB(sb)->data[index].open_files); ++} ++ ++static inline void set_branch_count(struct super_block *sb, int index, int val) ++{ ++ BUG_ON(!sb || index < 0); ++ atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val); ++} ++ ++static inline void branchget(struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ atomic_inc(&UNIONFS_SB(sb)->data[index].open_files); ++} ++ ++static inline void branchput(struct super_block *sb, int index) ++{ ++ BUG_ON(!sb || index < 0); ++ atomic_dec(&UNIONFS_SB(sb)->data[index].open_files); ++} ++ ++/* Dentry macros */ ++static inline struct unionfs_dentry_info *UNIONFS_D(const struct dentry *dent) ++{ ++ BUG_ON(!dent); ++ return dent->d_fsdata; ++} ++ ++static inline int dbstart(const struct dentry *dent) ++{ ++ BUG_ON(!dent); ++ return UNIONFS_D(dent)->bstart; ++} ++ ++static inline void set_dbstart(struct dentry *dent, int val) ++{ ++ BUG_ON(!dent); ++ UNIONFS_D(dent)->bstart = val; ++} ++ ++static inline int dbend(const struct dentry *dent) ++{ ++ BUG_ON(!dent); ++ return UNIONFS_D(dent)->bend; ++} ++ ++static inline void set_dbend(struct dentry *dent, int val) ++{ ++ BUG_ON(!dent); ++ UNIONFS_D(dent)->bend = val; ++} ++ ++static inline int dbopaque(const struct dentry *dent) ++{ ++ BUG_ON(!dent); ++ return UNIONFS_D(dent)->bopaque; ++} ++ ++static inline void set_dbopaque(struct dentry *dent, int val) ++{ ++ BUG_ON(!dent); ++ UNIONFS_D(dent)->bopaque = val; ++} ++ ++static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index, ++ struct dentry *val) ++{ ++ BUG_ON(!dent || index < 0); ++ UNIONFS_D(dent)->lower_paths[index].dentry = val; ++} ++ ++static inline struct dentry *unionfs_lower_dentry_idx( ++ const struct dentry *dent, ++ int index) ++{ ++ BUG_ON(!dent || index < 0); ++ return UNIONFS_D(dent)->lower_paths[index].dentry; ++} ++ ++static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent) ++{ ++ BUG_ON(!dent); ++ return unionfs_lower_dentry_idx(dent, dbstart(dent)); ++} ++ ++static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index, ++ struct vfsmount *mnt) ++{ ++ BUG_ON(!dent || index < 0); ++ UNIONFS_D(dent)->lower_paths[index].mnt = mnt; ++} ++ ++static inline struct vfsmount *unionfs_lower_mnt_idx( ++ const struct dentry *dent, ++ int index) ++{ ++ BUG_ON(!dent || index < 0); ++ return UNIONFS_D(dent)->lower_paths[index].mnt; ++} ++ ++static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent) ++{ ++ BUG_ON(!dent); ++ return unionfs_lower_mnt_idx(dent, dbstart(dent)); ++} ++ ++/* Macros for locking a dentry. */ ++static inline void unionfs_lock_dentry(struct dentry *d) ++{ ++ BUG_ON(!d); ++ mutex_lock(&UNIONFS_D(d)->lock); ++} ++ ++static inline void unionfs_unlock_dentry(struct dentry *d) ++{ ++ BUG_ON(!d); ++ mutex_unlock(&UNIONFS_D(d)->lock); ++} ++ ++static inline void verify_locked(struct dentry *d) ++{ ++ BUG_ON(!d); ++ BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock)); ++} ++ ++#endif /* _FANOUT_H */ +diff -Nurb linux-2.6.22-570/fs/unionfs/file.c linux-2.6.22-591/fs/unionfs/file.c +--- linux-2.6.22-570/fs/unionfs/file.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/file.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,149 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/******************* ++ * File Operations * ++ *******************/ ++ ++static ssize_t unionfs_read(struct file *file, char __user *buf, ++ size_t count, loff_t *ppos) ++{ ++ int err; ++ ++ unionfs_read_lock(file->f_path.dentry->d_sb); ++ ++ if ((err = unionfs_file_revalidate(file, 0))) ++ goto out; ++ ++ err = do_sync_read(file, buf, count, ppos); ++ ++ if (err >= 0) ++ touch_atime(unionfs_lower_mnt(file->f_path.dentry), ++ unionfs_lower_dentry(file->f_path.dentry)); ++ ++out: ++ unionfs_read_unlock(file->f_path.dentry->d_sb); ++ return err; ++} ++ ++static ssize_t unionfs_aio_read(struct kiocb *iocb, const struct iovec *iov, ++ unsigned long nr_segs, loff_t pos) ++{ ++ int err = 0; ++ struct file *file = iocb->ki_filp; ++ ++ unionfs_read_lock(file->f_path.dentry->d_sb); ++ ++ if ((err = unionfs_file_revalidate(file, 0))) ++ goto out; ++ ++ err = generic_file_aio_read(iocb, iov, nr_segs, pos); ++ ++ if (err == -EIOCBQUEUED) ++ err = wait_on_sync_kiocb(iocb); ++ ++ if (err >= 0) ++ touch_atime(unionfs_lower_mnt(file->f_path.dentry), ++ unionfs_lower_dentry(file->f_path.dentry)); ++ ++out: ++ unionfs_read_unlock(file->f_path.dentry->d_sb); ++ return err; ++} ++static ssize_t unionfs_write(struct file * file, const char __user * buf, ++ size_t count, loff_t *ppos) ++{ ++ int err = 0; ++ ++ unionfs_read_lock(file->f_path.dentry->d_sb); ++ ++ if ((err = unionfs_file_revalidate(file, 1))) ++ goto out; ++ ++ err = do_sync_write(file, buf, count, ppos); ++ ++out: ++ unionfs_read_unlock(file->f_path.dentry->d_sb); ++ return err; ++} ++ ++static int unionfs_file_readdir(struct file *file, void *dirent, ++ filldir_t filldir) ++{ ++ return -ENOTDIR; ++} ++ ++static int unionfs_mmap(struct file *file, struct vm_area_struct *vma) ++{ ++ int err = 0; ++ int willwrite; ++ struct file *lower_file; ++ ++ unionfs_read_lock(file->f_path.dentry->d_sb); ++ ++ if ((err = unionfs_file_revalidate(file, 1))) ++ goto out; ++ ++ /* This might be deferred to mmap's writepage */ ++ willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags); ++ if ((err = unionfs_file_revalidate(file, willwrite))) ++ goto out; ++ ++ /* ++ * File systems which do not implement ->writepage may use ++ * generic_file_readonly_mmap as their ->mmap op. If you call ++ * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL. ++ * But we cannot call the lower ->mmap op, so we can't tell that ++ * writeable mappings won't work. Therefore, our only choice is to ++ * check if the lower file system supports the ->writepage, and if ++ * not, return EINVAL (the same error that ++ * generic_file_readonly_mmap returns in that case). ++ */ ++ lower_file = unionfs_lower_file(file); ++ if (willwrite && !lower_file->f_mapping->a_ops->writepage) { ++ err = -EINVAL; ++ printk("unionfs: branch %d file system does not support " ++ "writeable mmap\n", fbstart(file)); ++ } else { ++ err = generic_file_mmap(file, vma); ++ if (err) ++ printk("unionfs: generic_file_mmap failed %d\n", err); ++ } ++ ++out: ++ unionfs_read_unlock(file->f_path.dentry->d_sb); ++ return err; ++} ++ ++struct file_operations unionfs_main_fops = { ++ .llseek = generic_file_llseek, ++ .read = unionfs_read, ++ .aio_read = unionfs_aio_read, ++ .write = unionfs_write, ++ .aio_write = generic_file_aio_write, ++ .readdir = unionfs_file_readdir, ++ .unlocked_ioctl = unionfs_ioctl, ++ .mmap = unionfs_mmap, ++ .open = unionfs_open, ++ .flush = unionfs_flush, ++ .release = unionfs_file_release, ++ .fsync = file_fsync, ++ .sendfile = generic_file_sendfile, ++}; +diff -Nurb linux-2.6.22-570/fs/unionfs/inode.c linux-2.6.22-591/fs/unionfs/inode.c +--- linux-2.6.22-570/fs/unionfs/inode.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/inode.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,1138 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++static int unionfs_create(struct inode *parent, struct dentry *dentry, ++ int mode, struct nameidata *nd) ++{ ++ int err = 0; ++ struct dentry *hidden_dentry = NULL; ++ struct dentry *wh_dentry = NULL; ++ struct dentry *new_hidden_dentry; ++ struct dentry *hidden_parent_dentry = NULL; ++ int bindex = 0, bstart; ++ char *name = NULL; ++ int valid = 0; ++ ++ unionfs_read_lock(dentry->d_sb); ++ unionfs_lock_dentry(dentry); ++ ++ unionfs_lock_dentry(dentry->d_parent); ++ valid = __unionfs_d_revalidate_chain(dentry->d_parent, nd); ++ unionfs_unlock_dentry(dentry->d_parent); ++ if (!valid) { ++ err = -ESTALE; /* same as what real_lookup does */ ++ goto out; ++ } ++ valid = __unionfs_d_revalidate_chain(dentry, nd); ++ /* ++ * It's only a bug if this dentry was not negative and couldn't be ++ * revalidated (shouldn't happen). ++ */ ++ BUG_ON(!valid && dentry->d_inode); ++ ++ /* We start out in the leftmost branch. */ ++ bstart = dbstart(dentry); ++ hidden_dentry = unionfs_lower_dentry(dentry); ++ ++ /* ++ * check if whiteout exists in this branch, i.e. lookup .wh.foo ++ * first. ++ */ ++ name = alloc_whname(dentry->d_name.name, dentry->d_name.len); ++ if (IS_ERR(name)) { ++ err = PTR_ERR(name); ++ goto out; ++ } ++ ++ wh_dentry = lookup_one_len(name, hidden_dentry->d_parent, ++ dentry->d_name.len + UNIONFS_WHLEN); ++ if (IS_ERR(wh_dentry)) { ++ err = PTR_ERR(wh_dentry); ++ wh_dentry = NULL; ++ goto out; ++ } ++ ++ if (wh_dentry->d_inode) { ++ /* ++ * .wh.foo has been found. ++ * First truncate it and then rename it to foo (hence having ++ * the same overall effect as a normal create. ++ */ ++ struct dentry *hidden_dir_dentry; ++ struct iattr newattrs; ++ ++ mutex_lock(&wh_dentry->d_inode->i_mutex); ++ newattrs.ia_valid = ATTR_CTIME | ATTR_MODE | ATTR_ATIME ++ | ATTR_MTIME | ATTR_UID | ATTR_GID | ATTR_FORCE ++ | ATTR_KILL_SUID | ATTR_KILL_SGID; ++ ++ newattrs.ia_mode = mode & ~current->fs->umask; ++ newattrs.ia_uid = current->fsuid; ++ newattrs.ia_gid = current->fsgid; ++ ++ if (wh_dentry->d_inode->i_size != 0) { ++ newattrs.ia_valid |= ATTR_SIZE; ++ newattrs.ia_size = 0; ++ } ++ ++ err = notify_change(wh_dentry, &newattrs); ++ ++ mutex_unlock(&wh_dentry->d_inode->i_mutex); ++ ++ if (err) ++ printk(KERN_WARNING "unionfs: %s:%d: notify_change " ++ "failed: %d, ignoring..\n", ++ __FILE__, __LINE__, err); ++ ++ new_hidden_dentry = unionfs_lower_dentry(dentry); ++ dget(new_hidden_dentry); ++ ++ hidden_dir_dentry = dget_parent(wh_dentry); ++ lock_rename(hidden_dir_dentry, hidden_dir_dentry); ++ ++ if (!(err = is_robranch_super(dentry->d_sb, bstart))) { ++ err = vfs_rename(hidden_dir_dentry->d_inode, ++ wh_dentry, ++ hidden_dir_dentry->d_inode, ++ new_hidden_dentry); ++ } ++ if (!err) { ++ fsstack_copy_attr_times(parent, ++ new_hidden_dentry->d_parent-> ++ d_inode); ++ fsstack_copy_inode_size(parent, ++ new_hidden_dentry->d_parent-> ++ d_inode); ++ parent->i_nlink = unionfs_get_nlinks(parent); ++ } ++ ++ unlock_rename(hidden_dir_dentry, hidden_dir_dentry); ++ dput(hidden_dir_dentry); ++ ++ dput(new_hidden_dentry); ++ ++ if (err) { ++ /* exit if the error returned was NOT -EROFS */ ++ if (!IS_COPYUP_ERR(err)) ++ goto out; ++ /* ++ * We were not able to create the file in this ++ * branch, so, we try to create it in one branch to ++ * left ++ */ ++ bstart--; ++ } else { ++ /* ++ * reset the unionfs dentry to point to the .wh.foo ++ * entry. ++ */ ++ ++ /* Discard any old reference. */ ++ dput(unionfs_lower_dentry(dentry)); ++ ++ /* Trade one reference to another. */ ++ unionfs_set_lower_dentry_idx(dentry, bstart, ++ wh_dentry); ++ wh_dentry = NULL; ++ ++ err = unionfs_interpose(dentry, parent->i_sb, 0); ++ goto out; ++ } ++ } ++ ++ for (bindex = bstart; bindex >= 0; bindex--) { ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!hidden_dentry) { ++ /* ++ * if hidden_dentry is NULL, create the entire ++ * dentry directory structure in branch 'bindex'. ++ * hidden_dentry will NOT be null when bindex == bstart ++ * because lookup passed as a negative unionfs dentry ++ * pointing to a lone negative underlying dentry. ++ */ ++ hidden_dentry = create_parents(parent, dentry, bindex); ++ if (!hidden_dentry || IS_ERR(hidden_dentry)) { ++ if (IS_ERR(hidden_dentry)) ++ err = PTR_ERR(hidden_dentry); ++ continue; ++ } ++ } ++ ++ hidden_parent_dentry = lock_parent(hidden_dentry); ++ if (IS_ERR(hidden_parent_dentry)) { ++ err = PTR_ERR(hidden_parent_dentry); ++ goto out; ++ } ++ /* We shouldn't create things in a read-only branch. */ ++ if (!(err = is_robranch_super(dentry->d_sb, bindex))) ++ err = vfs_create(hidden_parent_dentry->d_inode, ++ hidden_dentry, mode, nd); ++ ++ if (err || !hidden_dentry->d_inode) { ++ unlock_dir(hidden_parent_dentry); ++ ++ /* break out of for loop if the error wasn't -EROFS */ ++ if (!IS_COPYUP_ERR(err)) ++ break; ++ } else { ++ err = unionfs_interpose(dentry, parent->i_sb, 0); ++ if (!err) { ++ fsstack_copy_attr_times(parent, ++ hidden_parent_dentry-> ++ d_inode); ++ fsstack_copy_inode_size(parent, ++ hidden_parent_dentry-> ++ d_inode); ++ /* update no. of links on parent directory */ ++ parent->i_nlink = unionfs_get_nlinks(parent); ++ } ++ unlock_dir(hidden_parent_dentry); ++ break; ++ } ++ } ++ ++out: ++ dput(wh_dentry); ++ kfree(name); ++ ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static struct dentry *unionfs_lookup(struct inode *parent, ++ struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ struct path path_save; ++ struct dentry *ret; ++ ++ unionfs_read_lock(dentry->d_sb); ++ ++ /* save the dentry & vfsmnt from namei */ ++ if (nd) { ++ path_save.dentry = nd->dentry; ++ path_save.mnt = nd->mnt; ++ } ++ ++ /* The locking is done by unionfs_lookup_backend. */ ++ ret = unionfs_lookup_backend(dentry, nd, INTERPOSE_LOOKUP); ++ ++ /* restore the dentry & vfsmnt in namei */ ++ if (nd) { ++ nd->dentry = path_save.dentry; ++ nd->mnt = path_save.mnt; ++ } ++ ++ unionfs_read_unlock(dentry->d_sb); ++ ++ return ret; ++} ++ ++static int unionfs_link(struct dentry *old_dentry, struct inode *dir, ++ struct dentry *new_dentry) ++{ ++ int err = 0; ++ struct dentry *hidden_old_dentry = NULL; ++ struct dentry *hidden_new_dentry = NULL; ++ struct dentry *hidden_dir_dentry = NULL; ++ struct dentry *whiteout_dentry; ++ char *name = NULL; ++ ++ unionfs_read_lock(old_dentry->d_sb); ++ unionfs_double_lock_dentry(new_dentry, old_dentry); ++ ++ if (!__unionfs_d_revalidate_chain(old_dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ if (new_dentry->d_inode && ++ !__unionfs_d_revalidate_chain(new_dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ hidden_new_dentry = unionfs_lower_dentry(new_dentry); ++ ++ /* ++ * check if whiteout exists in the branch of new dentry, i.e. lookup ++ * .wh.foo first. If present, delete it ++ */ ++ name = alloc_whname(new_dentry->d_name.name, new_dentry->d_name.len); ++ if (IS_ERR(name)) { ++ err = PTR_ERR(name); ++ goto out; ++ } ++ ++ whiteout_dentry = lookup_one_len(name, hidden_new_dentry->d_parent, ++ new_dentry->d_name.len + ++ UNIONFS_WHLEN); ++ if (IS_ERR(whiteout_dentry)) { ++ err = PTR_ERR(whiteout_dentry); ++ goto out; ++ } ++ ++ if (!whiteout_dentry->d_inode) { ++ dput(whiteout_dentry); ++ whiteout_dentry = NULL; ++ } else { ++ /* found a .wh.foo entry, unlink it and then call vfs_link() */ ++ hidden_dir_dentry = lock_parent(whiteout_dentry); ++ err = is_robranch_super(new_dentry->d_sb, dbstart(new_dentry)); ++ if (!err) ++ err = vfs_unlink(hidden_dir_dentry->d_inode, ++ whiteout_dentry); ++ ++ fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode); ++ dir->i_nlink = unionfs_get_nlinks(dir); ++ unlock_dir(hidden_dir_dentry); ++ hidden_dir_dentry = NULL; ++ dput(whiteout_dentry); ++ if (err) ++ goto out; ++ } ++ ++ if (dbstart(old_dentry) != dbstart(new_dentry)) { ++ hidden_new_dentry = ++ create_parents(dir, new_dentry, dbstart(old_dentry)); ++ err = PTR_ERR(hidden_new_dentry); ++ if (IS_COPYUP_ERR(err)) ++ goto docopyup; ++ if (!hidden_new_dentry || IS_ERR(hidden_new_dentry)) ++ goto out; ++ } ++ hidden_new_dentry = unionfs_lower_dentry(new_dentry); ++ hidden_old_dentry = unionfs_lower_dentry(old_dentry); ++ ++ BUG_ON(dbstart(old_dentry) != dbstart(new_dentry)); ++ hidden_dir_dentry = lock_parent(hidden_new_dentry); ++ if (!(err = is_robranch(old_dentry))) ++ err = vfs_link(hidden_old_dentry, hidden_dir_dentry->d_inode, ++ hidden_new_dentry); ++ unlock_dir(hidden_dir_dentry); ++ ++docopyup: ++ if (IS_COPYUP_ERR(err)) { ++ int old_bstart = dbstart(old_dentry); ++ int bindex; ++ ++ for (bindex = old_bstart - 1; bindex >= 0; bindex--) { ++ err = copyup_dentry(old_dentry->d_parent->d_inode, ++ old_dentry, old_bstart, ++ bindex, NULL, ++ old_dentry->d_inode->i_size); ++ if (!err) { ++ hidden_new_dentry = ++ create_parents(dir, new_dentry, ++ bindex); ++ hidden_old_dentry = ++ unionfs_lower_dentry(old_dentry); ++ hidden_dir_dentry = ++ lock_parent(hidden_new_dentry); ++ /* do vfs_link */ ++ err = vfs_link(hidden_old_dentry, ++ hidden_dir_dentry->d_inode, ++ hidden_new_dentry); ++ unlock_dir(hidden_dir_dentry); ++ goto check_link; ++ } ++ } ++ goto out; ++ } ++ ++check_link: ++ if (err || !hidden_new_dentry->d_inode) ++ goto out; ++ ++ /* Its a hard link, so use the same inode */ ++ new_dentry->d_inode = igrab(old_dentry->d_inode); ++ d_instantiate(new_dentry, new_dentry->d_inode); ++ fsstack_copy_attr_all(dir, hidden_new_dentry->d_parent->d_inode, ++ unionfs_get_nlinks); ++ fsstack_copy_inode_size(dir, hidden_new_dentry->d_parent->d_inode); ++ ++ /* propagate number of hard-links */ ++ old_dentry->d_inode->i_nlink = unionfs_get_nlinks(old_dentry->d_inode); ++ ++out: ++ if (!new_dentry->d_inode) ++ d_drop(new_dentry); ++ ++ kfree(name); ++ ++ unionfs_unlock_dentry(new_dentry); ++ unionfs_unlock_dentry(old_dentry); ++ ++ unionfs_read_unlock(old_dentry->d_sb); ++ ++ return err; ++} ++ ++static int unionfs_symlink(struct inode *dir, struct dentry *dentry, ++ const char *symname) ++{ ++ int err = 0; ++ struct dentry *hidden_dentry = NULL; ++ struct dentry *whiteout_dentry = NULL; ++ struct dentry *hidden_dir_dentry = NULL; ++ umode_t mode; ++ int bindex = 0, bstart; ++ char *name = NULL; ++ ++ unionfs_read_lock(dentry->d_sb); ++ unionfs_lock_dentry(dentry); ++ ++ if (dentry->d_inode && ++ !__unionfs_d_revalidate_chain(dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ /* We start out in the leftmost branch. */ ++ bstart = dbstart(dentry); ++ ++ hidden_dentry = unionfs_lower_dentry(dentry); ++ ++ /* ++ * check if whiteout exists in this branch, i.e. lookup .wh.foo ++ * first. If present, delete it ++ */ ++ name = alloc_whname(dentry->d_name.name, dentry->d_name.len); ++ if (IS_ERR(name)) { ++ err = PTR_ERR(name); ++ goto out; ++ } ++ ++ whiteout_dentry = ++ lookup_one_len(name, hidden_dentry->d_parent, ++ dentry->d_name.len + UNIONFS_WHLEN); ++ if (IS_ERR(whiteout_dentry)) { ++ err = PTR_ERR(whiteout_dentry); ++ goto out; ++ } ++ ++ if (!whiteout_dentry->d_inode) { ++ dput(whiteout_dentry); ++ whiteout_dentry = NULL; ++ } else { ++ /* ++ * found a .wh.foo entry, unlink it and then call ++ * vfs_symlink(). ++ */ ++ hidden_dir_dentry = lock_parent(whiteout_dentry); ++ ++ if (!(err = is_robranch_super(dentry->d_sb, bstart))) ++ err = vfs_unlink(hidden_dir_dentry->d_inode, ++ whiteout_dentry); ++ dput(whiteout_dentry); ++ ++ fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode); ++ /* propagate number of hard-links */ ++ dir->i_nlink = unionfs_get_nlinks(dir); ++ ++ unlock_dir(hidden_dir_dentry); ++ ++ if (err) { ++ /* exit if the error returned was NOT -EROFS */ ++ if (!IS_COPYUP_ERR(err)) ++ goto out; ++ /* ++ * should now try to create symlink in the another ++ * branch. ++ */ ++ bstart--; ++ } ++ } ++ ++ /* ++ * deleted whiteout if it was present, now do a normal vfs_symlink() ++ * with possible recursive directory creation ++ */ ++ for (bindex = bstart; bindex >= 0; bindex--) { ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!hidden_dentry) { ++ /* ++ * if hidden_dentry is NULL, create the entire ++ * dentry directory structure in branch 'bindex'. ++ * hidden_dentry will NOT be null when bindex == ++ * bstart because lookup passed as a negative ++ * unionfs dentry pointing to a lone negative ++ * underlying dentry ++ */ ++ hidden_dentry = create_parents(dir, dentry, bindex); ++ if (!hidden_dentry || IS_ERR(hidden_dentry)) { ++ if (IS_ERR(hidden_dentry)) ++ err = PTR_ERR(hidden_dentry); ++ ++ printk(KERN_DEBUG "unionfs: hidden dentry " ++ "NULL (or error) for bindex = %d\n", ++ bindex); ++ continue; ++ } ++ } ++ ++ hidden_dir_dentry = lock_parent(hidden_dentry); ++ ++ if (!(err = is_robranch_super(dentry->d_sb, bindex))) { ++ mode = S_IALLUGO; ++ err = ++ vfs_symlink(hidden_dir_dentry->d_inode, ++ hidden_dentry, symname, mode); ++ } ++ unlock_dir(hidden_dir_dentry); ++ ++ if (err || !hidden_dentry->d_inode) { ++ /* ++ * break out of for loop if error returned was NOT ++ * -EROFS. ++ */ ++ if (!IS_COPYUP_ERR(err)) ++ break; ++ } else { ++ err = unionfs_interpose(dentry, dir->i_sb, 0); ++ if (!err) { ++ fsstack_copy_attr_times(dir, ++ hidden_dir_dentry-> ++ d_inode); ++ fsstack_copy_inode_size(dir, ++ hidden_dir_dentry-> ++ d_inode); ++ /* ++ * update number of links on parent ++ * directory. ++ */ ++ dir->i_nlink = unionfs_get_nlinks(dir); ++ } ++ break; ++ } ++ } ++ ++out: ++ if (!dentry->d_inode) ++ d_drop(dentry); ++ ++ kfree(name); ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static int unionfs_mkdir(struct inode *parent, struct dentry *dentry, int mode) ++{ ++ int err = 0; ++ struct dentry *hidden_dentry = NULL, *whiteout_dentry = NULL; ++ struct dentry *hidden_parent_dentry = NULL; ++ int bindex = 0, bstart; ++ char *name = NULL; ++ int whiteout_unlinked = 0; ++ struct sioq_args args; ++ ++ unionfs_read_lock(dentry->d_sb); ++ unionfs_lock_dentry(dentry); ++ ++ if (dentry->d_inode && ++ !__unionfs_d_revalidate_chain(dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ bstart = dbstart(dentry); ++ ++ hidden_dentry = unionfs_lower_dentry(dentry); ++ ++ /* ++ * check if whiteout exists in this branch, i.e. lookup .wh.foo ++ * first. ++ */ ++ name = alloc_whname(dentry->d_name.name, dentry->d_name.len); ++ if (IS_ERR(name)) { ++ err = PTR_ERR(name); ++ goto out; ++ } ++ ++ whiteout_dentry = lookup_one_len(name, hidden_dentry->d_parent, ++ dentry->d_name.len + UNIONFS_WHLEN); ++ if (IS_ERR(whiteout_dentry)) { ++ err = PTR_ERR(whiteout_dentry); ++ goto out; ++ } ++ ++ if (!whiteout_dentry->d_inode) { ++ dput(whiteout_dentry); ++ whiteout_dentry = NULL; ++ } else { ++ hidden_parent_dentry = lock_parent(whiteout_dentry); ++ ++ /* found a.wh.foo entry, remove it then do vfs_mkdir */ ++ if (!(err = is_robranch_super(dentry->d_sb, bstart))) { ++ args.unlink.parent = hidden_parent_dentry->d_inode; ++ args.unlink.dentry = whiteout_dentry; ++ run_sioq(__unionfs_unlink, &args); ++ err = args.err; ++ } ++ dput(whiteout_dentry); ++ ++ unlock_dir(hidden_parent_dentry); ++ ++ if (err) { ++ /* exit if the error returned was NOT -EROFS */ ++ if (!IS_COPYUP_ERR(err)) ++ goto out; ++ bstart--; ++ } else ++ whiteout_unlinked = 1; ++ } ++ ++ for (bindex = bstart; bindex >= 0; bindex--) { ++ int i; ++ int bend = dbend(dentry); ++ ++ if (is_robranch_super(dentry->d_sb, bindex)) ++ continue; ++ ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!hidden_dentry) { ++ hidden_dentry = create_parents(parent, dentry, bindex); ++ if (!hidden_dentry || IS_ERR(hidden_dentry)) { ++ printk(KERN_DEBUG "unionfs: hidden dentry " ++ " NULL for bindex = %d\n", bindex); ++ continue; ++ } ++ } ++ ++ hidden_parent_dentry = lock_parent(hidden_dentry); ++ ++ if (IS_ERR(hidden_parent_dentry)) { ++ err = PTR_ERR(hidden_parent_dentry); ++ goto out; ++ } ++ ++ err = vfs_mkdir(hidden_parent_dentry->d_inode, hidden_dentry, ++ mode); ++ ++ unlock_dir(hidden_parent_dentry); ++ ++ /* did the mkdir succeed? */ ++ if (err) ++ break; ++ ++ for (i = bindex + 1; i < bend; i++) { ++ if (unionfs_lower_dentry_idx(dentry, i)) { ++ dput(unionfs_lower_dentry_idx(dentry, i)); ++ unionfs_set_lower_dentry_idx(dentry, i, NULL); ++ } ++ } ++ set_dbend(dentry, bindex); ++ ++ err = unionfs_interpose(dentry, parent->i_sb, 0); ++ if (!err) { ++ fsstack_copy_attr_times(parent, ++ hidden_parent_dentry->d_inode); ++ fsstack_copy_inode_size(parent, ++ hidden_parent_dentry->d_inode); ++ ++ /* update number of links on parent directory */ ++ parent->i_nlink = unionfs_get_nlinks(parent); ++ } ++ ++ err = make_dir_opaque(dentry, dbstart(dentry)); ++ if (err) { ++ printk(KERN_ERR "unionfs: mkdir: error creating " ++ ".wh.__dir_opaque: %d\n", err); ++ goto out; ++ } ++ ++ /* we are done! */ ++ break; ++ } ++ ++out: ++ if (!dentry->d_inode) ++ d_drop(dentry); ++ ++ kfree(name); ++ ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static int unionfs_mknod(struct inode *dir, struct dentry *dentry, int mode, ++ dev_t dev) ++{ ++ int err = 0; ++ struct dentry *hidden_dentry = NULL, *whiteout_dentry = NULL; ++ struct dentry *hidden_parent_dentry = NULL; ++ int bindex = 0, bstart; ++ char *name = NULL; ++ int whiteout_unlinked = 0; ++ ++ unionfs_read_lock(dentry->d_sb); ++ unionfs_lock_dentry(dentry); ++ ++ if (dentry->d_inode && ++ !__unionfs_d_revalidate_chain(dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ bstart = dbstart(dentry); ++ ++ hidden_dentry = unionfs_lower_dentry(dentry); ++ ++ /* ++ * check if whiteout exists in this branch, i.e. lookup .wh.foo ++ * first. ++ */ ++ name = alloc_whname(dentry->d_name.name, dentry->d_name.len); ++ if (IS_ERR(name)) { ++ err = PTR_ERR(name); ++ goto out; ++ } ++ ++ whiteout_dentry = lookup_one_len(name, hidden_dentry->d_parent, ++ dentry->d_name.len + UNIONFS_WHLEN); ++ if (IS_ERR(whiteout_dentry)) { ++ err = PTR_ERR(whiteout_dentry); ++ goto out; ++ } ++ ++ if (!whiteout_dentry->d_inode) { ++ dput(whiteout_dentry); ++ whiteout_dentry = NULL; ++ } else { ++ /* found .wh.foo, unlink it */ ++ hidden_parent_dentry = lock_parent(whiteout_dentry); ++ ++ /* found a.wh.foo entry, remove it then do vfs_mkdir */ ++ if (!(err = is_robranch_super(dentry->d_sb, bstart))) ++ err = vfs_unlink(hidden_parent_dentry->d_inode, ++ whiteout_dentry); ++ dput(whiteout_dentry); ++ ++ unlock_dir(hidden_parent_dentry); ++ ++ if (err) { ++ if (!IS_COPYUP_ERR(err)) ++ goto out; ++ ++ bstart--; ++ } else ++ whiteout_unlinked = 1; ++ } ++ ++ for (bindex = bstart; bindex >= 0; bindex--) { ++ if (is_robranch_super(dentry->d_sb, bindex)) ++ continue; ++ ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!hidden_dentry) { ++ hidden_dentry = create_parents(dir, dentry, bindex); ++ if (IS_ERR(hidden_dentry)) { ++ printk(KERN_DEBUG "unionfs: failed to create " ++ "parents on %d, err = %ld\n", ++ bindex, PTR_ERR(hidden_dentry)); ++ continue; ++ } ++ } ++ ++ hidden_parent_dentry = lock_parent(hidden_dentry); ++ if (IS_ERR(hidden_parent_dentry)) { ++ err = PTR_ERR(hidden_parent_dentry); ++ goto out; ++ } ++ ++ err = vfs_mknod(hidden_parent_dentry->d_inode, ++ hidden_dentry, mode, dev); ++ ++ if (err) { ++ unlock_dir(hidden_parent_dentry); ++ break; ++ } ++ ++ err = unionfs_interpose(dentry, dir->i_sb, 0); ++ if (!err) { ++ fsstack_copy_attr_times(dir, ++ hidden_parent_dentry->d_inode); ++ fsstack_copy_inode_size(dir, ++ hidden_parent_dentry->d_inode); ++ /* update number of links on parent directory */ ++ dir->i_nlink = unionfs_get_nlinks(dir); ++ } ++ unlock_dir(hidden_parent_dentry); ++ ++ break; ++ } ++ ++out: ++ if (!dentry->d_inode) ++ d_drop(dentry); ++ ++ kfree(name); ++ ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static int unionfs_readlink(struct dentry *dentry, char __user *buf, ++ int bufsiz) ++{ ++ int err; ++ struct dentry *hidden_dentry; ++ ++ unionfs_read_lock(dentry->d_sb); ++ unionfs_lock_dentry(dentry); ++ ++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ hidden_dentry = unionfs_lower_dentry(dentry); ++ ++ if (!hidden_dentry->d_inode->i_op || ++ !hidden_dentry->d_inode->i_op->readlink) { ++ err = -EINVAL; ++ goto out; ++ } ++ ++ err = hidden_dentry->d_inode->i_op->readlink(hidden_dentry, ++ buf, bufsiz); ++ if (err > 0) ++ fsstack_copy_attr_atime(dentry->d_inode, ++ hidden_dentry->d_inode); ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * Check if dentry is valid or not, as per our generation numbers. ++ * @dentry: dentry to check. ++ * Returns 1 (valid) or 0 (invalid/stale). ++ */ ++static inline int is_valid_dentry(struct dentry *dentry) ++{ ++ BUG_ON(!UNIONFS_D(dentry)); ++ BUG_ON(!UNIONFS_SB(dentry->d_sb)); ++ return (atomic_read(&UNIONFS_D(dentry)->generation) == ++ atomic_read(&UNIONFS_SB(dentry->d_sb)->generation)); ++} ++ ++/* We don't lock the dentry here, because readlink does the heavy lifting. */ ++static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd) ++{ ++ char *buf; ++ int len = PAGE_SIZE, err; ++ mm_segment_t old_fs; ++ ++ /* ++ * FIXME: Really nasty...we can get called from two distinct places: ++ * 1) read_link - locks the dentry ++ * 2) VFS lookup code - does NOT lock the dentry ++ * ++ * The proper thing would be to call dentry revalidate. It however ++ * expects a locked dentry, and we can't cleanly guarantee that. ++ */ ++ BUG_ON(!is_valid_dentry(dentry)); ++ ++ unionfs_read_lock(dentry->d_sb); ++ ++ /* This is freed by the put_link method assuming a successful call. */ ++ buf = kmalloc(len, GFP_KERNEL); ++ if (!buf) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ /* read the symlink, and then we will follow it */ ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); ++ set_fs(old_fs); ++ if (err < 0) { ++ kfree(buf); ++ buf = NULL; ++ goto out; ++ } ++ buf[err] = 0; ++ nd_set_link(nd, buf); ++ err = 0; ++ ++out: ++ unionfs_read_unlock(dentry->d_sb); ++ return ERR_PTR(err); ++} ++ ++/* FIXME: We may not have to lock here */ ++static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd, ++ void *cookie) ++{ ++ unionfs_read_lock(dentry->d_sb); ++ kfree(nd_get_link(nd)); ++ unionfs_read_unlock(dentry->d_sb); ++} ++ ++/* ++ * Basically copied from the kernel vfs permission(), but we've changed ++ * the following: ++ * (1) the IS_RDONLY check is skipped, and ++ * (2) if you set the mount option `mode=nfsro', we assume that -EACCES ++ * means that the export is read-only and we should check standard Unix ++ * permissions. This means that NFS ACL checks (or other advanced ++ * permission features) are bypassed. Note however, that we do call ++ * security_inode_permission, and therefore security inside SELinux, etc. ++ * are performed. ++ */ ++static int inode_permission(struct inode *inode, int mask, ++ struct nameidata *nd, int bindex) ++{ ++ int retval, submask; ++ ++ if (mask & MAY_WRITE) { ++ /* The first branch is allowed to be really readonly. */ ++ if (bindex == 0) { ++ umode_t mode = inode->i_mode; ++ if (IS_RDONLY(inode) && ++ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) ++ return -EROFS; ++ } ++ /* ++ * Nobody gets write access to an immutable file. ++ */ ++ if (IS_IMMUTABLE(inode)) ++ return -EACCES; ++ } ++ ++ /* Ordinary permission routines do not understand MAY_APPEND. */ ++ submask = mask & ~MAY_APPEND; ++ if (inode->i_op && inode->i_op->permission) { ++ retval = inode->i_op->permission(inode, submask, nd); ++ if ((retval == -EACCES) && (submask & MAY_WRITE) && ++ (!strcmp("nfs", (inode)->i_sb->s_type->name)) && ++ (nd) && (nd->mnt) && (nd->mnt->mnt_sb)) { ++ int perms; ++ perms = branchperms(nd->mnt->mnt_sb, bindex); ++ if (perms & MAY_NFSRO) ++ retval = generic_permission(inode, submask, ++ NULL); ++ } ++ } else ++ retval = generic_permission(inode, submask, NULL); ++ ++ if (retval && retval != -EROFS) /* ignore EROFS */ ++ return retval; ++ ++ retval = security_inode_permission(inode, mask, nd); ++ return ((retval == -EROFS) ? 0 : retval); /* ignore EROFS */ ++} ++ ++static int unionfs_permission(struct inode *inode, int mask, ++ struct nameidata *nd) ++{ ++ struct inode *hidden_inode = NULL; ++ int err = 0; ++ int bindex, bstart, bend; ++ const int is_file = !S_ISDIR(inode->i_mode); ++ const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ); ++ ++ unionfs_read_lock(inode->i_sb); ++ ++ bstart = ibstart(inode); ++ bend = ibend(inode); ++ if (bstart < 0 || bend < 0) { ++ /* ++ * With branch-management, we can get a stale inode here. ++ * If so, we return ESTALE back to link_path_walk, which ++ * would discard the dcache entry and re-lookup the ++ * dentry+inode. This should be equivalent to issuing ++ * __unionfs_d_revalidate_chain on nd.dentry here. ++ */ ++ err = -ESTALE; /* force revalidate */ ++ goto out; ++ } ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!hidden_inode) ++ continue; ++ ++ /* ++ * check the condition for D-F-D underlying files/directories, ++ * we don't have to check for files, if we are checking for ++ * directories. ++ */ ++ if (!is_file && !S_ISDIR(hidden_inode->i_mode)) ++ continue; ++ ++ /* ++ * We use our own special version of permission, such that ++ * only the first branch returns -EROFS. ++ */ ++ err = inode_permission(hidden_inode, mask, nd, bindex); ++ ++ /* ++ * The permissions are an intersection of the overall directory ++ * permissions, so we fail if one fails. ++ */ ++ if (err) ++ goto out; ++ ++ /* only the leftmost file matters. */ ++ if (is_file || write_mask) { ++ if (is_file && write_mask) { ++ err = get_write_access(hidden_inode); ++ if (!err) ++ put_write_access(hidden_inode); ++ } ++ break; ++ } ++ } ++ ++out: ++ unionfs_read_unlock(inode->i_sb); ++ return err; ++} ++ ++static int unionfs_setattr(struct dentry *dentry, struct iattr *ia) ++{ ++ int err = 0; ++ struct dentry *hidden_dentry; ++ struct inode *inode = NULL; ++ struct inode *hidden_inode = NULL; ++ int bstart, bend, bindex; ++ int i; ++ int copyup = 0; ++ ++ unionfs_read_lock(dentry->d_sb); ++ unionfs_lock_dentry(dentry); ++ ++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ inode = dentry->d_inode; ++ ++ for (bindex = bstart; (bindex <= bend) || (bindex == bstart); ++ bindex++) { ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!hidden_dentry) ++ continue; ++ BUG_ON(hidden_dentry->d_inode == NULL); ++ ++ /* If the file is on a read only branch */ ++ if (is_robranch_super(dentry->d_sb, bindex) ++ || IS_RDONLY(hidden_dentry->d_inode)) { ++ if (copyup || (bindex != bstart)) ++ continue; ++ /* Only if its the leftmost file, copyup the file */ ++ for (i = bstart - 1; i >= 0; i--) { ++ loff_t size = dentry->d_inode->i_size; ++ if (ia->ia_valid & ATTR_SIZE) ++ size = ia->ia_size; ++ err = copyup_dentry(dentry->d_parent->d_inode, ++ dentry, bstart, i, NULL, ++ size); ++ ++ if (!err) { ++ copyup = 1; ++ hidden_dentry = ++ unionfs_lower_dentry(dentry); ++ break; ++ } ++ /* ++ * if error is in the leftmost branch, pass ++ * it up. ++ */ ++ if (i == 0) ++ goto out; ++ } ++ ++ } ++ err = notify_change(hidden_dentry, ia); ++ if (err) ++ goto out; ++ break; ++ } ++ ++ /* for mmap */ ++ if (ia->ia_valid & ATTR_SIZE) { ++ if (ia->ia_size != i_size_read(inode)) { ++ err = vmtruncate(inode, ia->ia_size); ++ if (err) ++ printk("unionfs_setattr: vmtruncate failed\n"); ++ } ++ } ++ ++ /* get the size from the first hidden inode */ ++ hidden_inode = unionfs_lower_inode(dentry->d_inode); ++ fsstack_copy_attr_all(inode, hidden_inode, unionfs_get_nlinks); ++ fsstack_copy_inode_size(inode, hidden_inode); ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++struct inode_operations unionfs_symlink_iops = { ++ .readlink = unionfs_readlink, ++ .permission = unionfs_permission, ++ .follow_link = unionfs_follow_link, ++ .setattr = unionfs_setattr, ++ .put_link = unionfs_put_link, ++}; ++ ++struct inode_operations unionfs_dir_iops = { ++ .create = unionfs_create, ++ .lookup = unionfs_lookup, ++ .link = unionfs_link, ++ .unlink = unionfs_unlink, ++ .symlink = unionfs_symlink, ++ .mkdir = unionfs_mkdir, ++ .rmdir = unionfs_rmdir, ++ .mknod = unionfs_mknod, ++ .rename = unionfs_rename, ++ .permission = unionfs_permission, ++ .setattr = unionfs_setattr, ++#ifdef CONFIG_UNION_FS_XATTR ++ .setxattr = unionfs_setxattr, ++ .getxattr = unionfs_getxattr, ++ .removexattr = unionfs_removexattr, ++ .listxattr = unionfs_listxattr, ++#endif ++}; ++ ++struct inode_operations unionfs_main_iops = { ++ .permission = unionfs_permission, ++ .setattr = unionfs_setattr, ++#ifdef CONFIG_UNION_FS_XATTR ++ .setxattr = unionfs_setxattr, ++ .getxattr = unionfs_getxattr, ++ .removexattr = unionfs_removexattr, ++ .listxattr = unionfs_listxattr, ++#endif ++}; +diff -Nurb linux-2.6.22-570/fs/unionfs/lookup.c linux-2.6.22-591/fs/unionfs/lookup.c +--- linux-2.6.22-570/fs/unionfs/lookup.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/lookup.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,549 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* is the filename valid == !(whiteout for a file or opaque dir marker) */ ++static int is_validname(const char *name) ++{ ++ if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) ++ return 0; ++ if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME, ++ sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1)) ++ return 0; ++ return 1; ++} ++ ++/* The rest of these are utility functions for lookup. */ ++static noinline int is_opaque_dir(struct dentry *dentry, int bindex) ++{ ++ int err = 0; ++ struct dentry *hidden_dentry; ++ struct dentry *wh_hidden_dentry; ++ struct inode *hidden_inode; ++ struct sioq_args args; ++ ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ hidden_inode = hidden_dentry->d_inode; ++ ++ BUG_ON(!S_ISDIR(hidden_inode->i_mode)); ++ ++ mutex_lock(&hidden_inode->i_mutex); ++ ++ if (!permission(hidden_inode, MAY_EXEC, NULL)) ++ wh_hidden_dentry = ++ lookup_one_len(UNIONFS_DIR_OPAQUE, hidden_dentry, ++ sizeof(UNIONFS_DIR_OPAQUE) - 1); ++ else { ++ args.is_opaque.dentry = hidden_dentry; ++ run_sioq(__is_opaque_dir, &args); ++ wh_hidden_dentry = args.ret; ++ } ++ ++ mutex_unlock(&hidden_inode->i_mutex); ++ ++ if (IS_ERR(wh_hidden_dentry)) { ++ err = PTR_ERR(wh_hidden_dentry); ++ goto out; ++ } ++ ++ /* This is an opaque dir iff wh_hidden_dentry is positive */ ++ err = !!wh_hidden_dentry->d_inode; ++ ++ dput(wh_hidden_dentry); ++out: ++ return err; ++} ++ ++/* main (and complex) driver function for Unionfs's lookup */ ++struct dentry *unionfs_lookup_backend(struct dentry *dentry, ++ struct nameidata *nd, int lookupmode) ++{ ++ int err = 0; ++ struct dentry *hidden_dentry = NULL; ++ struct dentry *wh_hidden_dentry = NULL; ++ struct dentry *hidden_dir_dentry = NULL; ++ struct dentry *parent_dentry = NULL; ++ int bindex, bstart, bend, bopaque; ++ int dentry_count = 0; /* Number of positive dentries. */ ++ int first_dentry_offset = -1; /* -1 is uninitialized */ ++ struct dentry *first_dentry = NULL; ++ struct dentry *first_hidden_dentry = NULL; ++ struct vfsmount *first_hidden_mnt = NULL; ++ int locked_parent = 0; ++ int locked_child = 0; ++ int allocated_new_info = 0; ++ ++ int opaque; ++ char *whname = NULL; ++ const char *name; ++ int namelen; ++ ++ /* ++ * We should already have a lock on this dentry in the case of a ++ * partial lookup, or a revalidation. Otherwise it is returned from ++ * new_dentry_private_data already locked. ++ */ ++ if (lookupmode == INTERPOSE_PARTIAL || lookupmode == INTERPOSE_REVAL || ++ lookupmode == INTERPOSE_REVAL_NEG) ++ verify_locked(dentry); ++ else { ++ BUG_ON(UNIONFS_D(dentry) != NULL); ++ locked_child = 1; ++ } ++ ++ switch(lookupmode) { ++ case INTERPOSE_PARTIAL: ++ break; ++ case INTERPOSE_LOOKUP: ++ if ((err = new_dentry_private_data(dentry))) ++ goto out; ++ allocated_new_info = 1; ++ break; ++ default: ++ if ((err = realloc_dentry_private_data(dentry))) ++ goto out; ++ allocated_new_info = 1; ++ break; ++ } ++ ++ /* must initialize dentry operations */ ++ dentry->d_op = &unionfs_dops; ++ ++ parent_dentry = dget_parent(dentry); ++ /* We never partial lookup the root directory. */ ++ if (parent_dentry != dentry) { ++ unionfs_lock_dentry(parent_dentry); ++ locked_parent = 1; ++ } else { ++ dput(parent_dentry); ++ parent_dentry = NULL; ++ goto out; ++ } ++ ++ name = dentry->d_name.name; ++ namelen = dentry->d_name.len; ++ ++ /* No dentries should get created for possible whiteout names. */ ++ if (!is_validname(name)) { ++ err = -EPERM; ++ goto out_free; ++ } ++ ++ /* Now start the actual lookup procedure. */ ++ bstart = dbstart(parent_dentry); ++ bend = dbend(parent_dentry); ++ bopaque = dbopaque(parent_dentry); ++ BUG_ON(bstart < 0); ++ ++ /* ++ * It would be ideal if we could convert partial lookups to only have ++ * to do this work when they really need to. It could probably improve ++ * performance quite a bit, and maybe simplify the rest of the code. ++ */ ++ if (lookupmode == INTERPOSE_PARTIAL) { ++ bstart++; ++ if ((bopaque != -1) && (bopaque < bend)) ++ bend = bopaque; ++ } ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (lookupmode == INTERPOSE_PARTIAL && hidden_dentry) ++ continue; ++ BUG_ON(hidden_dentry != NULL); ++ ++ hidden_dir_dentry = ++ unionfs_lower_dentry_idx(parent_dentry, bindex); ++ ++ /* if the parent hidden dentry does not exist skip this */ ++ if (!(hidden_dir_dentry && hidden_dir_dentry->d_inode)) ++ continue; ++ ++ /* also skip it if the parent isn't a directory. */ ++ if (!S_ISDIR(hidden_dir_dentry->d_inode->i_mode)) ++ continue; ++ ++ /* Reuse the whiteout name because its value doesn't change. */ ++ if (!whname) { ++ whname = alloc_whname(name, namelen); ++ if (IS_ERR(whname)) { ++ err = PTR_ERR(whname); ++ goto out_free; ++ } ++ } ++ ++ /* check if whiteout exists in this branch: lookup .wh.foo */ ++ wh_hidden_dentry = lookup_one_len(whname, hidden_dir_dentry, ++ namelen + UNIONFS_WHLEN); ++ if (IS_ERR(wh_hidden_dentry)) { ++ dput(first_hidden_dentry); ++ unionfs_mntput(first_dentry, first_dentry_offset); ++ err = PTR_ERR(wh_hidden_dentry); ++ goto out_free; ++ } ++ ++ if (wh_hidden_dentry->d_inode) { ++ /* We found a whiteout so lets give up. */ ++ if (S_ISREG(wh_hidden_dentry->d_inode->i_mode)) { ++ set_dbend(dentry, bindex); ++ set_dbopaque(dentry, bindex); ++ dput(wh_hidden_dentry); ++ break; ++ } ++ err = -EIO; ++ printk(KERN_NOTICE "unionfs: EIO: invalid whiteout " ++ "entry type %d.\n", ++ wh_hidden_dentry->d_inode->i_mode); ++ dput(wh_hidden_dentry); ++ dput(first_hidden_dentry); ++ unionfs_mntput(first_dentry, first_dentry_offset); ++ goto out_free; ++ } ++ ++ dput(wh_hidden_dentry); ++ wh_hidden_dentry = NULL; ++ ++ /* Now do regular lookup; lookup foo */ ++ nd->dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ /* FIXME: fix following line for mount point crossing */ ++ nd->mnt = unionfs_lower_mnt_idx(parent_dentry, bindex); ++ ++ hidden_dentry = lookup_one_len_nd(name, hidden_dir_dentry, ++ namelen, nd); ++ if (IS_ERR(hidden_dentry)) { ++ dput(first_hidden_dentry); ++ unionfs_mntput(first_dentry, first_dentry_offset); ++ err = PTR_ERR(hidden_dentry); ++ goto out_free; ++ } ++ ++ /* ++ * Store the first negative dentry specially, because if they ++ * are all negative we need this for future creates. ++ */ ++ if (!hidden_dentry->d_inode) { ++ if (!first_hidden_dentry && (dbstart(dentry) == -1)) { ++ first_hidden_dentry = hidden_dentry; ++ /* ++ * FIXME: following line needs to be changed ++ * to allow mount-point crossing ++ */ ++ first_dentry = parent_dentry; ++ first_hidden_mnt = ++ unionfs_mntget(parent_dentry, bindex); ++ first_dentry_offset = bindex; ++ } else ++ dput(hidden_dentry); ++ ++ continue; ++ } ++ ++ /* number of positive dentries */ ++ dentry_count++; ++ ++ /* store underlying dentry */ ++ if (dbstart(dentry) == -1) ++ set_dbstart(dentry, bindex); ++ unionfs_set_lower_dentry_idx(dentry, bindex, hidden_dentry); ++ /* ++ * FIXME: the following line needs to get fixed to allow ++ * mount-point crossing ++ */ ++ unionfs_set_lower_mnt_idx(dentry, bindex, ++ unionfs_mntget(parent_dentry, ++ bindex)); ++ set_dbend(dentry, bindex); ++ ++ /* update parent directory's atime with the bindex */ ++ fsstack_copy_attr_atime(parent_dentry->d_inode, ++ hidden_dir_dentry->d_inode); ++ ++ /* We terminate file lookups here. */ ++ if (!S_ISDIR(hidden_dentry->d_inode->i_mode)) { ++ if (lookupmode == INTERPOSE_PARTIAL) ++ continue; ++ if (dentry_count == 1) ++ goto out_positive; ++ /* This can only happen with mixed D-*-F-* */ ++ BUG_ON(!S_ISDIR(unionfs_lower_dentry(dentry)-> ++ d_inode->i_mode)); ++ continue; ++ } ++ ++ opaque = is_opaque_dir(dentry, bindex); ++ if (opaque < 0) { ++ dput(first_hidden_dentry); ++ unionfs_mntput(first_dentry, first_dentry_offset); ++ err = opaque; ++ goto out_free; ++ } else if (opaque) { ++ set_dbend(dentry, bindex); ++ set_dbopaque(dentry, bindex); ++ break; ++ } ++ } ++ ++ if (dentry_count) ++ goto out_positive; ++ else ++ goto out_negative; ++ ++out_negative: ++ if (lookupmode == INTERPOSE_PARTIAL) ++ goto out; ++ ++ /* If we've only got negative dentries, then use the leftmost one. */ ++ if (lookupmode == INTERPOSE_REVAL) { ++ if (dentry->d_inode) ++ UNIONFS_I(dentry->d_inode)->stale = 1; ++ ++ goto out; ++ } ++ /* This should only happen if we found a whiteout. */ ++ if (first_dentry_offset == -1) { ++ nd->dentry = dentry; ++ /* FIXME: fix following line for mount point crossing */ ++ nd->mnt = unionfs_lower_mnt_idx(parent_dentry, bindex); ++ ++ first_hidden_dentry = ++ lookup_one_len_nd(name, hidden_dir_dentry, ++ namelen, nd); ++ first_dentry_offset = bindex; ++ if (IS_ERR(first_hidden_dentry)) { ++ err = PTR_ERR(first_hidden_dentry); ++ goto out; ++ } ++ ++ /* ++ * FIXME: the following line needs to be changed to allow ++ * mount-point crossing ++ */ ++ first_dentry = dentry; ++ first_hidden_mnt = unionfs_mntget(dentry, bindex); ++ } ++ unionfs_set_lower_dentry_idx(dentry, first_dentry_offset, ++ first_hidden_dentry); ++ unionfs_set_lower_mnt_idx(dentry, first_dentry_offset, ++ first_hidden_mnt); ++ set_dbstart(dentry, first_dentry_offset); ++ set_dbend(dentry, first_dentry_offset); ++ ++ if (lookupmode == INTERPOSE_REVAL_NEG) ++ BUG_ON(dentry->d_inode != NULL); ++ else ++ d_add(dentry, NULL); ++ goto out; ++ ++/* This part of the code is for positive dentries. */ ++out_positive: ++ BUG_ON(dentry_count <= 0); ++ ++ /* ++ * If we're holding onto the first negative dentry & corresponding ++ * vfsmount - throw it out. ++ */ ++ dput(first_hidden_dentry); ++ unionfs_mntput(first_dentry, first_dentry_offset); ++ ++ /* Partial lookups need to re-interpose, or throw away older negs. */ ++ if (lookupmode == INTERPOSE_PARTIAL) { ++ if (dentry->d_inode) { ++ unionfs_reinterpose(dentry); ++ goto out; ++ } ++ ++ /* ++ * This somehow turned positive, so it is as if we had a ++ * negative revalidation. ++ */ ++ lookupmode = INTERPOSE_REVAL_NEG; ++ ++ update_bstart(dentry); ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ } ++ ++ err = unionfs_interpose(dentry, dentry->d_sb, lookupmode); ++ if (err) ++ goto out_drop; ++ ++ goto out; ++ ++out_drop: ++ d_drop(dentry); ++ ++out_free: ++ /* should dput all the underlying dentries on error condition */ ++ bstart = dbstart(dentry); ++ if (bstart >= 0) { ++ bend = dbend(dentry); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ dput(unionfs_lower_dentry_idx(dentry, bindex)); ++ unionfs_mntput(dentry, bindex); ++ } ++ } ++ kfree(UNIONFS_D(dentry)->lower_paths); ++ UNIONFS_D(dentry)->lower_paths = NULL; ++ set_dbstart(dentry, -1); ++ set_dbend(dentry, -1); ++ ++out: ++ if (!err && UNIONFS_D(dentry)) { ++ BUG_ON(dbend(dentry) > UNIONFS_D(dentry)->bcount); ++ BUG_ON(dbend(dentry) > sbmax(dentry->d_sb)); ++ BUG_ON(dbstart(dentry) < 0); ++ } ++ kfree(whname); ++ if (locked_parent) ++ unionfs_unlock_dentry(parent_dentry); ++ dput(parent_dentry); ++ if (locked_child || (err && allocated_new_info)) ++ unionfs_unlock_dentry(dentry); ++ return ERR_PTR(err); ++} ++ ++/* This is a utility function that fills in a unionfs dentry */ ++int unionfs_partial_lookup(struct dentry *dentry) ++{ ++ struct dentry *tmp; ++ struct nameidata nd = { .flags = 0 }; ++ ++ tmp = unionfs_lookup_backend(dentry, &nd, INTERPOSE_PARTIAL); ++ if (!tmp) ++ return 0; ++ if (IS_ERR(tmp)) ++ return PTR_ERR(tmp); ++ /* need to change the interface */ ++ BUG_ON(tmp != dentry); ++ return -ENOSYS; ++} ++ ++/* The dentry cache is just so we have properly sized dentries. */ ++static struct kmem_cache *unionfs_dentry_cachep; ++int unionfs_init_dentry_cache(void) ++{ ++ unionfs_dentry_cachep = ++ kmem_cache_create("unionfs_dentry", ++ sizeof(struct unionfs_dentry_info), ++ 0, SLAB_RECLAIM_ACCOUNT, NULL, NULL); ++ ++ return (unionfs_dentry_cachep ? 0 : -ENOMEM); ++} ++ ++void unionfs_destroy_dentry_cache(void) ++{ ++ if (unionfs_dentry_cachep) ++ kmem_cache_destroy(unionfs_dentry_cachep); ++} ++ ++void free_dentry_private_data(struct dentry *dentry) ++{ ++ if (!dentry || !dentry->d_fsdata) ++ return; ++ kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata); ++ dentry->d_fsdata = NULL; ++} ++ ++static inline int __realloc_dentry_private_data(struct dentry *dentry) ++{ ++ struct unionfs_dentry_info *info = UNIONFS_D(dentry); ++ void *p; ++ int size; ++ ++ BUG_ON(!info); ++ ++ size = sizeof(struct path) * sbmax(dentry->d_sb); ++ p = krealloc(info->lower_paths, size, GFP_ATOMIC); ++ if (!p) ++ return -ENOMEM; ++ ++ info->lower_paths = p; ++ ++ info->bstart = -1; ++ info->bend = -1; ++ info->bopaque = -1; ++ info->bcount = sbmax(dentry->d_sb); ++ atomic_set(&info->generation, ++ atomic_read(&UNIONFS_SB(dentry->d_sb)->generation)); ++ ++ memset(info->lower_paths, 0, size); ++ ++ return 0; ++} ++ ++/* UNIONFS_D(dentry)->lock must be locked */ ++int realloc_dentry_private_data(struct dentry *dentry) ++{ ++ if (!__realloc_dentry_private_data(dentry)) ++ return 0; ++ ++ kfree(UNIONFS_D(dentry)->lower_paths); ++ free_dentry_private_data(dentry); ++ return -ENOMEM; ++} ++ ++/* allocate new dentry private data */ ++int new_dentry_private_data(struct dentry *dentry) ++{ ++ struct unionfs_dentry_info *info = UNIONFS_D(dentry); ++ ++ BUG_ON(info); ++ ++ info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC); ++ if (!info) ++ return -ENOMEM; ++ ++ mutex_init(&info->lock); ++ mutex_lock(&info->lock); ++ ++ info->lower_paths = NULL; ++ ++ dentry->d_fsdata = info; ++ ++ if (!__realloc_dentry_private_data(dentry)) ++ return 0; ++ ++ mutex_unlock(&info->lock); ++ free_dentry_private_data(dentry); ++ return -ENOMEM; ++} ++ ++/* ++ * scan through the lower dentry objects, and set bstart to reflect the ++ * starting branch ++ */ ++void update_bstart(struct dentry *dentry) ++{ ++ int bindex; ++ int bstart = dbstart(dentry); ++ int bend = dbend(dentry); ++ struct dentry *hidden_dentry; ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!hidden_dentry) ++ continue; ++ if (hidden_dentry->d_inode) { ++ set_dbstart(dentry, bindex); ++ break; ++ } ++ dput(hidden_dentry); ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ } ++} +diff -Nurb linux-2.6.22-570/fs/unionfs/main.c linux-2.6.22-591/fs/unionfs/main.c +--- linux-2.6.22-570/fs/unionfs/main.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/main.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,729 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++#include ++#include ++ ++/* ++ * Connect a unionfs inode dentry/inode with several lower ones. This is ++ * the classic stackable file system "vnode interposition" action. ++ * ++ * @sb: unionfs's super_block ++ */ ++int unionfs_interpose(struct dentry *dentry, struct super_block *sb, int flag) ++{ ++ struct inode *hidden_inode; ++ struct dentry *hidden_dentry; ++ int err = 0; ++ struct inode *inode; ++ int is_negative_dentry = 1; ++ int bindex, bstart, bend; ++ ++ verify_locked(dentry); ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ /* Make sure that we didn't get a negative dentry. */ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ if (unionfs_lower_dentry_idx(dentry, bindex) && ++ unionfs_lower_dentry_idx(dentry, bindex)->d_inode) { ++ is_negative_dentry = 0; ++ break; ++ } ++ } ++ BUG_ON(is_negative_dentry); ++ ++ /* ++ * We allocate our new inode below, by calling iget. ++ * iget will call our read_inode which will initialize some ++ * of the new inode's fields ++ */ ++ ++ /* ++ * On revalidate we've already got our own inode and just need ++ * to fix it up. ++ */ ++ if (flag == INTERPOSE_REVAL) { ++ inode = dentry->d_inode; ++ UNIONFS_I(inode)->bstart = -1; ++ UNIONFS_I(inode)->bend = -1; ++ atomic_set(&UNIONFS_I(inode)->generation, ++ atomic_read(&UNIONFS_SB(sb)->generation)); ++ ++ UNIONFS_I(inode)->lower_inodes = ++ kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL); ++ if (!UNIONFS_I(inode)->lower_inodes) { ++ err = -ENOMEM; ++ goto out; ++ } ++ } else { ++ /* get unique inode number for unionfs */ ++ inode = iget(sb, iunique(sb, UNIONFS_ROOT_INO)); ++ if (!inode) { ++ err = -EACCES; ++ goto out; ++ } ++ ++ if (atomic_read(&inode->i_count) > 1) ++ goto skip; ++ } ++ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!hidden_dentry) { ++ unionfs_set_lower_inode_idx(inode, bindex, NULL); ++ continue; ++ } ++ ++ /* Initialize the hidden inode to the new hidden inode. */ ++ if (!hidden_dentry->d_inode) ++ continue; ++ ++ unionfs_set_lower_inode_idx(inode, bindex, ++ igrab(hidden_dentry->d_inode)); ++ } ++ ++ ibstart(inode) = dbstart(dentry); ++ ibend(inode) = dbend(dentry); ++ ++ /* Use attributes from the first branch. */ ++ hidden_inode = unionfs_lower_inode(inode); ++ ++ /* Use different set of inode ops for symlinks & directories */ ++ if (S_ISLNK(hidden_inode->i_mode)) ++ inode->i_op = &unionfs_symlink_iops; ++ else if (S_ISDIR(hidden_inode->i_mode)) ++ inode->i_op = &unionfs_dir_iops; ++ ++ /* Use different set of file ops for directories */ ++ if (S_ISDIR(hidden_inode->i_mode)) ++ inode->i_fop = &unionfs_dir_fops; ++ ++ /* properly initialize special inodes */ ++ if (S_ISBLK(hidden_inode->i_mode) || S_ISCHR(hidden_inode->i_mode) || ++ S_ISFIFO(hidden_inode->i_mode) || S_ISSOCK(hidden_inode->i_mode)) ++ init_special_inode(inode, hidden_inode->i_mode, ++ hidden_inode->i_rdev); ++ ++ /* all well, copy inode attributes */ ++ fsstack_copy_attr_all(inode, hidden_inode, unionfs_get_nlinks); ++ fsstack_copy_inode_size(inode, hidden_inode); ++ ++skip: ++ /* only (our) lookup wants to do a d_add */ ++ switch (flag) { ++ case INTERPOSE_DEFAULT: ++ case INTERPOSE_REVAL_NEG: ++ d_instantiate(dentry, inode); ++ break; ++ case INTERPOSE_LOOKUP: ++ err = PTR_ERR(d_splice_alias(inode, dentry)); ++ break; ++ case INTERPOSE_REVAL: ++ /* Do nothing. */ ++ break; ++ default: ++ printk(KERN_ERR "unionfs: invalid interpose flag passed!"); ++ BUG(); ++ } ++ ++out: ++ return err; ++} ++ ++/* like interpose above, but for an already existing dentry */ ++void unionfs_reinterpose(struct dentry *dentry) ++{ ++ struct dentry *hidden_dentry; ++ struct inode *inode; ++ int bindex, bstart, bend; ++ ++ verify_locked(dentry); ++ ++ /* This is pre-allocated inode */ ++ inode = dentry->d_inode; ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!hidden_dentry) ++ continue; ++ ++ if (!hidden_dentry->d_inode) ++ continue; ++ if (unionfs_lower_inode_idx(inode, bindex)) ++ continue; ++ unionfs_set_lower_inode_idx(inode, bindex, ++ igrab(hidden_dentry->d_inode)); ++ } ++ ibstart(inode) = dbstart(dentry); ++ ibend(inode) = dbend(dentry); ++} ++ ++/* ++ * make sure the branch we just looked up (nd) makes sense: ++ * ++ * 1) we're not trying to stack unionfs on top of unionfs ++ * 2) it exists ++ * 3) is a directory ++ */ ++int check_branch(struct nameidata *nd) ++{ ++ if (!strcmp(nd->dentry->d_sb->s_type->name, "unionfs")) ++ return -EINVAL; ++ if (!nd->dentry->d_inode) ++ return -ENOENT; ++ if (!S_ISDIR(nd->dentry->d_inode->i_mode)) ++ return -ENOTDIR; ++ return 0; ++} ++ ++/* checks if two hidden_dentries have overlapping branches */ ++static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2) ++{ ++ struct dentry *dent = NULL; ++ ++ dent = dent1; ++ while ((dent != dent2) && (dent->d_parent != dent)) ++ dent = dent->d_parent; ++ ++ if (dent == dent2) ++ return 1; ++ ++ dent = dent2; ++ while ((dent != dent1) && (dent->d_parent != dent)) ++ dent = dent->d_parent; ++ ++ return (dent == dent1); ++} ++ ++/* ++ * Parse branch mode helper function ++ */ ++int __parse_branch_mode(const char *name) ++{ ++ if (!name) ++ return 0; ++ if (!strcmp(name, "ro")) ++ return MAY_READ; ++ if (!strcmp(name, "rw")) ++ return (MAY_READ | MAY_WRITE); ++ return 0; ++} ++ ++/* ++ * Parse "ro" or "rw" options, but default to "rw" of no mode options ++ * was specified. ++ */ ++int parse_branch_mode(const char *name) ++{ ++ int perms = __parse_branch_mode(name); ++ ++ if (perms == 0) ++ perms = MAY_READ | MAY_WRITE; ++ return perms; ++} ++ ++/* ++ * parse the dirs= mount argument ++ * ++ * We don't need to lock the superblock private data's rwsem, as we get ++ * called only by unionfs_read_super - it is still a long time before anyone ++ * can even get a reference to us. ++ */ ++static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info ++ *hidden_root_info, char *options) ++{ ++ struct nameidata nd; ++ char *name; ++ int err = 0; ++ int branches = 1; ++ int bindex = 0; ++ int i = 0; ++ int j = 0; ++ ++ struct dentry *dent1; ++ struct dentry *dent2; ++ ++ if (options[0] == '\0') { ++ printk(KERN_WARNING "unionfs: no branches specified\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ /* ++ * Each colon means we have a separator, this is really just a rough ++ * guess, since strsep will handle empty fields for us. ++ */ ++ for (i = 0; options[i]; i++) ++ if (options[i] == ':') ++ branches++; ++ ++ /* allocate space for underlying pointers to hidden dentry */ ++ UNIONFS_SB(sb)->data = ++ kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL); ++ if (!UNIONFS_SB(sb)->data) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ hidden_root_info->lower_paths = ++ kcalloc(branches, sizeof(struct path), GFP_KERNEL); ++ if (!hidden_root_info->lower_paths) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ /* now parsing a string such as "b1:b2=rw:b3=ro:b4" */ ++ branches = 0; ++ while ((name = strsep(&options, ":")) != NULL) { ++ int perms; ++ char *mode = strchr(name, '='); ++ ++ if (!name || !*name) ++ continue; ++ ++ branches++; ++ ++ /* strip off '=' if any */ ++ if (mode) ++ *mode++ = '\0'; ++ ++ perms = parse_branch_mode(mode); ++ if (!bindex && !(perms & MAY_WRITE)) { ++ err = -EINVAL; ++ goto out; ++ } ++ ++ err = path_lookup(name, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_WARNING "unionfs: error accessing " ++ "hidden directory '%s' (error %d)\n", ++ name, err); ++ goto out; ++ } ++ ++ if ((err = check_branch(&nd))) { ++ printk(KERN_WARNING "unionfs: hidden directory " ++ "'%s' is not a valid branch\n", name); ++ path_release(&nd); ++ goto out; ++ } ++ ++ hidden_root_info->lower_paths[bindex].dentry = nd.dentry; ++ hidden_root_info->lower_paths[bindex].mnt = nd.mnt; ++ ++ set_branchperms(sb, bindex, perms); ++ set_branch_count(sb, bindex, 0); ++ new_branch_id(sb, bindex); ++ ++ if (hidden_root_info->bstart < 0) ++ hidden_root_info->bstart = bindex; ++ hidden_root_info->bend = bindex; ++ bindex++; ++ } ++ ++ if (branches == 0) { ++ printk(KERN_WARNING "unionfs: no branches specified\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ BUG_ON(branches != (hidden_root_info->bend + 1)); ++ ++ /* ++ * Ensure that no overlaps exist in the branches. ++ * ++ * This test is required because the Linux kernel has no support ++ * currently for ensuring coherency between stackable layers and ++ * branches. If we were to allow overlapping branches, it would be ++ * possible, for example, to delete a file via one branch, which ++ * would not be reflected in another branch. Such incoherency could ++ * lead to inconsistencies and even kernel oopses. Rather than ++ * implement hacks to work around some of these cache-coherency ++ * problems, we prevent branch overlapping, for now. A complete ++ * solution will involve proper kernel/VFS support for cache ++ * coherency, at which time we could safely remove this ++ * branch-overlapping test. ++ */ ++ for (i = 0; i < branches; i++) { ++ for (j = i + 1; j < branches; j++) { ++ dent1 = hidden_root_info->lower_paths[i].dentry; ++ dent2 = hidden_root_info->lower_paths[j].dentry; ++ ++ if (is_branch_overlap(dent1, dent2)) { ++ printk(KERN_WARNING "unionfs: branches %d and " ++ "%d overlap\n", i, j); ++ err = -EINVAL; ++ goto out; ++ } ++ } ++ } ++ ++out: ++ if (err) { ++ for (i = 0; i < branches; i++) ++ if (hidden_root_info->lower_paths[i].dentry) { ++ dput(hidden_root_info->lower_paths[i].dentry); ++ /* initialize: can't use unionfs_mntput here */ ++ mntput(hidden_root_info->lower_paths[i].mnt); ++ } ++ ++ kfree(hidden_root_info->lower_paths); ++ kfree(UNIONFS_SB(sb)->data); ++ ++ /* ++ * MUST clear the pointers to prevent potential double free if ++ * the caller dies later on ++ */ ++ hidden_root_info->lower_paths = NULL; ++ UNIONFS_SB(sb)->data = NULL; ++ } ++ return err; ++} ++ ++/* ++ * Parse mount options. See the manual page for usage instructions. ++ * ++ * Returns the dentry object of the lower-level (hidden) directory; ++ * We want to mount our stackable file system on top of that hidden directory. ++ */ ++static struct unionfs_dentry_info *unionfs_parse_options( ++ struct super_block *sb, ++ char *options) ++{ ++ struct unionfs_dentry_info *hidden_root_info; ++ char *optname; ++ int err = 0; ++ int bindex; ++ int dirsfound = 0; ++ ++ /* allocate private data area */ ++ err = -ENOMEM; ++ hidden_root_info = ++ kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL); ++ if (!hidden_root_info) ++ goto out_error; ++ hidden_root_info->bstart = -1; ++ hidden_root_info->bend = -1; ++ hidden_root_info->bopaque = -1; ++ ++ while ((optname = strsep(&options, ",")) != NULL) { ++ char *optarg; ++ char *endptr; ++ int intval; ++ ++ if (!optname || !*optname) ++ continue; ++ ++ optarg = strchr(optname, '='); ++ if (optarg) ++ *optarg++ = '\0'; ++ ++ /* ++ * All of our options take an argument now. Insert ones that ++ * don't, above this check. ++ */ ++ if (!optarg) { ++ printk("unionfs: %s requires an argument.\n", optname); ++ err = -EINVAL; ++ goto out_error; ++ } ++ ++ if (!strcmp("dirs", optname)) { ++ if (++dirsfound > 1) { ++ printk(KERN_WARNING ++ "unionfs: multiple dirs specified\n"); ++ err = -EINVAL; ++ goto out_error; ++ } ++ err = parse_dirs_option(sb, hidden_root_info, optarg); ++ if (err) ++ goto out_error; ++ continue; ++ } ++ ++ /* All of these options require an integer argument. */ ++ intval = simple_strtoul(optarg, &endptr, 0); ++ if (*endptr) { ++ printk(KERN_WARNING ++ "unionfs: invalid %s option '%s'\n", ++ optname, optarg); ++ err = -EINVAL; ++ goto out_error; ++ } ++ ++ err = -EINVAL; ++ printk(KERN_WARNING ++ "unionfs: unrecognized option '%s'\n", optname); ++ goto out_error; ++ } ++ if (dirsfound != 1) { ++ printk(KERN_WARNING "unionfs: dirs option required\n"); ++ err = -EINVAL; ++ goto out_error; ++ } ++ goto out; ++ ++out_error: ++ if (hidden_root_info && hidden_root_info->lower_paths) { ++ for (bindex = hidden_root_info->bstart; ++ bindex >= 0 && bindex <= hidden_root_info->bend; ++ bindex++) { ++ struct dentry *d; ++ struct vfsmount *m; ++ ++ d = hidden_root_info->lower_paths[bindex].dentry; ++ m = hidden_root_info->lower_paths[bindex].mnt; ++ ++ dput(d); ++ /* initializing: can't use unionfs_mntput here */ ++ mntput(m); ++ } ++ } ++ ++ kfree(hidden_root_info->lower_paths); ++ kfree(hidden_root_info); ++ ++ kfree(UNIONFS_SB(sb)->data); ++ UNIONFS_SB(sb)->data = NULL; ++ ++ hidden_root_info = ERR_PTR(err); ++out: ++ return hidden_root_info; ++} ++ ++/* ++ * our custom d_alloc_root work-alike ++ * ++ * we can't use d_alloc_root if we want to use our own interpose function ++ * unchanged, so we simply call our own "fake" d_alloc_root ++ */ ++static struct dentry *unionfs_d_alloc_root(struct super_block *sb) ++{ ++ struct dentry *ret = NULL; ++ ++ if (sb) { ++ static const struct qstr name = {.name = "/",.len = 1 }; ++ ++ ret = d_alloc(NULL, &name); ++ if (ret) { ++ ret->d_op = &unionfs_dops; ++ ret->d_sb = sb; ++ ret->d_parent = ret; ++ } ++ } ++ return ret; ++} ++ ++/* ++ * There is no need to lock the unionfs_super_info's rwsem as there is no ++ * way anyone can have a reference to the superblock at this point in time. ++ */ ++static int unionfs_read_super(struct super_block *sb, void *raw_data, ++ int silent) ++{ ++ int err = 0; ++ ++ struct unionfs_dentry_info *hidden_root_info = NULL; ++ int bindex, bstart, bend; ++ ++ if (!raw_data) { ++ printk(KERN_WARNING ++ "unionfs: read_super: missing data argument\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ /* Allocate superblock private data */ ++ sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL); ++ if (!UNIONFS_SB(sb)) { ++ printk(KERN_WARNING "unionfs: read_super: out of memory\n"); ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ UNIONFS_SB(sb)->bend = -1; ++ atomic_set(&UNIONFS_SB(sb)->generation, 1); ++ init_rwsem(&UNIONFS_SB(sb)->rwsem); ++ UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */ ++ ++ hidden_root_info = unionfs_parse_options(sb, raw_data); ++ if (IS_ERR(hidden_root_info)) { ++ printk(KERN_WARNING ++ "unionfs: read_super: error while parsing options " ++ "(err = %ld)\n", PTR_ERR(hidden_root_info)); ++ err = PTR_ERR(hidden_root_info); ++ hidden_root_info = NULL; ++ goto out_free; ++ } ++ if (hidden_root_info->bstart == -1) { ++ err = -ENOENT; ++ goto out_free; ++ } ++ ++ /* set the hidden superblock field of upper superblock */ ++ bstart = hidden_root_info->bstart; ++ BUG_ON(bstart != 0); ++ sbend(sb) = bend = hidden_root_info->bend; ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ struct dentry *d = hidden_root_info->lower_paths[bindex].dentry; ++ unionfs_set_lower_super_idx(sb, bindex, d->d_sb); ++ } ++ ++ /* max Bytes is the maximum bytes from highest priority branch */ ++ sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes; ++ ++ sb->s_op = &unionfs_sops; ++ ++ /* See comment next to the definition of unionfs_d_alloc_root */ ++ sb->s_root = unionfs_d_alloc_root(sb); ++ if (!sb->s_root) { ++ err = -ENOMEM; ++ goto out_dput; ++ } ++ ++ /* link the upper and lower dentries */ ++ sb->s_root->d_fsdata = NULL; ++ if ((err = new_dentry_private_data(sb->s_root))) ++ goto out_freedpd; ++ ++ /* Set the hidden dentries for s_root */ ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ struct dentry *d; ++ struct vfsmount *m; ++ ++ d = hidden_root_info->lower_paths[bindex].dentry; ++ m = hidden_root_info->lower_paths[bindex].mnt; ++ ++ unionfs_set_lower_dentry_idx(sb->s_root, bindex, d); ++ unionfs_set_lower_mnt_idx(sb->s_root, bindex, m); ++ } ++ set_dbstart(sb->s_root, bstart); ++ set_dbend(sb->s_root, bend); ++ ++ /* Set the generation number to one, since this is for the mount. */ ++ atomic_set(&UNIONFS_D(sb->s_root)->generation, 1); ++ ++ /* call interpose to create the upper level inode */ ++ err = unionfs_interpose(sb->s_root, sb, 0); ++ unionfs_unlock_dentry(sb->s_root); ++ if (!err) ++ goto out; ++ /* else fall through */ ++ ++out_freedpd: ++ if (UNIONFS_D(sb->s_root)) { ++ kfree(UNIONFS_D(sb->s_root)->lower_paths); ++ free_dentry_private_data(sb->s_root); ++ } ++ dput(sb->s_root); ++ ++out_dput: ++ if (hidden_root_info && !IS_ERR(hidden_root_info)) { ++ for (bindex = hidden_root_info->bstart; ++ bindex <= hidden_root_info->bend; bindex++) { ++ struct dentry *d; ++ struct vfsmount *m; ++ ++ d = hidden_root_info->lower_paths[bindex].dentry; ++ m = hidden_root_info->lower_paths[bindex].mnt; ++ ++ dput(d); ++ /* initializing: can't use unionfs_mntput here */ ++ mntput(m); ++ } ++ kfree(hidden_root_info->lower_paths); ++ kfree(hidden_root_info); ++ hidden_root_info = NULL; ++ } ++ ++out_free: ++ kfree(UNIONFS_SB(sb)->data); ++ kfree(UNIONFS_SB(sb)); ++ sb->s_fs_info = NULL; ++ ++out: ++ if (hidden_root_info && !IS_ERR(hidden_root_info)) { ++ kfree(hidden_root_info->lower_paths); ++ kfree(hidden_root_info); ++ } ++ return err; ++} ++ ++static int unionfs_get_sb(struct file_system_type *fs_type, ++ int flags, const char *dev_name, ++ void *raw_data, struct vfsmount *mnt) ++{ ++ return get_sb_nodev(fs_type, flags, raw_data, unionfs_read_super, mnt); ++} ++ ++static struct file_system_type unionfs_fs_type = { ++ .owner = THIS_MODULE, ++ .name = "unionfs", ++ .get_sb = unionfs_get_sb, ++ .kill_sb = generic_shutdown_super, ++ .fs_flags = FS_REVAL_DOT, ++}; ++ ++static int __init init_unionfs_fs(void) ++{ ++ int err; ++ ++ printk("Registering unionfs " UNIONFS_VERSION "\n"); ++ ++ if ((err = unionfs_init_filldir_cache())) ++ goto out; ++ if ((err = unionfs_init_inode_cache())) ++ goto out; ++ if ((err = unionfs_init_dentry_cache())) ++ goto out; ++ if ((err = init_sioq())) ++ goto out; ++ err = register_filesystem(&unionfs_fs_type); ++out: ++ if (err) { ++ stop_sioq(); ++ unionfs_destroy_filldir_cache(); ++ unionfs_destroy_inode_cache(); ++ unionfs_destroy_dentry_cache(); ++ } ++ return err; ++} ++ ++static void __exit exit_unionfs_fs(void) ++{ ++ stop_sioq(); ++ unionfs_destroy_filldir_cache(); ++ unionfs_destroy_inode_cache(); ++ unionfs_destroy_dentry_cache(); ++ unregister_filesystem(&unionfs_fs_type); ++ printk("Completed unionfs module unload.\n"); ++} ++ ++MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University" ++ " (http://www.fsl.cs.sunysb.edu)"); ++MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION ++ " (http://unionfs.filesystems.org)"); ++MODULE_LICENSE("GPL"); ++ ++module_init(init_unionfs_fs); ++module_exit(exit_unionfs_fs); +diff -Nurb linux-2.6.22-570/fs/unionfs/mmap.c linux-2.6.22-591/fs/unionfs/mmap.c +--- linux-2.6.22-570/fs/unionfs/mmap.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/mmap.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,348 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2006 Shaya Potter ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of State University of New York ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Unionfs doesn't implement ->writepages, which is OK with the VFS and ++ * nkeeps our code simpler and smaller. Nevertheless, somehow, our own ++ * ->writepage must be called so we can sync the upper pages with the lower ++ * pages: otherwise data changed at the upper layer won't get written to the ++ * lower layer. ++ * ++ * Some lower file systems (e.g., NFS) expect the VFS to call its writepages ++ * only, which in turn will call generic_writepages and invoke each of the ++ * lower file system's ->writepage. NFS in particular uses the ++ * wbc->fs_private field in its nfs_writepage, which is set in its ++ * nfs_writepages. So if we don't call the lower nfs_writepages first, then ++ * NFS's nfs_writepage will dereference a NULL wbc->fs_private and cause an ++ * OOPS. If, however, we implement a unionfs_writepages and then we do call ++ * the lower nfs_writepages, then we "lose control" over the pages we're ++ * trying to write to the lower file system: we won't be writing our own ++ * new/modified data from the upper pages to the lower pages, and any ++ * mmap-based changes are lost. ++ * ++ * This is a fundamental cache-coherency problem in Linux. The kernel isn't ++ * able to support such stacking abstractions cleanly. One possible clean ++ * way would be that a lower file system's ->writepage method have some sort ++ * of a callback to validate if any upper pages for the same file+offset ++ * exist and have newer content in them. ++ * ++ * This whole NULL ptr dereference is triggered at the lower file system ++ * (NFS) because the wbc->for_writepages is set to 1. Therefore, to avoid ++ * this NULL pointer dereference, we set this flag to 0 and restore it upon ++ * exit. This probably means that we're slightly less efficient in writing ++ * pages out, doing them one at a time, but at least we avoid the oops until ++ * such day as Linux can better support address_space_ops in a stackable ++ * fashion. ++ */ ++int unionfs_writepage(struct page *page, struct writeback_control *wbc) ++{ ++ int err = -EIO; ++ struct inode *inode; ++ struct inode *lower_inode; ++ struct page *lower_page; ++ char *kaddr, *lower_kaddr; ++ int saved_for_writepages = wbc->for_writepages; ++ ++ inode = page->mapping->host; ++ lower_inode = unionfs_lower_inode(inode); ++ ++ /* find lower page (returns a locked page) */ ++ lower_page = grab_cache_page(lower_inode->i_mapping, page->index); ++ if (!lower_page) ++ goto out; ++ ++ /* get page address, and encode it */ ++ kaddr = kmap(page); ++ lower_kaddr = kmap(lower_page); ++ ++ memcpy(lower_kaddr, kaddr, PAGE_CACHE_SIZE); ++ ++ kunmap(page); ++ kunmap(lower_page); ++ ++ BUG_ON(!lower_inode->i_mapping->a_ops->writepage); ++ ++ /* workaround for some lower file systems: see big comment on top */ ++ if (wbc->for_writepages && !wbc->fs_private) ++ wbc->for_writepages = 0; ++ ++ /* call lower writepage (expects locked page) */ ++ err = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc); ++ wbc->for_writepages = saved_for_writepages; /* restore value */ ++ ++ /* ++ * update mtime and ctime of lower level file system ++ * unionfs' mtime and ctime are updated by generic_file_write ++ */ ++ lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; ++ ++ page_cache_release(lower_page); /* b/c grab_cache_page increased refcnt */ ++ ++ if (err) ++ ClearPageUptodate(page); ++ else ++ SetPageUptodate(page); ++ ++out: ++ unlock_page(page); ++ return err; ++} ++ ++/* ++ * readpage is called from generic_page_read and the fault handler. ++ * If your file system uses generic_page_read for the read op, it ++ * must implement readpage. ++ * ++ * Readpage expects a locked page, and must unlock it. ++ */ ++static int unionfs_do_readpage(struct file *file, struct page *page) ++{ ++ int err = -EIO; ++ struct dentry *dentry; ++ struct file *lower_file = NULL; ++ struct inode *inode, *lower_inode; ++ char *page_data; ++ struct page *lower_page; ++ char *lower_page_data; ++ ++ dentry = file->f_dentry; ++ if (UNIONFS_F(file) == NULL) { ++ err = -ENOENT; ++ goto out_err; ++ } ++ ++ lower_file = unionfs_lower_file(file); ++ inode = dentry->d_inode; ++ lower_inode = unionfs_lower_inode(inode); ++ ++ lower_page = NULL; ++ ++ /* find lower page (returns a locked page) */ ++ lower_page = read_cache_page(lower_inode->i_mapping, ++ page->index, ++ (filler_t *) lower_inode->i_mapping-> ++ a_ops->readpage, (void *)lower_file); ++ ++ if (IS_ERR(lower_page)) { ++ err = PTR_ERR(lower_page); ++ lower_page = NULL; ++ goto out_release; ++ } ++ ++ /* ++ * wait for the page data to show up ++ * (signaled by readpage as unlocking the page) ++ */ ++ wait_on_page_locked(lower_page); ++ if (!PageUptodate(lower_page)) { ++ /* ++ * call readpage() again if we returned from wait_on_page ++ * with a page that's not up-to-date; that can happen when a ++ * partial page has a few buffers which are ok, but not the ++ * whole page. ++ */ ++ lock_page(lower_page); ++ err = lower_inode->i_mapping->a_ops->readpage(lower_file, ++ lower_page); ++ if (err) { ++ lower_page = NULL; ++ goto out_release; ++ } ++ ++ wait_on_page_locked(lower_page); ++ if (!PageUptodate(lower_page)) { ++ err = -EIO; ++ goto out_release; ++ } ++ } ++ ++ /* map pages, get their addresses */ ++ page_data = (char *)kmap(page); ++ lower_page_data = (char *)kmap(lower_page); ++ ++ memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE); ++ ++ err = 0; ++ ++ kunmap(lower_page); ++ kunmap(page); ++ ++out_release: ++ if (lower_page) ++ page_cache_release(lower_page); /* undo read_cache_page */ ++ ++ if (err == 0) ++ SetPageUptodate(page); ++ else ++ ClearPageUptodate(page); ++ ++out_err: ++ return err; ++} ++ ++int unionfs_readpage(struct file *file, struct page *page) ++{ ++ int err; ++ ++ unionfs_read_lock(file->f_dentry->d_sb); ++ ++ if ((err = unionfs_file_revalidate(file, 0))) ++ goto out; ++ ++ err = unionfs_do_readpage(file, page); ++ ++ if (!err) ++ touch_atime(unionfs_lower_mnt(file->f_path.dentry), ++ unionfs_lower_dentry(file->f_path.dentry)); ++ ++ /* ++ * we have to unlock our page, b/c we _might_ have gotten a locked ++ * page. but we no longer have to wakeup on our page here, b/c ++ * UnlockPage does it ++ */ ++out: ++ unlock_page(page); ++ unionfs_read_unlock(file->f_dentry->d_sb); ++ ++ return err; ++} ++ ++int unionfs_prepare_write(struct file *file, struct page *page, unsigned from, ++ unsigned to) ++{ ++ int err; ++ ++ unionfs_read_lock(file->f_dentry->d_sb); ++ ++ err = unionfs_file_revalidate(file, 1); ++ ++ unionfs_read_unlock(file->f_dentry->d_sb); ++ ++ return err; ++} ++ ++int unionfs_commit_write(struct file *file, struct page *page, unsigned from, ++ unsigned to) ++{ ++ int err = -ENOMEM; ++ struct inode *inode, *lower_inode; ++ struct file *lower_file = NULL; ++ loff_t pos; ++ unsigned bytes = to - from; ++ char *page_data = NULL; ++ mm_segment_t old_fs; ++ ++ BUG_ON(file == NULL); ++ ++ unionfs_read_lock(file->f_dentry->d_sb); ++ ++ if ((err = unionfs_file_revalidate(file, 1))) ++ goto out; ++ ++ inode = page->mapping->host; ++ lower_inode = unionfs_lower_inode(inode); ++ ++ if (UNIONFS_F(file) != NULL) ++ lower_file = unionfs_lower_file(file); ++ ++ /* FIXME: is this assertion right here? */ ++ BUG_ON(lower_file == NULL); ++ ++ page_data = (char *)kmap(page); ++ lower_file->f_pos = (page->index << PAGE_CACHE_SHIFT) + from; ++ ++ /* SP: I use vfs_write instead of copying page data and the ++ * prepare_write/commit_write combo because file system's like ++ * GFS/OCFS2 don't like things touching those directly, ++ * calling the underlying write op, while a little bit slower, will ++ * call all the FS specific code as well ++ */ ++ old_fs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = vfs_write(lower_file, page_data + from, bytes, ++ &lower_file->f_pos); ++ set_fs(old_fs); ++ ++ kunmap(page); ++ ++ if (err < 0) ++ goto out; ++ ++ inode->i_blocks = lower_inode->i_blocks; ++ /* we may have to update i_size */ ++ pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to; ++ if (pos > i_size_read(inode)) ++ i_size_write(inode, pos); ++ ++ /* ++ * update mtime and ctime of lower level file system ++ * unionfs' mtime and ctime are updated by generic_file_write ++ */ ++ lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME; ++ ++ mark_inode_dirty_sync(inode); ++ ++out: ++ if (err < 0) ++ ClearPageUptodate(page); ++ ++ unionfs_read_unlock(file->f_dentry->d_sb); ++ return err; /* assume all is ok */ ++} ++ ++void unionfs_sync_page(struct page *page) ++{ ++ struct inode *inode; ++ struct inode *lower_inode; ++ struct page *lower_page; ++ struct address_space *mapping; ++ ++ inode = page->mapping->host; ++ lower_inode = unionfs_lower_inode(inode); ++ ++ /* find lower page (returns a locked page) */ ++ lower_page = grab_cache_page(lower_inode->i_mapping, page->index); ++ if (!lower_page) ++ goto out; ++ ++ /* do the actual sync */ ++ mapping = lower_page->mapping; ++ /* ++ * XXX: can we optimize ala RAIF and set the lower page to be ++ * discarded after a successful sync_page? ++ */ ++ if (mapping && mapping->a_ops && mapping->a_ops->sync_page) ++ mapping->a_ops->sync_page(lower_page); ++ ++ unlock_page(lower_page); /* b/c grab_cache_page locked it */ ++ page_cache_release(lower_page); /* b/c grab_cache_page increased refcnt */ ++ ++out: ++ return; ++} ++ ++struct address_space_operations unionfs_aops = { ++ .writepage = unionfs_writepage, ++ .readpage = unionfs_readpage, ++ .prepare_write = unionfs_prepare_write, ++ .commit_write = unionfs_commit_write, ++ .sync_page = unionfs_sync_page, ++}; +diff -Nurb linux-2.6.22-570/fs/unionfs/rdstate.c linux-2.6.22-591/fs/unionfs/rdstate.c +--- linux-2.6.22-570/fs/unionfs/rdstate.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/rdstate.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,282 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* This file contains the routines for maintaining readdir state. */ ++ ++/* ++ * There are two structures here, rdstate which is a hash table ++ * of the second structure which is a filldir_node. ++ */ ++ ++/* ++ * This is a struct kmem_cache for filldir nodes, because we allocate a lot ++ * of them and they shouldn't waste memory. If the node has a small name ++ * (as defined by the dentry structure), then we use an inline name to ++ * preserve kmalloc space. ++ */ ++static struct kmem_cache *unionfs_filldir_cachep; ++ ++int unionfs_init_filldir_cache(void) ++{ ++ unionfs_filldir_cachep = ++ kmem_cache_create("unionfs_filldir", ++ sizeof(struct filldir_node), 0, ++ SLAB_RECLAIM_ACCOUNT, NULL, NULL); ++ ++ return (unionfs_filldir_cachep ? 0 : -ENOMEM); ++} ++ ++void unionfs_destroy_filldir_cache(void) ++{ ++ if (unionfs_filldir_cachep) ++ kmem_cache_destroy(unionfs_filldir_cachep); ++} ++ ++/* ++ * This is a tuning parameter that tells us roughly how big to make the ++ * hash table in directory entries per page. This isn't perfect, but ++ * at least we get a hash table size that shouldn't be too overloaded. ++ * The following averages are based on my home directory. ++ * 14.44693 Overall ++ * 12.29 Single Page Directories ++ * 117.93 Multi-page directories ++ */ ++#define DENTPAGE 4096 ++#define DENTPERONEPAGE 12 ++#define DENTPERPAGE 118 ++#define MINHASHSIZE 1 ++static int guesstimate_hash_size(struct inode *inode) ++{ ++ struct inode *hidden_inode; ++ int bindex; ++ int hashsize = MINHASHSIZE; ++ ++ if (UNIONFS_I(inode)->hashsize > 0) ++ return UNIONFS_I(inode)->hashsize; ++ ++ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) { ++ if (!(hidden_inode = unionfs_lower_inode_idx(inode, bindex))) ++ continue; ++ ++ if (hidden_inode->i_size == DENTPAGE) ++ hashsize += DENTPERONEPAGE; ++ else ++ hashsize += (hidden_inode->i_size / DENTPAGE) * ++ DENTPERPAGE; ++ } ++ ++ return hashsize; ++} ++ ++int init_rdstate(struct file *file) ++{ ++ BUG_ON(sizeof(loff_t) != ++ (sizeof(unsigned int) + sizeof(unsigned int))); ++ BUG_ON(UNIONFS_F(file)->rdstate != NULL); ++ ++ UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_dentry->d_inode, ++ fbstart(file)); ++ ++ return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM); ++} ++ ++struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos) ++{ ++ struct unionfs_dir_state *rdstate = NULL; ++ struct list_head *pos; ++ ++ spin_lock(&UNIONFS_I(inode)->rdlock); ++ list_for_each(pos, &UNIONFS_I(inode)->readdircache) { ++ struct unionfs_dir_state *r = ++ list_entry(pos, struct unionfs_dir_state, cache); ++ if (fpos == rdstate2offset(r)) { ++ UNIONFS_I(inode)->rdcount--; ++ list_del(&r->cache); ++ rdstate = r; ++ break; ++ } ++ } ++ spin_unlock(&UNIONFS_I(inode)->rdlock); ++ return rdstate; ++} ++ ++struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex) ++{ ++ int i = 0; ++ int hashsize; ++ unsigned long mallocsize = sizeof(struct unionfs_dir_state); ++ struct unionfs_dir_state *rdstate; ++ ++ hashsize = guesstimate_hash_size(inode); ++ mallocsize += hashsize * sizeof(struct list_head); ++ mallocsize = __roundup_pow_of_two(mallocsize); ++ ++ /* This should give us about 500 entries anyway. */ ++ if (mallocsize > PAGE_SIZE) ++ mallocsize = PAGE_SIZE; ++ ++ hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) / ++ sizeof(struct list_head); ++ ++ rdstate = kmalloc(mallocsize, GFP_KERNEL); ++ if (!rdstate) ++ return NULL; ++ ++ spin_lock(&UNIONFS_I(inode)->rdlock); ++ if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1)) ++ UNIONFS_I(inode)->cookie = 1; ++ else ++ UNIONFS_I(inode)->cookie++; ++ ++ rdstate->cookie = UNIONFS_I(inode)->cookie; ++ spin_unlock(&UNIONFS_I(inode)->rdlock); ++ rdstate->offset = 1; ++ rdstate->access = jiffies; ++ rdstate->bindex = bindex; ++ rdstate->dirpos = 0; ++ rdstate->hashentries = 0; ++ rdstate->size = hashsize; ++ for (i = 0; i < rdstate->size; i++) ++ INIT_LIST_HEAD(&rdstate->list[i]); ++ ++ return rdstate; ++} ++ ++static void free_filldir_node(struct filldir_node *node) ++{ ++ if (node->namelen >= DNAME_INLINE_LEN_MIN) ++ kfree(node->name); ++ kmem_cache_free(unionfs_filldir_cachep, node); ++} ++ ++void free_rdstate(struct unionfs_dir_state *state) ++{ ++ struct filldir_node *tmp; ++ int i; ++ ++ for (i = 0; i < state->size; i++) { ++ struct list_head *head = &(state->list[i]); ++ struct list_head *pos, *n; ++ ++ /* traverse the list and deallocate space */ ++ list_for_each_safe(pos, n, head) { ++ tmp = list_entry(pos, struct filldir_node, file_list); ++ list_del(&tmp->file_list); ++ free_filldir_node(tmp); ++ } ++ } ++ ++ kfree(state); ++} ++ ++struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate, ++ const char *name, int namelen) ++{ ++ int index; ++ unsigned int hash; ++ struct list_head *head; ++ struct list_head *pos; ++ struct filldir_node *cursor = NULL; ++ int found = 0; ++ ++ BUG_ON(namelen <= 0); ++ ++ hash = full_name_hash(name, namelen); ++ index = hash % rdstate->size; ++ ++ head = &(rdstate->list[index]); ++ list_for_each(pos, head) { ++ cursor = list_entry(pos, struct filldir_node, file_list); ++ ++ if (cursor->namelen == namelen && cursor->hash == hash && ++ !strncmp(cursor->name, name, namelen)) { ++ /* ++ * a duplicate exists, and hence no need to create ++ * entry to the list ++ */ ++ found = 1; ++ ++ /* ++ * if the duplicate is in this branch, then the file ++ * system is corrupted. ++ */ ++ if (cursor->bindex == rdstate->bindex) { ++ printk(KERN_DEBUG "unionfs: filldir: possible " ++ "I/O error: a file is duplicated " ++ "in the same branch %d: %s\n", ++ rdstate->bindex, cursor->name); ++ } ++ break; ++ } ++ } ++ ++ if (!found) ++ cursor = NULL; ++ ++ return cursor; ++} ++ ++int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name, ++ int namelen, int bindex, int whiteout) ++{ ++ struct filldir_node *new; ++ unsigned int hash; ++ int index; ++ int err = 0; ++ struct list_head *head; ++ ++ BUG_ON(namelen <= 0); ++ ++ hash = full_name_hash(name, namelen); ++ index = hash % rdstate->size; ++ head = &(rdstate->list[index]); ++ ++ new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL); ++ if (!new) { ++ err = -ENOMEM; ++ goto out; ++ } ++ ++ INIT_LIST_HEAD(&new->file_list); ++ new->namelen = namelen; ++ new->hash = hash; ++ new->bindex = bindex; ++ new->whiteout = whiteout; ++ ++ if (namelen < DNAME_INLINE_LEN_MIN) ++ new->name = new->iname; ++ else { ++ new->name = kmalloc(namelen + 1, GFP_KERNEL); ++ if (!new->name) { ++ kmem_cache_free(unionfs_filldir_cachep, new); ++ new = NULL; ++ goto out; ++ } ++ } ++ ++ memcpy(new->name, name, namelen); ++ new->name[namelen] = '\0'; ++ ++ rdstate->hashentries++; ++ ++ list_add(&(new->file_list), head); ++out: ++ return err; ++} +diff -Nurb linux-2.6.22-570/fs/unionfs/rename.c linux-2.6.22-591/fs/unionfs/rename.c +--- linux-2.6.22-570/fs/unionfs/rename.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/rename.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,477 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry, ++ int bindex, struct dentry **wh_old) ++{ ++ int err = 0; ++ struct dentry *hidden_old_dentry; ++ struct dentry *hidden_new_dentry; ++ struct dentry *hidden_old_dir_dentry; ++ struct dentry *hidden_new_dir_dentry; ++ struct dentry *hidden_wh_dentry; ++ struct dentry *hidden_wh_dir_dentry; ++ char *wh_name = NULL; ++ ++ hidden_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex); ++ hidden_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex); ++ ++ if (!hidden_new_dentry) { ++ hidden_new_dentry = ++ create_parents(new_dentry->d_parent->d_inode, ++ new_dentry, bindex); ++ if (IS_ERR(hidden_new_dentry)) { ++ printk(KERN_DEBUG "unionfs: error creating directory " ++ "tree for rename, bindex = %d, err = %ld\n", ++ bindex, PTR_ERR(hidden_new_dentry)); ++ err = PTR_ERR(hidden_new_dentry); ++ goto out; ++ } ++ } ++ ++ wh_name = alloc_whname(new_dentry->d_name.name, ++ new_dentry->d_name.len); ++ if (IS_ERR(wh_name)) { ++ err = PTR_ERR(wh_name); ++ goto out; ++ } ++ ++ hidden_wh_dentry = lookup_one_len(wh_name, hidden_new_dentry->d_parent, ++ new_dentry->d_name.len + ++ UNIONFS_WHLEN); ++ if (IS_ERR(hidden_wh_dentry)) { ++ err = PTR_ERR(hidden_wh_dentry); ++ goto out; ++ } ++ ++ if (hidden_wh_dentry->d_inode) { ++ /* get rid of the whiteout that is existing */ ++ if (hidden_new_dentry->d_inode) { ++ printk(KERN_WARNING "unionfs: both a whiteout and a " ++ "dentry exist when doing a rename!\n"); ++ err = -EIO; ++ ++ dput(hidden_wh_dentry); ++ goto out; ++ } ++ ++ hidden_wh_dir_dentry = lock_parent(hidden_wh_dentry); ++ if (!(err = is_robranch_super(old_dentry->d_sb, bindex))) ++ err = vfs_unlink(hidden_wh_dir_dentry->d_inode, ++ hidden_wh_dentry); ++ ++ dput(hidden_wh_dentry); ++ unlock_dir(hidden_wh_dir_dentry); ++ if (err) ++ goto out; ++ } else ++ dput(hidden_wh_dentry); ++ ++ dget(hidden_old_dentry); ++ hidden_old_dir_dentry = dget_parent(hidden_old_dentry); ++ hidden_new_dir_dentry = dget_parent(hidden_new_dentry); ++ ++ lock_rename(hidden_old_dir_dentry, hidden_new_dir_dentry); ++ ++ err = is_robranch_super(old_dentry->d_sb, bindex); ++ if (err) ++ goto out_unlock; ++ ++ /* ++ * ready to whiteout for old_dentry. caller will create the actual ++ * whiteout, and must dput(*wh_old) ++ */ ++ if (wh_old) { ++ char *whname; ++ whname = alloc_whname(old_dentry->d_name.name, ++ old_dentry->d_name.len); ++ err = PTR_ERR(whname); ++ if (IS_ERR(whname)) ++ goto out_unlock; ++ *wh_old = lookup_one_len(whname, hidden_old_dir_dentry, ++ old_dentry->d_name.len + ++ UNIONFS_WHLEN); ++ kfree(whname); ++ err = PTR_ERR(*wh_old); ++ if (IS_ERR(*wh_old)) { ++ *wh_old = NULL; ++ goto out_unlock; ++ } ++ } ++ ++ err = vfs_rename(hidden_old_dir_dentry->d_inode, hidden_old_dentry, ++ hidden_new_dir_dentry->d_inode, hidden_new_dentry); ++ ++out_unlock: ++ unlock_rename(hidden_old_dir_dentry, hidden_new_dir_dentry); ++ ++ dput(hidden_old_dir_dentry); ++ dput(hidden_new_dir_dentry); ++ dput(hidden_old_dentry); ++ ++out: ++ if (!err) { ++ /* Fixup the new_dentry. */ ++ if (bindex < dbstart(new_dentry)) ++ set_dbstart(new_dentry, bindex); ++ else if (bindex > dbend(new_dentry)) ++ set_dbend(new_dentry, bindex); ++ } ++ ++ kfree(wh_name); ++ ++ return err; ++} ++ ++/* ++ * Main rename code. This is sufficienly complex, that it's documented in ++ * Docmentation/filesystems/unionfs/rename.txt. This routine calls ++ * __unionfs_rename() above to perform some of the work. ++ */ ++static int do_unionfs_rename(struct inode *old_dir, ++ struct dentry *old_dentry, ++ struct inode *new_dir, ++ struct dentry *new_dentry) ++{ ++ int err = 0; ++ int bindex, bwh_old; ++ int old_bstart, old_bend; ++ int new_bstart, new_bend; ++ int do_copyup = -1; ++ struct dentry *parent_dentry; ++ int local_err = 0; ++ int eio = 0; ++ int revert = 0; ++ struct dentry *wh_old = NULL; ++ ++ old_bstart = dbstart(old_dentry); ++ bwh_old = old_bstart; ++ old_bend = dbend(old_dentry); ++ parent_dentry = old_dentry->d_parent; ++ ++ new_bstart = dbstart(new_dentry); ++ new_bend = dbend(new_dentry); ++ ++ /* Rename source to destination. */ ++ err = __unionfs_rename(old_dir, old_dentry, new_dir, new_dentry, ++ old_bstart, &wh_old); ++ if (err) { ++ if (!IS_COPYUP_ERR(err)) ++ goto out; ++ do_copyup = old_bstart - 1; ++ } else ++ revert = 1; ++ ++ /* ++ * Unlink all instances of destination that exist to the left of ++ * bstart of source. On error, revert back, goto out. ++ */ ++ for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) { ++ struct dentry *unlink_dentry; ++ struct dentry *unlink_dir_dentry; ++ ++ unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex); ++ if (!unlink_dentry) ++ continue; ++ ++ unlink_dir_dentry = lock_parent(unlink_dentry); ++ if (!(err = is_robranch_super(old_dir->i_sb, bindex))) ++ err = vfs_unlink(unlink_dir_dentry->d_inode, ++ unlink_dentry); ++ ++ fsstack_copy_attr_times(new_dentry->d_parent->d_inode, ++ unlink_dir_dentry->d_inode); ++ /* propagate number of hard-links */ ++ new_dentry->d_parent->d_inode->i_nlink = ++ unionfs_get_nlinks(new_dentry->d_parent->d_inode); ++ ++ unlock_dir(unlink_dir_dentry); ++ if (!err) { ++ if (bindex != new_bstart) { ++ dput(unlink_dentry); ++ unionfs_set_lower_dentry_idx(new_dentry, ++ bindex, NULL); ++ } ++ } else if (IS_COPYUP_ERR(err)) { ++ do_copyup = bindex - 1; ++ } else if (revert) { ++ dput(wh_old); ++ goto revert; ++ } ++ } ++ ++ if (do_copyup != -1) { ++ for (bindex = do_copyup; bindex >= 0; bindex--) { ++ /* ++ * copyup the file into some left directory, so that ++ * you can rename it ++ */ ++ err = copyup_dentry(old_dentry->d_parent->d_inode, ++ old_dentry, old_bstart, bindex, ++ NULL, old_dentry->d_inode->i_size); ++ if (!err) { ++ dput(wh_old); ++ bwh_old = bindex; ++ err = __unionfs_rename(old_dir, old_dentry, ++ new_dir, new_dentry, ++ bindex, &wh_old); ++ break; ++ } ++ } ++ } ++ ++ /* make it opaque */ ++ if (S_ISDIR(old_dentry->d_inode->i_mode)) { ++ err = make_dir_opaque(old_dentry, dbstart(old_dentry)); ++ if (err) ++ goto revert; ++ } ++ ++ /* ++ * Create whiteout for source, only if: ++ * (1) There is more than one underlying instance of source. ++ * (2) We did a copy_up ++ */ ++ if ((old_bstart != old_bend) || (do_copyup != -1)) { ++ struct dentry *hidden_parent; ++ BUG_ON(!wh_old || wh_old->d_inode || bwh_old < 0); ++ hidden_parent = lock_parent(wh_old); ++ local_err = vfs_create(hidden_parent->d_inode, wh_old, S_IRUGO, ++ NULL); ++ unlock_dir(hidden_parent); ++ if (!local_err) ++ set_dbopaque(old_dentry, bwh_old); ++ else { ++ /* ++ * we can't fix anything now, so we cop-out and use ++ * -EIO. ++ */ ++ printk(KERN_ERR "unionfs: can't create a whiteout for " ++ "the source in rename!\n"); ++ err = -EIO; ++ } ++ } ++ ++out: ++ dput(wh_old); ++ return err; ++ ++revert: ++ /* Do revert here. */ ++ local_err = unionfs_refresh_hidden_dentry(new_dentry, old_bstart); ++ if (local_err) { ++ printk(KERN_WARNING "unionfs: revert failed in rename: " ++ "the new refresh failed.\n"); ++ eio = -EIO; ++ } ++ ++ local_err = unionfs_refresh_hidden_dentry(old_dentry, old_bstart); ++ if (local_err) { ++ printk(KERN_WARNING "unionfs: revert failed in rename: " ++ "the old refresh failed.\n"); ++ eio = -EIO; ++ goto revert_out; ++ } ++ ++ if (!unionfs_lower_dentry_idx(new_dentry, bindex) || ++ !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) { ++ printk(KERN_WARNING "unionfs: revert failed in rename: " ++ "the object disappeared from under us!\n"); ++ eio = -EIO; ++ goto revert_out; ++ } ++ ++ if (unionfs_lower_dentry_idx(old_dentry, bindex) && ++ unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) { ++ printk(KERN_WARNING "unionfs: revert failed in rename: " ++ "the object was created underneath us!\n"); ++ eio = -EIO; ++ goto revert_out; ++ } ++ ++ local_err = __unionfs_rename(new_dir, new_dentry, ++ old_dir, old_dentry, old_bstart, NULL); ++ ++ /* If we can't fix it, then we cop-out with -EIO. */ ++ if (local_err) { ++ printk(KERN_WARNING "unionfs: revert failed in rename!\n"); ++ eio = -EIO; ++ } ++ ++ local_err = unionfs_refresh_hidden_dentry(new_dentry, bindex); ++ if (local_err) ++ eio = -EIO; ++ local_err = unionfs_refresh_hidden_dentry(old_dentry, bindex); ++ if (local_err) ++ eio = -EIO; ++ ++revert_out: ++ if (eio) ++ err = eio; ++ return err; ++} ++ ++static struct dentry *lookup_whiteout(struct dentry *dentry) ++{ ++ char *whname; ++ int bindex = -1, bstart = -1, bend = -1; ++ struct dentry *parent, *hidden_parent, *wh_dentry; ++ ++ whname = alloc_whname(dentry->d_name.name, dentry->d_name.len); ++ if (IS_ERR(whname)) ++ return (void *)whname; ++ ++ parent = dget_parent(dentry); ++ unionfs_lock_dentry(parent); ++ bstart = dbstart(parent); ++ bend = dbend(parent); ++ wh_dentry = ERR_PTR(-ENOENT); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_parent = unionfs_lower_dentry_idx(parent, bindex); ++ if (!hidden_parent) ++ continue; ++ wh_dentry = lookup_one_len(whname, hidden_parent, ++ dentry->d_name.len + UNIONFS_WHLEN); ++ if (IS_ERR(wh_dentry)) ++ continue; ++ if (wh_dentry->d_inode) ++ break; ++ dput(wh_dentry); ++ wh_dentry = ERR_PTR(-ENOENT); ++ } ++ unionfs_unlock_dentry(parent); ++ dput(parent); ++ kfree(whname); ++ return wh_dentry; ++} ++ ++/* ++ * We can't copyup a directory, because it may involve huge numbers of ++ * children, etc. Doing that in the kernel would be bad, so instead we ++ * return EXDEV to the user-space utility that caused this, and let the ++ * user-space recurse and ask us to copy up each file separately. ++ */ ++static int may_rename_dir(struct dentry *dentry) ++{ ++ int err, bstart; ++ ++ err = check_empty(dentry, NULL); ++ if (err == -ENOTEMPTY) { ++ if (is_robranch(dentry)) ++ return -EXDEV; ++ } else if (err) ++ return err; ++ ++ bstart = dbstart(dentry); ++ if (dbend(dentry) == bstart || dbopaque(dentry) == bstart) ++ return 0; ++ ++ set_dbstart(dentry, bstart + 1); ++ err = check_empty(dentry, NULL); ++ set_dbstart(dentry, bstart); ++ if (err == -ENOTEMPTY) ++ err = -EXDEV; ++ return err; ++} ++ ++int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry) ++{ ++ int err = 0; ++ struct dentry *wh_dentry; ++ ++ unionfs_read_lock(old_dentry->d_sb); ++ unionfs_double_lock_dentry(old_dentry, new_dentry); ++ ++ if (!__unionfs_d_revalidate_chain(old_dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ if (!d_deleted(new_dentry) && new_dentry->d_inode && ++ !__unionfs_d_revalidate_chain(new_dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ if (!S_ISDIR(old_dentry->d_inode->i_mode)) ++ err = unionfs_partial_lookup(old_dentry); ++ else ++ err = may_rename_dir(old_dentry); ++ ++ if (err) ++ goto out; ++ ++ err = unionfs_partial_lookup(new_dentry); ++ if (err) ++ goto out; ++ ++ /* ++ * if new_dentry is already hidden because of whiteout, ++ * simply override it even if the whited-out dir is not empty. ++ */ ++ wh_dentry = lookup_whiteout(new_dentry); ++ if (!IS_ERR(wh_dentry)) ++ dput(wh_dentry); ++ else if (new_dentry->d_inode) { ++ if (S_ISDIR(old_dentry->d_inode->i_mode) != ++ S_ISDIR(new_dentry->d_inode->i_mode)) { ++ err = S_ISDIR(old_dentry->d_inode->i_mode) ? ++ -ENOTDIR : -EISDIR; ++ goto out; ++ } ++ ++ if (S_ISDIR(new_dentry->d_inode->i_mode)) { ++ struct unionfs_dir_state *namelist; ++ /* check if this unionfs directory is empty or not */ ++ err = check_empty(new_dentry, &namelist); ++ if (err) ++ goto out; ++ ++ if (!is_robranch(new_dentry)) ++ err = delete_whiteouts(new_dentry, ++ dbstart(new_dentry), ++ namelist); ++ ++ free_rdstate(namelist); ++ ++ if (err) ++ goto out; ++ } ++ } ++ err = do_unionfs_rename(old_dir, old_dentry, new_dir, new_dentry); ++ ++out: ++ if (err) ++ /* clear the new_dentry stuff created */ ++ d_drop(new_dentry); ++ else ++ /* ++ * force re-lookup since the dir on ro branch is not renamed, ++ * and hidden dentries still indicate the un-renamed ones. ++ */ ++ if (S_ISDIR(old_dentry->d_inode->i_mode)) ++ atomic_dec(&UNIONFS_D(old_dentry)->generation); ++ ++ unionfs_unlock_dentry(new_dentry); ++ unionfs_unlock_dentry(old_dentry); ++ unionfs_read_unlock(old_dentry->d_sb); ++ return err; ++} +diff -Nurb linux-2.6.22-570/fs/unionfs/sioq.c linux-2.6.22-591/fs/unionfs/sioq.c +--- linux-2.6.22-570/fs/unionfs/sioq.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/sioq.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,118 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Super-user IO work Queue - sometimes we need to perform actions which ++ * would fail due to the unix permissions on the parent directory (e.g., ++ * rmdir a directory which appears empty, but in reality contains ++ * whiteouts). ++ */ ++ ++static struct workqueue_struct *superio_workqueue; ++ ++int __init init_sioq(void) ++{ ++ int err; ++ ++ superio_workqueue = create_workqueue("unionfs_siod"); ++ if (!IS_ERR(superio_workqueue)) ++ return 0; ++ ++ err = PTR_ERR(superio_workqueue); ++ printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err); ++ superio_workqueue = NULL; ++ return err; ++} ++ ++void stop_sioq(void) ++{ ++ if (superio_workqueue) ++ destroy_workqueue(superio_workqueue); ++} ++ ++void run_sioq(work_func_t func, struct sioq_args *args) ++{ ++ INIT_WORK(&args->work, func); ++ ++ init_completion(&args->comp); ++ while (!queue_work(superio_workqueue, &args->work)) { ++ /* TODO: do accounting if needed */ ++ schedule(); ++ } ++ wait_for_completion(&args->comp); ++} ++ ++void __unionfs_create(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct create_args *c = &args->create; ++ ++ args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd); ++ complete(&args->comp); ++} ++ ++void __unionfs_mkdir(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct mkdir_args *m = &args->mkdir; ++ ++ args->err = vfs_mkdir(m->parent, m->dentry, m->mode); ++ complete(&args->comp); ++} ++ ++void __unionfs_mknod(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct mknod_args *m = &args->mknod; ++ ++ args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev); ++ complete(&args->comp); ++} ++ ++void __unionfs_symlink(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct symlink_args *s = &args->symlink; ++ ++ args->err = vfs_symlink(s->parent, s->dentry, s->symbuf, s->mode); ++ complete(&args->comp); ++} ++ ++void __unionfs_unlink(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct unlink_args *u = &args->unlink; ++ ++ args->err = vfs_unlink(u->parent, u->dentry); ++ complete(&args->comp); ++} ++ ++void __delete_whiteouts(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ struct deletewh_args *d = &args->deletewh; ++ ++ args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist); ++ complete(&args->comp); ++} ++ ++void __is_opaque_dir(struct work_struct *work) ++{ ++ struct sioq_args *args = container_of(work, struct sioq_args, work); ++ ++ args->ret = lookup_one_len(UNIONFS_DIR_OPAQUE, args->is_opaque.dentry, ++ sizeof(UNIONFS_DIR_OPAQUE) - 1); ++ complete(&args->comp); ++} +diff -Nurb linux-2.6.22-570/fs/unionfs/sioq.h linux-2.6.22-591/fs/unionfs/sioq.h +--- linux-2.6.22-570/fs/unionfs/sioq.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/sioq.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,91 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef _SIOQ_H ++#define _SIOQ_H ++ ++struct deletewh_args { ++ struct unionfs_dir_state *namelist; ++ struct dentry *dentry; ++ int bindex; ++}; ++ ++struct is_opaque_args { ++ struct dentry *dentry; ++}; ++ ++struct create_args { ++ struct inode *parent; ++ struct dentry *dentry; ++ umode_t mode; ++ struct nameidata *nd; ++}; ++ ++struct mkdir_args { ++ struct inode *parent; ++ struct dentry *dentry; ++ umode_t mode; ++}; ++ ++struct mknod_args { ++ struct inode *parent; ++ struct dentry *dentry; ++ umode_t mode; ++ dev_t dev; ++}; ++ ++struct symlink_args { ++ struct inode *parent; ++ struct dentry *dentry; ++ char *symbuf; ++ umode_t mode; ++}; ++ ++struct unlink_args { ++ struct inode *parent; ++ struct dentry *dentry; ++}; ++ ++ ++struct sioq_args { ++ struct completion comp; ++ struct work_struct work; ++ int err; ++ void *ret; ++ ++ union { ++ struct deletewh_args deletewh; ++ struct is_opaque_args is_opaque; ++ struct create_args create; ++ struct mkdir_args mkdir; ++ struct mknod_args mknod; ++ struct symlink_args symlink; ++ struct unlink_args unlink; ++ }; ++}; ++ ++/* Extern definitions for SIOQ functions */ ++extern int __init init_sioq(void); ++extern void stop_sioq(void); ++extern void run_sioq(work_func_t func, struct sioq_args *args); ++ ++/* Extern definitions for our privilege escalation helpers */ ++extern void __unionfs_create(struct work_struct *work); ++extern void __unionfs_mkdir(struct work_struct *work); ++extern void __unionfs_mknod(struct work_struct *work); ++extern void __unionfs_symlink(struct work_struct *work); ++extern void __unionfs_unlink(struct work_struct *work); ++extern void __delete_whiteouts(struct work_struct *work); ++extern void __is_opaque_dir(struct work_struct *work); ++ ++#endif /* _SIOQ_H */ +diff -Nurb linux-2.6.22-570/fs/unionfs/subr.c linux-2.6.22-591/fs/unionfs/subr.c +--- linux-2.6.22-570/fs/unionfs/subr.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/subr.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,238 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * Pass an unionfs dentry and an index. It will try to create a whiteout ++ * for the filename in dentry, and will try in branch 'index'. On error, ++ * it will proceed to a branch to the left. ++ */ ++int create_whiteout(struct dentry *dentry, int start) ++{ ++ int bstart, bend, bindex; ++ struct dentry *hidden_dir_dentry; ++ struct dentry *hidden_dentry; ++ struct dentry *hidden_wh_dentry; ++ char *name = NULL; ++ int err = -EINVAL; ++ ++ verify_locked(dentry); ++ ++ bstart = dbstart(dentry); ++ bend = dbend(dentry); ++ ++ /* create dentry's whiteout equivalent */ ++ name = alloc_whname(dentry->d_name.name, dentry->d_name.len); ++ if (IS_ERR(name)) { ++ err = PTR_ERR(name); ++ goto out; ++ } ++ ++ for (bindex = start; bindex >= 0; bindex--) { ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ ++ if (!hidden_dentry) { ++ /* ++ * if hidden dentry is not present, create the ++ * entire hidden dentry directory structure and go ++ * ahead. Since we want to just create whiteout, we ++ * only want the parent dentry, and hence get rid of ++ * this dentry. ++ */ ++ hidden_dentry = create_parents(dentry->d_inode, ++ dentry, bindex); ++ if (!hidden_dentry || IS_ERR(hidden_dentry)) { ++ printk(KERN_DEBUG "unionfs: create_parents " ++ "failed for bindex = %d\n", bindex); ++ continue; ++ } ++ } ++ ++ hidden_wh_dentry = ++ lookup_one_len(name, hidden_dentry->d_parent, ++ dentry->d_name.len + UNIONFS_WHLEN); ++ if (IS_ERR(hidden_wh_dentry)) ++ continue; ++ ++ /* ++ * The whiteout already exists. This used to be impossible, ++ * but now is possible because of opaqueness. ++ */ ++ if (hidden_wh_dentry->d_inode) { ++ dput(hidden_wh_dentry); ++ err = 0; ++ goto out; ++ } ++ ++ hidden_dir_dentry = lock_parent(hidden_wh_dentry); ++ if (!(err = is_robranch_super(dentry->d_sb, bindex))) ++ err = vfs_create(hidden_dir_dentry->d_inode, ++ hidden_wh_dentry, ++ ~current->fs->umask & S_IRWXUGO, ++ NULL); ++ unlock_dir(hidden_dir_dentry); ++ dput(hidden_wh_dentry); ++ ++ if (!err || !IS_COPYUP_ERR(err)) ++ break; ++ } ++ ++ /* set dbopaque so that lookup will not proceed after this branch */ ++ if (!err) ++ set_dbopaque(dentry, bindex); ++ ++out: ++ kfree(name); ++ return err; ++} ++ ++/* ++ * This is a helper function for rename, which ends up with hosed over ++ * dentries when it needs to revert. ++ */ ++int unionfs_refresh_hidden_dentry(struct dentry *dentry, int bindex) ++{ ++ struct dentry *hidden_dentry; ++ struct dentry *hidden_parent; ++ int err = 0; ++ ++ verify_locked(dentry); ++ ++ unionfs_lock_dentry(dentry->d_parent); ++ hidden_parent = unionfs_lower_dentry_idx(dentry->d_parent, bindex); ++ unionfs_unlock_dentry(dentry->d_parent); ++ ++ BUG_ON(!S_ISDIR(hidden_parent->d_inode->i_mode)); ++ ++ hidden_dentry = lookup_one_len(dentry->d_name.name, hidden_parent, ++ dentry->d_name.len); ++ if (IS_ERR(hidden_dentry)) { ++ err = PTR_ERR(hidden_dentry); ++ goto out; ++ } ++ ++ dput(unionfs_lower_dentry_idx(dentry, bindex)); ++ iput(unionfs_lower_inode_idx(dentry->d_inode, bindex)); ++ unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL); ++ ++ if (!hidden_dentry->d_inode) { ++ dput(hidden_dentry); ++ unionfs_set_lower_dentry_idx(dentry, bindex, NULL); ++ } else { ++ unionfs_set_lower_dentry_idx(dentry, bindex, hidden_dentry); ++ unionfs_set_lower_inode_idx(dentry->d_inode, bindex, ++ igrab(hidden_dentry->d_inode)); ++ } ++ ++out: ++ return err; ++} ++ ++int make_dir_opaque(struct dentry *dentry, int bindex) ++{ ++ int err = 0; ++ struct dentry *hidden_dentry, *diropq; ++ struct inode *hidden_dir; ++ ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ hidden_dir = hidden_dentry->d_inode; ++ BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) || ++ !S_ISDIR(hidden_dir->i_mode)); ++ ++ mutex_lock(&hidden_dir->i_mutex); ++ diropq = lookup_one_len(UNIONFS_DIR_OPAQUE, hidden_dentry, ++ sizeof(UNIONFS_DIR_OPAQUE) - 1); ++ if (IS_ERR(diropq)) { ++ err = PTR_ERR(diropq); ++ goto out; ++ } ++ ++ if (!diropq->d_inode) ++ err = vfs_create(hidden_dir, diropq, S_IRUGO, NULL); ++ if (!err) ++ set_dbopaque(dentry, bindex); ++ ++ dput(diropq); ++ ++out: ++ mutex_unlock(&hidden_dir->i_mutex); ++ return err; ++} ++ ++/* ++ * returns the sum of the n_link values of all the underlying inodes of the ++ * passed inode ++ */ ++int unionfs_get_nlinks(struct inode *inode) ++{ ++ int sum_nlinks = 0; ++ int dirs = 0; ++ int bindex; ++ struct inode *hidden_inode; ++ ++ /* don't bother to do all the work since we're unlinked */ ++ if (inode->i_nlink == 0) ++ return 0; ++ ++ if (!S_ISDIR(inode->i_mode)) ++ return unionfs_lower_inode(inode)->i_nlink; ++ ++ for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) { ++ hidden_inode = unionfs_lower_inode_idx(inode, bindex); ++ ++ /* ignore files */ ++ if (!hidden_inode || !S_ISDIR(hidden_inode->i_mode)) ++ continue; ++ ++ BUG_ON(hidden_inode->i_nlink < 0); ++ ++ /* A deleted directory. */ ++ if (hidden_inode->i_nlink == 0) ++ continue; ++ dirs++; ++ ++ /* ++ * A broken directory... ++ * ++ * Some filesystems don't properly set the number of links ++ * on empty directories ++ */ ++ if (hidden_inode->i_nlink == 1) ++ sum_nlinks += 2; ++ else ++ sum_nlinks += (hidden_inode->i_nlink - 2); ++ } ++ ++ return (!dirs ? 0 : sum_nlinks + 2); ++} ++ ++/* construct whiteout filename */ ++char *alloc_whname(const char *name, int len) ++{ ++ char *buf; ++ ++ buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL); ++ if (!buf) ++ return ERR_PTR(-ENOMEM); ++ ++ strcpy(buf, UNIONFS_WHPFX); ++ strlcat(buf, name, len + UNIONFS_WHLEN + 1); ++ ++ return buf; ++} +diff -Nurb linux-2.6.22-570/fs/unionfs/super.c linux-2.6.22-591/fs/unionfs/super.c +--- linux-2.6.22-570/fs/unionfs/super.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/super.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,1002 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* ++ * The inode cache is used with alloc_inode for both our inode info and the ++ * vfs inode. ++ */ ++static struct kmem_cache *unionfs_inode_cachep; ++ ++static void unionfs_read_inode(struct inode *inode) ++{ ++ extern struct address_space_operations unionfs_aops; ++ int size; ++ struct unionfs_inode_info *info = UNIONFS_I(inode); ++ ++ unionfs_read_lock(inode->i_sb); ++ ++ memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode)); ++ info->bstart = -1; ++ info->bend = -1; ++ atomic_set(&info->generation, ++ atomic_read(&UNIONFS_SB(inode->i_sb)->generation)); ++ spin_lock_init(&info->rdlock); ++ info->rdcount = 1; ++ info->hashsize = -1; ++ INIT_LIST_HEAD(&info->readdircache); ++ ++ size = sbmax(inode->i_sb) * sizeof(struct inode *); ++ info->lower_inodes = kzalloc(size, GFP_KERNEL); ++ if (!info->lower_inodes) { ++ printk(KERN_ERR "unionfs: no kernel memory when allocating " ++ "lower-pointer array!\n"); ++ BUG(); ++ } ++ ++ inode->i_version++; ++ inode->i_op = &unionfs_main_iops; ++ inode->i_fop = &unionfs_main_fops; ++ ++ inode->i_mapping->a_ops = &unionfs_aops; ++ ++ unionfs_read_unlock(inode->i_sb); ++} ++ ++/* ++ * we now define delete_inode, because there are two VFS paths that may ++ * destroy an inode: one of them calls clear inode before doing everything ++ * else that's needed, and the other is fine. This way we truncate the inode ++ * size (and its pages) and then clear our own inode, which will do an iput ++ * on our and the lower inode. ++ * ++ * No need to lock sb info's rwsem. ++ */ ++static void unionfs_delete_inode(struct inode *inode) ++{ ++ inode->i_size = 0; /* every f/s seems to do that */ ++ ++ if (inode->i_data.nrpages) ++ truncate_inode_pages(&inode->i_data, 0); ++ ++ clear_inode(inode); ++} ++ ++/* ++ * final actions when unmounting a file system ++ * ++ * No need to lock rwsem. ++ */ ++static void unionfs_put_super(struct super_block *sb) ++{ ++ int bindex, bstart, bend; ++ struct unionfs_sb_info *spd; ++ int leaks = 0; ++ ++ spd = UNIONFS_SB(sb); ++ if (!spd) ++ return; ++ ++ bstart = sbstart(sb); ++ bend = sbend(sb); ++ ++ /* Make sure we have no leaks of branchget/branchput. */ ++ for (bindex = bstart; bindex <= bend; bindex++) ++ if (branch_count(sb, bindex) != 0) { ++ printk("unionfs: branch %d has %d references left!\n", ++ bindex, branch_count(sb, bindex)); ++ leaks = 1; ++ } ++ BUG_ON(leaks != 0); ++ ++ kfree(spd->data); ++ kfree(spd); ++ sb->s_fs_info = NULL; ++} ++ ++/* ++ * Since people use this to answer the "How big of a file can I write?" ++ * question, we report the size of the highest priority branch as the size of ++ * the union. ++ */ ++static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf) ++{ ++ int err = 0; ++ struct super_block *sb; ++ struct dentry *lower_dentry; ++ ++ sb = dentry->d_sb; ++ ++ unionfs_read_lock(sb); ++ unionfs_lock_dentry(dentry); ++ ++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ lower_dentry = unionfs_lower_dentry(sb->s_root); ++ err = vfs_statfs(lower_dentry, buf); ++ ++ /* set return buf to our f/s to avoid confusing user-level utils */ ++ buf->f_type = UNIONFS_SUPER_MAGIC; ++ ++ /* ++ * Our maximum file name can is shorter by a few bytes because every ++ * file name could potentially be whited-out. ++ */ ++ buf->f_namelen -= UNIONFS_WHLEN; ++ ++ memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t)); ++ memset(&buf->f_spare, 0, sizeof(buf->f_spare)); ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(sb); ++ return err; ++} ++ ++/* handle mode changing during remount */ ++static noinline int do_remount_mode_option(char *optarg, int cur_branches, ++ struct unionfs_data *new_data, ++ struct path *new_lower_paths) ++{ ++ int err = -EINVAL; ++ int perms, idx; ++ char *modename = strchr(optarg, '='); ++ struct nameidata nd; ++ ++ /* by now, optarg contains the branch name */ ++ if (!*optarg) { ++ printk("unionfs: no branch specified for mode change.\n"); ++ goto out; ++ } ++ if (!modename) { ++ printk("unionfs: branch \"%s\" requires a mode.\n", optarg); ++ goto out; ++ } ++ *modename++ = '\0'; ++ perms = __parse_branch_mode(modename); ++ if (perms == 0) { ++ printk("unionfs: invalid mode \"%s\" for \"%s\".\n", ++ modename, optarg); ++ goto out; ++ } ++ ++ /* ++ * Find matching branch index. For now, this assumes that nothing ++ * has been mounted on top of this Unionfs stack. Once we have /odf ++ * and cache-coherency resolved, we'll address the branch-path ++ * uniqueness. ++ */ ++ err = path_lookup(optarg, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_WARNING "unionfs: error accessing " ++ "hidden directory \"%s\" (error %d)\n", ++ optarg, err); ++ goto out; ++ } ++ for (idx=0; idx 0) { ++ err = -EBUSY; ++ goto out; ++ } ++ ++ /* ++ * Now we have to delete the branch. First, release any handles it ++ * has. Then, move the remaining array indexes past "idx" in ++ * new_data and new_lower_paths one to the left. Finally, adjust ++ * cur_branches. ++ */ ++ pathput(&new_lower_paths[idx]); ++ ++ if (idx < cur_branches - 1) { ++ /* if idx==cur_branches-1, we delete last branch: easy */ ++ memmove(&new_data[idx], &new_data[idx+1], ++ (cur_branches - 1 - idx) * ++ sizeof(struct unionfs_data)); ++ memmove(&new_lower_paths[idx], &new_lower_paths[idx+1], ++ (cur_branches - 1 - idx) * sizeof(struct path)); ++ } ++ ++ err = 0; ++out: ++ return err; ++} ++ ++/* handle branch insertion during remount */ ++static noinline int do_remount_add_option(char *optarg, int cur_branches, ++ struct unionfs_data *new_data, ++ struct path *new_lower_paths, ++ int *high_branch_id) ++{ ++ int err = -EINVAL; ++ int perms; ++ int idx = 0; /* default: insert at beginning */ ++ char *new_branch , *modename = NULL; ++ struct nameidata nd; ++ ++ /* ++ * optarg can be of several forms: ++ * ++ * /bar:/foo insert /foo before /bar ++ * /bar:/foo=ro insert /foo in ro mode before /bar ++ * /foo insert /foo in the beginning (prepend) ++ * :/foo insert /foo at the end (append) ++ */ ++ if (*optarg == ':') { /* append? */ ++ new_branch = optarg + 1; /* skip ':' */ ++ idx = cur_branches; ++ goto found_insertion_point; ++ } ++ new_branch = strchr(optarg, ':'); ++ if (!new_branch) { /* prepend? */ ++ new_branch = optarg; ++ goto found_insertion_point; ++ } ++ *new_branch++ = '\0'; /* holds path+mode of new branch */ ++ ++ /* ++ * Find matching branch index. For now, this assumes that nothing ++ * has been mounted on top of this Unionfs stack. Once we have /odf ++ * and cache-coherency resolved, we'll address the branch-path ++ * uniqueness. ++ */ ++ err = path_lookup(optarg, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_WARNING "unionfs: error accessing " ++ "hidden directory \"%s\" (error %d)\n", ++ optarg, err); ++ goto out; ++ } ++ for (idx=0; idx < cur_branches; idx++) ++ if (nd.mnt == new_lower_paths[idx].mnt && ++ nd.dentry == new_lower_paths[idx].dentry) ++ break; ++ path_release(&nd); /* no longer needed */ ++ if (idx == cur_branches) { ++ printk(KERN_WARNING "unionfs: branch \"%s\" " ++ "not found\n", optarg); ++ err = -ENOENT; ++ goto out; ++ } ++ ++ /* ++ * At this point idx will hold the index where the new branch should ++ * be inserted before. ++ */ ++found_insertion_point: ++ /* find the mode for the new branch */ ++ if (new_branch) ++ modename = strchr(new_branch, '='); ++ if (modename) ++ *modename++ = '\0'; ++ perms = parse_branch_mode(modename); ++ ++ if (!new_branch || !*new_branch) { ++ printk(KERN_WARNING "unionfs: null new branch\n"); ++ err = -EINVAL; ++ goto out; ++ } ++ err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd); ++ if (err) { ++ printk(KERN_WARNING "unionfs: error accessing " ++ "hidden directory \"%s\" (error %d)\n", ++ new_branch, err); ++ goto out; ++ } ++ /* ++ * It's probably safe to check_mode the new branch to insert. Note: ++ * we don't allow inserting branches which are unionfs's by ++ * themselves (check_branch returns EINVAL in that case). This is ++ * because this code base doesn't support stacking unionfs: the ODF ++ * code base supports that correctly. ++ */ ++ if ((err = check_branch(&nd))) { ++ printk(KERN_WARNING "unionfs: hidden directory " ++ "\"%s\" is not a valid branch\n", optarg); ++ path_release(&nd); ++ goto out; ++ } ++ ++ /* ++ * Now we have to insert the new branch. But first, move the bits ++ * to make space for the new branch, if needed. Finally, adjust ++ * cur_branches. ++ * We don't release nd here; it's kept until umount/remount. ++ */ ++ if (idx < cur_branches) { ++ /* if idx==cur_branches, we append: easy */ ++ memmove(&new_data[idx+1], &new_data[idx], ++ (cur_branches - idx) * sizeof(struct unionfs_data)); ++ memmove(&new_lower_paths[idx+1], &new_lower_paths[idx], ++ (cur_branches - idx) * sizeof(struct path)); ++ } ++ new_lower_paths[idx].dentry = nd.dentry; ++ new_lower_paths[idx].mnt = nd.mnt; ++ ++ new_data[idx].sb = nd.dentry->d_sb; ++ atomic_set(&new_data[idx].open_files, 0); ++ new_data[idx].branchperms = perms; ++ new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */ ++ ++ err = 0; ++out: ++ return err; ++} ++ ++ ++/* ++ * Support branch management options on remount. ++ * ++ * See Documentation/filesystems/unionfs/ for details. ++ * ++ * @flags: numeric mount options ++ * @options: mount options string ++ * ++ * This function can rearrange a mounted union dynamically, adding and ++ * removing branches, including changing branch modes. Clearly this has to ++ * be done safely and atomically. Luckily, the VFS already calls this ++ * function with lock_super(sb) and lock_kernel() held, preventing ++ * concurrent mixing of new mounts, remounts, and unmounts. Moreover, ++ * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb) ++ * to purge dentries/inodes from our superblock, and also called ++ * fsync_super(sb) to purge any dirty pages. So we're good. ++ * ++ * XXX: however, our remount code may also need to invalidate mapped pages ++ * so as to force them to be re-gotten from the (newly reconfigured) lower ++ * branches. This has to wait for proper mmap and cache coherency support ++ * in the VFS. ++ * ++ */ ++static int unionfs_remount_fs(struct super_block *sb, int *flags, ++ char *options) ++{ ++ int err = 0; ++ int i; ++ char *optionstmp, *tmp_to_free; /* kstrdup'ed of "options" */ ++ char *optname; ++ int cur_branches = 0; /* no. of current branches */ ++ int new_branches = 0; /* no. of branches actually left in the end */ ++ int add_branches; /* est. no. of branches to add */ ++ int del_branches; /* est. no. of branches to del */ ++ int max_branches; /* max possible no. of branches */ ++ struct unionfs_data *new_data = NULL, *tmp_data = NULL; ++ struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL; ++ struct inode **new_lower_inodes = NULL; ++ int new_high_branch_id; /* new high branch ID */ ++ int size; /* memory allocation size, temp var */ ++ int old_ibstart, old_ibend; ++ ++ unionfs_write_lock(sb); ++ ++ /* ++ * The VFS will take care of "ro" and "rw" flags, and we can safely ++ * ignore MS_SILENT, but anything else left over is an error. So we ++ * need to check if any other flags may have been passed (none are ++ * allowed/supported as of now). ++ */ ++ if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) { ++ printk(KERN_WARNING ++ "unionfs: remount flags 0x%x unsupported\n", *flags); ++ err = -EINVAL; ++ goto out_error; ++ } ++ ++ /* ++ * If 'options' is NULL, it's probably because the user just changed ++ * the union to a "ro" or "rw" and the VFS took care of it. So ++ * nothing to do and we're done. ++ */ ++ if (!options || options[0] == '\0') ++ goto out_error; ++ ++ /* ++ * Find out how many branches we will have in the end, counting ++ * "add" and "del" commands. Copy the "options" string because ++ * strsep modifies the string and we need it later. ++ */ ++ optionstmp = tmp_to_free = kstrdup(options, GFP_KERNEL); ++ if (!optionstmp) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ new_branches = cur_branches = sbmax(sb); /* current no. branches */ ++ add_branches = del_branches = 0; ++ new_high_branch_id = sbhbid(sb); /* save current high_branch_id */ ++ while ((optname = strsep(&optionstmp, ",")) != NULL) { ++ char *optarg; ++ ++ if (!optname || !*optname) ++ continue; ++ ++ optarg = strchr(optname, '='); ++ if (optarg) ++ *optarg++ = '\0'; ++ ++ if (!strcmp("add", optname)) ++ add_branches++; ++ else if (!strcmp("del", optname)) ++ del_branches++; ++ } ++ kfree(tmp_to_free); ++ /* after all changes, will we have at least one branch left? */ ++ if ((new_branches + add_branches - del_branches) < 1) { ++ printk(KERN_WARNING ++ "unionfs: no branches left after remount\n"); ++ err = -EINVAL; ++ goto out_free; ++ } ++ ++ /* ++ * Since we haven't actually parsed all the add/del options, nor ++ * have we checked them for errors, we don't know for sure how many ++ * branches we will have after all changes have taken place. In ++ * fact, the total number of branches left could be less than what ++ * we have now. So we need to allocate space for a temporary ++ * placeholder that is at least as large as the maximum number of ++ * branches we *could* have, which is the current number plus all ++ * the additions. Once we're done with these temp placeholders, we ++ * may have to re-allocate the final size, copy over from the temp, ++ * and then free the temps (done near the end of this function). ++ */ ++ max_branches = cur_branches + add_branches; ++ /* allocate space for new pointers to hidden dentry */ ++ tmp_data = kcalloc(max_branches, ++ sizeof(struct unionfs_data), GFP_KERNEL); ++ if (!tmp_data) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ /* allocate space for new pointers to lower paths */ ++ tmp_lower_paths = kcalloc(max_branches, ++ sizeof(struct path), GFP_KERNEL); ++ if (!tmp_lower_paths) { ++ err = -ENOMEM; ++ goto out_free; ++ } ++ /* copy current info into new placeholders, incrementing refcnts */ ++ memcpy(tmp_data, UNIONFS_SB(sb)->data, ++ cur_branches * sizeof(struct unionfs_data)); ++ memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths, ++ cur_branches * sizeof(struct path)); ++ for (i=0; i UNIONFS_MAX_BRANCHES) { ++ printk("unionfs: command exceeds " ++ "%d branches\n", UNIONFS_MAX_BRANCHES); ++ err = -E2BIG; ++ goto out_release; ++ } ++ continue; ++ } ++ if (!strcmp("del", optname)) { ++ err = do_remount_del_option(optarg, new_branches, ++ tmp_data, ++ tmp_lower_paths); ++ if (err) ++ goto out_release; ++ new_branches--; ++ continue; ++ } ++ if (!strcmp("mode", optname)) { ++ err = do_remount_mode_option(optarg, new_branches, ++ tmp_data, ++ tmp_lower_paths); ++ if (err) ++ goto out_release; ++ continue; ++ } ++ ++ /* ++ * When you use "mount -o remount,ro", mount(8) will ++ * reportedly pass the original dirs= string from ++ * /proc/mounts. So for now, we have to ignore dirs= and ++ * not consider it an error, unless we want to allow users ++ * to pass dirs= in remount. Note that to allow the VFS to ++ * actually process the ro/rw remount options, we have to ++ * return 0 from this function. ++ */ ++ if (!strcmp("dirs", optname)) { ++ printk(KERN_WARNING ++ "unionfs: remount ignoring option \"%s\".\n", ++ optname); ++ continue; ++ } ++ ++ err = -EINVAL; ++ printk(KERN_WARNING ++ "unionfs: unrecognized option \"%s\"\n", optname); ++ goto out_release; ++ } ++ ++out_no_change: ++ ++ /****************************************************************** ++ * WE'RE ALMOST DONE: check if leftmost branch might be read-only, ++ * see if we need to allocate a small-sized new vector, copy the ++ * vectors to their correct place, release the refcnt of the older ++ * ones, and return. Also handle invalidating any pages that will ++ * have to be re-read. ++ *******************************************************************/ ++ ++ if (!(tmp_data[0].branchperms & MAY_WRITE)) { ++ printk("unionfs: leftmost branch cannot be read-only " ++ "(use \"remount,ro\" to create a read-only union)\n"); ++ err = -EINVAL; ++ goto out_release; ++ } ++ ++ /* (re)allocate space for new pointers to hidden dentry */ ++ size = new_branches * sizeof(struct unionfs_data); ++ new_data = krealloc(tmp_data, size, GFP_KERNEL); ++ if (!new_data) { ++ err = -ENOMEM; ++ goto out_release; ++ } ++ ++ /* allocate space for new pointers to lower paths */ ++ size = new_branches * sizeof(struct path); ++ new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL); ++ if (!new_lower_paths) { ++ err = -ENOMEM; ++ goto out_release; ++ } ++ ++ /* allocate space for new pointers to lower inodes */ ++ new_lower_inodes = kcalloc(new_branches, ++ sizeof(struct inode *), GFP_KERNEL); ++ if (!new_lower_inodes) { ++ err = -ENOMEM; ++ goto out_release; ++ } ++ ++ /* ++ * OK, just before we actually put the new set of branches in place, ++ * we need to ensure that our own f/s has no dirty objects left. ++ * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and ++ * fsync_super(sb), taking care of dentries, inodes, and dirty ++ * pages. So all that's left is for us to invalidate any leftover ++ * (non-dirty) pages to ensure that they will be re-read from the ++ * new lower branches (and to support mmap). ++ */ ++ ++ /* ++ * No we call drop_pagecache_sb() to invalidate all pages in this ++ * super. This function calls invalidate_inode_pages(mapping), ++ * which calls invalidate_mapping_pages(): the latter, however, will ++ * not invalidate pages which are dirty, locked, under writeback, or ++ * mapped into page tables. We shouldn't have to worry about dirty ++ * or under-writeback pages, because do_remount_sb() called ++ * fsync_super() which would not have returned until all dirty pages ++ * were flushed. ++ * ++ * But do we have to worry about locked pages? Is there any chance ++ * that in here we'll get locked pages? ++ * ++ * XXX: what about pages mapped into pagetables? Are these pages ++ * which user processes may have mmap(2)'ed? If so, then we need to ++ * invalidate those too, no? Maybe we'll have to write our own ++ * version of invalidate_mapping_pages() which also handled mapped ++ * pages. ++ * ++ * XXX: Alternatively, maybe we should call truncate_inode_pages(), ++ * which use two passes over the pages list, and will truncate all ++ * pages. ++ */ ++ drop_pagecache_sb(sb); ++ ++ /* copy new vectors into their correct place */ ++ tmp_data = UNIONFS_SB(sb)->data; ++ UNIONFS_SB(sb)->data = new_data; ++ new_data = NULL; /* so don't free good pointers below */ ++ tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths; ++ UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths; ++ new_lower_paths = NULL; /* so don't free good pointers below */ ++ ++ /* update our unionfs_sb_info and root dentry index of last branch */ ++ i = sbmax(sb); /* save no. of branches to release at end */ ++ sbend(sb) = new_branches - 1; ++ set_dbend(sb->s_root, new_branches - 1); ++ old_ibstart = ibstart(sb->s_root->d_inode); ++ old_ibend = ibend(sb->s_root->d_inode); ++ ibend(sb->s_root->d_inode) = new_branches - 1; ++ UNIONFS_D(sb->s_root)->bcount = new_branches; ++ new_branches = i; /* no. of branches to release below */ ++ ++ /* ++ * Update lower inodes: 3 steps ++ * 1. grab ref on all new lower inodes ++ */ ++ for (i=dbstart(sb->s_root); i<=dbend(sb->s_root); i++) { ++ struct dentry *lower_dentry = ++ unionfs_lower_dentry_idx(sb->s_root, i); ++ atomic_inc(&lower_dentry->d_inode->i_count); ++ new_lower_inodes[i] = lower_dentry->d_inode; ++ } ++ /* 2. release reference on all older lower inodes */ ++ for (i=old_ibstart; i<=old_ibend; i++) { ++ iput(unionfs_lower_inode_idx(sb->s_root->d_inode, i)); ++ unionfs_set_lower_inode_idx(sb->s_root->d_inode, i, NULL); ++ } ++ kfree(UNIONFS_I(sb->s_root->d_inode)->lower_inodes); ++ /* 3. update root dentry's inode to new lower_inodes array */ ++ UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes; ++ new_lower_inodes = NULL; ++ ++ /* maxbytes may have changed */ ++ sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes; ++ /* update high branch ID */ ++ sbhbid(sb) = new_high_branch_id; ++ ++ /* update our sb->generation for revalidating objects */ ++ i = atomic_inc_return(&UNIONFS_SB(sb)->generation); ++ atomic_set(&UNIONFS_D(sb->s_root)->generation, i); ++ atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i); ++ ++ err = 0; /* reset to success */ ++ ++ if (!(*flags & MS_SILENT)) ++ printk("unionfs: new generation number %d\n", i); ++ ++ /* ++ * The code above falls through to the next label, and releases the ++ * refcnts of the older ones (stored in tmp_*): if we fell through ++ * here, it means success. However, if we jump directly to this ++ * label from any error above, then an error occurred after we ++ * grabbed various refcnts, and so we have to release the ++ * temporarily constructed structures. ++ */ ++out_release: ++ /* no need to cleanup/release anything in tmp_data */ ++ if (tmp_lower_paths) ++ for (i=0; ireaddircache) { ++ rdstate = list_entry(pos, struct unionfs_dir_state, cache); ++ list_del(&rdstate->cache); ++ free_rdstate(rdstate); ++ } ++ ++ /* ++ * Decrement a reference to a hidden_inode, which was incremented ++ * by our read_inode when it was created initially. ++ */ ++ bstart = ibstart(inode); ++ bend = ibend(inode); ++ if (bstart >= 0) { ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_inode = unionfs_lower_inode_idx(inode, bindex); ++ if (!hidden_inode) ++ continue; ++ iput(hidden_inode); ++ } ++ } ++ ++ kfree(UNIONFS_I(inode)->lower_inodes); ++ UNIONFS_I(inode)->lower_inodes = NULL; ++} ++ ++static struct inode *unionfs_alloc_inode(struct super_block *sb) ++{ ++ struct unionfs_inode_info *i; ++ ++ i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL); ++ if (!i) ++ return NULL; ++ ++ /* memset everything up to the inode to 0 */ ++ memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode)); ++ ++ i->vfs_inode.i_version = 1; ++ return &i->vfs_inode; ++} ++ ++static void unionfs_destroy_inode(struct inode *inode) ++{ ++ kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode)); ++} ++ ++/* unionfs inode cache constructor */ ++static void init_once(void *v, struct kmem_cache *cachep, unsigned long flags) ++{ ++ struct unionfs_inode_info *i = v; ++ ++ inode_init_once(&i->vfs_inode); ++} ++ ++int unionfs_init_inode_cache(void) ++{ ++ int err = 0; ++ ++ unionfs_inode_cachep = ++ kmem_cache_create("unionfs_inode_cache", ++ sizeof(struct unionfs_inode_info), 0, ++ SLAB_RECLAIM_ACCOUNT, init_once, NULL); ++ if (!unionfs_inode_cachep) ++ err = -ENOMEM; ++ return err; ++} ++ ++/* unionfs inode cache destructor */ ++void unionfs_destroy_inode_cache(void) ++{ ++ if (unionfs_inode_cachep) ++ kmem_cache_destroy(unionfs_inode_cachep); ++} ++ ++/* ++ * Called when we have a dirty inode, right here we only throw out ++ * parts of our readdir list that are too old. ++ * ++ * No need to grab sb info's rwsem. ++ */ ++static int unionfs_write_inode(struct inode *inode, int sync) ++{ ++ struct list_head *pos, *n; ++ struct unionfs_dir_state *rdstate; ++ ++ spin_lock(&UNIONFS_I(inode)->rdlock); ++ list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) { ++ rdstate = list_entry(pos, struct unionfs_dir_state, cache); ++ /* We keep this list in LRU order. */ ++ if ((rdstate->access + RDCACHE_JIFFIES) > jiffies) ++ break; ++ UNIONFS_I(inode)->rdcount--; ++ list_del(&rdstate->cache); ++ free_rdstate(rdstate); ++ } ++ spin_unlock(&UNIONFS_I(inode)->rdlock); ++ ++ return 0; ++} ++ ++/* ++ * Used only in nfs, to kill any pending RPC tasks, so that subsequent ++ * code can actually succeed and won't leave tasks that need handling. ++ */ ++static void unionfs_umount_begin(struct vfsmount *mnt, int flags) ++{ ++ struct super_block *sb, *hidden_sb; ++ struct vfsmount *hidden_mnt; ++ int bindex, bstart, bend; ++ ++ if (!(flags & MNT_FORCE)) ++ /* ++ * we are not being MNT_FORCE'd, therefore we should emulate ++ * old behavior ++ */ ++ return; ++ ++ sb = mnt->mnt_sb; ++ ++ unionfs_read_lock(sb); ++ ++ bstart = sbstart(sb); ++ bend = sbend(sb); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ hidden_mnt = unionfs_lower_mnt_idx(sb->s_root, bindex); ++ hidden_sb = unionfs_lower_super_idx(sb, bindex); ++ ++ if (hidden_mnt && hidden_sb && hidden_sb->s_op && ++ hidden_sb->s_op->umount_begin) ++ hidden_sb->s_op->umount_begin(hidden_mnt, flags); ++ } ++ ++ unionfs_read_unlock(sb); ++} ++ ++static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt) ++{ ++ struct super_block *sb = mnt->mnt_sb; ++ int ret = 0; ++ char *tmp_page; ++ char *path; ++ int bindex, bstart, bend; ++ int perms; ++ ++ unionfs_read_lock(sb); ++ ++ unionfs_lock_dentry(sb->s_root); ++ ++ tmp_page = (char*) __get_free_page(GFP_KERNEL); ++ if (!tmp_page) { ++ ret = -ENOMEM; ++ goto out; ++ } ++ ++ bstart = sbstart(sb); ++ bend = sbend(sb); ++ ++ seq_printf(m, ",dirs="); ++ for (bindex = bstart; bindex <= bend; bindex++) { ++ path = d_path(unionfs_lower_dentry_idx(sb->s_root, bindex), ++ unionfs_lower_mnt_idx(sb->s_root, bindex), ++ tmp_page, PAGE_SIZE); ++ if (IS_ERR(path)) { ++ ret = PTR_ERR(path); ++ goto out; ++ } ++ ++ perms = branchperms(sb, bindex); ++ ++ seq_printf(m, "%s=%s", path, ++ perms & MAY_WRITE ? "rw" : "ro"); ++ if (bindex != bend) ++ seq_printf(m, ":"); ++ } ++ ++out: ++ free_page((unsigned long) tmp_page); ++ ++ unionfs_unlock_dentry(sb->s_root); ++ ++ unionfs_read_unlock(sb); ++ ++ return ret; ++} ++ ++struct super_operations unionfs_sops = { ++ .read_inode = unionfs_read_inode, ++ .delete_inode = unionfs_delete_inode, ++ .put_super = unionfs_put_super, ++ .statfs = unionfs_statfs, ++ .remount_fs = unionfs_remount_fs, ++ .clear_inode = unionfs_clear_inode, ++ .umount_begin = unionfs_umount_begin, ++ .show_options = unionfs_show_options, ++ .write_inode = unionfs_write_inode, ++ .alloc_inode = unionfs_alloc_inode, ++ .destroy_inode = unionfs_destroy_inode, ++}; +diff -Nurb linux-2.6.22-570/fs/unionfs/union.h linux-2.6.22-591/fs/unionfs/union.h +--- linux-2.6.22-570/fs/unionfs/union.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/union.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,467 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef _UNION_H_ ++#define _UNION_H_ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include ++ ++/* the file system name */ ++#define UNIONFS_NAME "unionfs" ++ ++/* unionfs root inode number */ ++#define UNIONFS_ROOT_INO 1 ++ ++/* number of times we try to get a unique temporary file name */ ++#define GET_TMPNAM_MAX_RETRY 5 ++ ++/* maximum number of branches we support, to avoid memory blowup */ ++#define UNIONFS_MAX_BRANCHES 128 ++ ++/* Operations vectors defined in specific files. */ ++extern struct file_operations unionfs_main_fops; ++extern struct file_operations unionfs_dir_fops; ++extern struct inode_operations unionfs_main_iops; ++extern struct inode_operations unionfs_dir_iops; ++extern struct inode_operations unionfs_symlink_iops; ++extern struct super_operations unionfs_sops; ++extern struct dentry_operations unionfs_dops; ++ ++/* How long should an entry be allowed to persist */ ++#define RDCACHE_JIFFIES (5*HZ) ++ ++/* file private data. */ ++struct unionfs_file_info { ++ int bstart; ++ int bend; ++ atomic_t generation; ++ ++ struct unionfs_dir_state *rdstate; ++ struct file **lower_files; ++ int *saved_branch_ids; /* IDs of branches when file was opened */ ++}; ++ ++/* unionfs inode data in memory */ ++struct unionfs_inode_info { ++ int bstart; ++ int bend; ++ atomic_t generation; ++ int stale; ++ /* Stuff for readdir over NFS. */ ++ spinlock_t rdlock; ++ struct list_head readdircache; ++ int rdcount; ++ int hashsize; ++ int cookie; ++ ++ /* The hidden inodes */ ++ struct inode **lower_inodes; ++ /* to keep track of reads/writes for unlinks before closes */ ++ atomic_t totalopens; ++ ++ struct inode vfs_inode; ++}; ++ ++/* unionfs dentry data in memory */ ++struct unionfs_dentry_info { ++ /* ++ * The semaphore is used to lock the dentry as soon as we get into a ++ * unionfs function from the VFS. Our lock ordering is that children ++ * go before their parents. ++ */ ++ struct mutex lock; ++ int bstart; ++ int bend; ++ int bopaque; ++ int bcount; ++ atomic_t generation; ++ struct path *lower_paths; ++}; ++ ++/* These are the pointers to our various objects. */ ++struct unionfs_data { ++ struct super_block *sb; ++ atomic_t open_files; /* number of open files on branch */ ++ int branchperms; ++ int branch_id; /* unique branch ID at re/mount time */ ++}; ++ ++/* unionfs super-block data in memory */ ++struct unionfs_sb_info { ++ int bend; ++ ++ atomic_t generation; ++ ++ /* ++ * This rwsem is used to make sure that a branch management ++ * operation... ++ * 1) will not begin before all currently in-flight operations ++ * complete ++ * 2) any new operations do not execute until the currently ++ * running branch management operation completes ++ */ ++ struct rw_semaphore rwsem; ++ int high_branch_id; /* last unique branch ID given */ ++ struct unionfs_data *data; ++}; ++ ++/* ++ * structure for making the linked list of entries by readdir on left branch ++ * to compare with entries on right branch ++ */ ++struct filldir_node { ++ struct list_head file_list; /* list for directory entries */ ++ char *name; /* name entry */ ++ int hash; /* name hash */ ++ int namelen; /* name len since name is not 0 terminated */ ++ ++ /* ++ * we can check for duplicate whiteouts and files in the same branch ++ * in order to return -EIO. ++ */ ++ int bindex; ++ ++ /* is this a whiteout entry? */ ++ int whiteout; ++ ++ /* Inline name, so we don't need to separately kmalloc small ones */ ++ char iname[DNAME_INLINE_LEN_MIN]; ++}; ++ ++/* Directory hash table. */ ++struct unionfs_dir_state { ++ unsigned int cookie; /* the cookie, based off of rdversion */ ++ unsigned int offset; /* The entry we have returned. */ ++ int bindex; ++ loff_t dirpos; /* offset within the lower level directory */ ++ int size; /* How big is the hash table? */ ++ int hashentries; /* How many entries have been inserted? */ ++ unsigned long access; ++ ++ /* This cache list is used when the inode keeps us around. */ ++ struct list_head cache; ++ struct list_head list[0]; ++}; ++ ++/* include miscellaneous macros */ ++#include "fanout.h" ++#include "sioq.h" ++ ++/* externs for cache creation/deletion routines */ ++extern void unionfs_destroy_filldir_cache(void); ++extern int unionfs_init_filldir_cache(void); ++extern int unionfs_init_inode_cache(void); ++extern void unionfs_destroy_inode_cache(void); ++extern int unionfs_init_dentry_cache(void); ++extern void unionfs_destroy_dentry_cache(void); ++ ++/* Initialize and free readdir-specific state. */ ++extern int init_rdstate(struct file *file); ++extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex); ++extern struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos); ++extern void free_rdstate(struct unionfs_dir_state *state); ++extern int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name, ++ int namelen, int bindex, int whiteout); ++extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate, ++ const char *name, int namelen); ++ ++extern struct dentry **alloc_new_dentries(int objs); ++extern struct unionfs_data *alloc_new_data(int objs); ++ ++/* We can only use 32-bits of offset for rdstate --- blech! */ ++#define DIREOF (0xfffff) ++#define RDOFFBITS 20 /* This is the number of bits in DIREOF. */ ++#define MAXRDCOOKIE (0xfff) ++/* Turn an rdstate into an offset. */ ++static inline off_t rdstate2offset(struct unionfs_dir_state *buf) ++{ ++ off_t tmp; ++ ++ tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS) ++ | (buf->offset & DIREOF); ++ return tmp; ++} ++ ++#define unionfs_read_lock(sb) down_read(&UNIONFS_SB(sb)->rwsem) ++#define unionfs_read_unlock(sb) up_read(&UNIONFS_SB(sb)->rwsem) ++#define unionfs_write_lock(sb) down_write(&UNIONFS_SB(sb)->rwsem) ++#define unionfs_write_unlock(sb) up_write(&UNIONFS_SB(sb)->rwsem) ++ ++static inline void unionfs_double_lock_dentry(struct dentry *d1, ++ struct dentry *d2) ++{ ++ if (d2 < d1) { ++ struct dentry *tmp = d1; ++ d1 = d2; ++ d2 = tmp; ++ } ++ unionfs_lock_dentry(d1); ++ unionfs_lock_dentry(d2); ++} ++ ++extern int realloc_dentry_private_data(struct dentry *dentry); ++extern int new_dentry_private_data(struct dentry *dentry); ++extern void free_dentry_private_data(struct dentry *dentry); ++extern void update_bstart(struct dentry *dentry); ++ ++/* ++ * EXTERNALS: ++ */ ++ ++/* replicates the directory structure up to given dentry in given branch */ ++extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry, ++ int bindex); ++extern int make_dir_opaque(struct dentry *dir, int bindex); ++ ++/* partial lookup */ ++extern int unionfs_partial_lookup(struct dentry *dentry); ++ ++/* ++ * Pass an unionfs dentry and an index and it will try to create a whiteout ++ * in branch 'index'. ++ * ++ * On error, it will proceed to a branch to the left ++ */ ++extern int create_whiteout(struct dentry *dentry, int start); ++/* copies a file from dbstart to newbindex branch */ ++extern int copyup_file(struct inode *dir, struct file *file, int bstart, ++ int newbindex, loff_t size); ++extern int copyup_named_file(struct inode *dir, struct file *file, ++ char *name, int bstart, int new_bindex, ++ loff_t len); ++/* copies a dentry from dbstart to newbindex branch */ ++extern int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart, ++ int new_bindex, struct file **copyup_file, ++ loff_t len); ++ ++extern int remove_whiteouts(struct dentry *dentry, ++ struct dentry *hidden_dentry, int bindex); ++ ++extern int do_delete_whiteouts(struct dentry *dentry, int bindex, ++ struct unionfs_dir_state *namelist); ++ ++extern int unionfs_get_nlinks(struct inode *inode); ++ ++/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */ ++extern int check_empty(struct dentry *dentry, ++ struct unionfs_dir_state **namelist); ++/* Delete whiteouts from this directory in branch bindex. */ ++extern int delete_whiteouts(struct dentry *dentry, int bindex, ++ struct unionfs_dir_state *namelist); ++ ++/* Re-lookup a hidden dentry. */ ++extern int unionfs_refresh_hidden_dentry(struct dentry *dentry, int bindex); ++ ++extern void unionfs_reinterpose(struct dentry *this_dentry); ++extern struct super_block *unionfs_duplicate_super(struct super_block *sb); ++ ++/* Locking functions. */ ++extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl); ++extern int unionfs_getlk(struct file *file, struct file_lock *fl); ++ ++/* Common file operations. */ ++extern int unionfs_file_revalidate(struct file *file, int willwrite); ++extern int unionfs_open(struct inode *inode, struct file *file); ++extern int unionfs_file_release(struct inode *inode, struct file *file); ++extern int unionfs_flush(struct file *file, fl_owner_t id); ++extern long unionfs_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg); ++ ++/* Inode operations */ ++extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry); ++extern int unionfs_unlink(struct inode *dir, struct dentry *dentry); ++extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry); ++ ++extern int __unionfs_d_revalidate_chain(struct dentry *dentry, ++ struct nameidata *nd); ++ ++/* The values for unionfs_interpose's flag. */ ++#define INTERPOSE_DEFAULT 0 ++#define INTERPOSE_LOOKUP 1 ++#define INTERPOSE_REVAL 2 ++#define INTERPOSE_REVAL_NEG 3 ++#define INTERPOSE_PARTIAL 4 ++ ++extern int unionfs_interpose(struct dentry *this_dentry, ++ struct super_block *sb, int flag); ++ ++#ifdef CONFIG_UNION_FS_XATTR ++/* Extended attribute functions. */ ++extern void *unionfs_xattr_alloc(size_t size, size_t limit); ++extern void unionfs_xattr_free(void *ptr, size_t size); ++ ++extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, ++ void *value, size_t size); ++extern int unionfs_removexattr(struct dentry *dentry, const char *name); ++extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list, ++ size_t size); ++extern int unionfs_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags); ++#endif /* CONFIG_UNION_FS_XATTR */ ++ ++/* The root directory is unhashed, but isn't deleted. */ ++static inline int d_deleted(struct dentry *d) ++{ ++ return d_unhashed(d) && (d != d->d_sb->s_root); ++} ++ ++struct dentry *unionfs_lookup_backend(struct dentry *dentry, ++ struct nameidata *nd, int lookupmode); ++ ++/* unionfs_permission, check if we should bypass error to facilitate copyup */ ++#define IS_COPYUP_ERR(err) ((err) == -EROFS) ++ ++/* unionfs_open, check if we need to copyup the file */ ++#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND) ++#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS) ++ ++static inline int branchperms(const struct super_block *sb, int index) ++{ ++ BUG_ON(index < 0); ++ ++ return UNIONFS_SB(sb)->data[index].branchperms; ++} ++ ++static inline int set_branchperms(struct super_block *sb, int index, int perms) ++{ ++ BUG_ON(index < 0); ++ ++ UNIONFS_SB(sb)->data[index].branchperms = perms; ++ ++ return perms; ++} ++ ++/* Is this file on a read-only branch? */ ++static inline int is_robranch_super(const struct super_block *sb, int index) ++{ ++ int ret; ++ ++ ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0; ++ return ret; ++} ++ ++/* Is this file on a read-only branch? */ ++static inline int is_robranch_idx(const struct dentry *dentry, int index) ++{ ++ int err = 0; ++ ++ BUG_ON(index < 0); ++ ++ if ((!(branchperms(dentry->d_sb, index) & MAY_WRITE)) || ++ IS_RDONLY(unionfs_lower_dentry_idx(dentry, index)->d_inode)) ++ err = -EROFS; ++ return err; ++} ++ ++static inline int is_robranch(const struct dentry *dentry) ++{ ++ int index; ++ ++ index = UNIONFS_D(dentry)->bstart; ++ BUG_ON(index < 0); ++ ++ return is_robranch_idx(dentry, index); ++} ++ ++/* What do we use for whiteouts. */ ++#define UNIONFS_WHPFX ".wh." ++#define UNIONFS_WHLEN 4 ++/* ++ * If a directory contains this file, then it is opaque. We start with the ++ * .wh. flag so that it is blocked by lookup. ++ */ ++#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque" ++#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME ++ ++#ifndef DEFAULT_POLLMASK ++#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) ++#endif ++ ++/* ++ * EXTERNALS: ++ */ ++extern char *alloc_whname(const char *name, int len); ++extern int check_branch(struct nameidata *nd); ++extern int __parse_branch_mode(const char *name); ++extern int parse_branch_mode(const char *name); ++ ++/* ++ * These two functions are here because it is kind of daft to copy and paste ++ * the contents of the two functions to 32+ places in unionfs ++ */ ++static inline struct dentry *lock_parent(struct dentry *dentry) ++{ ++ struct dentry *dir = dget(dentry->d_parent); ++ ++ mutex_lock(&dir->d_inode->i_mutex); ++ return dir; ++} ++ ++static inline void unlock_dir(struct dentry *dir) ++{ ++ mutex_unlock(&dir->d_inode->i_mutex); ++ dput(dir); ++} ++ ++static inline struct vfsmount *unionfs_mntget(struct dentry *dentry, ++ int bindex) ++{ ++ BUG_ON(!dentry || bindex < 0); ++ ++ return mntget(unionfs_lower_mnt_idx(dentry, bindex)); ++} ++ ++static inline void unionfs_mntput(struct dentry *dentry, int bindex) ++{ ++ if (!dentry) ++ return; ++ ++ BUG_ON(bindex < 0); ++ ++ mntput(unionfs_lower_mnt_idx(dentry, bindex)); ++} ++#endif /* not _UNION_H_ */ +diff -Nurb linux-2.6.22-570/fs/unionfs/unlink.c linux-2.6.22-591/fs/unionfs/unlink.c +--- linux-2.6.22-570/fs/unionfs/unlink.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/unlink.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,176 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* unlink a file by creating a whiteout */ ++static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry) ++{ ++ struct dentry *hidden_dentry; ++ struct dentry *hidden_dir_dentry; ++ int bindex; ++ int err = 0; ++ ++ if ((err = unionfs_partial_lookup(dentry))) ++ goto out; ++ ++ bindex = dbstart(dentry); ++ ++ hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex); ++ if (!hidden_dentry) ++ goto out; ++ ++ hidden_dir_dentry = lock_parent(hidden_dentry); ++ ++ /* avoid destroying the hidden inode if the file is in use */ ++ dget(hidden_dentry); ++ if (!(err = is_robranch_super(dentry->d_sb, bindex))) ++ err = vfs_unlink(hidden_dir_dentry->d_inode, hidden_dentry); ++ dput(hidden_dentry); ++ fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode); ++ unlock_dir(hidden_dir_dentry); ++ ++ if (err && !IS_COPYUP_ERR(err)) ++ goto out; ++ ++ if (err) { ++ if (dbstart(dentry) == 0) ++ goto out; ++ ++ err = create_whiteout(dentry, dbstart(dentry) - 1); ++ } else if (dbopaque(dentry) != -1) ++ /* There is a hidden lower-priority file with the same name. */ ++ err = create_whiteout(dentry, dbopaque(dentry)); ++ else ++ err = create_whiteout(dentry, dbstart(dentry)); ++ ++out: ++ if (!err) ++ dentry->d_inode->i_nlink--; ++ ++ /* We don't want to leave negative leftover dentries for revalidate. */ ++ if (!err && (dbopaque(dentry) != -1)) ++ update_bstart(dentry); ++ ++ return err; ++} ++ ++int unionfs_unlink(struct inode *dir, struct dentry *dentry) ++{ ++ int err = 0; ++ ++ unionfs_read_lock(dentry->d_sb); ++ unionfs_lock_dentry(dentry); ++ ++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ err = unionfs_unlink_whiteout(dir, dentry); ++ /* call d_drop so the system "forgets" about us */ ++ if (!err) ++ d_drop(dentry); ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry, ++ struct unionfs_dir_state *namelist) ++{ ++ int err; ++ struct dentry *hidden_dentry; ++ struct dentry *hidden_dir_dentry = NULL; ++ ++ /* Here we need to remove whiteout entries. */ ++ err = delete_whiteouts(dentry, dbstart(dentry), namelist); ++ if (err) ++ goto out; ++ ++ hidden_dentry = unionfs_lower_dentry(dentry); ++ ++ hidden_dir_dentry = lock_parent(hidden_dentry); ++ ++ /* avoid destroying the hidden inode if the file is in use */ ++ dget(hidden_dentry); ++ if (!(err = is_robranch(dentry))) ++ err = vfs_rmdir(hidden_dir_dentry->d_inode, hidden_dentry); ++ dput(hidden_dentry); ++ ++ fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode); ++ /* propagate number of hard-links */ ++ dentry->d_inode->i_nlink = unionfs_get_nlinks(dentry->d_inode); ++ ++out: ++ if (hidden_dir_dentry) ++ unlock_dir(hidden_dir_dentry); ++ return err; ++} ++ ++int unionfs_rmdir(struct inode *dir, struct dentry *dentry) ++{ ++ int err = 0; ++ struct unionfs_dir_state *namelist = NULL; ++ ++ unionfs_read_lock(dentry->d_sb); ++ unionfs_lock_dentry(dentry); ++ ++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ /* check if this unionfs directory is empty or not */ ++ err = check_empty(dentry, &namelist); ++ if (err) ++ goto out; ++ ++ err = unionfs_rmdir_first(dir, dentry, namelist); ++ /* create whiteout */ ++ if (!err) ++ err = create_whiteout(dentry, dbstart(dentry)); ++ else { ++ int new_err; ++ ++ if (dbstart(dentry) == 0) ++ goto out; ++ ++ /* exit if the error returned was NOT -EROFS */ ++ if (!IS_COPYUP_ERR(err)) ++ goto out; ++ ++ new_err = create_whiteout(dentry, dbstart(dentry) - 1); ++ if (new_err != -EEXIST) ++ err = new_err; ++ } ++ ++out: ++ /* call d_drop so the system "forgets" about us */ ++ if (!err) ++ d_drop(dentry); ++ ++ if (namelist) ++ free_rdstate(namelist); ++ ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} +diff -Nurb linux-2.6.22-570/fs/unionfs/xattr.c linux-2.6.22-591/fs/unionfs/xattr.c +--- linux-2.6.22-570/fs/unionfs/xattr.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/fs/unionfs/xattr.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,161 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2003-2006 Charles P. Wright ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2005-2006 Junjiro Okajima ++ * Copyright (c) 2005 Arun M. Krishnakumar ++ * Copyright (c) 2004-2006 David P. Quigley ++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair ++ * Copyright (c) 2003 Puja Gupta ++ * Copyright (c) 2003 Harikesavan Krishnan ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#include "union.h" ++ ++/* This is lifted from fs/xattr.c */ ++void *unionfs_xattr_alloc(size_t size, size_t limit) ++{ ++ void *ptr; ++ ++ if (size > limit) ++ return ERR_PTR(-E2BIG); ++ ++ if (!size) /* size request, no buffer is needed */ ++ return NULL; ++ else if (size <= PAGE_SIZE) ++ ptr = kmalloc(size, GFP_KERNEL); ++ else ++ ptr = vmalloc(size); ++ if (!ptr) ++ return ERR_PTR(-ENOMEM); ++ return ptr; ++} ++ ++void unionfs_xattr_free(void *ptr, size_t size) ++{ ++ if (!size) /* size request, no buffer was needed */ ++ return; ++ else if (size <= PAGE_SIZE) ++ kfree(ptr); ++ else ++ vfree(ptr); ++} ++ ++/* ++ * BKL held by caller. ++ * dentry->d_inode->i_mutex locked ++ */ ++ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value, ++ size_t size) ++{ ++ struct dentry *hidden_dentry = NULL; ++ int err = -EOPNOTSUPP; ++ ++ unionfs_read_lock(dentry->d_sb); ++ unionfs_lock_dentry(dentry); ++ ++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ hidden_dentry = unionfs_lower_dentry(dentry); ++ ++ err = vfs_getxattr(hidden_dentry, (char*) name, value, size); ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * BKL held by caller. ++ * dentry->d_inode->i_mutex locked ++ */ ++int unionfs_setxattr(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags) ++{ ++ struct dentry *hidden_dentry = NULL; ++ int err = -EOPNOTSUPP; ++ ++ unionfs_read_lock(dentry->d_sb); ++ unionfs_lock_dentry(dentry); ++ ++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ hidden_dentry = unionfs_lower_dentry(dentry); ++ ++ err = vfs_setxattr(hidden_dentry, (char*) name, (void*) value, ++ size, flags); ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * BKL held by caller. ++ * dentry->d_inode->i_mutex locked ++ */ ++int unionfs_removexattr(struct dentry *dentry, const char *name) ++{ ++ struct dentry *hidden_dentry = NULL; ++ int err = -EOPNOTSUPP; ++ ++ unionfs_read_lock(dentry->d_sb); ++ unionfs_lock_dentry(dentry); ++ ++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ hidden_dentry = unionfs_lower_dentry(dentry); ++ ++ err = vfs_removexattr(hidden_dentry, (char*) name); ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} ++ ++/* ++ * BKL held by caller. ++ * dentry->d_inode->i_mutex locked ++ */ ++ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size) ++{ ++ struct dentry *hidden_dentry = NULL; ++ int err = -EOPNOTSUPP; ++ char *encoded_list = NULL; ++ ++ unionfs_read_lock(dentry->d_sb); ++ unionfs_lock_dentry(dentry); ++ ++ if (!__unionfs_d_revalidate_chain(dentry, NULL)) { ++ err = -ESTALE; ++ goto out; ++ } ++ ++ hidden_dentry = unionfs_lower_dentry(dentry); ++ ++ encoded_list = list; ++ err = vfs_listxattr(hidden_dentry, encoded_list, size); ++ ++out: ++ unionfs_unlock_dentry(dentry); ++ unionfs_read_unlock(dentry->d_sb); ++ return err; ++} +diff -Nurb linux-2.6.22-570/fs/xfs/linux-2.6/xfs_file.c linux-2.6.22-591/fs/xfs/linux-2.6/xfs_file.c +--- linux-2.6.22-570/fs/xfs/linux-2.6/xfs_file.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/fs/xfs/linux-2.6/xfs_file.c 2007-12-21 15:36:12.000000000 -0500 +@@ -246,18 +246,19 @@ + + #ifdef CONFIG_XFS_DMAPI + STATIC struct page * +-xfs_vm_nopage( +- struct vm_area_struct *area, +- unsigned long address, +- int *type) ++xfs_vm_fault( ++ struct vm_area_struct *vma, ++ struct fault_data *fdata) + { +- struct inode *inode = area->vm_file->f_path.dentry->d_inode; ++ struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + bhv_vnode_t *vp = vn_from_inode(inode); + + ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI); +- if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), area, 0)) ++ if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0)) { ++ fdata->type = VM_FAULT_SIGBUS; + return NULL; +- return filemap_nopage(area, address, type); ++ } ++ return filemap_fault(vma, fdata); + } + #endif /* CONFIG_XFS_DMAPI */ + +@@ -343,6 +344,7 @@ + struct vm_area_struct *vma) + { + vma->vm_ops = &xfs_file_vm_ops; ++ vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; + + #ifdef CONFIG_XFS_DMAPI + if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI) +@@ -501,14 +503,12 @@ + }; + + static struct vm_operations_struct xfs_file_vm_ops = { +- .nopage = filemap_nopage, +- .populate = filemap_populate, ++ .fault = filemap_fault, + }; + + #ifdef CONFIG_XFS_DMAPI + static struct vm_operations_struct xfs_dmapi_file_vm_ops = { +- .nopage = xfs_vm_nopage, +- .populate = filemap_populate, ++ .fault = xfs_vm_fault, + #ifdef HAVE_VMOP_MPROTECT + .mprotect = xfs_vm_mprotect, + #endif +diff -Nurb linux-2.6.22-570/fs/xfs/linux-2.6/xfs_super.c linux-2.6.22-591/fs/xfs/linux-2.6/xfs_super.c +--- linux-2.6.22-570/fs/xfs/linux-2.6/xfs_super.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/fs/xfs/linux-2.6/xfs_super.c 2007-12-21 15:36:12.000000000 -0500 +@@ -570,6 +570,7 @@ + bhv_vfs_sync_work_t *work, *n; + LIST_HEAD (tmp); + ++ set_freezable(); + timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); + for (;;) { + timeleft = schedule_timeout_interruptible(timeleft); +diff -Nurb linux-2.6.22-570/include/acpi/acmacros.h linux-2.6.22-591/include/acpi/acmacros.h +--- linux-2.6.22-570/include/acpi/acmacros.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/acpi/acmacros.h 2007-12-21 15:36:12.000000000 -0500 +@@ -486,6 +486,8 @@ + #define ACPI_FUNCTION_NAME(name) + #endif + ++#ifdef DEBUG_FUNC_TRACE ++ + #define ACPI_FUNCTION_TRACE(a) ACPI_FUNCTION_NAME(a) \ + acpi_ut_trace(ACPI_DEBUG_PARAMETERS) + #define ACPI_FUNCTION_TRACE_PTR(a,b) ACPI_FUNCTION_NAME(a) \ +@@ -563,6 +565,27 @@ + + #endif /* ACPI_SIMPLE_RETURN_MACROS */ + ++#else /* !DEBUG_FUNC_TRACE */ ++ ++#define ACPI_FUNCTION_TRACE(a) ++#define ACPI_FUNCTION_TRACE_PTR(a,b) ++#define ACPI_FUNCTION_TRACE_U32(a,b) ++#define ACPI_FUNCTION_TRACE_STR(a,b) ++#define ACPI_FUNCTION_EXIT ++#define ACPI_FUNCTION_STATUS_EXIT(s) ++#define ACPI_FUNCTION_VALUE_EXIT(s) ++#define ACPI_FUNCTION_TRACE(a) ++#define ACPI_FUNCTION_ENTRY() ++ ++#define return_VOID return ++#define return_ACPI_STATUS(s) return(s) ++#define return_VALUE(s) return(s) ++#define return_UINT8(s) return(s) ++#define return_UINT32(s) return(s) ++#define return_PTR(s) return(s) ++ ++#endif /* DEBUG_FUNC_TRACE */ ++ + /* Conditional execution */ + + #define ACPI_DEBUG_EXEC(a) a +diff -Nurb linux-2.6.22-570/include/acpi/acoutput.h linux-2.6.22-591/include/acpi/acoutput.h +--- linux-2.6.22-570/include/acpi/acoutput.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/acpi/acoutput.h 2007-12-21 15:36:12.000000000 -0500 +@@ -178,8 +178,8 @@ + + /* Defaults for debug_level, debug and normal */ + +-#define ACPI_DEBUG_DEFAULT (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR | ACPI_LV_DEBUG_OBJECT) +-#define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR | ACPI_LV_DEBUG_OBJECT) ++#define ACPI_DEBUG_DEFAULT (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR) ++#define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR) + #define ACPI_DEBUG_ALL (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL) + + #endif /* __ACOUTPUT_H__ */ +diff -Nurb linux-2.6.22-570/include/acpi/platform/acenv.h linux-2.6.22-591/include/acpi/platform/acenv.h +--- linux-2.6.22-570/include/acpi/platform/acenv.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/acpi/platform/acenv.h 2007-12-21 15:36:12.000000000 -0500 +@@ -136,7 +136,7 @@ + + /*! [Begin] no source code translation */ + +-#if defined(__linux__) ++#if defined(_LINUX) || defined(__linux__) + #include "aclinux.h" + + #elif defined(_AED_EFI) +diff -Nurb linux-2.6.22-570/include/acpi/platform/aclinux.h linux-2.6.22-591/include/acpi/platform/aclinux.h +--- linux-2.6.22-570/include/acpi/platform/aclinux.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/acpi/platform/aclinux.h 2007-12-21 15:36:12.000000000 -0500 +@@ -91,7 +91,10 @@ + #define ACPI_USE_NATIVE_DIVIDE + #endif + ++#ifndef __cdecl + #define __cdecl ++#endif ++ + #define ACPI_FLUSH_CPU_CACHE() + #endif /* __KERNEL__ */ + +diff -Nurb linux-2.6.22-570/include/acpi/processor.h linux-2.6.22-591/include/acpi/processor.h +--- linux-2.6.22-570/include/acpi/processor.h 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/include/acpi/processor.h 2007-12-21 15:36:12.000000000 -0500 +@@ -21,6 +21,8 @@ + #define ACPI_PSD_REV0_REVISION 0 /* Support for _PSD as in ACPI 3.0 */ + #define ACPI_PSD_REV0_ENTRIES 5 + ++#define ACPI_TSD_REV0_REVISION 0 /* Support for _PSD as in ACPI 3.0 */ ++#define ACPI_TSD_REV0_ENTRIES 5 + /* + * Types of coordination defined in ACPI 3.0. Same macros can be used across + * P, C and T states +@@ -125,17 +127,53 @@ + + /* Throttling Control */ + ++struct acpi_tsd_package { ++ acpi_integer num_entries; ++ acpi_integer revision; ++ acpi_integer domain; ++ acpi_integer coord_type; ++ acpi_integer num_processors; ++} __attribute__ ((packed)); ++ ++struct acpi_ptc_register { ++ u8 descriptor; ++ u16 length; ++ u8 space_id; ++ u8 bit_width; ++ u8 bit_offset; ++ u8 reserved; ++ u64 address; ++} __attribute__ ((packed)); ++ ++struct acpi_processor_tx_tss { ++ acpi_integer freqpercentage; /* */ ++ acpi_integer power; /* milliWatts */ ++ acpi_integer transition_latency; /* microseconds */ ++ acpi_integer control; /* control value */ ++ acpi_integer status; /* success indicator */ ++}; + struct acpi_processor_tx { + u16 power; + u16 performance; + }; + ++struct acpi_processor; + struct acpi_processor_throttling { +- int state; ++ unsigned int state; ++ unsigned int platform_limit; ++ struct acpi_pct_register control_register; ++ struct acpi_pct_register status_register; ++ unsigned int state_count; ++ struct acpi_processor_tx_tss *states_tss; ++ struct acpi_tsd_package domain_info; ++ cpumask_t shared_cpu_map; ++ int (*acpi_processor_get_throttling) (struct acpi_processor * pr); ++ int (*acpi_processor_set_throttling) (struct acpi_processor * pr, ++ int state); ++ + u32 address; + u8 duty_offset; + u8 duty_width; +- int state_count; + struct acpi_processor_tx states[ACPI_PROCESSOR_MAX_THROTTLING]; + }; + +@@ -161,6 +199,7 @@ + u8 bm_check:1; + u8 has_cst:1; + u8 power_setup_done:1; ++ u8 bm_rld_set:1; + }; + + struct acpi_processor { +@@ -169,6 +208,9 @@ + u32 id; + u32 pblk; + int performance_platform_limit; ++ int throttling_platform_limit; ++ /* 0 - states 0..n-th state available */ ++ + struct acpi_processor_flags flags; + struct acpi_processor_power power; + struct acpi_processor_performance *performance; +@@ -270,7 +312,7 @@ + + /* in processor_throttling.c */ + int acpi_processor_get_throttling_info(struct acpi_processor *pr); +-int acpi_processor_set_throttling(struct acpi_processor *pr, int state); ++extern int acpi_processor_set_throttling(struct acpi_processor *pr, int state); + extern struct file_operations acpi_processor_throttling_fops; + + /* in processor_idle.c */ +@@ -279,6 +321,9 @@ + int acpi_processor_cst_has_changed(struct acpi_processor *pr); + int acpi_processor_power_exit(struct acpi_processor *pr, + struct acpi_device *device); ++ ++extern struct cpuidle_driver acpi_idle_driver; ++void acpi_max_cstate_changed(void); + int acpi_processor_suspend(struct acpi_device * device, pm_message_t state); + int acpi_processor_resume(struct acpi_device * device); + +diff -Nurb linux-2.6.22-570/include/asm-alpha/page.h linux-2.6.22-591/include/asm-alpha/page.h +--- linux-2.6.22-570/include/asm-alpha/page.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-alpha/page.h 2007-12-21 15:36:12.000000000 -0500 +@@ -17,7 +17,8 @@ + extern void clear_page(void *page); + #define clear_user_page(page, vaddr, pg) clear_page(page) + +-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vmaddr) ++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ ++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vmaddr) + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + + extern void copy_page(void * _to, void * _from); +diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop13xx/adma.h linux-2.6.22-591/include/asm-arm/arch-iop13xx/adma.h +--- linux-2.6.22-570/include/asm-arm/arch-iop13xx/adma.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/asm-arm/arch-iop13xx/adma.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,544 @@ ++/* ++ * Copyright(c) 2006, Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ */ ++#ifndef _ADMA_H ++#define _ADMA_H ++#include ++#include ++#include ++#include ++ ++#define ADMA_ACCR(chan) (chan->mmr_base + 0x0) ++#define ADMA_ACSR(chan) (chan->mmr_base + 0x4) ++#define ADMA_ADAR(chan) (chan->mmr_base + 0x8) ++#define ADMA_IIPCR(chan) (chan->mmr_base + 0x18) ++#define ADMA_IIPAR(chan) (chan->mmr_base + 0x1c) ++#define ADMA_IIPUAR(chan) (chan->mmr_base + 0x20) ++#define ADMA_ANDAR(chan) (chan->mmr_base + 0x24) ++#define ADMA_ADCR(chan) (chan->mmr_base + 0x28) ++#define ADMA_CARMD(chan) (chan->mmr_base + 0x2c) ++#define ADMA_ABCR(chan) (chan->mmr_base + 0x30) ++#define ADMA_DLADR(chan) (chan->mmr_base + 0x34) ++#define ADMA_DUADR(chan) (chan->mmr_base + 0x38) ++#define ADMA_SLAR(src, chan) (chan->mmr_base + (0x3c + (src << 3))) ++#define ADMA_SUAR(src, chan) (chan->mmr_base + (0x40 + (src << 3))) ++ ++struct iop13xx_adma_src { ++ u32 src_addr; ++ union { ++ u32 upper_src_addr; ++ struct { ++ unsigned int pq_upper_src_addr:24; ++ unsigned int pq_dmlt:8; ++ }; ++ }; ++}; ++ ++struct iop13xx_adma_desc_ctrl { ++ unsigned int int_en:1; ++ unsigned int xfer_dir:2; ++ unsigned int src_select:4; ++ unsigned int zero_result:1; ++ unsigned int block_fill_en:1; ++ unsigned int crc_gen_en:1; ++ unsigned int crc_xfer_dis:1; ++ unsigned int crc_seed_fetch_dis:1; ++ unsigned int status_write_back_en:1; ++ unsigned int endian_swap_en:1; ++ unsigned int reserved0:2; ++ unsigned int pq_update_xfer_en:1; ++ unsigned int dual_xor_en:1; ++ unsigned int pq_xfer_en:1; ++ unsigned int p_xfer_dis:1; ++ unsigned int reserved1:10; ++ unsigned int relax_order_en:1; ++ unsigned int no_snoop_en:1; ++}; ++ ++struct iop13xx_adma_byte_count { ++ unsigned int byte_count:24; ++ unsigned int host_if:3; ++ unsigned int reserved:2; ++ unsigned int zero_result_err_q:1; ++ unsigned int zero_result_err:1; ++ unsigned int tx_complete:1; ++}; ++ ++struct iop13xx_adma_desc_hw { ++ u32 next_desc; ++ union { ++ u32 desc_ctrl; ++ struct iop13xx_adma_desc_ctrl desc_ctrl_field; ++ }; ++ union { ++ u32 crc_addr; ++ u32 block_fill_data; ++ u32 q_dest_addr; ++ }; ++ union { ++ u32 byte_count; ++ struct iop13xx_adma_byte_count byte_count_field; ++ }; ++ union { ++ u32 dest_addr; ++ u32 p_dest_addr; ++ }; ++ union { ++ u32 upper_dest_addr; ++ u32 pq_upper_dest_addr; ++ }; ++ struct iop13xx_adma_src src[1]; ++}; ++ ++struct iop13xx_adma_desc_dual_xor { ++ u32 next_desc; ++ u32 desc_ctrl; ++ u32 reserved; ++ u32 byte_count; ++ u32 h_dest_addr; ++ u32 h_upper_dest_addr; ++ u32 src0_addr; ++ u32 upper_src0_addr; ++ u32 src1_addr; ++ u32 upper_src1_addr; ++ u32 h_src_addr; ++ u32 h_upper_src_addr; ++ u32 d_src_addr; ++ u32 d_upper_src_addr; ++ u32 d_dest_addr; ++ u32 d_upper_dest_addr; ++}; ++ ++struct iop13xx_adma_desc_pq_update { ++ u32 next_desc; ++ u32 desc_ctrl; ++ u32 reserved; ++ u32 byte_count; ++ u32 p_dest_addr; ++ u32 p_upper_dest_addr; ++ u32 src0_addr; ++ u32 upper_src0_addr; ++ u32 src1_addr; ++ u32 upper_src1_addr; ++ u32 p_src_addr; ++ u32 p_upper_src_addr; ++ u32 q_src_addr; ++ struct { ++ unsigned int q_upper_src_addr:24; ++ unsigned int q_dmlt:8; ++ }; ++ u32 q_dest_addr; ++ u32 q_upper_dest_addr; ++}; ++ ++static inline int iop_adma_get_max_xor(void) ++{ ++ return 16; ++} ++ ++static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) ++{ ++ return __raw_readl(ADMA_ADAR(chan)); ++} ++ ++static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan, ++ u32 next_desc_addr) ++{ ++ __raw_writel(next_desc_addr, ADMA_ANDAR(chan)); ++} ++ ++#define ADMA_STATUS_BUSY (1 << 13) ++ ++static inline char iop_chan_is_busy(struct iop_adma_chan *chan) ++{ ++ if (__raw_readl(ADMA_ACSR(chan)) & ++ ADMA_STATUS_BUSY) ++ return 1; ++ else ++ return 0; ++} ++ ++static inline int ++iop_chan_get_desc_align(struct iop_adma_chan *chan, int num_slots) ++{ ++ return 1; ++} ++#define iop_desc_is_aligned(x, y) 1 ++ ++static inline int ++iop_chan_memcpy_slot_count(size_t len, int *slots_per_op) ++{ ++ *slots_per_op = 1; ++ return 1; ++} ++ ++#define iop_chan_interrupt_slot_count(s, c) iop_chan_memcpy_slot_count(0, s) ++ ++static inline int ++iop_chan_memset_slot_count(size_t len, int *slots_per_op) ++{ ++ *slots_per_op = 1; ++ return 1; ++} ++ ++static inline int ++iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op) ++{ ++ int num_slots; ++ /* slots_to_find = 1 for basic descriptor + 1 per 4 sources above 1 ++ * (1 source => 8 bytes) (1 slot => 32 bytes) ++ */ ++ num_slots = 1 + (((src_cnt - 1) << 3) >> 5); ++ if (((src_cnt - 1) << 3) & 0x1f) ++ num_slots++; ++ ++ *slots_per_op = num_slots; ++ ++ return num_slots; ++} ++ ++#define ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024) ++#define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT ++#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT ++#define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT ++#define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o) ++ ++static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *chan) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ return hw_desc->dest_addr; ++} ++ ++static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *chan) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ return hw_desc->byte_count_field.byte_count; ++} ++ ++static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *chan, ++ int src_idx) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ return hw_desc->src[src_idx].src_addr; ++} ++ ++static inline u32 iop_desc_get_src_count(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *chan) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ return hw_desc->desc_ctrl_field.src_select + 1; ++} ++ ++static inline void ++iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ union { ++ u32 value; ++ struct iop13xx_adma_desc_ctrl field; ++ } u_desc_ctrl; ++ ++ u_desc_ctrl.value = 0; ++ u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ ++ u_desc_ctrl.field.int_en = int_en; ++ hw_desc->desc_ctrl = u_desc_ctrl.value; ++ hw_desc->crc_addr = 0; ++} ++ ++static inline void ++iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ union { ++ u32 value; ++ struct iop13xx_adma_desc_ctrl field; ++ } u_desc_ctrl; ++ ++ u_desc_ctrl.value = 0; ++ u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ ++ u_desc_ctrl.field.block_fill_en = 1; ++ u_desc_ctrl.field.int_en = int_en; ++ hw_desc->desc_ctrl = u_desc_ctrl.value; ++ hw_desc->crc_addr = 0; ++} ++ ++/* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */ ++static inline void ++iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ union { ++ u32 value; ++ struct iop13xx_adma_desc_ctrl field; ++ } u_desc_ctrl; ++ ++ u_desc_ctrl.value = 0; ++ u_desc_ctrl.field.src_select = src_cnt - 1; ++ u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ ++ u_desc_ctrl.field.int_en = int_en; ++ hw_desc->desc_ctrl = u_desc_ctrl.value; ++ hw_desc->crc_addr = 0; ++ ++} ++#define iop_desc_init_null_xor(d, s, i) iop_desc_init_xor(d, s, i) ++ ++/* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */ ++static inline int ++iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ union { ++ u32 value; ++ struct iop13xx_adma_desc_ctrl field; ++ } u_desc_ctrl; ++ ++ u_desc_ctrl.value = 0; ++ u_desc_ctrl.field.src_select = src_cnt - 1; ++ u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ ++ u_desc_ctrl.field.zero_result = 1; ++ u_desc_ctrl.field.status_write_back_en = 1; ++ u_desc_ctrl.field.int_en = int_en; ++ hw_desc->desc_ctrl = u_desc_ctrl.value; ++ hw_desc->crc_addr = 0; ++ ++ return 1; ++} ++ ++static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *chan, ++ u32 byte_count) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ hw_desc->byte_count = byte_count; ++} ++ ++static inline void ++iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) ++{ ++ int slots_per_op = desc->slots_per_op; ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter; ++ int i = 0; ++ ++ if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { ++ hw_desc->byte_count = len; ++ } else { ++ do { ++ iter = iop_hw_desc_slot_idx(hw_desc, i); ++ iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; ++ len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; ++ i += slots_per_op; ++ } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT); ++ ++ if (len) { ++ iter = iop_hw_desc_slot_idx(hw_desc, i); ++ iter->byte_count = len; ++ } ++ } ++} ++ ++ ++static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *chan, ++ dma_addr_t addr) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ hw_desc->dest_addr = addr; ++ hw_desc->upper_dest_addr = 0; ++} ++ ++static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, ++ dma_addr_t addr) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ hw_desc->src[0].src_addr = addr; ++ hw_desc->src[0].upper_src_addr = 0; ++} ++ ++static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc, ++ int src_idx, dma_addr_t addr) ++{ ++ int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter; ++ int i = 0; ++ ++ do { ++ iter = iop_hw_desc_slot_idx(hw_desc, i); ++ iter->src[src_idx].src_addr = addr; ++ iter->src[src_idx].upper_src_addr = 0; ++ slot_cnt -= slots_per_op; ++ if (slot_cnt) { ++ i += slots_per_op; ++ addr += IOP_ADMA_XOR_MAX_BYTE_COUNT; ++ } ++ } while (slot_cnt); ++} ++ ++static inline void ++iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *chan) ++{ ++ iop_desc_init_memcpy(desc, 1); ++ iop_desc_set_byte_count(desc, chan, 0); ++ iop_desc_set_dest_addr(desc, chan, 0); ++ iop_desc_set_memcpy_src_addr(desc, 0); ++} ++ ++#define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr ++ ++static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, ++ u32 next_desc_addr) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ BUG_ON(hw_desc->next_desc); ++ hw_desc->next_desc = next_desc_addr; ++} ++ ++static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ return hw_desc->next_desc; ++} ++ ++static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ hw_desc->next_desc = 0; ++} ++ ++static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, ++ u32 val) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ hw_desc->block_fill_data = val; ++} ++ ++static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) ++{ ++ struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; ++ struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; ++ struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; ++ ++ BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); ++ ++ if (desc_ctrl.pq_xfer_en) ++ return byte_count.zero_result_err_q; ++ else ++ return byte_count.zero_result_err; ++} ++ ++static inline void iop_chan_append(struct iop_adma_chan *chan) ++{ ++ u32 adma_accr; ++ ++ adma_accr = __raw_readl(ADMA_ACCR(chan)); ++ adma_accr |= 0x2; ++ __raw_writel(adma_accr, ADMA_ACCR(chan)); ++} ++ ++static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan) ++{ ++ do { } while (0); ++} ++ ++static inline u32 iop_chan_get_status(struct iop_adma_chan *chan) ++{ ++ return __raw_readl(ADMA_ACSR(chan)); ++} ++ ++static inline void iop_chan_disable(struct iop_adma_chan *chan) ++{ ++ u32 adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan)); ++ adma_chan_ctrl &= ~0x1; ++ __raw_writel(adma_chan_ctrl, ADMA_ACCR(chan)); ++} ++ ++static inline void iop_chan_enable(struct iop_adma_chan *chan) ++{ ++ u32 adma_chan_ctrl; ++ ++ adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan)); ++ adma_chan_ctrl |= 0x1; ++ __raw_writel(adma_chan_ctrl, ADMA_ACCR(chan)); ++} ++ ++static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan) ++{ ++ u32 status = __raw_readl(ADMA_ACSR(chan)); ++ status &= (1 << 12); ++ __raw_writel(status, ADMA_ACSR(chan)); ++} ++ ++static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan) ++{ ++ u32 status = __raw_readl(ADMA_ACSR(chan)); ++ status &= (1 << 11); ++ __raw_writel(status, ADMA_ACSR(chan)); ++} ++ ++static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan) ++{ ++ u32 status = __raw_readl(ADMA_ACSR(chan)); ++ status &= (1 << 9) | (1 << 5) | (1 << 4) | (1 << 3); ++ __raw_writel(status, ADMA_ACSR(chan)); ++} ++ ++static inline int ++iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan) ++{ ++ return test_bit(9, &status); ++} ++ ++static inline int ++iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan) ++{ ++ return test_bit(5, &status); ++} ++ ++static inline int ++iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan) ++{ ++ return test_bit(4, &status); ++} ++ ++static inline int ++iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan) ++{ ++ return test_bit(3, &status); ++} ++ ++static inline int ++iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan) ++{ ++ return 0; ++} ++ ++static inline int ++iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan) ++{ ++ return 0; ++} ++ ++static inline int ++iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan) ++{ ++ return 0; ++} ++ ++#endif /* _ADMA_H */ +diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop13xx/iop13xx.h linux-2.6.22-591/include/asm-arm/arch-iop13xx/iop13xx.h +--- linux-2.6.22-570/include/asm-arm/arch-iop13xx/iop13xx.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-arm/arch-iop13xx/iop13xx.h 2007-12-21 15:36:12.000000000 -0500 +@@ -166,12 +166,22 @@ + #define IOP13XX_INIT_I2C_1 (1 << 1) + #define IOP13XX_INIT_I2C_2 (1 << 2) + ++/* ADMA selection flags */ ++/* INIT_ADMA_DEFAULT = Rely on CONFIG_IOP13XX_ADMA* */ ++#define IOP13XX_INIT_ADMA_DEFAULT (0) ++#define IOP13XX_INIT_ADMA_0 (1 << 0) ++#define IOP13XX_INIT_ADMA_1 (1 << 1) ++#define IOP13XX_INIT_ADMA_2 (1 << 2) ++ ++/* Platform devices */ + #define IQ81340_NUM_UART 2 + #define IQ81340_NUM_I2C 3 + #define IQ81340_NUM_PHYS_MAP_FLASH 1 +-#define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART +\ +- IQ81340_NUM_I2C +\ +- IQ81340_NUM_PHYS_MAP_FLASH) ++#define IQ81340_NUM_ADMA 3 ++#define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART + \ ++ IQ81340_NUM_I2C + \ ++ IQ81340_NUM_PHYS_MAP_FLASH + \ ++ IQ81340_NUM_ADMA) + + /*========================== PMMR offsets for key registers ============*/ + #define IOP13XX_ATU0_PMMR_OFFSET 0x00048000 +@@ -444,22 +454,6 @@ + /*==============================ADMA UNITS===============================*/ + #define IOP13XX_ADMA_PHYS_BASE(chan) IOP13XX_REG_ADDR32_PHYS((chan << 9)) + #define IOP13XX_ADMA_UPPER_PA(chan) (IOP13XX_ADMA_PHYS_BASE(chan) + 0xc0) +-#define IOP13XX_ADMA_OFFSET(chan, ofs) IOP13XX_REG_ADDR32((chan << 9) + (ofs)) +- +-#define IOP13XX_ADMA_ACCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x0) +-#define IOP13XX_ADMA_ACSR(chan) IOP13XX_ADMA_OFFSET(chan, 0x4) +-#define IOP13XX_ADMA_ADAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x8) +-#define IOP13XX_ADMA_IIPCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x18) +-#define IOP13XX_ADMA_IIPAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x1c) +-#define IOP13XX_ADMA_IIPUAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x20) +-#define IOP13XX_ADMA_ANDAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x24) +-#define IOP13XX_ADMA_ADCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x28) +-#define IOP13XX_ADMA_CARMD(chan) IOP13XX_ADMA_OFFSET(chan, 0x2c) +-#define IOP13XX_ADMA_ABCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x30) +-#define IOP13XX_ADMA_DLADR(chan) IOP13XX_ADMA_OFFSET(chan, 0x34) +-#define IOP13XX_ADMA_DUADR(chan) IOP13XX_ADMA_OFFSET(chan, 0x38) +-#define IOP13XX_ADMA_SLAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x3c + (src <<3)) +-#define IOP13XX_ADMA_SUAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x40 + (src <<3)) + + /*==============================XSI BRIDGE===============================*/ + #define IOP13XX_XBG_BECSR IOP13XX_REG_ADDR32(0x178c) +diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop32x/adma.h linux-2.6.22-591/include/asm-arm/arch-iop32x/adma.h +--- linux-2.6.22-570/include/asm-arm/arch-iop32x/adma.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/asm-arm/arch-iop32x/adma.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,5 @@ ++#ifndef IOP32X_ADMA_H ++#define IOP32X_ADMA_H ++#include ++#endif ++ +diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop33x/adma.h linux-2.6.22-591/include/asm-arm/arch-iop33x/adma.h +--- linux-2.6.22-570/include/asm-arm/arch-iop33x/adma.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/asm-arm/arch-iop33x/adma.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,5 @@ ++#ifndef IOP33X_ADMA_H ++#define IOP33X_ADMA_H ++#include ++#endif ++ +diff -Nurb linux-2.6.22-570/include/asm-arm/hardware/iop3xx-adma.h linux-2.6.22-591/include/asm-arm/hardware/iop3xx-adma.h +--- linux-2.6.22-570/include/asm-arm/hardware/iop3xx-adma.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/asm-arm/hardware/iop3xx-adma.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,891 @@ ++/* ++ * Copyright © 2006, Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ */ ++#ifndef _ADMA_H ++#define _ADMA_H ++#include ++#include ++#include ++#include ++ ++/* Memory copy units */ ++#define DMA_CCR(chan) (chan->mmr_base + 0x0) ++#define DMA_CSR(chan) (chan->mmr_base + 0x4) ++#define DMA_DAR(chan) (chan->mmr_base + 0xc) ++#define DMA_NDAR(chan) (chan->mmr_base + 0x10) ++#define DMA_PADR(chan) (chan->mmr_base + 0x14) ++#define DMA_PUADR(chan) (chan->mmr_base + 0x18) ++#define DMA_LADR(chan) (chan->mmr_base + 0x1c) ++#define DMA_BCR(chan) (chan->mmr_base + 0x20) ++#define DMA_DCR(chan) (chan->mmr_base + 0x24) ++ ++/* Application accelerator unit */ ++#define AAU_ACR(chan) (chan->mmr_base + 0x0) ++#define AAU_ASR(chan) (chan->mmr_base + 0x4) ++#define AAU_ADAR(chan) (chan->mmr_base + 0x8) ++#define AAU_ANDAR(chan) (chan->mmr_base + 0xc) ++#define AAU_SAR(src, chan) (chan->mmr_base + (0x10 + ((src) << 2))) ++#define AAU_DAR(chan) (chan->mmr_base + 0x20) ++#define AAU_ABCR(chan) (chan->mmr_base + 0x24) ++#define AAU_ADCR(chan) (chan->mmr_base + 0x28) ++#define AAU_SAR_EDCR(src_edc) (chan->mmr_base + (0x02c + ((src_edc-4) << 2))) ++#define AAU_EDCR0_IDX 8 ++#define AAU_EDCR1_IDX 17 ++#define AAU_EDCR2_IDX 26 ++ ++#define DMA0_ID 0 ++#define DMA1_ID 1 ++#define AAU_ID 2 ++ ++struct iop3xx_aau_desc_ctrl { ++ unsigned int int_en:1; ++ unsigned int blk1_cmd_ctrl:3; ++ unsigned int blk2_cmd_ctrl:3; ++ unsigned int blk3_cmd_ctrl:3; ++ unsigned int blk4_cmd_ctrl:3; ++ unsigned int blk5_cmd_ctrl:3; ++ unsigned int blk6_cmd_ctrl:3; ++ unsigned int blk7_cmd_ctrl:3; ++ unsigned int blk8_cmd_ctrl:3; ++ unsigned int blk_ctrl:2; ++ unsigned int dual_xor_en:1; ++ unsigned int tx_complete:1; ++ unsigned int zero_result_err:1; ++ unsigned int zero_result_en:1; ++ unsigned int dest_write_en:1; ++}; ++ ++struct iop3xx_aau_e_desc_ctrl { ++ unsigned int reserved:1; ++ unsigned int blk1_cmd_ctrl:3; ++ unsigned int blk2_cmd_ctrl:3; ++ unsigned int blk3_cmd_ctrl:3; ++ unsigned int blk4_cmd_ctrl:3; ++ unsigned int blk5_cmd_ctrl:3; ++ unsigned int blk6_cmd_ctrl:3; ++ unsigned int blk7_cmd_ctrl:3; ++ unsigned int blk8_cmd_ctrl:3; ++ unsigned int reserved2:7; ++}; ++ ++struct iop3xx_dma_desc_ctrl { ++ unsigned int pci_transaction:4; ++ unsigned int int_en:1; ++ unsigned int dac_cycle_en:1; ++ unsigned int mem_to_mem_en:1; ++ unsigned int crc_data_tx_en:1; ++ unsigned int crc_gen_en:1; ++ unsigned int crc_seed_dis:1; ++ unsigned int reserved:21; ++ unsigned int crc_tx_complete:1; ++}; ++ ++struct iop3xx_desc_dma { ++ u32 next_desc; ++ union { ++ u32 pci_src_addr; ++ u32 pci_dest_addr; ++ u32 src_addr; ++ }; ++ union { ++ u32 upper_pci_src_addr; ++ u32 upper_pci_dest_addr; ++ }; ++ union { ++ u32 local_pci_src_addr; ++ u32 local_pci_dest_addr; ++ u32 dest_addr; ++ }; ++ u32 byte_count; ++ union { ++ u32 desc_ctrl; ++ struct iop3xx_dma_desc_ctrl desc_ctrl_field; ++ }; ++ u32 crc_addr; ++}; ++ ++struct iop3xx_desc_aau { ++ u32 next_desc; ++ u32 src[4]; ++ u32 dest_addr; ++ u32 byte_count; ++ union { ++ u32 desc_ctrl; ++ struct iop3xx_aau_desc_ctrl desc_ctrl_field; ++ }; ++ union { ++ u32 src_addr; ++ u32 e_desc_ctrl; ++ struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field; ++ } src_edc[31]; ++}; ++ ++struct iop3xx_aau_gfmr { ++ unsigned int gfmr1:8; ++ unsigned int gfmr2:8; ++ unsigned int gfmr3:8; ++ unsigned int gfmr4:8; ++}; ++ ++struct iop3xx_desc_pq_xor { ++ u32 next_desc; ++ u32 src[3]; ++ union { ++ u32 data_mult1; ++ struct iop3xx_aau_gfmr data_mult1_field; ++ }; ++ u32 dest_addr; ++ u32 byte_count; ++ union { ++ u32 desc_ctrl; ++ struct iop3xx_aau_desc_ctrl desc_ctrl_field; ++ }; ++ union { ++ u32 src_addr; ++ u32 e_desc_ctrl; ++ struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field; ++ u32 data_multiplier; ++ struct iop3xx_aau_gfmr data_mult_field; ++ u32 reserved; ++ } src_edc_gfmr[19]; ++}; ++ ++struct iop3xx_desc_dual_xor { ++ u32 next_desc; ++ u32 src0_addr; ++ u32 src1_addr; ++ u32 h_src_addr; ++ u32 d_src_addr; ++ u32 h_dest_addr; ++ u32 byte_count; ++ union { ++ u32 desc_ctrl; ++ struct iop3xx_aau_desc_ctrl desc_ctrl_field; ++ }; ++ u32 d_dest_addr; ++}; ++ ++union iop3xx_desc { ++ struct iop3xx_desc_aau *aau; ++ struct iop3xx_desc_dma *dma; ++ struct iop3xx_desc_pq_xor *pq_xor; ++ struct iop3xx_desc_dual_xor *dual_xor; ++ void *ptr; ++}; ++ ++static inline int iop_adma_get_max_xor(void) ++{ ++ return 32; ++} ++ ++static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) ++{ ++ int id = chan->device->id; ++ ++ switch (id) { ++ case DMA0_ID: ++ case DMA1_ID: ++ return __raw_readl(DMA_DAR(chan)); ++ case AAU_ID: ++ return __raw_readl(AAU_ADAR(chan)); ++ default: ++ BUG(); ++ } ++ return 0; ++} ++ ++static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan, ++ u32 next_desc_addr) ++{ ++ int id = chan->device->id; ++ ++ switch (id) { ++ case DMA0_ID: ++ case DMA1_ID: ++ __raw_writel(next_desc_addr, DMA_NDAR(chan)); ++ break; ++ case AAU_ID: ++ __raw_writel(next_desc_addr, AAU_ANDAR(chan)); ++ break; ++ } ++ ++} ++ ++#define IOP_ADMA_STATUS_BUSY (1 << 10) ++#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT (1024) ++#define IOP_ADMA_XOR_MAX_BYTE_COUNT (16 * 1024 * 1024) ++#define IOP_ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024) ++ ++static inline int iop_chan_is_busy(struct iop_adma_chan *chan) ++{ ++ u32 status = __raw_readl(DMA_CSR(chan)); ++ return (status & IOP_ADMA_STATUS_BUSY) ? 1 : 0; ++} ++ ++static inline int iop_desc_is_aligned(struct iop_adma_desc_slot *desc, ++ int num_slots) ++{ ++ /* num_slots will only ever be 1, 2, 4, or 8 */ ++ return (desc->idx & (num_slots - 1)) ? 0 : 1; ++} ++ ++/* to do: support large (i.e. > hw max) buffer sizes */ ++static inline int iop_chan_memcpy_slot_count(size_t len, int *slots_per_op) ++{ ++ *slots_per_op = 1; ++ return 1; ++} ++ ++/* to do: support large (i.e. > hw max) buffer sizes */ ++static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op) ++{ ++ *slots_per_op = 1; ++ return 1; ++} ++ ++static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt, ++ int *slots_per_op) ++{ ++ const static int slot_count_table[] = { 0, ++ 1, 1, 1, 1, /* 01 - 04 */ ++ 2, 2, 2, 2, /* 05 - 08 */ ++ 4, 4, 4, 4, /* 09 - 12 */ ++ 4, 4, 4, 4, /* 13 - 16 */ ++ 8, 8, 8, 8, /* 17 - 20 */ ++ 8, 8, 8, 8, /* 21 - 24 */ ++ 8, 8, 8, 8, /* 25 - 28 */ ++ 8, 8, 8, 8, /* 29 - 32 */ ++ }; ++ *slots_per_op = slot_count_table[src_cnt]; ++ return *slots_per_op; ++} ++ ++static inline int ++iop_chan_interrupt_slot_count(int *slots_per_op, struct iop_adma_chan *chan) ++{ ++ switch (chan->device->id) { ++ case DMA0_ID: ++ case DMA1_ID: ++ return iop_chan_memcpy_slot_count(0, slots_per_op); ++ case AAU_ID: ++ return iop3xx_aau_xor_slot_count(0, 2, slots_per_op); ++ default: ++ BUG(); ++ } ++ return 0; ++} ++ ++static inline int iop_chan_xor_slot_count(size_t len, int src_cnt, ++ int *slots_per_op) ++{ ++ int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op); ++ ++ if (len <= IOP_ADMA_XOR_MAX_BYTE_COUNT) ++ return slot_cnt; ++ ++ len -= IOP_ADMA_XOR_MAX_BYTE_COUNT; ++ while (len > IOP_ADMA_XOR_MAX_BYTE_COUNT) { ++ len -= IOP_ADMA_XOR_MAX_BYTE_COUNT; ++ slot_cnt += *slots_per_op; ++ } ++ ++ if (len) ++ slot_cnt += *slots_per_op; ++ ++ return slot_cnt; ++} ++ ++/* zero sum on iop3xx is limited to 1k at a time so it requires multiple ++ * descriptors ++ */ ++static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt, ++ int *slots_per_op) ++{ ++ int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op); ++ ++ if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) ++ return slot_cnt; ++ ++ len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; ++ while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { ++ len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; ++ slot_cnt += *slots_per_op; ++ } ++ ++ if (len) ++ slot_cnt += *slots_per_op; ++ ++ return slot_cnt; ++} ++ ++static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *chan) ++{ ++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; ++ ++ switch (chan->device->id) { ++ case DMA0_ID: ++ case DMA1_ID: ++ return hw_desc.dma->dest_addr; ++ case AAU_ID: ++ return hw_desc.aau->dest_addr; ++ default: ++ BUG(); ++ } ++ return 0; ++} ++ ++static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *chan) ++{ ++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; ++ ++ switch (chan->device->id) { ++ case DMA0_ID: ++ case DMA1_ID: ++ return hw_desc.dma->byte_count; ++ case AAU_ID: ++ return hw_desc.aau->byte_count; ++ default: ++ BUG(); ++ } ++ return 0; ++} ++ ++/* translate the src_idx to a descriptor word index */ ++static inline int __desc_idx(int src_idx) ++{ ++ const static int desc_idx_table[] = { 0, 0, 0, 0, ++ 0, 1, 2, 3, ++ 5, 6, 7, 8, ++ 9, 10, 11, 12, ++ 14, 15, 16, 17, ++ 18, 19, 20, 21, ++ 23, 24, 25, 26, ++ 27, 28, 29, 30, ++ }; ++ ++ return desc_idx_table[src_idx]; ++} ++ ++static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *chan, ++ int src_idx) ++{ ++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; ++ ++ switch (chan->device->id) { ++ case DMA0_ID: ++ case DMA1_ID: ++ return hw_desc.dma->src_addr; ++ case AAU_ID: ++ break; ++ default: ++ BUG(); ++ } ++ ++ if (src_idx < 4) ++ return hw_desc.aau->src[src_idx]; ++ else ++ return hw_desc.aau->src_edc[__desc_idx(src_idx)].src_addr; ++} ++ ++static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc, ++ int src_idx, dma_addr_t addr) ++{ ++ if (src_idx < 4) ++ hw_desc->src[src_idx] = addr; ++ else ++ hw_desc->src_edc[__desc_idx(src_idx)].src_addr = addr; ++} ++ ++static inline void ++iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en) ++{ ++ struct iop3xx_desc_dma *hw_desc = desc->hw_desc; ++ union { ++ u32 value; ++ struct iop3xx_dma_desc_ctrl field; ++ } u_desc_ctrl; ++ ++ u_desc_ctrl.value = 0; ++ u_desc_ctrl.field.mem_to_mem_en = 1; ++ u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */ ++ u_desc_ctrl.field.int_en = int_en; ++ hw_desc->desc_ctrl = u_desc_ctrl.value; ++ hw_desc->upper_pci_src_addr = 0; ++ hw_desc->crc_addr = 0; ++} ++ ++static inline void ++iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en) ++{ ++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc; ++ union { ++ u32 value; ++ struct iop3xx_aau_desc_ctrl field; ++ } u_desc_ctrl; ++ ++ u_desc_ctrl.value = 0; ++ u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */ ++ u_desc_ctrl.field.dest_write_en = 1; ++ u_desc_ctrl.field.int_en = int_en; ++ hw_desc->desc_ctrl = u_desc_ctrl.value; ++} ++ ++static inline u32 ++iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt, int int_en) ++{ ++ int i, shift; ++ u32 edcr; ++ union { ++ u32 value; ++ struct iop3xx_aau_desc_ctrl field; ++ } u_desc_ctrl; ++ ++ u_desc_ctrl.value = 0; ++ switch (src_cnt) { ++ case 25 ... 32: ++ u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ ++ edcr = 0; ++ shift = 1; ++ for (i = 24; i < src_cnt; i++) { ++ edcr |= (1 << shift); ++ shift += 3; ++ } ++ hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = edcr; ++ src_cnt = 24; ++ /* fall through */ ++ case 17 ... 24: ++ if (!u_desc_ctrl.field.blk_ctrl) { ++ hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; ++ u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ ++ } ++ edcr = 0; ++ shift = 1; ++ for (i = 16; i < src_cnt; i++) { ++ edcr |= (1 << shift); ++ shift += 3; ++ } ++ hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = edcr; ++ src_cnt = 16; ++ /* fall through */ ++ case 9 ... 16: ++ if (!u_desc_ctrl.field.blk_ctrl) ++ u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */ ++ edcr = 0; ++ shift = 1; ++ for (i = 8; i < src_cnt; i++) { ++ edcr |= (1 << shift); ++ shift += 3; ++ } ++ hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = edcr; ++ src_cnt = 8; ++ /* fall through */ ++ case 2 ... 8: ++ shift = 1; ++ for (i = 0; i < src_cnt; i++) { ++ u_desc_ctrl.value |= (1 << shift); ++ shift += 3; ++ } ++ ++ if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4) ++ u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */ ++ } ++ ++ u_desc_ctrl.field.dest_write_en = 1; ++ u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */ ++ u_desc_ctrl.field.int_en = int_en; ++ hw_desc->desc_ctrl = u_desc_ctrl.value; ++ ++ return u_desc_ctrl.value; ++} ++ ++static inline void ++iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) ++{ ++ iop3xx_desc_init_xor(desc->hw_desc, src_cnt, int_en); ++} ++ ++/* return the number of operations */ ++static inline int ++iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) ++{ ++ int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; ++ struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter; ++ union { ++ u32 value; ++ struct iop3xx_aau_desc_ctrl field; ++ } u_desc_ctrl; ++ int i, j; ++ ++ hw_desc = desc->hw_desc; ++ ++ for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0; ++ i += slots_per_op, j++) { ++ iter = iop_hw_desc_slot_idx(hw_desc, i); ++ u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, int_en); ++ u_desc_ctrl.field.dest_write_en = 0; ++ u_desc_ctrl.field.zero_result_en = 1; ++ u_desc_ctrl.field.int_en = int_en; ++ iter->desc_ctrl = u_desc_ctrl.value; ++ ++ /* for the subsequent descriptors preserve the store queue ++ * and chain them together ++ */ ++ if (i) { ++ prev_hw_desc = ++ iop_hw_desc_slot_idx(hw_desc, i - slots_per_op); ++ prev_hw_desc->next_desc = (u32) (desc->phys + (i << 5)); ++ } ++ } ++ ++ return j; ++} ++ ++static inline void ++iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) ++{ ++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc; ++ union { ++ u32 value; ++ struct iop3xx_aau_desc_ctrl field; ++ } u_desc_ctrl; ++ ++ u_desc_ctrl.value = 0; ++ switch (src_cnt) { ++ case 25 ... 32: ++ u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ ++ hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; ++ /* fall through */ ++ case 17 ... 24: ++ if (!u_desc_ctrl.field.blk_ctrl) { ++ hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; ++ u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ ++ } ++ hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = 0; ++ /* fall through */ ++ case 9 ... 16: ++ if (!u_desc_ctrl.field.blk_ctrl) ++ u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */ ++ hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = 0; ++ /* fall through */ ++ case 1 ... 8: ++ if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4) ++ u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */ ++ } ++ ++ u_desc_ctrl.field.dest_write_en = 0; ++ u_desc_ctrl.field.int_en = int_en; ++ hw_desc->desc_ctrl = u_desc_ctrl.value; ++} ++ ++static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *chan, ++ u32 byte_count) ++{ ++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; ++ ++ switch (chan->device->id) { ++ case DMA0_ID: ++ case DMA1_ID: ++ hw_desc.dma->byte_count = byte_count; ++ break; ++ case AAU_ID: ++ hw_desc.aau->byte_count = byte_count; ++ break; ++ default: ++ BUG(); ++ } ++} ++ ++static inline void ++iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *chan) ++{ ++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; ++ ++ switch (chan->device->id) { ++ case DMA0_ID: ++ case DMA1_ID: ++ iop_desc_init_memcpy(desc, 1); ++ hw_desc.dma->byte_count = 0; ++ hw_desc.dma->dest_addr = 0; ++ hw_desc.dma->src_addr = 0; ++ break; ++ case AAU_ID: ++ iop_desc_init_null_xor(desc, 2, 1); ++ hw_desc.aau->byte_count = 0; ++ hw_desc.aau->dest_addr = 0; ++ hw_desc.aau->src[0] = 0; ++ hw_desc.aau->src[1] = 0; ++ break; ++ default: ++ BUG(); ++ } ++} ++ ++static inline void ++iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) ++{ ++ int slots_per_op = desc->slots_per_op; ++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; ++ int i = 0; ++ ++ if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { ++ hw_desc->byte_count = len; ++ } else { ++ do { ++ iter = iop_hw_desc_slot_idx(hw_desc, i); ++ iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; ++ len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; ++ i += slots_per_op; ++ } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT); ++ ++ if (len) { ++ iter = iop_hw_desc_slot_idx(hw_desc, i); ++ iter->byte_count = len; ++ } ++ } ++} ++ ++static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, ++ struct iop_adma_chan *chan, ++ dma_addr_t addr) ++{ ++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; ++ ++ switch (chan->device->id) { ++ case DMA0_ID: ++ case DMA1_ID: ++ hw_desc.dma->dest_addr = addr; ++ break; ++ case AAU_ID: ++ hw_desc.aau->dest_addr = addr; ++ break; ++ default: ++ BUG(); ++ } ++} ++ ++static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, ++ dma_addr_t addr) ++{ ++ struct iop3xx_desc_dma *hw_desc = desc->hw_desc; ++ hw_desc->src_addr = addr; ++} ++ ++static inline void ++iop_desc_set_zero_sum_src_addr(struct iop_adma_desc_slot *desc, int src_idx, ++ dma_addr_t addr) ++{ ++ ++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; ++ int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; ++ int i; ++ ++ for (i = 0; (slot_cnt -= slots_per_op) >= 0; ++ i += slots_per_op, addr += IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { ++ iter = iop_hw_desc_slot_idx(hw_desc, i); ++ iop3xx_aau_desc_set_src_addr(iter, src_idx, addr); ++ } ++} ++ ++static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc, ++ int src_idx, dma_addr_t addr) ++{ ++ ++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; ++ int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; ++ int i; ++ ++ for (i = 0; (slot_cnt -= slots_per_op) >= 0; ++ i += slots_per_op, addr += IOP_ADMA_XOR_MAX_BYTE_COUNT) { ++ iter = iop_hw_desc_slot_idx(hw_desc, i); ++ iop3xx_aau_desc_set_src_addr(iter, src_idx, addr); ++ } ++} ++ ++static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, ++ u32 next_desc_addr) ++{ ++ /* hw_desc->next_desc is the same location for all channels */ ++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; ++ BUG_ON(hw_desc.dma->next_desc); ++ hw_desc.dma->next_desc = next_desc_addr; ++} ++ ++static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc) ++{ ++ /* hw_desc->next_desc is the same location for all channels */ ++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; ++ return hw_desc.dma->next_desc; ++} ++ ++static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc) ++{ ++ /* hw_desc->next_desc is the same location for all channels */ ++ union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; ++ hw_desc.dma->next_desc = 0; ++} ++ ++static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, ++ u32 val) ++{ ++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc; ++ hw_desc->src[0] = val; ++} ++ ++static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) ++{ ++ struct iop3xx_desc_aau *hw_desc = desc->hw_desc; ++ struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; ++ ++ BUG_ON(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); ++ return desc_ctrl.zero_result_err; ++} ++ ++static inline void iop_chan_append(struct iop_adma_chan *chan) ++{ ++ u32 dma_chan_ctrl; ++ /* workaround dropped interrupts on 3xx */ ++ mod_timer(&chan->cleanup_watchdog, jiffies + msecs_to_jiffies(3)); ++ ++ dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); ++ dma_chan_ctrl |= 0x2; ++ __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); ++} ++ ++static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan) ++{ ++ if (!busy) ++ del_timer(&chan->cleanup_watchdog); ++} ++ ++static inline u32 iop_chan_get_status(struct iop_adma_chan *chan) ++{ ++ return __raw_readl(DMA_CSR(chan)); ++} ++ ++static inline void iop_chan_disable(struct iop_adma_chan *chan) ++{ ++ u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); ++ dma_chan_ctrl &= ~1; ++ __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); ++} ++ ++static inline void iop_chan_enable(struct iop_adma_chan *chan) ++{ ++ u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); ++ ++ dma_chan_ctrl |= 1; ++ __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); ++} ++ ++static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan) ++{ ++ u32 status = __raw_readl(DMA_CSR(chan)); ++ status &= (1 << 9); ++ __raw_writel(status, DMA_CSR(chan)); ++} ++ ++static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan) ++{ ++ u32 status = __raw_readl(DMA_CSR(chan)); ++ status &= (1 << 8); ++ __raw_writel(status, DMA_CSR(chan)); ++} ++ ++static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan) ++{ ++ u32 status = __raw_readl(DMA_CSR(chan)); ++ ++ switch (chan->device->id) { ++ case DMA0_ID: ++ case DMA1_ID: ++ status &= (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1); ++ break; ++ case AAU_ID: ++ status &= (1 << 5); ++ break; ++ default: ++ BUG(); ++ } ++ ++ __raw_writel(status, DMA_CSR(chan)); ++} ++ ++static inline int ++iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan) ++{ ++ return 0; ++} ++ ++static inline int ++iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan) ++{ ++ return 0; ++} ++ ++static inline int ++iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan) ++{ ++ return 0; ++} ++ ++static inline int ++iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan) ++{ ++ return test_bit(5, &status); ++} ++ ++static inline int ++iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan) ++{ ++ switch (chan->device->id) { ++ case DMA0_ID: ++ case DMA1_ID: ++ return test_bit(2, &status); ++ default: ++ return 0; ++ } ++} ++ ++static inline int ++iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan) ++{ ++ switch (chan->device->id) { ++ case DMA0_ID: ++ case DMA1_ID: ++ return test_bit(3, &status); ++ default: ++ return 0; ++ } ++} ++ ++static inline int ++iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan) ++{ ++ switch (chan->device->id) { ++ case DMA0_ID: ++ case DMA1_ID: ++ return test_bit(1, &status); ++ default: ++ return 0; ++ } ++} ++#endif /* _ADMA_H */ +diff -Nurb linux-2.6.22-570/include/asm-arm/hardware/iop3xx.h linux-2.6.22-591/include/asm-arm/hardware/iop3xx.h +--- linux-2.6.22-570/include/asm-arm/hardware/iop3xx.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-arm/hardware/iop3xx.h 2007-12-21 15:36:12.000000000 -0500 +@@ -144,24 +144,9 @@ + #define IOP3XX_IAR (volatile u32 *)IOP3XX_REG_ADDR(0x0380) + + /* DMA Controller */ +-#define IOP3XX_DMA0_CCR (volatile u32 *)IOP3XX_REG_ADDR(0x0400) +-#define IOP3XX_DMA0_CSR (volatile u32 *)IOP3XX_REG_ADDR(0x0404) +-#define IOP3XX_DMA0_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x040c) +-#define IOP3XX_DMA0_NDAR (volatile u32 *)IOP3XX_REG_ADDR(0x0410) +-#define IOP3XX_DMA0_PADR (volatile u32 *)IOP3XX_REG_ADDR(0x0414) +-#define IOP3XX_DMA0_PUADR (volatile u32 *)IOP3XX_REG_ADDR(0x0418) +-#define IOP3XX_DMA0_LADR (volatile u32 *)IOP3XX_REG_ADDR(0x041c) +-#define IOP3XX_DMA0_BCR (volatile u32 *)IOP3XX_REG_ADDR(0x0420) +-#define IOP3XX_DMA0_DCR (volatile u32 *)IOP3XX_REG_ADDR(0x0424) +-#define IOP3XX_DMA1_CCR (volatile u32 *)IOP3XX_REG_ADDR(0x0440) +-#define IOP3XX_DMA1_CSR (volatile u32 *)IOP3XX_REG_ADDR(0x0444) +-#define IOP3XX_DMA1_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x044c) +-#define IOP3XX_DMA1_NDAR (volatile u32 *)IOP3XX_REG_ADDR(0x0450) +-#define IOP3XX_DMA1_PADR (volatile u32 *)IOP3XX_REG_ADDR(0x0454) +-#define IOP3XX_DMA1_PUADR (volatile u32 *)IOP3XX_REG_ADDR(0x0458) +-#define IOP3XX_DMA1_LADR (volatile u32 *)IOP3XX_REG_ADDR(0x045c) +-#define IOP3XX_DMA1_BCR (volatile u32 *)IOP3XX_REG_ADDR(0x0460) +-#define IOP3XX_DMA1_DCR (volatile u32 *)IOP3XX_REG_ADDR(0x0464) ++#define IOP3XX_DMA_PHYS_BASE(chan) (IOP3XX_PERIPHERAL_PHYS_BASE + \ ++ (0x400 + (chan << 6))) ++#define IOP3XX_DMA_UPPER_PA(chan) (IOP3XX_DMA_PHYS_BASE(chan) + 0x27) + + /* Peripheral bus interface */ + #define IOP3XX_PBCR (volatile u32 *)IOP3XX_REG_ADDR(0x0680) +@@ -210,48 +195,8 @@ + #define IOP_TMR_RATIO_1_1 0x00 + + /* Application accelerator unit */ +-#define IOP3XX_AAU_ACR (volatile u32 *)IOP3XX_REG_ADDR(0x0800) +-#define IOP3XX_AAU_ASR (volatile u32 *)IOP3XX_REG_ADDR(0x0804) +-#define IOP3XX_AAU_ADAR (volatile u32 *)IOP3XX_REG_ADDR(0x0808) +-#define IOP3XX_AAU_ANDAR (volatile u32 *)IOP3XX_REG_ADDR(0x080c) +-#define IOP3XX_AAU_SAR1 (volatile u32 *)IOP3XX_REG_ADDR(0x0810) +-#define IOP3XX_AAU_SAR2 (volatile u32 *)IOP3XX_REG_ADDR(0x0814) +-#define IOP3XX_AAU_SAR3 (volatile u32 *)IOP3XX_REG_ADDR(0x0818) +-#define IOP3XX_AAU_SAR4 (volatile u32 *)IOP3XX_REG_ADDR(0x081c) +-#define IOP3XX_AAU_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x0820) +-#define IOP3XX_AAU_ABCR (volatile u32 *)IOP3XX_REG_ADDR(0x0824) +-#define IOP3XX_AAU_ADCR (volatile u32 *)IOP3XX_REG_ADDR(0x0828) +-#define IOP3XX_AAU_SAR5 (volatile u32 *)IOP3XX_REG_ADDR(0x082c) +-#define IOP3XX_AAU_SAR6 (volatile u32 *)IOP3XX_REG_ADDR(0x0830) +-#define IOP3XX_AAU_SAR7 (volatile u32 *)IOP3XX_REG_ADDR(0x0834) +-#define IOP3XX_AAU_SAR8 (volatile u32 *)IOP3XX_REG_ADDR(0x0838) +-#define IOP3XX_AAU_EDCR0 (volatile u32 *)IOP3XX_REG_ADDR(0x083c) +-#define IOP3XX_AAU_SAR9 (volatile u32 *)IOP3XX_REG_ADDR(0x0840) +-#define IOP3XX_AAU_SAR10 (volatile u32 *)IOP3XX_REG_ADDR(0x0844) +-#define IOP3XX_AAU_SAR11 (volatile u32 *)IOP3XX_REG_ADDR(0x0848) +-#define IOP3XX_AAU_SAR12 (volatile u32 *)IOP3XX_REG_ADDR(0x084c) +-#define IOP3XX_AAU_SAR13 (volatile u32 *)IOP3XX_REG_ADDR(0x0850) +-#define IOP3XX_AAU_SAR14 (volatile u32 *)IOP3XX_REG_ADDR(0x0854) +-#define IOP3XX_AAU_SAR15 (volatile u32 *)IOP3XX_REG_ADDR(0x0858) +-#define IOP3XX_AAU_SAR16 (volatile u32 *)IOP3XX_REG_ADDR(0x085c) +-#define IOP3XX_AAU_EDCR1 (volatile u32 *)IOP3XX_REG_ADDR(0x0860) +-#define IOP3XX_AAU_SAR17 (volatile u32 *)IOP3XX_REG_ADDR(0x0864) +-#define IOP3XX_AAU_SAR18 (volatile u32 *)IOP3XX_REG_ADDR(0x0868) +-#define IOP3XX_AAU_SAR19 (volatile u32 *)IOP3XX_REG_ADDR(0x086c) +-#define IOP3XX_AAU_SAR20 (volatile u32 *)IOP3XX_REG_ADDR(0x0870) +-#define IOP3XX_AAU_SAR21 (volatile u32 *)IOP3XX_REG_ADDR(0x0874) +-#define IOP3XX_AAU_SAR22 (volatile u32 *)IOP3XX_REG_ADDR(0x0878) +-#define IOP3XX_AAU_SAR23 (volatile u32 *)IOP3XX_REG_ADDR(0x087c) +-#define IOP3XX_AAU_SAR24 (volatile u32 *)IOP3XX_REG_ADDR(0x0880) +-#define IOP3XX_AAU_EDCR2 (volatile u32 *)IOP3XX_REG_ADDR(0x0884) +-#define IOP3XX_AAU_SAR25 (volatile u32 *)IOP3XX_REG_ADDR(0x0888) +-#define IOP3XX_AAU_SAR26 (volatile u32 *)IOP3XX_REG_ADDR(0x088c) +-#define IOP3XX_AAU_SAR27 (volatile u32 *)IOP3XX_REG_ADDR(0x0890) +-#define IOP3XX_AAU_SAR28 (volatile u32 *)IOP3XX_REG_ADDR(0x0894) +-#define IOP3XX_AAU_SAR29 (volatile u32 *)IOP3XX_REG_ADDR(0x0898) +-#define IOP3XX_AAU_SAR30 (volatile u32 *)IOP3XX_REG_ADDR(0x089c) +-#define IOP3XX_AAU_SAR31 (volatile u32 *)IOP3XX_REG_ADDR(0x08a0) +-#define IOP3XX_AAU_SAR32 (volatile u32 *)IOP3XX_REG_ADDR(0x08a4) ++#define IOP3XX_AAU_PHYS_BASE (IOP3XX_PERIPHERAL_PHYS_BASE + 0x800) ++#define IOP3XX_AAU_UPPER_PA (IOP3XX_AAU_PHYS_BASE + 0xa7) + + /* I2C bus interface unit */ + #define IOP3XX_ICR0 (volatile u32 *)IOP3XX_REG_ADDR(0x1680) +@@ -329,6 +274,9 @@ + asm volatile("mcr p6, 0, %0, c6, c1, 0" : : "r" (val)); + } + ++extern struct platform_device iop3xx_dma_0_channel; ++extern struct platform_device iop3xx_dma_1_channel; ++extern struct platform_device iop3xx_aau_channel; + extern struct platform_device iop3xx_i2c0_device; + extern struct platform_device iop3xx_i2c1_device; + +diff -Nurb linux-2.6.22-570/include/asm-arm/hardware/iop_adma.h linux-2.6.22-591/include/asm-arm/hardware/iop_adma.h +--- linux-2.6.22-570/include/asm-arm/hardware/iop_adma.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/asm-arm/hardware/iop_adma.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,120 @@ ++/* ++ * Copyright © 2006, Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ */ ++#ifndef IOP_ADMA_H ++#define IOP_ADMA_H ++#include ++#include ++#include ++ ++#define IOP_ADMA_SLOT_SIZE 32 ++#define IOP_ADMA_THRESHOLD 4 ++ ++/** ++ * struct iop_adma_device - internal representation of an ADMA device ++ * @pdev: Platform device ++ * @id: HW ADMA Device selector ++ * @dma_desc_pool: base of DMA descriptor region (DMA address) ++ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address) ++ * @common: embedded struct dma_device ++ */ ++struct iop_adma_device { ++ struct platform_device *pdev; ++ int id; ++ dma_addr_t dma_desc_pool; ++ void *dma_desc_pool_virt; ++ struct dma_device common; ++}; ++ ++/** ++ * struct iop_adma_chan - internal representation of an ADMA device ++ * @pending: allows batching of hardware operations ++ * @completed_cookie: identifier for the most recently completed operation ++ * @lock: serializes enqueue/dequeue operations to the slot pool ++ * @mmr_base: memory mapped register base ++ * @chain: device chain view of the descriptors ++ * @device: parent device ++ * @common: common dmaengine channel object members ++ * @last_used: place holder for allocation to continue from where it left off ++ * @all_slots: complete domain of slots usable by the channel ++ * @cleanup_watchdog: workaround missed interrupts on iop3xx ++ * @slots_allocated: records the actual size of the descriptor slot pool ++ * @irq_tasklet: bottom half where iop_adma_slot_cleanup runs ++ */ ++struct iop_adma_chan { ++ int pending; ++ dma_cookie_t completed_cookie; ++ spinlock_t lock; /* protects the descriptor slot pool */ ++ void __iomem *mmr_base; ++ struct list_head chain; ++ struct iop_adma_device *device; ++ struct dma_chan common; ++ struct iop_adma_desc_slot *last_used; ++ struct list_head all_slots; ++ struct timer_list cleanup_watchdog; ++ int slots_allocated; ++ struct tasklet_struct irq_tasklet; ++}; ++ ++/** ++ * struct iop_adma_desc_slot - IOP-ADMA software descriptor ++ * @slot_node: node on the iop_adma_chan.all_slots list ++ * @chain_node: node on the op_adma_chan.chain list ++ * @hw_desc: virtual address of the hardware descriptor chain ++ * @phys: hardware address of the hardware descriptor chain ++ * @group_head: first operation in a transaction ++ * @slot_cnt: total slots used in an transaction (group of operations) ++ * @slots_per_op: number of slots per operation ++ * @idx: pool index ++ * @unmap_src_cnt: number of xor sources ++ * @unmap_len: transaction bytecount ++ * @async_tx: support for the async_tx api ++ * @group_list: list of slots that make up a multi-descriptor transaction ++ * for example transfer lengths larger than the supported hw max ++ * @xor_check_result: result of zero sum ++ * @crc32_result: result crc calculation ++ */ ++struct iop_adma_desc_slot { ++ struct list_head slot_node; ++ struct list_head chain_node; ++ void *hw_desc; ++ dma_addr_t phys; ++ struct iop_adma_desc_slot *group_head; ++ u16 slot_cnt; ++ u16 slots_per_op; ++ u16 idx; ++ u16 unmap_src_cnt; ++ size_t unmap_len; ++ struct dma_async_tx_descriptor async_tx; ++ struct list_head group_list; ++ union { ++ u32 *xor_check_result; ++ u32 *crc32_result; ++ }; ++}; ++ ++struct iop_adma_platform_data { ++ int hw_id; ++ dma_cap_mask_t cap_mask; ++ size_t pool_size; ++}; ++ ++#define to_iop_sw_desc(addr_hw_desc) \ ++ container_of(addr_hw_desc, struct iop_adma_desc_slot, hw_desc) ++#define iop_hw_desc_slot_idx(hw_desc, idx) \ ++ ( (void *) (((unsigned long) hw_desc) + ((idx) << 5)) ) ++#endif +diff -Nurb linux-2.6.22-570/include/asm-arm/kgdb.h linux-2.6.22-591/include/asm-arm/kgdb.h +--- linux-2.6.22-570/include/asm-arm/kgdb.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/asm-arm/kgdb.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,103 @@ ++/* ++ * include/asm-arm/kgdb.h ++ * ++ * ARM KGDB support ++ * ++ * Author: Deepak Saxena ++ * ++ * Copyright (C) 2002 MontaVista Software Inc. ++ * ++ */ ++ ++#ifndef __ASM_KGDB_H__ ++#define __ASM_KGDB_H__ ++ ++#include ++#include ++ ++ ++/* ++ * GDB assumes that we're a user process being debugged, so ++ * it will send us an SWI command to write into memory as the ++ * debug trap. When an SWI occurs, the next instruction addr is ++ * placed into R14_svc before jumping to the vector trap. ++ * This doesn't work for kernel debugging as we are already in SVC ++ * we would loose the kernel's LR, which is a bad thing. This ++ * is bad thing. ++ * ++ * By doing this as an undefined instruction trap, we force a mode ++ * switch from SVC to UND mode, allowing us to save full kernel state. ++ * ++ * We also define a KGDB_COMPILED_BREAK which can be used to compile ++ * in breakpoints. This is important for things like sysrq-G and for ++ * the initial breakpoint from trap_init(). ++ * ++ * Note to ARM HW designers: Add real trap support like SH && PPC to ++ * make our lives much much simpler. :) ++ */ ++#define BREAK_INSTR_SIZE 4 ++#define GDB_BREAKINST 0xef9f0001 ++#define KGDB_BREAKINST 0xe7ffdefe ++#define KGDB_COMPILED_BREAK 0xe7ffdeff ++#define CACHE_FLUSH_IS_SAFE 1 ++ ++#ifndef __ASSEMBLY__ ++ ++#define BREAKPOINT() asm(".word 0xe7ffdeff") ++ ++ ++extern void kgdb_handle_bus_error(void); ++extern int kgdb_fault_expected; ++#endif /* !__ASSEMBLY__ */ ++ ++/* ++ * From Kevin Hilman: ++ * ++ * gdb is expecting the following registers layout. ++ * ++ * r0-r15: 1 long word each ++ * f0-f7: unused, 3 long words each !! ++ * fps: unused, 1 long word ++ * cpsr: 1 long word ++ * ++ * Even though f0-f7 and fps are not used, they need to be ++ * present in the registers sent for correct processing in ++ * the host-side gdb. ++ * ++ * In particular, it is crucial that CPSR is in the right place, ++ * otherwise gdb will not be able to correctly interpret stepping over ++ * conditional branches. ++ */ ++#define _GP_REGS 16 ++#define _FP_REGS 8 ++#define _EXTRA_REGS 2 ++#define GDB_MAX_REGS (_GP_REGS + (_FP_REGS * 3) + _EXTRA_REGS) ++ ++#define KGDB_MAX_NO_CPUS 1 ++#define BUFMAX 400 ++#define NUMREGBYTES (GDB_MAX_REGS << 2) ++#define NUMCRITREGBYTES (32 << 2) ++ ++#define _R0 0 ++#define _R1 1 ++#define _R2 2 ++#define _R3 3 ++#define _R4 4 ++#define _R5 5 ++#define _R6 6 ++#define _R7 7 ++#define _R8 8 ++#define _R9 9 ++#define _R10 10 ++#define _FP 11 ++#define _IP 12 ++#define _SP 13 ++#define _LR 14 ++#define _PC 15 ++#define _CPSR (GDB_MAX_REGS - 1) ++ ++/* So that we can denote the end of a frame for tracing, in the simple ++ * case. */ ++#define CFI_END_FRAME(func) __CFI_END_FRAME(_PC,_SP,func) ++ ++#endif /* __ASM_KGDB_H__ */ +diff -Nurb linux-2.6.22-570/include/asm-arm/system.h linux-2.6.22-591/include/asm-arm/system.h +--- linux-2.6.22-570/include/asm-arm/system.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-arm/system.h 2007-12-21 15:36:12.000000000 -0500 +@@ -360,6 +360,41 @@ + extern void disable_hlt(void); + extern void enable_hlt(void); + ++#ifndef CONFIG_SMP ++/* ++ * Atomic compare and exchange. ++ */ ++#define __HAVE_ARCH_CMPXCHG 1 ++ ++extern unsigned long wrong_size_cmpxchg(volatile void *ptr); ++ ++static inline unsigned long __cmpxchg(volatile void *ptr, ++ unsigned long old, ++ unsigned long new, int size) ++{ ++ unsigned long flags, prev; ++ volatile unsigned long *p = ptr; ++ ++ if (size == 4) { ++ local_irq_save(flags); ++ if ((prev = *p) == old) ++ *p = new; ++ local_irq_restore(flags); ++ return(prev); ++ } else ++ return wrong_size_cmpxchg(ptr); ++} ++ ++#define cmpxchg(ptr,o,n) \ ++({ \ ++ __typeof__(*(ptr)) _o_ = (o); \ ++ __typeof__(*(ptr)) _n_ = (n); \ ++ (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ ++ (unsigned long)_n_, sizeof(*(ptr))); \ ++}) ++ ++#endif ++ + #endif /* __ASSEMBLY__ */ + + #define arch_align_stack(x) (x) +diff -Nurb linux-2.6.22-570/include/asm-cris/page.h linux-2.6.22-591/include/asm-cris/page.h +--- linux-2.6.22-570/include/asm-cris/page.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-cris/page.h 2007-12-21 15:36:12.000000000 -0500 +@@ -20,7 +20,8 @@ + #define clear_user_page(page, vaddr, pg) clear_page(page) + #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) ++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ ++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + + /* +diff -Nurb linux-2.6.22-570/include/asm-generic/kgdb.h linux-2.6.22-591/include/asm-generic/kgdb.h +--- linux-2.6.22-570/include/asm-generic/kgdb.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/asm-generic/kgdb.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,100 @@ ++/* ++ * include/asm-generic/kgdb.h ++ * ++ * This provides the assembly level information so that KGDB can provide ++ * a GDB that has been patched with enough information to know to stop ++ * trying to unwind the function. ++ * ++ * Author: Tom Rini ++ * ++ * 2005 (c) MontaVista Software, Inc. ++ * 2006 (c) Embedded Alley Solutions, Inc. ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program is licensed "as is" without any warranty of any ++ * kind, whether express or implied. ++ */ ++ ++#ifndef __ASM_GENERIC_KGDB_H__ ++#define __ASM_GENERIC_KGDB_H__ ++ ++#ifdef CONFIG_X86 ++/** ++ * kgdb_skipexception - Bail of of KGDB when we've been triggered. ++ * @exception: Exception vector number ++ * @regs: Current &struct pt_regs. ++ * ++ * On some architectures we need to skip a breakpoint exception when ++ * it occurs after a breakpoint has been removed. ++ */ ++int kgdb_skipexception(int exception, struct pt_regs *regs); ++#else ++#define kgdb_skipexception(exception, regs) 0 ++#endif ++ ++#if defined(CONFIG_X86) ++/** ++ * kgdb_post_master_code - Save error vector/code numbers. ++ * @regs: Original pt_regs. ++ * @e_vector: Original error vector. ++ * @err_code: Original error code. ++ * ++ * This is needed on architectures which support SMP and KGDB. ++ * This function is called after all the slave cpus have been put ++ * to a know spin state and the master CPU has control over KGDB. ++ */ ++extern void kgdb_post_master_code(struct pt_regs *regs, int e_vector, ++ int err_code); ++ ++/** ++ * kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb. ++ * @regs: Current &struct pt_regs. ++ * ++ * This function will be called if the particular architecture must ++ * disable hardware debugging while it is processing gdb packets or ++ * handling exception. ++ */ ++extern void kgdb_disable_hw_debug(struct pt_regs *regs); ++#else ++#define kgdb_disable_hw_debug(regs) do { } while (0) ++#define kgdb_post_master_code(regs, v, c) do { } while (0) ++#endif ++ ++#ifdef CONFIG_KGDB_ARCH_HAS_SHADOW_INFO ++/** ++ * kgdb_shadowinfo - Get shadowed information on @threadid. ++ * @regs: The &struct pt_regs of the current process. ++ * @buffer: A buffer of %BUFMAX size. ++ * @threadid: The thread id of the shadowed process to get information on. ++ */ ++extern void kgdb_shadowinfo(struct pt_regs *regs, char *buffer, ++ unsigned threadid); ++ ++/** ++ * kgdb_get_shadow_thread - Get the shadowed &task_struct of @threadid. ++ * @regs: The &struct pt_regs of the current thread. ++ * @threadid: The thread id of the shadowed process to get information on. ++ * ++ * RETURN: ++ * This returns a pointer to the &struct task_struct of the shadowed ++ * thread, @threadid. ++ */ ++extern struct task_struct *kgdb_get_shadow_thread(struct pt_regs *regs, ++ int threadid); ++ ++/** ++ * kgdb_shadow_regs - Return the shadowed registers of @threadid. ++ * @regs: The &struct pt_regs of the current thread. ++ * @threadid: The thread id we want the &struct pt_regs for. ++ * ++ * RETURN: ++ * The a pointer to the &struct pt_regs of the shadowed thread @threadid. ++ */ ++extern struct pt_regs *kgdb_shadow_regs(struct pt_regs *regs, int threadid); ++#else ++#define kgdb_shadowinfo(regs, buf, threadid) do { } while (0) ++#define kgdb_get_shadow_thread(regs, threadid) NULL ++#define kgdb_shadow_regs(regs, threadid) NULL ++#endif ++ ++#endif /* __ASM_GENERIC_KGDB_H__ */ +diff -Nurb linux-2.6.22-570/include/asm-generic/vmlinux.lds.h linux-2.6.22-591/include/asm-generic/vmlinux.lds.h +--- linux-2.6.22-570/include/asm-generic/vmlinux.lds.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-generic/vmlinux.lds.h 2007-12-21 15:36:12.000000000 -0500 +@@ -127,6 +127,8 @@ + *(__ksymtab_strings) \ + } \ + \ ++ EH_FRAME \ ++ \ + /* Built-in module parameters. */ \ + __param : AT(ADDR(__param) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start___param) = .; \ +@@ -177,6 +179,26 @@ + *(.kprobes.text) \ + VMLINUX_SYMBOL(__kprobes_text_end) = .; + ++#ifdef CONFIG_STACK_UNWIND ++#define EH_FRAME \ ++ /* Unwind data binary search table */ \ ++ . = ALIGN(8); \ ++ .eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) { \ ++ VMLINUX_SYMBOL(__start_unwind_hdr) = .; \ ++ *(.eh_frame_hdr) \ ++ VMLINUX_SYMBOL(__end_unwind_hdr) = .; \ ++ } \ ++ /* Unwind data */ \ ++ . = ALIGN(8); \ ++ .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) { \ ++ VMLINUX_SYMBOL(__start_unwind) = .; \ ++ *(.eh_frame) \ ++ VMLINUX_SYMBOL(__end_unwind) = .; \ ++ } ++#else ++#define EH_FRAME ++#endif ++ + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to + the beginning of the section so we begin them at 0. */ +diff -Nurb linux-2.6.22-570/include/asm-h8300/page.h linux-2.6.22-591/include/asm-h8300/page.h +--- linux-2.6.22-570/include/asm-h8300/page.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-h8300/page.h 2007-12-21 15:36:12.000000000 -0500 +@@ -22,7 +22,8 @@ + #define clear_user_page(page, vaddr, pg) clear_page(page) + #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) ++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ ++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + + /* +diff -Nurb linux-2.6.22-570/include/asm-i386/kdebug.h linux-2.6.22-591/include/asm-i386/kdebug.h +--- linux-2.6.22-570/include/asm-i386/kdebug.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-i386/kdebug.h 2007-12-21 15:36:12.000000000 -0500 +@@ -28,6 +28,7 @@ + DIE_CALL, + DIE_NMI_IPI, + DIE_PAGE_FAULT, ++ DIE_PAGE_FAULT_NO_CONTEXT, + }; + + #endif +diff -Nurb linux-2.6.22-570/include/asm-i386/kgdb.h linux-2.6.22-591/include/asm-i386/kgdb.h +--- linux-2.6.22-570/include/asm-i386/kgdb.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/asm-i386/kgdb.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,51 @@ ++#ifdef __KERNEL__ ++#ifndef _ASM_KGDB_H_ ++#define _ASM_KGDB_H_ ++ ++#include ++ ++/* ++ * Copyright (C) 2001-2004 Amit S. Kale ++ */ ++ ++/************************************************************************/ ++/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ ++/* at least NUMREGBYTES*2 are needed for register packets */ ++/* Longer buffer is needed to list all threads */ ++#define BUFMAX 1024 ++ ++/* Number of bytes of registers. */ ++#define NUMREGBYTES 64 ++/* Number of bytes of registers we need to save for a setjmp/longjmp. */ ++#define NUMCRITREGBYTES 24 ++ ++/* ++ * Note that this register image is in a different order than ++ * the register image that Linux produces at interrupt time. ++ * ++ * Linux's register image is defined by struct pt_regs in ptrace.h. ++ * Just why GDB uses a different order is a historical mystery. ++ */ ++enum regnames { _EAX, /* 0 */ ++ _ECX, /* 1 */ ++ _EDX, /* 2 */ ++ _EBX, /* 3 */ ++ _ESP, /* 4 */ ++ _EBP, /* 5 */ ++ _ESI, /* 6 */ ++ _EDI, /* 7 */ ++ _PC, /* 8 also known as eip */ ++ _PS, /* 9 also known as eflags */ ++ _CS, /* 10 */ ++ _SS, /* 11 */ ++ _DS, /* 12 */ ++ _ES, /* 13 */ ++ _FS, /* 14 */ ++ _GS /* 15 */ ++}; ++ ++#define BREAKPOINT() asm(" int $3"); ++#define BREAK_INSTR_SIZE 1 ++#define CACHE_FLUSH_IS_SAFE 1 ++#endif /* _ASM_KGDB_H_ */ ++#endif /* __KERNEL__ */ +diff -Nurb linux-2.6.22-570/include/asm-i386/page.h linux-2.6.22-591/include/asm-i386/page.h +--- linux-2.6.22-570/include/asm-i386/page.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-i386/page.h 2007-12-21 15:36:12.000000000 -0500 +@@ -34,7 +34,8 @@ + #define clear_user_page(page, vaddr, pg) clear_page(page) + #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) ++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ ++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + + /* +diff -Nurb linux-2.6.22-570/include/asm-i386/unistd.h linux-2.6.22-591/include/asm-i386/unistd.h +--- linux-2.6.22-570/include/asm-i386/unistd.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-i386/unistd.h 2007-12-21 15:36:12.000000000 -0500 +@@ -329,10 +329,13 @@ + #define __NR_signalfd 321 + #define __NR_timerfd 322 + #define __NR_eventfd 323 ++#define __NR_revokeat 324 ++#define __NR_frevoke 325 ++#define __NR_fallocate 326 + + #ifdef __KERNEL__ + +-#define NR_syscalls 324 ++#define NR_syscalls 327 + + #define __ARCH_WANT_IPC_PARSE_VERSION + #define __ARCH_WANT_OLD_READDIR +diff -Nurb linux-2.6.22-570/include/asm-i386/unwind.h linux-2.6.22-591/include/asm-i386/unwind.h +--- linux-2.6.22-570/include/asm-i386/unwind.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-i386/unwind.h 2007-12-21 15:36:12.000000000 -0500 +@@ -1,6 +1,95 @@ + #ifndef _ASM_I386_UNWIND_H + #define _ASM_I386_UNWIND_H + ++/* ++ * Copyright (C) 2002-2006 Novell, Inc. ++ * Jan Beulich ++ * This code is released under version 2 of the GNU GPL. ++ */ ++ ++#ifdef CONFIG_STACK_UNWIND ++ ++#include ++#include ++#include ++#include ++ ++struct unwind_frame_info ++{ ++ struct pt_regs regs; ++ struct task_struct *task; ++ unsigned call_frame:1; ++}; ++ ++#define UNW_PC(frame) (frame)->regs.eip ++#define UNW_SP(frame) (frame)->regs.esp ++#ifdef CONFIG_FRAME_POINTER ++#define UNW_FP(frame) (frame)->regs.ebp ++#define FRAME_RETADDR_OFFSET 4 ++#define FRAME_LINK_OFFSET 0 ++#define STACK_BOTTOM(tsk) STACK_LIMIT((tsk)->thread.esp0) ++#define STACK_TOP(tsk) ((tsk)->thread.esp0) ++#else ++#define UNW_FP(frame) ((void)(frame), 0) ++#endif ++#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1)) ++ ++#define UNW_REGISTER_INFO \ ++ PTREGS_INFO(eax), \ ++ PTREGS_INFO(ecx), \ ++ PTREGS_INFO(edx), \ ++ PTREGS_INFO(ebx), \ ++ PTREGS_INFO(esp), \ ++ PTREGS_INFO(ebp), \ ++ PTREGS_INFO(esi), \ ++ PTREGS_INFO(edi), \ ++ PTREGS_INFO(eip) ++ ++#define UNW_DEFAULT_RA(raItem, dataAlign) \ ++ ((raItem).where == Memory && \ ++ !((raItem).value * (dataAlign) + 4)) ++ ++static inline void arch_unw_init_frame_info(struct unwind_frame_info *info, ++ /*const*/ struct pt_regs *regs) ++{ ++ if (user_mode_vm(regs)) ++ info->regs = *regs; ++ else { ++ memcpy(&info->regs, regs, offsetof(struct pt_regs, esp)); ++ info->regs.esp = (unsigned long)®s->esp; ++ info->regs.xss = __KERNEL_DS; ++ } ++} ++ ++static inline void arch_unw_init_blocked(struct unwind_frame_info *info) ++{ ++ memset(&info->regs, 0, sizeof(info->regs)); ++ info->regs.eip = info->task->thread.eip; ++ info->regs.xcs = __KERNEL_CS; ++ __get_user(info->regs.ebp, (long *)info->task->thread.esp); ++ info->regs.esp = info->task->thread.esp; ++ info->regs.xss = __KERNEL_DS; ++ info->regs.xds = __USER_DS; ++ info->regs.xes = __USER_DS; ++ info->regs.xfs = __KERNEL_PERCPU; ++} ++ ++extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *, ++ asmlinkage int (*callback)(struct unwind_frame_info *, ++ void *arg), ++ void *arg); ++ ++static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info) ++{ ++ return user_mode_vm(&info->regs) ++ || info->regs.eip < PAGE_OFFSET ++ || (info->regs.eip >= __fix_to_virt(FIX_VDSO) ++ && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE) ++ || info->regs.esp < PAGE_OFFSET; ++} ++ ++#else ++ + #define UNW_PC(frame) ((void)(frame), 0) + #define UNW_SP(frame) ((void)(frame), 0) + #define UNW_FP(frame) ((void)(frame), 0) +@@ -10,4 +99,6 @@ + return 0; + } + ++#endif ++ + #endif /* _ASM_I386_UNWIND_H */ +diff -Nurb linux-2.6.22-570/include/asm-ia64/kdebug.h linux-2.6.22-591/include/asm-ia64/kdebug.h +--- linux-2.6.22-570/include/asm-ia64/kdebug.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-ia64/kdebug.h 2007-12-21 15:36:12.000000000 -0500 +@@ -69,6 +69,7 @@ + DIE_KDEBUG_LEAVE, + DIE_KDUMP_ENTER, + DIE_KDUMP_LEAVE, ++ DIE_PAGE_FAULT_NO_CONTEXT, + }; + + #endif +diff -Nurb linux-2.6.22-570/include/asm-ia64/kgdb.h linux-2.6.22-591/include/asm-ia64/kgdb.h +--- linux-2.6.22-570/include/asm-ia64/kgdb.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/asm-ia64/kgdb.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,37 @@ ++#ifdef __KERNEL__ ++#ifndef _ASM_KGDB_H_ ++#define _ASM_KGDB_H_ ++ ++/* ++ * Copyright (C) 2001-2004 Amit S. Kale ++ */ ++ ++#include ++#include ++ ++/************************************************************************/ ++/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/ ++/* at least NUMREGBYTES*2 are needed for register packets */ ++/* Longer buffer is needed to list all threads */ ++#define BUFMAX 1024 ++ ++/* Number of bytes of registers. We set this to 0 so that certain GDB ++ * packets will fail, forcing the use of others, which are more friendly ++ * on ia64. */ ++#define NUMREGBYTES 0 ++ ++#define NUMCRITREGBYTES (70*8) ++#define JMP_REGS_ALIGNMENT __attribute__ ((aligned (16))) ++ ++#define BREAKNUM 0x00003333300LL ++#define KGDBBREAKNUM 0x6665UL ++#define BREAKPOINT() asm volatile ("break.m 0x6665") ++#define BREAK_INSTR_SIZE 16 ++#define CACHE_FLUSH_IS_SAFE 1 ++ ++struct pt_regs; ++extern volatile int kgdb_hwbreak_sstep[NR_CPUS]; ++extern void smp_send_nmi_allbutself(void); ++extern void kgdb_wait_ipi(struct pt_regs *); ++#endif /* _ASM_KGDB_H_ */ ++#endif /* __KERNEL__ */ +diff -Nurb linux-2.6.22-570/include/asm-ia64/page.h linux-2.6.22-591/include/asm-ia64/page.h +--- linux-2.6.22-570/include/asm-ia64/page.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-ia64/page.h 2007-12-21 15:36:12.000000000 -0500 +@@ -87,9 +87,10 @@ + } while (0) + + +-#define alloc_zeroed_user_highpage(vma, vaddr) \ ++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ + ({ \ +- struct page *page = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr); \ ++ struct page *page = alloc_page_vma( \ ++ GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr); \ + if (page) \ + flush_dcache_page(page); \ + page; \ +diff -Nurb linux-2.6.22-570/include/asm-ia64/processor.h linux-2.6.22-591/include/asm-ia64/processor.h +--- linux-2.6.22-570/include/asm-ia64/processor.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-ia64/processor.h 2007-12-21 15:36:12.000000000 -0500 +@@ -295,9 +295,9 @@ + regs->ar_bspstore = current->thread.rbs_bot; \ + regs->ar_fpsr = FPSR_DEFAULT; \ + regs->loadrs = 0; \ +- regs->r8 = current->mm->dumpable; /* set "don't zap registers" flag */ \ ++ regs->r8 = get_dumpable(current->mm); /* set "don't zap registers" flag */ \ + regs->r12 = new_sp - 16; /* allocate 16 byte scratch area */ \ +- if (unlikely(!current->mm->dumpable)) { \ ++ if (unlikely(!get_dumpable(current->mm))) { \ + /* \ + * Zap scratch regs to avoid leaking bits between processes with different \ + * uid/privileges. \ +diff -Nurb linux-2.6.22-570/include/asm-m32r/page.h linux-2.6.22-591/include/asm-m32r/page.h +--- linux-2.6.22-570/include/asm-m32r/page.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-m32r/page.h 2007-12-21 15:36:12.000000000 -0500 +@@ -15,7 +15,8 @@ + #define clear_user_page(page, vaddr, pg) clear_page(page) + #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) ++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ ++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + + /* +diff -Nurb linux-2.6.22-570/include/asm-m68knommu/page.h linux-2.6.22-591/include/asm-m68knommu/page.h +--- linux-2.6.22-570/include/asm-m68knommu/page.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-m68knommu/page.h 2007-12-21 15:36:12.000000000 -0500 +@@ -22,7 +22,8 @@ + #define clear_user_page(page, vaddr, pg) clear_page(page) + #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) ++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ ++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + + /* +diff -Nurb linux-2.6.22-570/include/asm-mips/asmmacro-32.h linux-2.6.22-591/include/asm-mips/asmmacro-32.h +--- linux-2.6.22-570/include/asm-mips/asmmacro-32.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-mips/asmmacro-32.h 2007-12-21 15:36:12.000000000 -0500 +@@ -11,6 +11,28 @@ + #include + #include + #include ++#include ++ ++ .macro fpu_save_double_kgdb stack status tmp1=t0 ++ cfc1 \tmp1, fcr31 ++ sdc1 $f0, GDB_FR_FPR0(\stack) ++ sdc1 $f2, GDB_FR_FPR2(\stack) ++ sdc1 $f4, GDB_FR_FPR4(\stack) ++ sdc1 $f6, GDB_FR_FPR6(\stack) ++ sdc1 $f8, GDB_FR_FPR8(\stack) ++ sdc1 $f10, GDB_FR_FPR10(\stack) ++ sdc1 $f12, GDB_FR_FPR12(\stack) ++ sdc1 $f14, GDB_FR_FPR14(\stack) ++ sdc1 $f16, GDB_FR_FPR16(\stack) ++ sdc1 $f18, GDB_FR_FPR18(\stack) ++ sdc1 $f20, GDB_FR_FPR20(\stack) ++ sdc1 $f22, GDB_FR_FPR22(\stack) ++ sdc1 $f24, GDB_FR_FPR24(\stack) ++ sdc1 $f26, GDB_FR_FPR26(\stack) ++ sdc1 $f28, GDB_FR_FPR28(\stack) ++ sdc1 $f30, GDB_FR_FPR30(\stack) ++ sw \tmp1, GDB_FR_FSR(\stack) ++ .endm + + .macro fpu_save_double thread status tmp1=t0 + cfc1 \tmp1, fcr31 +@@ -91,6 +113,27 @@ + ctc1 \tmp, fcr31 + .endm + ++ .macro fpu_restore_double_kgdb stack status tmp=t0 ++ lw \tmp, GDB_FR_FSR(\stack) ++ ldc1 $f0, GDB_FR_FPR0(\stack) ++ ldc1 $f2, GDB_FR_FPR2(\stack) ++ ldc1 $f4, GDB_FR_FPR4(\stack) ++ ldc1 $f6, GDB_FR_FPR6(\stack) ++ ldc1 $f8, GDB_FR_FPR8(\stack) ++ ldc1 $f10, GDB_FR_FPR10(\stack) ++ ldc1 $f12, GDB_FR_FPR12(\stack) ++ ldc1 $f14, GDB_FR_FPR14(\stack) ++ ldc1 $f16, GDB_FR_FPR16(\stack) ++ ldc1 $f18, GDB_FR_FPR18(\stack) ++ ldc1 $f20, GDB_FR_FPR20(\stack) ++ ldc1 $f22, GDB_FR_FPR22(\stack) ++ ldc1 $f24, GDB_FR_FPR24(\stack) ++ ldc1 $f26, GDB_FR_FPR26(\stack) ++ ldc1 $f28, GDB_FR_FPR28(\stack) ++ ldc1 $f30, GDB_FR_FPR30(\stack) ++ ctc1 \tmp, fcr31 ++ .endm ++ + .macro fpu_restore_single thread tmp=t0 + lw \tmp, THREAD_FCR31(\thread) + lwc1 $f0, THREAD_FPR0(\thread) +diff -Nurb linux-2.6.22-570/include/asm-mips/asmmacro-64.h linux-2.6.22-591/include/asm-mips/asmmacro-64.h +--- linux-2.6.22-570/include/asm-mips/asmmacro-64.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-mips/asmmacro-64.h 2007-12-21 15:36:12.000000000 -0500 +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + + .macro fpu_save_16even thread tmp=t0 + cfc1 \tmp, fcr31 +@@ -53,6 +54,46 @@ + sdc1 $f31, THREAD_FPR31(\thread) + .endm + ++ .macro fpu_save_16odd_kgdb stack ++ sdc1 $f1, GDB_FR_FPR1(\stack) ++ sdc1 $f3, GDB_FR_FPR3(\stack) ++ sdc1 $f5, GDB_FR_FPR5(\stack) ++ sdc1 $f7, GDB_FR_FPR7(\stack) ++ sdc1 $f9, GDB_FR_FPR9(\stack) ++ sdc1 $f11, GDB_FR_FPR11(\stack) ++ sdc1 $f13, GDB_FR_FPR13(\stack) ++ sdc1 $f15, GDB_FR_FPR15(\stack) ++ sdc1 $f17, GDB_FR_FPR17(\stack) ++ sdc1 $f19, GDB_FR_FPR19(\stack) ++ sdc1 $f21, GDB_FR_FPR21(\stack) ++ sdc1 $f23, GDB_FR_FPR23(\stack) ++ sdc1 $f25, GDB_FR_FPR25(\stack) ++ sdc1 $f27, GDB_FR_FPR27(\stack) ++ sdc1 $f29, GDB_FR_FPR29(\stack) ++ sdc1 $f31, GDB_FR_FPR31(\stack) ++ .endm ++ ++ .macro fpu_save_16even_kgdb stack tmp=t0 ++ cfc1 \tmp, fcr31 ++ sdc1 $f0, GDB_FR_FPR0(\stack) ++ sdc1 $f2, GDB_FR_FPR2(\stack) ++ sdc1 $f4, GDB_FR_FPR4(\stack) ++ sdc1 $f6, GDB_FR_FPR6(\stack) ++ sdc1 $f8, GDB_FR_FPR8(\stack) ++ sdc1 $f10, GDB_FR_FPR10(\stack) ++ sdc1 $f12, GDB_FR_FPR12(\stack) ++ sdc1 $f14, GDB_FR_FPR14(\stack) ++ sdc1 $f16, GDB_FR_FPR16(\stack) ++ sdc1 $f18, GDB_FR_FPR18(\stack) ++ sdc1 $f20, GDB_FR_FPR20(\stack) ++ sdc1 $f22, GDB_FR_FPR22(\stack) ++ sdc1 $f24, GDB_FR_FPR24(\stack) ++ sdc1 $f26, GDB_FR_FPR26(\stack) ++ sdc1 $f28, GDB_FR_FPR28(\stack) ++ sdc1 $f30, GDB_FR_FPR30(\stack) ++ sw \tmp, GDB_FR_FSR(\stack) ++ .endm ++ + .macro fpu_save_double thread status tmp + sll \tmp, \status, 5 + bgez \tmp, 2f +@@ -61,6 +102,15 @@ + fpu_save_16even \thread \tmp + .endm + ++ .macro fpu_save_double_kgdb stack status tmp ++ sll \tmp, \status, 5 ++ bgez \tmp, 2f ++ nop ++ fpu_save_16odd_kgdb \stack ++2: ++ fpu_save_16even_kgdb \stack \tmp ++ .endm ++ + .macro fpu_restore_16even thread tmp=t0 + lw \tmp, THREAD_FCR31(\thread) + ldc1 $f0, THREAD_FPR0(\thread) +@@ -101,6 +151,46 @@ + ldc1 $f31, THREAD_FPR31(\thread) + .endm + ++ .macro fpu_restore_16even_kgdb stack tmp=t0 ++ lw \tmp, GDB_FR_FSR(\stack) ++ ldc1 $f0, GDB_FR_FPR0(\stack) ++ ldc1 $f2, GDB_FR_FPR2(\stack) ++ ldc1 $f4, GDB_FR_FPR4(\stack) ++ ldc1 $f6, GDB_FR_FPR6(\stack) ++ ldc1 $f8, GDB_FR_FPR8(\stack) ++ ldc1 $f10, GDB_FR_FPR10(\stack) ++ ldc1 $f12, GDB_FR_FPR12(\stack) ++ ldc1 $f14, GDB_FR_FPR14(\stack) ++ ldc1 $f16, GDB_FR_FPR16(\stack) ++ ldc1 $f18, GDB_FR_FPR18(\stack) ++ ldc1 $f20, GDB_FR_FPR20(\stack) ++ ldc1 $f22, GDB_FR_FPR22(\stack) ++ ldc1 $f24, GDB_FR_FPR24(\stack) ++ ldc1 $f26, GDB_FR_FPR26(\stack) ++ ldc1 $f28, GDB_FR_FPR28(\stack) ++ ldc1 $f30, GDB_FR_FPR30(\stack) ++ ctc1 \tmp, fcr31 ++ .endm ++ ++ .macro fpu_restore_16odd_kgdb stack ++ ldc1 $f1, GDB_FR_FPR1(\stack) ++ ldc1 $f3, GDB_FR_FPR3(\stack) ++ ldc1 $f5, GDB_FR_FPR5(\stack) ++ ldc1 $f7, GDB_FR_FPR7(\stack) ++ ldc1 $f9, GDB_FR_FPR9(\stack) ++ ldc1 $f11, GDB_FR_FPR11(\stack) ++ ldc1 $f13, GDB_FR_FPR13(\stack) ++ ldc1 $f15, GDB_FR_FPR15(\stack) ++ ldc1 $f17, GDB_FR_FPR17(\stack) ++ ldc1 $f19, GDB_FR_FPR19(\stack) ++ ldc1 $f21, GDB_FR_FPR21(\stack) ++ ldc1 $f23, GDB_FR_FPR23(\stack) ++ ldc1 $f25, GDB_FR_FPR25(\stack) ++ ldc1 $f27, GDB_FR_FPR27(\stack) ++ ldc1 $f29, GDB_FR_FPR29(\stack) ++ ldc1 $f31, GDB_FR_FPR31(\stack) ++ .endm ++ + .macro fpu_restore_double thread status tmp + sll \tmp, \status, 5 + bgez \tmp, 1f # 16 register mode? +@@ -109,6 +199,15 @@ + 1: fpu_restore_16even \thread \tmp + .endm + ++ .macro fpu_restore_double_kgdb stack status tmp ++ sll \tmp, \status, 5 ++ bgez \tmp, 1f # 16 register mode? ++ nop ++ ++ fpu_restore_16odd_kgdb \stack ++1: fpu_restore_16even_kgdb \stack \tmp ++ .endm ++ + .macro cpu_save_nonscratch thread + LONG_S s0, THREAD_REG16(\thread) + LONG_S s1, THREAD_REG17(\thread) +diff -Nurb linux-2.6.22-570/include/asm-mips/kdebug.h linux-2.6.22-591/include/asm-mips/kdebug.h +--- linux-2.6.22-570/include/asm-mips/kdebug.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-mips/kdebug.h 2007-12-21 15:36:12.000000000 -0500 +@@ -1 +1,30 @@ +-#include ++/* ++ * ++ * Copyright (C) 2004 MontaVista Software Inc. ++ * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License as published by the ++ * Free Software Foundation; either version 2 of the License, or (at your ++ * option) any later version. ++ * ++ */ ++#ifndef _MIPS_KDEBUG_H ++#define _MIPS_KDEBUG_H ++ ++#include ++ ++struct pt_regs; ++ ++extern struct atomic_notifier_head mips_die_head; ++ ++enum die_val { ++ DIE_OOPS = 1, ++ DIE_PANIC, ++ DIE_DIE, ++ DIE_KERNELDEBUG, ++ DIE_TRAP, ++ DIE_PAGE_FAULT, ++}; ++ ++#endif /* _MIPS_KDEBUG_H */ +diff -Nurb linux-2.6.22-570/include/asm-mips/kgdb.h linux-2.6.22-591/include/asm-mips/kgdb.h +--- linux-2.6.22-570/include/asm-mips/kgdb.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/asm-mips/kgdb.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,41 @@ ++#ifdef __KERNEL__ ++#ifndef _ASM_KGDB_H_ ++#define _ASM_KGDB_H_ ++ ++#include ++#include ++ ++#ifndef __ASSEMBLY__ ++#if (_MIPS_ISA == _MIPS_ISA_MIPS1) || (_MIPS_ISA == _MIPS_ISA_MIPS2) || (_MIPS_ISA == _MIPS_ISA_MIPS32) ++ ++typedef u32 gdb_reg_t; ++ ++#elif (_MIPS_ISA == _MIPS_ISA_MIPS3) || (_MIPS_ISA == _MIPS_ISA_MIPS4) || (_MIPS_ISA == _MIPS_ISA_MIPS64) ++ ++#ifdef CONFIG_32BIT ++typedef u32 gdb_reg_t; ++#else /* CONFIG_CPU_32BIT */ ++typedef u64 gdb_reg_t; ++#endif ++#else ++#error "Need to set typedef for gdb_reg_t" ++#endif /* _MIPS_ISA */ ++ ++#define BUFMAX 2048 ++#define NUMREGBYTES (90*sizeof(gdb_reg_t)) ++#define NUMCRITREGBYTES (12*sizeof(gdb_reg_t)) ++#define BREAK_INSTR_SIZE 4 ++#define BREAKPOINT() __asm__ __volatile__( \ ++ ".globl breakinst\n\t" \ ++ ".set\tnoreorder\n\t" \ ++ "nop\n" \ ++ "breakinst:\tbreak\n\t" \ ++ "nop\n\t" \ ++ ".set\treorder") ++#define CACHE_FLUSH_IS_SAFE 0 ++ ++extern int kgdb_early_setup; ++ ++#endif /* !__ASSEMBLY__ */ ++#endif /* _ASM_KGDB_H_ */ ++#endif /* __KERNEL__ */ +diff -Nurb linux-2.6.22-570/include/asm-mips/ptrace.h linux-2.6.22-591/include/asm-mips/ptrace.h +--- linux-2.6.22-570/include/asm-mips/ptrace.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-mips/ptrace.h 2007-12-21 15:36:12.000000000 -0500 +@@ -28,7 +28,7 @@ + * system call/exception. As usual the registers k0/k1 aren't being saved. + */ + struct pt_regs { +-#ifdef CONFIG_32BIT ++#if defined(CONFIG_32BIT) || defined(CONFIG_KGDB) + /* Pad bytes for argument save space on the stack. */ + unsigned long pad0[6]; + #endif +diff -Nurb linux-2.6.22-570/include/asm-powerpc/cputable.h linux-2.6.22-591/include/asm-powerpc/cputable.h +--- linux-2.6.22-570/include/asm-powerpc/cputable.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/cputable.h 2007-12-21 15:36:12.000000000 -0500 +@@ -111,7 +111,7 @@ + /* CPU kernel features */ + + /* Retain the 32b definitions all use bottom half of word */ +-#define CPU_FTR_SPLIT_ID_CACHE ASM_CONST(0x0000000000000001) ++#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x0000000000000001) + #define CPU_FTR_L2CR ASM_CONST(0x0000000000000002) + #define CPU_FTR_SPEC7450 ASM_CONST(0x0000000000000004) + #define CPU_FTR_ALTIVEC ASM_CONST(0x0000000000000008) +@@ -135,6 +135,7 @@ + #define CPU_FTR_PPC_LE ASM_CONST(0x0000000000200000) + #define CPU_FTR_REAL_LE ASM_CONST(0x0000000000400000) + #define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x0000000000800000) ++#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x0000000001000000) + + /* + * Add the 64-bit processor unique features in the top half of the word; +@@ -154,7 +155,6 @@ + #define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000004000000000) + #define CPU_FTR_CTRL LONG_ASM_CONST(0x0000008000000000) + #define CPU_FTR_SMT LONG_ASM_CONST(0x0000010000000000) +-#define CPU_FTR_COHERENT_ICACHE LONG_ASM_CONST(0x0000020000000000) + #define CPU_FTR_LOCKLESS_TLBIE LONG_ASM_CONST(0x0000040000000000) + #define CPU_FTR_CI_LARGE_PAGE LONG_ASM_CONST(0x0000100000000000) + #define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000200000000000) +@@ -206,164 +206,163 @@ + !defined(CONFIG_POWER3) && !defined(CONFIG_POWER4) && \ + !defined(CONFIG_BOOKE)) + +-#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | CPU_FTR_HPTE_TABLE) +-#define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | CPU_FTR_HPTE_TABLE | \ ++ CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE) ++#define CPU_FTRS_603 (CPU_FTR_COMMON | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \ + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) +-#define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_604 (CPU_FTR_COMMON | \ + CPU_FTR_USE_TB | CPU_FTR_604_PERF_MON | CPU_FTR_HPTE_TABLE | \ + CPU_FTR_PPC_LE) +-#define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) +-#define CPU_FTRS_740 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_740 (CPU_FTR_COMMON | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ + CPU_FTR_PPC_LE) +-#define CPU_FTRS_750 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_750 (CPU_FTR_COMMON | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ + CPU_FTR_PPC_LE) +-#define CPU_FTRS_750CL (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_750CL (CPU_FTR_COMMON | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ + CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE) +-#define CPU_FTRS_750FX1 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_750FX1 (CPU_FTR_COMMON | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ + CPU_FTR_DUAL_PLL_750FX | CPU_FTR_NO_DPM | CPU_FTR_PPC_LE) +-#define CPU_FTRS_750FX2 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_750FX2 (CPU_FTR_COMMON | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ + CPU_FTR_NO_DPM | CPU_FTR_PPC_LE) +-#define CPU_FTRS_750FX (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_750FX (CPU_FTR_COMMON | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ + CPU_FTR_DUAL_PLL_750FX | CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE) +-#define CPU_FTRS_750GX (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_750GX (CPU_FTR_COMMON | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \ + CPU_FTR_DUAL_PLL_750FX | CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE) +-#define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_ALTIVEC_COMP | CPU_FTR_HPTE_TABLE | \ + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) +-#define CPU_FTRS_7400 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_7400 (CPU_FTR_COMMON | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \ + CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | CPU_FTR_HPTE_TABLE | \ + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE) +-#define CPU_FTRS_7450_20 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_7450_20 (CPU_FTR_COMMON | \ + CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ + CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ + CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) +-#define CPU_FTRS_7450_21 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_7450_21 (CPU_FTR_COMMON | \ + CPU_FTR_USE_TB | \ + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ + CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ + CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \ + CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) +-#define CPU_FTRS_7450_23 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_7450_23 (CPU_FTR_COMMON | \ + CPU_FTR_USE_TB | \ + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ + CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ + CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) +-#define CPU_FTRS_7455_1 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_7455_1 (CPU_FTR_COMMON | \ + CPU_FTR_USE_TB | \ + CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_HAS_HIGH_BATS | \ + CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) +-#define CPU_FTRS_7455_20 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_7455_20 (CPU_FTR_COMMON | \ + CPU_FTR_USE_TB | \ + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ + CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ + CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \ + CPU_FTR_NEED_COHERENT | CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE) +-#define CPU_FTRS_7455 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_7455 (CPU_FTR_COMMON | \ + CPU_FTR_USE_TB | \ + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ + CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ + CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \ + CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) +-#define CPU_FTRS_7447_10 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_7447_10 (CPU_FTR_COMMON | \ + CPU_FTR_USE_TB | \ + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ + CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ + CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \ + CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE) +-#define CPU_FTRS_7447 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_7447 (CPU_FTR_COMMON | \ + CPU_FTR_USE_TB | \ + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ + CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ + CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \ + CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) +-#define CPU_FTRS_7447A (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_7447A (CPU_FTR_COMMON | \ + CPU_FTR_USE_TB | \ + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ + CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \ + CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE) +-#define CPU_FTRS_7448 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_7448 (CPU_FTR_COMMON | \ + CPU_FTR_USE_TB | \ + CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \ + CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \ + CPU_FTR_PPC_LE) +-#define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_82XX (CPU_FTR_COMMON | \ + CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB) +-#define CPU_FTRS_G2_LE (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | \ ++#define CPU_FTRS_G2_LE (CPU_FTR_MAYBE_CAN_DOZE | \ + CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS) +-#define CPU_FTRS_E300 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | \ ++#define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \ + CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS | \ + CPU_FTR_COMMON) +-#define CPU_FTRS_E300C2 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | \ ++#define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \ + CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS | \ + CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE) +-#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \ ++#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | \ + CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE) +-#define CPU_FTRS_8XX (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB) +-#define CPU_FTRS_40X (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ +- CPU_FTR_NODSISRALIGN) +-#define CPU_FTRS_44X (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ +- CPU_FTR_NODSISRALIGN) +-#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN) +-#define CPU_FTRS_E500 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ +- CPU_FTR_NODSISRALIGN) +-#define CPU_FTRS_E500_2 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ ++#define CPU_FTRS_8XX (CPU_FTR_USE_TB) ++#define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN) ++#define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN) ++#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \ ++ CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE) ++#define CPU_FTRS_E500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN) ++#define CPU_FTRS_E500_2 (CPU_FTR_USE_TB | \ + CPU_FTR_BIG_PHYS | CPU_FTR_NODSISRALIGN) + #define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN) + + /* 64-bit CPUs */ +-#define CPU_FTRS_POWER3 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ ++#define CPU_FTRS_POWER3 (CPU_FTR_USE_TB | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | CPU_FTR_PPC_LE) +-#define CPU_FTRS_RS64 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ ++#define CPU_FTRS_RS64 (CPU_FTR_USE_TB | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | \ + CPU_FTR_MMCRA | CPU_FTR_CTRL) +-#define CPU_FTRS_POWER4 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ ++#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ + CPU_FTR_MMCRA) +-#define CPU_FTRS_PPC970 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ ++#define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ + CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA) +-#define CPU_FTRS_POWER5 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ ++#define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ + CPU_FTR_MMCRA | CPU_FTR_SMT | \ + CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ + CPU_FTR_PURR) +-#define CPU_FTRS_POWER6 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ ++#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ + CPU_FTR_MMCRA | CPU_FTR_SMT | \ + CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ + CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ + CPU_FTR_DSCR) +-#define CPU_FTRS_CELL (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ ++#define CPU_FTRS_CELL (CPU_FTR_USE_TB | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ + CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ + CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE | CPU_FTR_CELL_TB_BUG) +-#define CPU_FTRS_PA6T (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ ++#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \ + CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \ + CPU_FTR_PURR | CPU_FTR_REAL_LE) +-#define CPU_FTRS_COMPATIBLE (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \ ++#define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | \ + CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2) + + #ifdef __powerpc64__ +diff -Nurb linux-2.6.22-570/include/asm-powerpc/floppy.h linux-2.6.22-591/include/asm-powerpc/floppy.h +--- linux-2.6.22-570/include/asm-powerpc/floppy.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/floppy.h 2007-12-21 15:36:12.000000000 -0500 +@@ -29,7 +29,7 @@ + #define fd_free_irq() free_irq(FLOPPY_IRQ, NULL); + + #include +-#include /* for ppc64_isabridge_dev */ ++#include /* for isa_bridge_pcidev */ + + #define fd_dma_setup(addr,size,mode,io) fd_ops->_dma_setup(addr,size,mode,io) + +@@ -139,12 +139,12 @@ + if (bus_addr + && (addr != prev_addr || size != prev_size || dir != prev_dir)) { + /* different from last time -- unmap prev */ +- pci_unmap_single(ppc64_isabridge_dev, bus_addr, prev_size, prev_dir); ++ pci_unmap_single(isa_bridge_pcidev, bus_addr, prev_size, prev_dir); + bus_addr = 0; + } + + if (!bus_addr) /* need to map it */ +- bus_addr = pci_map_single(ppc64_isabridge_dev, addr, size, dir); ++ bus_addr = pci_map_single(isa_bridge_pcidev, addr, size, dir); + + /* remember this one as prev */ + prev_addr = addr; +diff -Nurb linux-2.6.22-570/include/asm-powerpc/io.h linux-2.6.22-591/include/asm-powerpc/io.h +--- linux-2.6.22-570/include/asm-powerpc/io.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/io.h 2007-12-21 15:36:12.000000000 -0500 +@@ -607,9 +607,9 @@ + * + * * iounmap undoes such a mapping and can be hooked + * +- * * __ioremap_explicit (and the pending __iounmap_explicit) are low level +- * functions to create hand-made mappings for use only by the PCI code +- * and cannot currently be hooked. ++ * * __ioremap_at (and the pending __iounmap_at) are low level functions to ++ * create hand-made mappings for use only by the PCI code and cannot ++ * currently be hooked. Must be page aligned. + * + * * __ioremap is the low level implementation used by ioremap and + * ioremap_flags and cannot be hooked (but can be used by a hook on one +@@ -629,19 +629,9 @@ + unsigned long flags); + extern void __iounmap(volatile void __iomem *addr); + +-extern int __ioremap_explicit(phys_addr_t p_addr, unsigned long v_addr, ++extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea, + unsigned long size, unsigned long flags); +-extern int __iounmap_explicit(volatile void __iomem *start, +- unsigned long size); +- +-extern void __iomem * reserve_phb_iospace(unsigned long size); +- +-/* Those are more 32 bits only functions */ +-extern unsigned long iopa(unsigned long addr); +-extern unsigned long mm_ptov(unsigned long addr) __attribute_const__; +-extern void io_block_mapping(unsigned long virt, phys_addr_t phys, +- unsigned int size, int flags); +- ++extern void __iounmap_at(void *ea, unsigned long size); + + /* + * When CONFIG_PPC_INDIRECT_IO is set, we use the generic iomap implementation +@@ -651,8 +641,8 @@ + */ + #define HAVE_ARCH_PIO_SIZE 1 + #define PIO_OFFSET 0x00000000UL +-#define PIO_MASK 0x3fffffffUL +-#define PIO_RESERVED 0x40000000UL ++#define PIO_MASK (FULL_IO_SIZE - 1) ++#define PIO_RESERVED (FULL_IO_SIZE) + + #define mmio_read16be(addr) readw_be(addr) + #define mmio_read32be(addr) readl_be(addr) +diff -Nurb linux-2.6.22-570/include/asm-powerpc/kgdb.h linux-2.6.22-591/include/asm-powerpc/kgdb.h +--- linux-2.6.22-570/include/asm-powerpc/kgdb.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/asm-powerpc/kgdb.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,75 @@ ++/* ++ * include/asm-powerpc/kgdb.h ++ * ++ * The PowerPC (32/64) specific defines / externs for KGDB. Based on ++ * the previous 32bit and 64bit specific files, which had the following ++ * copyrights: ++ * ++ * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com) ++ * PPC Mods (C) 2004 Tom Rini (trini@mvista.com) ++ * PPC Mods (C) 2003 John Whitney (john.whitney@timesys.com) ++ * PPC Mods (C) 1998 Michael Tesch (tesch@cs.wisc.edu) ++ * ++ * ++ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) ++ * Author: Tom Rini ++ * ++ * 2006 (c) MontaVista Software, Inc. This file is licensed under ++ * the terms of the GNU General Public License version 2. This program ++ * is licensed "as is" without any warranty of any kind, whether express ++ * or implied. ++ */ ++#ifdef __KERNEL__ ++#ifndef __POWERPC_KGDB_H__ ++#define __POWERPC_KGDB_H__ ++ ++#include ++ ++#ifndef __ASSEMBLY__ ++ ++#define BREAK_INSTR_SIZE 4 ++#define BUFMAX ((NUMREGBYTES * 2) + 512) ++#define OUTBUFMAX ((NUMREGBYTES * 2) + 512) ++#define BREAKPOINT() asm(".long 0x7d821008"); /* twge r2, r2 */ ++#define CACHE_FLUSH_IS_SAFE 1 ++ ++/* The number bytes of registers we have to save depends on a few ++ * things. For 64bit we default to not including vector registers and ++ * vector state registers. */ ++#ifdef CONFIG_PPC64 ++/* ++ * 64 bit (8 byte) registers: ++ * 32 gpr, 32 fpr, nip, msr, link, ctr ++ * 32 bit (4 byte) registers: ++ * ccr, xer, fpscr ++ */ ++#define NUMREGBYTES ((68 * 8) + (3 * 4)) ++#if 0 ++/* The following adds in vector registers and vector state registers. */ ++/* 128 bit (16 byte) registers: ++ * 32 vr ++ * 64 bit (8 byte) registers: ++ * 32 gpr, 32 fpr, nip, msr, link, ctr ++ * 32 bit (4 byte) registers: ++ * ccr, xer, fpscr, vscr, vrsave ++ */ ++#define NUMREGBYTES ((128 * 16) + (68 * 8) + (5 * 4)) ++#endif ++#define NUMCRITREGBYTES 184 ++#else /* CONFIG_PPC32 */ ++/* On non-E500 family PPC32 we determine the size by picking the last ++ * register we need, but on E500 we skip sections so we list what we ++ * need to store, and add it up. */ ++#ifndef CONFIG_E500 ++#define MAXREG (PT_FPSCR+1) ++#else ++/* 32 GPRs (8 bytes), nip, msr, ccr, link, ctr, xer, acc (8 bytes), spefscr*/ ++#define MAXREG ((32*2)+6+2+1) ++#endif ++#define NUMREGBYTES (MAXREG * sizeof(int)) ++/* CR/LR, R1, R2, R13-R31 inclusive. */ ++#define NUMCRITREGBYTES (23 * sizeof(int)) ++#endif /* 32/64 */ ++#endif /* !(__ASSEMBLY__) */ ++#endif /* !__POWERPC_KGDB_H__ */ ++#endif /* __KERNEL__ */ +diff -Nurb linux-2.6.22-570/include/asm-powerpc/lppaca.h linux-2.6.22-591/include/asm-powerpc/lppaca.h +--- linux-2.6.22-570/include/asm-powerpc/lppaca.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/lppaca.h 2007-12-21 15:36:12.000000000 -0500 +@@ -98,7 +98,7 @@ + u64 saved_gpr5; // Saved GPR5 x30-x37 + + u8 reserved4; // Reserved x38-x38 +- u8 cpuctls_task_attrs; // Task attributes for cpuctls x39-x39 ++ u8 donate_dedicated_cpu; // Donate dedicated CPU cycles x39-x39 + u8 fpregs_in_use; // FP regs in use x3A-x3A + u8 pmcregs_in_use; // PMC regs in use x3B-x3B + volatile u32 saved_decr; // Saved Decr Value x3C-x3F +diff -Nurb linux-2.6.22-570/include/asm-powerpc/mmu-hash32.h linux-2.6.22-591/include/asm-powerpc/mmu-hash32.h +--- linux-2.6.22-570/include/asm-powerpc/mmu-hash32.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/asm-powerpc/mmu-hash32.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,91 @@ ++#ifndef _ASM_POWERPC_MMU_HASH32_H_ ++#define _ASM_POWERPC_MMU_HASH32_H_ ++/* ++ * 32-bit hash table MMU support ++ */ ++ ++/* ++ * BATs ++ */ ++ ++/* Block size masks */ ++#define BL_128K 0x000 ++#define BL_256K 0x001 ++#define BL_512K 0x003 ++#define BL_1M 0x007 ++#define BL_2M 0x00F ++#define BL_4M 0x01F ++#define BL_8M 0x03F ++#define BL_16M 0x07F ++#define BL_32M 0x0FF ++#define BL_64M 0x1FF ++#define BL_128M 0x3FF ++#define BL_256M 0x7FF ++ ++/* BAT Access Protection */ ++#define BPP_XX 0x00 /* No access */ ++#define BPP_RX 0x01 /* Read only */ ++#define BPP_RW 0x02 /* Read/write */ ++ ++#ifndef __ASSEMBLY__ ++struct ppc_bat { ++ struct { ++ unsigned long bepi:15; /* Effective page index (virtual address) */ ++ unsigned long :4; /* Unused */ ++ unsigned long bl:11; /* Block size mask */ ++ unsigned long vs:1; /* Supervisor valid */ ++ unsigned long vp:1; /* User valid */ ++ } batu; /* Upper register */ ++ struct { ++ unsigned long brpn:15; /* Real page index (physical address) */ ++ unsigned long :10; /* Unused */ ++ unsigned long w:1; /* Write-thru cache */ ++ unsigned long i:1; /* Cache inhibit */ ++ unsigned long m:1; /* Memory coherence */ ++ unsigned long g:1; /* Guarded (MBZ in IBAT) */ ++ unsigned long :1; /* Unused */ ++ unsigned long pp:2; /* Page access protections */ ++ } batl; /* Lower register */ ++}; ++#endif /* !__ASSEMBLY__ */ ++ ++/* ++ * Hash table ++ */ ++ ++/* Values for PP (assumes Ks=0, Kp=1) */ ++#define PP_RWXX 0 /* Supervisor read/write, User none */ ++#define PP_RWRX 1 /* Supervisor read/write, User read */ ++#define PP_RWRW 2 /* Supervisor read/write, User read/write */ ++#define PP_RXRX 3 /* Supervisor read, User read */ ++ ++#ifndef __ASSEMBLY__ ++ ++/* Hardware Page Table Entry */ ++struct hash_pte { ++ unsigned long v:1; /* Entry is valid */ ++ unsigned long vsid:24; /* Virtual segment identifier */ ++ unsigned long h:1; /* Hash algorithm indicator */ ++ unsigned long api:6; /* Abbreviated page index */ ++ unsigned long rpn:20; /* Real (physical) page number */ ++ unsigned long :3; /* Unused */ ++ unsigned long r:1; /* Referenced */ ++ unsigned long c:1; /* Changed */ ++ unsigned long w:1; /* Write-thru cache mode */ ++ unsigned long i:1; /* Cache inhibited */ ++ unsigned long m:1; /* Memory coherence */ ++ unsigned long g:1; /* Guarded */ ++ unsigned long :1; /* Unused */ ++ unsigned long pp:2; /* Page protection */ ++}; ++ ++typedef struct { ++ unsigned long id; ++ unsigned long vdso_base; ++} mm_context_t; ++ ++typedef unsigned long phys_addr_t; ++ ++#endif /* !__ASSEMBLY__ */ ++ ++#endif /* _ASM_POWERPC_MMU_HASH32_H_ */ +diff -Nurb linux-2.6.22-570/include/asm-powerpc/mmu-hash64.h linux-2.6.22-591/include/asm-powerpc/mmu-hash64.h +--- linux-2.6.22-570/include/asm-powerpc/mmu-hash64.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/mmu-hash64.h 2007-12-21 15:36:12.000000000 -0500 +@@ -103,12 +103,12 @@ + + #ifndef __ASSEMBLY__ + +-typedef struct { ++struct hash_pte { + unsigned long v; + unsigned long r; +-} hpte_t; ++}; + +-extern hpte_t *htab_address; ++extern struct hash_pte *htab_address; + extern unsigned long htab_size_bytes; + extern unsigned long htab_hash_mask; + +diff -Nurb linux-2.6.22-570/include/asm-powerpc/mmu.h linux-2.6.22-591/include/asm-powerpc/mmu.h +--- linux-2.6.22-570/include/asm-powerpc/mmu.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/mmu.h 2007-12-21 15:36:12.000000000 -0500 +@@ -5,6 +5,9 @@ + #ifdef CONFIG_PPC64 + /* 64-bit classic hash table MMU */ + # include ++#elif defined(CONFIG_PPC_STD_MMU) ++/* 32-bit classic hash table MMU */ ++# include + #elif defined(CONFIG_44x) + /* 44x-style software loaded TLB */ + # include +diff -Nurb linux-2.6.22-570/include/asm-powerpc/pci-bridge.h linux-2.6.22-591/include/asm-powerpc/pci-bridge.h +--- linux-2.6.22-570/include/asm-powerpc/pci-bridge.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/pci-bridge.h 2007-12-21 15:36:12.000000000 -0500 +@@ -31,6 +31,7 @@ + int last_busno; + + void __iomem *io_base_virt; ++ void *io_base_alloc; + resource_size_t io_base_phys; + + /* Some machines have a non 1:1 mapping of +@@ -70,19 +71,22 @@ + int devfn; /* pci device and function number */ + int class_code; /* pci device class */ + +-#ifdef CONFIG_PPC_PSERIES ++ struct pci_controller *phb; /* for pci devices */ ++ struct iommu_table *iommu_table; /* for phb's or bridges */ ++ struct pci_dev *pcidev; /* back-pointer to the pci device */ ++ struct device_node *node; /* back-pointer to the device_node */ ++ ++ int pci_ext_config_space; /* for pci devices */ ++ ++#ifdef CONFIG_EEH + int eeh_mode; /* See eeh.h for possible EEH_MODEs */ + int eeh_config_addr; + int eeh_pe_config_addr; /* new-style partition endpoint address */ + int eeh_check_count; /* # times driver ignored error */ + int eeh_freeze_count; /* # times this device froze up. */ +-#endif +- int pci_ext_config_space; /* for pci devices */ +- struct pci_controller *phb; /* for pci devices */ +- struct iommu_table *iommu_table; /* for phb's or bridges */ +- struct pci_dev *pcidev; /* back-pointer to the pci device */ +- struct device_node *node; /* back-pointer to the device_node */ ++ int eeh_false_positives; /* # times this device reported #ff's */ + u32 config_space[16]; /* saved PCI config space */ ++#endif + }; + + /* Get the pointer to a device_node's pci_dn */ +@@ -164,6 +168,11 @@ + } + #endif + ++extern void isa_bridge_find_early(struct pci_controller *hose); ++ ++extern int pcibios_unmap_io_space(struct pci_bus *bus); ++extern int pcibios_map_io_space(struct pci_bus *bus); ++ + /* Return values for ppc_md.pci_probe_mode function */ + #define PCI_PROBE_NONE -1 /* Don't look at this bus at all */ + #define PCI_PROBE_NORMAL 0 /* Do normal PCI probing */ +diff -Nurb linux-2.6.22-570/include/asm-powerpc/pci.h linux-2.6.22-591/include/asm-powerpc/pci.h +--- linux-2.6.22-570/include/asm-powerpc/pci.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/pci.h 2007-12-21 15:36:12.000000000 -0500 +@@ -220,10 +220,6 @@ + return root; + } + +-extern int unmap_bus_range(struct pci_bus *bus); +- +-extern int remap_bus_range(struct pci_bus *bus); +- + extern void pcibios_fixup_device_resources(struct pci_dev *dev, + struct pci_bus *bus); + +diff -Nurb linux-2.6.22-570/include/asm-powerpc/pgtable-ppc32.h linux-2.6.22-591/include/asm-powerpc/pgtable-ppc32.h +--- linux-2.6.22-570/include/asm-powerpc/pgtable-ppc32.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/pgtable-ppc32.h 2007-12-21 15:36:12.000000000 -0500 +@@ -6,11 +6,7 @@ + #ifndef __ASSEMBLY__ + #include + #include +-#include /* For TASK_SIZE */ +-#include +-#include + #include /* For sub-arch specific PPC_PIN_SIZE */ +-struct mm_struct; + + extern unsigned long va_to_phys(unsigned long address); + extern pte_t *va_to_pte(unsigned long address); +@@ -488,14 +484,6 @@ + #define pfn_pte(pfn, prot) __pte(((pte_basic_t)(pfn) << PFN_SHIFT_OFFSET) |\ + pgprot_val(prot)) + #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) +- +-/* +- * ZERO_PAGE is a global shared page that is always zero: used +- * for zero-mapped memory areas etc.. +- */ +-extern unsigned long empty_zero_page[1024]; +-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) +- + #endif /* __ASSEMBLY__ */ + + #define pte_none(pte) ((pte_val(pte) & ~_PTE_NONE_MASK) == 0) +@@ -734,10 +722,6 @@ + #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) + #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) + +-extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +- +-extern void paging_init(void); +- + /* + * Encode and decode a swap entry. + * Note that the bits we use in a PTE for representing a swap entry +@@ -755,40 +739,6 @@ + #define pte_to_pgoff(pte) (pte_val(pte) >> 3) + #define pgoff_to_pte(off) ((pte_t) { ((off) << 3) | _PAGE_FILE }) + +-/* CONFIG_APUS */ +-/* For virtual address to physical address conversion */ +-extern void cache_clear(__u32 addr, int length); +-extern void cache_push(__u32 addr, int length); +-extern int mm_end_of_chunk (unsigned long addr, int len); +-extern unsigned long iopa(unsigned long addr); +-extern unsigned long mm_ptov(unsigned long addr) __attribute_const__; +- +-/* Values for nocacheflag and cmode */ +-/* These are not used by the APUS kernel_map, but prevents +- compilation errors. */ +-#define KERNELMAP_FULL_CACHING 0 +-#define KERNELMAP_NOCACHE_SER 1 +-#define KERNELMAP_NOCACHE_NONSER 2 +-#define KERNELMAP_NO_COPYBACK 3 +- +-/* +- * Map some physical address range into the kernel address space. +- */ +-extern unsigned long kernel_map(unsigned long paddr, unsigned long size, +- int nocacheflag, unsigned long *memavailp ); +- +-/* +- * Set cache mode of (kernel space) address range. +- */ +-extern void kernel_set_cachemode (unsigned long address, unsigned long size, +- unsigned int cmode); +- +-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */ +-#define kern_addr_valid(addr) (1) +- +-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ +- remap_pfn_range(vma, vaddr, pfn, size, prot) +- + /* + * No page table caches to initialise + */ +diff -Nurb linux-2.6.22-570/include/asm-powerpc/pgtable-ppc64.h linux-2.6.22-591/include/asm-powerpc/pgtable-ppc64.h +--- linux-2.6.22-570/include/asm-powerpc/pgtable-ppc64.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/pgtable-ppc64.h 2007-12-21 15:36:12.000000000 -0500 +@@ -7,11 +7,7 @@ + + #ifndef __ASSEMBLY__ + #include +-#include /* For TASK_SIZE */ +-#include +-#include + #include +-struct mm_struct; + #endif /* __ASSEMBLY__ */ + + #ifdef CONFIG_PPC_64K_PAGES +@@ -27,7 +23,7 @@ + */ + #define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ + PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) +-#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE) ++#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) + + #if TASK_SIZE_USER64 > PGTABLE_RANGE + #error TASK_SIZE_USER64 exceeds pagetable range +@@ -37,19 +33,28 @@ + #error TASK_SIZE_USER64 exceeds user VSID range + #endif + ++ + /* + * Define the address range of the vmalloc VM area. + */ + #define VMALLOC_START ASM_CONST(0xD000000000000000) +-#define VMALLOC_SIZE ASM_CONST(0x80000000000) ++#define VMALLOC_SIZE (PGTABLE_RANGE >> 1) + #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) + + /* +- * Define the address range of the imalloc VM area. +- */ +-#define PHBS_IO_BASE VMALLOC_END +-#define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ +-#define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE) ++ * Define the address ranges for MMIO and IO space : ++ * ++ * ISA_IO_BASE = VMALLOC_END, 64K reserved area ++ * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces ++ * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE ++ */ ++#define FULL_IO_SIZE 0x80000000ul ++#define ISA_IO_BASE (VMALLOC_END) ++#define ISA_IO_END (VMALLOC_END + 0x10000ul) ++#define PHB_IO_BASE (ISA_IO_END) ++#define PHB_IO_END (VMALLOC_END + FULL_IO_SIZE) ++#define IOREMAP_BASE (PHB_IO_END) ++#define IOREMAP_END (VMALLOC_START + PGTABLE_RANGE) + + /* + * Region IDs +@@ -134,16 +139,6 @@ + #define __S110 PAGE_SHARED_X + #define __S111 PAGE_SHARED_X + +-#ifndef __ASSEMBLY__ +- +-/* +- * ZERO_PAGE is a global shared page that is always zero: used +- * for zero-mapped memory areas etc.. +- */ +-extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; +-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) +-#endif /* __ASSEMBLY__ */ +- + #ifdef CONFIG_HUGETLB_PAGE + + #define HAVE_ARCH_UNMAPPED_AREA +@@ -442,10 +437,6 @@ + #define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + +-extern pgd_t swapper_pg_dir[]; +- +-extern void paging_init(void); +- + /* Encode and de-code a swap entry */ + #define __swp_type(entry) (((entry).val >> 1) & 0x3f) + #define __swp_offset(entry) ((entry).val >> 8) +@@ -456,17 +447,6 @@ + #define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE}) + #define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT) + +-/* +- * kern_addr_valid is intended to indicate whether an address is a valid +- * kernel address. Most 32-bit archs define it as always true (like this) +- * but most 64-bit archs actually perform a test. What should we do here? +- * The only use is in fs/ncpfs/dir.c +- */ +-#define kern_addr_valid(addr) (1) +- +-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ +- remap_pfn_range(vma, vaddr, pfn, size, prot) +- + void pgtable_cache_init(void); + + /* +diff -Nurb linux-2.6.22-570/include/asm-powerpc/pgtable.h linux-2.6.22-591/include/asm-powerpc/pgtable.h +--- linux-2.6.22-570/include/asm-powerpc/pgtable.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/pgtable.h 2007-12-21 15:36:12.000000000 -0500 +@@ -2,6 +2,13 @@ + #define _ASM_POWERPC_PGTABLE_H + #ifdef __KERNEL__ + ++#ifndef __ASSEMBLY__ ++#include /* For TASK_SIZE */ ++#include ++#include ++struct mm_struct; ++#endif /* !__ASSEMBLY__ */ ++ + #if defined(CONFIG_PPC64) + # include + #else +@@ -9,6 +16,27 @@ + #endif + + #ifndef __ASSEMBLY__ ++/* ++ * ZERO_PAGE is a global shared page that is always zero: used ++ * for zero-mapped memory areas etc.. ++ */ ++extern unsigned long empty_zero_page[]; ++#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) ++ ++extern pgd_t swapper_pg_dir[]; ++ ++extern void paging_init(void); ++ ++/* ++ * kern_addr_valid is intended to indicate whether an address is a valid ++ * kernel address. Most 32-bit archs define it as always true (like this) ++ * but most 64-bit archs actually perform a test. What should we do here? ++ */ ++#define kern_addr_valid(addr) (1) ++ ++#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ ++ remap_pfn_range(vma, vaddr, pfn, size, prot) ++ + #include + #endif /* __ASSEMBLY__ */ + +diff -Nurb linux-2.6.22-570/include/asm-powerpc/ppc-pci.h linux-2.6.22-591/include/asm-powerpc/ppc-pci.h +--- linux-2.6.22-570/include/asm-powerpc/ppc-pci.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/ppc-pci.h 2007-12-21 15:36:12.000000000 -0500 +@@ -26,7 +26,7 @@ + + extern void find_and_init_phbs(void); + +-extern struct pci_dev *ppc64_isabridge_dev; /* may be NULL if no ISA bus */ ++extern struct pci_dev *isa_bridge_pcidev; /* may be NULL if no ISA bus */ + + /** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */ + #define BUID_HI(buid) ((buid) >> 32) +@@ -47,8 +47,8 @@ + extern unsigned long get_phb_buid (struct device_node *); + extern int rtas_setup_phb(struct pci_controller *phb); + +-/* From pSeries_pci.h */ +-extern void pSeries_final_fixup(void); ++/* From iSeries PCI */ ++extern void iSeries_pcibios_init(void); + + extern unsigned long pci_probe_only; + +@@ -139,6 +139,9 @@ + */ + struct device_node * find_device_pe(struct device_node *dn); + ++void eeh_sysfs_add_device(struct pci_dev *pdev); ++void eeh_sysfs_remove_device(struct pci_dev *pdev); ++ + #endif /* CONFIG_EEH */ + + #else /* CONFIG_PCI */ +diff -Nurb linux-2.6.22-570/include/asm-powerpc/ptrace.h linux-2.6.22-591/include/asm-powerpc/ptrace.h +--- linux-2.6.22-570/include/asm-powerpc/ptrace.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/ptrace.h 2007-12-21 15:36:12.000000000 -0500 +@@ -92,6 +92,11 @@ + set_thread_flag(TIF_NOERROR); \ + } while(0) + ++struct task_struct; ++extern unsigned long ptrace_get_reg(struct task_struct *task, int regno); ++extern int ptrace_put_reg(struct task_struct *task, int regno, ++ unsigned long data); ++ + /* + * We use the least-significant bit of the trap field to indicate + * whether we have saved the full set of registers, or only a +@@ -158,9 +163,7 @@ + + #define PT_NIP 32 + #define PT_MSR 33 +-#ifdef __KERNEL__ + #define PT_ORIG_R3 34 +-#endif + #define PT_CTR 35 + #define PT_LNK 36 + #define PT_XER 37 +@@ -169,11 +172,12 @@ + #define PT_MQ 39 + #else + #define PT_SOFTE 39 ++#endif + #define PT_TRAP 40 + #define PT_DAR 41 + #define PT_DSISR 42 + #define PT_RESULT 43 +-#endif ++#define PT_REGS_COUNT 44 + + #define PT_FPR0 48 /* each FP reg occupies 2 slots in this space */ + +@@ -229,7 +233,17 @@ + #define PTRACE_GET_DEBUGREG 25 + #define PTRACE_SET_DEBUGREG 26 + +-/* Additional PTRACE requests implemented on PowerPC. */ ++/* (new) PTRACE requests using the same numbers as x86 and the same ++ * argument ordering. Additionally, they support more registers too ++ */ ++#define PTRACE_GETREGS 12 ++#define PTRACE_SETREGS 13 ++#define PTRACE_GETFPREGS 14 ++#define PTRACE_SETFPREGS 15 ++#define PTRACE_GETREGS64 22 ++#define PTRACE_SETREGS64 23 ++ ++/* (old) PTRACE requests with inverted arguments */ + #define PPC_PTRACE_GETREGS 0x99 /* Get GPRs 0 - 31 */ + #define PPC_PTRACE_SETREGS 0x98 /* Set GPRs 0 - 31 */ + #define PPC_PTRACE_GETFPREGS 0x97 /* Get FPRs 0 - 31 */ +diff -Nurb linux-2.6.22-570/include/asm-powerpc/syscalls.h linux-2.6.22-591/include/asm-powerpc/syscalls.h +--- linux-2.6.22-570/include/asm-powerpc/syscalls.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/syscalls.h 2007-12-21 15:36:12.000000000 -0500 +@@ -43,16 +43,9 @@ + + asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, + size_t sigsetsize); +- +-#ifndef __powerpc64__ +-asmlinkage long sys_sigaltstack(const stack_t __user *uss, +- stack_t __user *uoss, int r5, int r6, int r7, int r8, +- struct pt_regs *regs); +-#else /* __powerpc64__ */ + asmlinkage long sys_sigaltstack(const stack_t __user *uss, + stack_t __user *uoss, unsigned long r5, unsigned long r6, + unsigned long r7, unsigned long r8, struct pt_regs *regs); +-#endif /* __powerpc64__ */ + + #endif /* __KERNEL__ */ + #endif /* __ASM_POWERPC_SYSCALLS_H */ +diff -Nurb linux-2.6.22-570/include/asm-powerpc/systbl.h linux-2.6.22-591/include/asm-powerpc/systbl.h +--- linux-2.6.22-570/include/asm-powerpc/systbl.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/asm-powerpc/systbl.h 2007-12-21 15:36:12.000000000 -0500 +@@ -312,3 +312,4 @@ + COMPAT_SYS_SPU(timerfd) + SYSCALL_SPU(eventfd) + COMPAT_SYS_SPU(sync_file_range2) ++COMPAT_SYS(fallocate) +diff -Nurb linux-2.6.22-570/include/asm-powerpc/thread_info.h linux-2.6.22-591/include/asm-powerpc/thread_info.h +--- linux-2.6.22-570/include/asm-powerpc/thread_info.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-powerpc/thread_info.h 2007-12-21 15:36:12.000000000 -0500 +@@ -113,8 +113,8 @@ + #define TIF_POLLING_NRFLAG 4 /* true if poll_idle() is polling + TIF_NEED_RESCHED */ + #define TIF_32BIT 5 /* 32 bit binary */ +-#define TIF_RUNLATCH 6 /* Is the runlatch enabled? */ +-#define TIF_ABI_PENDING 7 /* 32/64 bit switch needed */ ++#define TIF_PERFMON_WORK 6 /* work for pfm_handle_work() */ ++#define TIF_PERFMON_CTXSW 7 /* perfmon needs ctxsw calls */ + #define TIF_SYSCALL_AUDIT 8 /* syscall auditing active */ + #define TIF_SINGLESTEP 9 /* singlestepping active */ + #define TIF_MEMDIE 10 +@@ -123,6 +123,8 @@ + #define TIF_NOERROR 14 /* Force successful syscall return */ + #define TIF_RESTORE_SIGMASK 15 /* Restore signal mask in do_signal */ + #define TIF_FREEZE 16 /* Freezing for suspend */ ++#define TIF_RUNLATCH 17 /* Is the runlatch enabled? */ ++#define TIF_ABI_PENDING 18 /* 32/64 bit switch needed */ + + /* as above, but as bit values */ + #define _TIF_SYSCALL_TRACE (1< + #ifndef __ASSEMBLY__ +- +-/* Things specific to the gen550 backend. */ +-struct uart_port; +- +-extern void gen550_progress(char *, unsigned short); +-extern void gen550_kgdb_map_scc(void); +-extern void gen550_init(int, struct uart_port *); +- +-/* Things specific to the pmac backend. */ +-extern void zs_kgdb_hook(int tty_num); +- +-/* To init the kgdb engine. (called by serial hook)*/ +-extern void set_debug_traps(void); +- +-/* To enter the debugger explicitly. */ +-extern void breakpoint(void); +- +-/* For taking exceptions ++ /* For taking exceptions + * these are defined in traps.c + */ +-extern int (*debugger)(struct pt_regs *regs); ++struct pt_regs; ++extern void (*debugger)(struct pt_regs *regs); + extern int (*debugger_bpt)(struct pt_regs *regs); + extern int (*debugger_sstep)(struct pt_regs *regs); + extern int (*debugger_iabr_match)(struct pt_regs *regs); + extern int (*debugger_dabr_match)(struct pt_regs *regs); + extern void (*debugger_fault_handler)(struct pt_regs *regs); +- +-/* What we bring to the party */ +-int kgdb_bpt(struct pt_regs *regs); +-int kgdb_sstep(struct pt_regs *regs); +-void kgdb(struct pt_regs *regs); +-int kgdb_iabr_match(struct pt_regs *regs); +-int kgdb_dabr_match(struct pt_regs *regs); +- +-/* +- * external low-level support routines (ie macserial.c) +- */ +-extern void kgdb_interruptible(int); /* control interrupts from serial */ +-extern void putDebugChar(char); /* write a single character */ +-extern char getDebugChar(void); /* read and return a single char */ +- +-#endif /* !(__ASSEMBLY__) */ +-#endif /* !(_PPC_KGDB_H) */ ++#endif /* !__ASSEMBLY__ */ ++#endif /* __PPC_KGDB_H__ */ + #endif /* __KERNEL__ */ +diff -Nurb linux-2.6.22-570/include/asm-ppc/machdep.h linux-2.6.22-591/include/asm-ppc/machdep.h +--- linux-2.6.22-570/include/asm-ppc/machdep.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-ppc/machdep.h 2007-12-21 15:36:12.000000000 -0500 +@@ -72,9 +72,7 @@ + unsigned long (*find_end_of_memory)(void); + void (*setup_io_mappings)(void); + +- void (*early_serial_map)(void); + void (*progress)(char *, unsigned short); +- void (*kgdb_map_scc)(void); + + unsigned char (*nvram_read_val)(int addr); + void (*nvram_write_val)(int addr, unsigned char val); +diff -Nurb linux-2.6.22-570/include/asm-ppc/mv64x60.h linux-2.6.22-591/include/asm-ppc/mv64x60.h +--- linux-2.6.22-570/include/asm-ppc/mv64x60.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-ppc/mv64x60.h 2007-12-21 15:36:12.000000000 -0500 +@@ -348,6 +348,8 @@ + + void mv64x60_progress_init(u32 base); + void mv64x60_mpsc_progress(char *s, unsigned short hex); ++struct platform_device * mv64x60_early_get_pdev_data(const char *name, ++ int id, int remove); + + extern struct mv64x60_32bit_window + gt64260_32bit_windows[MV64x60_32BIT_WIN_COUNT]; +diff -Nurb linux-2.6.22-570/include/asm-ppc/mv64x60_defs.h linux-2.6.22-591/include/asm-ppc/mv64x60_defs.h +--- linux-2.6.22-570/include/asm-ppc/mv64x60_defs.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-ppc/mv64x60_defs.h 2007-12-21 15:36:12.000000000 -0500 +@@ -57,7 +57,8 @@ + #define MV64x60_IRQ_I2C 37 + #define MV64x60_IRQ_BRG 39 + #define MV64x60_IRQ_MPSC_0 40 +-#define MV64x60_IRQ_MPSC_1 42 ++#define MV64360_IRQ_MPSC_1 41 ++#define GT64260_IRQ_MPSC_1 42 + #define MV64x60_IRQ_COMM 43 + #define MV64x60_IRQ_P0_GPP_0_7 56 + #define MV64x60_IRQ_P0_GPP_8_15 57 +diff -Nurb linux-2.6.22-570/include/asm-s390/page.h linux-2.6.22-591/include/asm-s390/page.h +--- linux-2.6.22-570/include/asm-s390/page.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-s390/page.h 2007-12-21 15:36:12.000000000 -0500 +@@ -64,7 +64,8 @@ + #define clear_user_page(page, vaddr, pg) clear_page(page) + #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) ++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ ++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + + /* +diff -Nurb linux-2.6.22-570/include/asm-sh/kgdb.h linux-2.6.22-591/include/asm-sh/kgdb.h +--- linux-2.6.22-570/include/asm-sh/kgdb.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-sh/kgdb.h 2007-12-21 15:36:12.000000000 -0500 +@@ -2,78 +2,41 @@ + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * +- * Based on original code by Glenn Engel, Jim Kingdon, +- * David Grothe , Tigran Aivazian, and +- * Amit S. Kale ++ * Based on a file that was modified or based on files by: Glenn Engel, ++ * Jim Kingdon, David Grothe , Tigran Aivazian , ++ * Amit S. Kale , sh-stub.c from Ben Lee and ++ * Steve Chamberlain, Henry Bell + * +- * Super-H port based on sh-stub.c (Ben Lee and Steve Chamberlain) by +- * Henry Bell +- * +- * Header file for low-level support for remote debug using GDB. ++ * Maintainer: Tom Rini + * + */ + + #ifndef __KGDB_H + #define __KGDB_H + +-#include +-#include ++#include + +-struct console; ++/* Based on sh-gdb.c from gdb-6.1, Glenn ++ Engel at HP Ben Lee and Steve Chamberlain */ ++#define NUMREGBYTES 112 /* 92 */ ++#define NUMCRITREGBYTES (9 << 2) ++#define BUFMAX 400 + +-/* Same as pt_regs but has vbr in place of syscall_nr */ ++#ifndef __ASSEMBLY__ + struct kgdb_regs { + unsigned long regs[16]; + unsigned long pc; + unsigned long pr; +- unsigned long sr; + unsigned long gbr; ++ unsigned long vbr; + unsigned long mach; + unsigned long macl; +- unsigned long vbr; ++ unsigned long sr; + }; + +-/* State info */ +-extern char kgdb_in_gdb_mode; +-extern int kgdb_done_init; +-extern int kgdb_enabled; +-extern int kgdb_nofault; /* Ignore bus errors (in gdb mem access) */ +-extern int kgdb_halt; /* Execute initial breakpoint at startup */ +-extern char in_nmi; /* Debounce flag to prevent NMI reentry*/ +- +-/* SCI */ +-extern int kgdb_portnum; +-extern int kgdb_baud; +-extern char kgdb_parity; +-extern char kgdb_bits; +- +-/* Init and interface stuff */ +-extern int kgdb_init(void); +-extern int (*kgdb_getchar)(void); +-extern void (*kgdb_putchar)(int); +- +-/* Trap functions */ +-typedef void (kgdb_debug_hook_t)(struct pt_regs *regs); +-typedef void (kgdb_bus_error_hook_t)(void); +-extern kgdb_debug_hook_t *kgdb_debug_hook; +-extern kgdb_bus_error_hook_t *kgdb_bus_err_hook; +- +-/* Console */ +-void kgdb_console_write(struct console *co, const char *s, unsigned count); +-extern int kgdb_console_setup(struct console *, char *); +- +-/* Prototypes for jmp fns */ +-#define _JBLEN 9 +-typedef int jmp_buf[_JBLEN]; +-extern void longjmp(jmp_buf __jmpb, int __retval); +-extern int setjmp(jmp_buf __jmpb); +- +-/* Forced breakpoint */ +-#define breakpoint() \ +-do { \ +- if (kgdb_enabled) \ +- __asm__ __volatile__("trapa #0x3c"); \ +-} while (0) ++#define BREAKPOINT() asm("trapa #0xff"); ++#define BREAK_INSTR_SIZE 2 ++#define CACHE_FLUSH_IS_SAFE 1 + + /* KGDB should be able to flush all kernel text space */ + #if defined(CONFIG_CPU_SH4) +@@ -100,4 +63,5 @@ + { + return hexchars[x & 0xf]; + } ++#endif /* !__ASSEMBLY__ */ + #endif +diff -Nurb linux-2.6.22-570/include/asm-sh/system.h linux-2.6.22-591/include/asm-sh/system.h +--- linux-2.6.22-570/include/asm-sh/system.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-sh/system.h 2007-12-21 15:36:12.000000000 -0500 +@@ -264,6 +264,45 @@ + #define instruction_size(insn) (2) + #endif + ++static inline unsigned long __cmpxchg_u32(volatile int * m, unsigned long old, ++ unsigned long new) ++{ ++ __u32 retval; ++ unsigned long flags; ++ ++ local_irq_save(flags); ++ retval = *m; ++ if (retval == old) ++ *m = new; ++ local_irq_restore(flags); /* implies memory barrier */ ++ return retval; ++} ++ ++/* This function doesn't exist, so you'll get a linker error ++ * if something tries to do an invalid cmpxchg(). */ ++extern void __cmpxchg_called_with_bad_pointer(void); ++ ++#define __HAVE_ARCH_CMPXCHG 1 ++ ++static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old, ++ unsigned long new, int size) ++{ ++ switch (size) { ++ case 4: ++ return __cmpxchg_u32(ptr, old, new); ++ } ++ __cmpxchg_called_with_bad_pointer(); ++ return old; ++} ++ ++#define cmpxchg(ptr,o,n) \ ++ ({ \ ++ __typeof__(*(ptr)) _o_ = (o); \ ++ __typeof__(*(ptr)) _n_ = (n); \ ++ (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ ++ (unsigned long)_n_, sizeof(*(ptr))); \ ++ }) ++ + /* XXX + * disable hlt during certain critical i/o operations + */ +diff -Nurb linux-2.6.22-570/include/asm-um/thread_info.h linux-2.6.22-591/include/asm-um/thread_info.h +--- linux-2.6.22-570/include/asm-um/thread_info.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-um/thread_info.h 2007-12-21 15:36:12.000000000 -0500 +@@ -52,10 +52,21 @@ + return ti; + } + ++#ifdef CONFIG_DEBUG_STACK_USAGE ++ ++#define alloc_thread_info(tsk) \ ++ ((struct thread_info *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, \ ++ CONFIG_KERNEL_STACK_ORDER)) ++#else ++ + /* thread information allocation */ + #define alloc_thread_info(tsk) \ +- ((struct thread_info *) kmalloc(THREAD_SIZE, GFP_KERNEL)) +-#define free_thread_info(ti) kfree(ti) ++ ((struct thread_info *) __get_free_pages(GFP_KERNEL, \ ++ CONFIG_KERNEL_STACK_ORDER)) ++#endif ++ ++#define free_thread_info(ti) \ ++ free_pages((unsigned long)(ti),CONFIG_KERNEL_STACK_ORDER) + + #endif + +diff -Nurb linux-2.6.22-570/include/asm-x86_64/kdebug.h linux-2.6.22-591/include/asm-x86_64/kdebug.h +--- linux-2.6.22-570/include/asm-x86_64/kdebug.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-x86_64/kdebug.h 2007-12-21 15:36:12.000000000 -0500 +@@ -23,6 +23,7 @@ + DIE_CALL, + DIE_NMI_IPI, + DIE_PAGE_FAULT, ++ DIE_PAGE_FAULT_NO_CONTEXT, + }; + + extern void printk_address(unsigned long address); +diff -Nurb linux-2.6.22-570/include/asm-x86_64/kgdb.h linux-2.6.22-591/include/asm-x86_64/kgdb.h +--- linux-2.6.22-570/include/asm-x86_64/kgdb.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/asm-x86_64/kgdb.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,52 @@ ++#ifdef __KERNEL__ ++#ifndef _ASM_KGDB_H_ ++#define _ASM_KGDB_H_ ++ ++/* ++ * Copyright (C) 2001-2004 Amit S. Kale ++ */ ++ ++#include ++ ++/* ++ * Note that this register image is in a different order than ++ * the register image that Linux produces at interrupt time. ++ * ++ * Linux's register image is defined by struct pt_regs in ptrace.h. ++ * Just why GDB uses a different order is a historical mystery. ++ */ ++#define _RAX 0 ++#define _RDX 1 ++#define _RCX 2 ++#define _RBX 3 ++#define _RSI 4 ++#define _RDI 5 ++#define _RBP 6 ++#define _RSP 7 ++#define _R8 8 ++#define _R9 9 ++#define _R10 10 ++#define _R11 11 ++#define _R12 12 ++#define _R13 13 ++#define _R14 14 ++#define _R15 15 ++#define _PC 16 ++#define _PS 17 ++ ++/* Number of bytes of registers. */ ++#define NUMREGBYTES ((_PS+1)*8) ++#define NUMCRITREGBYTES (8 * 8) /* 8 registers. */ ++ ++#ifndef __ASSEMBLY__ ++/* BUFMAX defines the maximum number of characters in inbound/outbound ++ * buffers at least NUMREGBYTES*2 are needed for register packets, and ++ * a longer buffer is needed to list all threads. */ ++#define BUFMAX 1024 ++#define BREAKPOINT() asm(" int $3"); ++#define CHECK_EXCEPTION_STACK() ((&__get_cpu_var(init_tss))[0].ist[0]) ++#define BREAK_INSTR_SIZE 1 ++#define CACHE_FLUSH_IS_SAFE 1 ++#endif /* !__ASSEMBLY__ */ ++#endif /* _ASM_KGDB_H_ */ ++#endif /* __KERNEL__ */ +diff -Nurb linux-2.6.22-570/include/asm-x86_64/page.h linux-2.6.22-591/include/asm-x86_64/page.h +--- linux-2.6.22-570/include/asm-x86_64/page.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-x86_64/page.h 2007-12-21 15:36:12.000000000 -0500 +@@ -48,7 +48,8 @@ + #define clear_user_page(page, vaddr, pg) clear_page(page) + #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) + +-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) ++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ ++ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) + #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + /* + * These are used to make use of C type-checking.. +diff -Nurb linux-2.6.22-570/include/asm-x86_64/proto.h linux-2.6.22-591/include/asm-x86_64/proto.h +--- linux-2.6.22-570/include/asm-x86_64/proto.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-x86_64/proto.h 2007-12-21 15:36:12.000000000 -0500 +@@ -75,8 +75,6 @@ + extern void early_quirks(void); + extern void check_efer(void); + +-extern int unhandled_signal(struct task_struct *tsk, int sig); +- + extern void select_idle_routine(const struct cpuinfo_x86 *c); + + extern unsigned long table_start, table_end; +diff -Nurb linux-2.6.22-570/include/asm-x86_64/system.h linux-2.6.22-591/include/asm-x86_64/system.h +--- linux-2.6.22-570/include/asm-x86_64/system.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-x86_64/system.h 2007-12-21 15:36:12.000000000 -0500 +@@ -22,7 +22,9 @@ + + /* Save restore flags to clear handle leaking NT */ + #define switch_to(prev,next,last) \ +- asm volatile(SAVE_CONTEXT \ ++ asm volatile(".globl __switch_to_begin\n\t" \ ++ "__switch_to_begin:\n\t" \ ++ SAVE_CONTEXT \ + "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ + "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ + "call __switch_to\n\t" \ +@@ -34,6 +36,8 @@ + "movq %%rax,%%rdi\n\t" \ + "jc ret_from_fork\n\t" \ + RESTORE_CONTEXT \ ++ "\n.globl __switch_to_end\n\t" \ ++ "__switch_to_end:\n\t" \ + : "=a" (last) \ + : [next] "S" (next), [prev] "D" (prev), \ + [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \ +diff -Nurb linux-2.6.22-570/include/asm-x86_64/unistd.h linux-2.6.22-591/include/asm-x86_64/unistd.h +--- linux-2.6.22-570/include/asm-x86_64/unistd.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/asm-x86_64/unistd.h 2007-12-21 15:36:12.000000000 -0500 +@@ -630,6 +630,8 @@ + __SYSCALL(__NR_timerfd, sys_timerfd) + #define __NR_eventfd 284 + __SYSCALL(__NR_eventfd, sys_eventfd) ++#define __NR_fallocate 284 ++__SYSCALL(__NR_fallocate, sys_fallocate) + + #ifndef __NO_STUBS + #define __ARCH_WANT_OLD_READDIR +diff -Nurb linux-2.6.22-570/include/asm-x86_64/unwind.h linux-2.6.22-591/include/asm-x86_64/unwind.h +--- linux-2.6.22-570/include/asm-x86_64/unwind.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/asm-x86_64/unwind.h 2007-12-21 15:36:12.000000000 -0500 +@@ -1,6 +1,100 @@ + #ifndef _ASM_X86_64_UNWIND_H + #define _ASM_X86_64_UNWIND_H + ++/* ++ * Copyright (C) 2002-2006 Novell, Inc. ++ * Jan Beulich ++ * This code is released under version 2 of the GNU GPL. ++ */ ++ ++#ifdef CONFIG_STACK_UNWIND ++ ++#include ++#include ++#include ++#include ++ ++struct unwind_frame_info ++{ ++ struct pt_regs regs; ++ struct task_struct *task; ++ unsigned call_frame:1; ++}; ++ ++#define UNW_PC(frame) (frame)->regs.rip ++#define UNW_SP(frame) (frame)->regs.rsp ++#ifdef CONFIG_FRAME_POINTER ++#define UNW_FP(frame) (frame)->regs.rbp ++#define FRAME_RETADDR_OFFSET 8 ++#define FRAME_LINK_OFFSET 0 ++#define STACK_BOTTOM(tsk) (((tsk)->thread.rsp0 - 1) & ~(THREAD_SIZE - 1)) ++#define STACK_TOP(tsk) ((tsk)->thread.rsp0) ++#endif ++/* Might need to account for the special exception and interrupt handling ++ stacks here, since normally ++ EXCEPTION_STACK_ORDER < THREAD_ORDER < IRQSTACK_ORDER, ++ but the construct is needed only for getting across the stack switch to ++ the interrupt stack - thus considering the IRQ stack itself is unnecessary, ++ and the overhead of comparing against all exception handling stacks seems ++ not desirable. */ ++#define STACK_LIMIT(ptr) (((ptr) - 1) & ~(THREAD_SIZE - 1)) ++ ++#define UNW_REGISTER_INFO \ ++ PTREGS_INFO(rax), \ ++ PTREGS_INFO(rdx), \ ++ PTREGS_INFO(rcx), \ ++ PTREGS_INFO(rbx), \ ++ PTREGS_INFO(rsi), \ ++ PTREGS_INFO(rdi), \ ++ PTREGS_INFO(rbp), \ ++ PTREGS_INFO(rsp), \ ++ PTREGS_INFO(r8), \ ++ PTREGS_INFO(r9), \ ++ PTREGS_INFO(r10), \ ++ PTREGS_INFO(r11), \ ++ PTREGS_INFO(r12), \ ++ PTREGS_INFO(r13), \ ++ PTREGS_INFO(r14), \ ++ PTREGS_INFO(r15), \ ++ PTREGS_INFO(rip) ++ ++#define UNW_DEFAULT_RA(raItem, dataAlign) \ ++ ((raItem).where == Memory && \ ++ !((raItem).value * (dataAlign) + 8)) ++ ++static inline void arch_unw_init_frame_info(struct unwind_frame_info *info, ++ /*const*/ struct pt_regs *regs) ++{ ++ info->regs = *regs; ++} ++ ++static inline void arch_unw_init_blocked(struct unwind_frame_info *info) ++{ ++ extern const char thread_return[]; ++ ++ memset(&info->regs, 0, sizeof(info->regs)); ++ info->regs.rip = (unsigned long)thread_return; ++ info->regs.cs = __KERNEL_CS; ++ __get_user(info->regs.rbp, (unsigned long *)info->task->thread.rsp); ++ info->regs.rsp = info->task->thread.rsp; ++ info->regs.ss = __KERNEL_DS; ++} ++ ++extern int arch_unwind_init_running(struct unwind_frame_info *, ++ int (*callback)(struct unwind_frame_info *, ++ void *arg), ++ void *arg); ++ ++static inline int arch_unw_user_mode(const struct unwind_frame_info *info) ++{ ++ return user_mode(&info->regs) ++ || (long)info->regs.rip >= 0 ++ || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END) ++ || (long)info->regs.rsp >= 0; ++} ++ ++#else ++ + #define UNW_PC(frame) ((void)(frame), 0UL) + #define UNW_SP(frame) ((void)(frame), 0UL) + +@@ -9,4 +103,6 @@ + return 0; + } + ++#endif ++ + #endif /* _ASM_X86_64_UNWIND_H */ +diff -Nurb linux-2.6.22-570/include/linux/Kbuild linux-2.6.22-591/include/linux/Kbuild +--- linux-2.6.22-570/include/linux/Kbuild 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/Kbuild 2007-12-21 15:36:12.000000000 -0500 +@@ -91,7 +91,6 @@ + header-y += in_route.h + header-y += ioctl.h + header-y += ipmi_msgdefs.h +-header-y += ip_mp_alg.h + header-y += ipsec.h + header-y += ipx.h + header-y += irda.h +diff -Nurb linux-2.6.22-570/include/linux/acpi.h linux-2.6.22-591/include/linux/acpi.h +--- linux-2.6.22-570/include/linux/acpi.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/acpi.h 2007-12-21 15:36:12.000000000 -0500 +@@ -206,11 +206,8 @@ + { + return max_cstate; + } +-static inline void acpi_set_cstate_limit(unsigned int new_limit) +-{ +- max_cstate = new_limit; +- return; +-} ++extern void (*acpi_do_set_cstate_limit)(void); ++extern void acpi_set_cstate_limit(unsigned int new_limit); + #else + static inline unsigned int acpi_get_cstate_limit(void) { return 0; } + static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; } +diff -Nurb linux-2.6.22-570/include/linux/async_tx.h linux-2.6.22-591/include/linux/async_tx.h +--- linux-2.6.22-570/include/linux/async_tx.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/linux/async_tx.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,156 @@ ++/* ++ * Copyright © 2006, Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ */ ++#ifndef _ASYNC_TX_H_ ++#define _ASYNC_TX_H_ ++#include ++#include ++#include ++ ++/** ++ * dma_chan_ref - object used to manage dma channels received from the ++ * dmaengine core. ++ * @chan - the channel being tracked ++ * @node - node for the channel to be placed on async_tx_master_list ++ * @rcu - for list_del_rcu ++ * @count - number of times this channel is listed in the pool ++ * (for channels with multiple capabiities) ++ */ ++struct dma_chan_ref { ++ struct dma_chan *chan; ++ struct list_head node; ++ struct rcu_head rcu; ++ atomic_t count; ++}; ++ ++/** ++ * async_tx_flags - modifiers for the async_* calls ++ * @ASYNC_TX_XOR_ZERO_DST: this flag must be used for xor operations where the ++ * the destination address is not a source. The asynchronous case handles this ++ * implicitly, the synchronous case needs to zero the destination block. ++ * @ASYNC_TX_XOR_DROP_DST: this flag must be used if the destination address is ++ * also one of the source addresses. In the synchronous case the destination ++ * address is an implied source, whereas the asynchronous case it must be listed ++ * as a source. The destination address must be the first address in the source ++ * array. ++ * @ASYNC_TX_ASSUME_COHERENT: skip cache maintenance operations ++ * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a ++ * dependency chain ++ * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. ++ * @ASYNC_TX_KMAP_SRC: if the transaction is to be performed synchronously ++ * take an atomic mapping (KM_USER0) on the source page(s) ++ * @ASYNC_TX_KMAP_DST: if the transaction is to be performed synchronously ++ * take an atomic mapping (KM_USER0) on the dest page(s) ++ */ ++enum async_tx_flags { ++ ASYNC_TX_XOR_ZERO_DST = (1 << 0), ++ ASYNC_TX_XOR_DROP_DST = (1 << 1), ++ ASYNC_TX_ASSUME_COHERENT = (1 << 2), ++ ASYNC_TX_ACK = (1 << 3), ++ ASYNC_TX_DEP_ACK = (1 << 4), ++ ASYNC_TX_KMAP_SRC = (1 << 5), ++ ASYNC_TX_KMAP_DST = (1 << 6), ++}; ++ ++#ifdef CONFIG_DMA_ENGINE ++void async_tx_issue_pending_all(void); ++enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); ++void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx); ++struct dma_chan * ++async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, ++ enum dma_transaction_type tx_type); ++#else ++static inline void async_tx_issue_pending_all(void) ++{ ++ do { } while (0); ++} ++ ++static inline enum dma_status ++dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) ++{ ++ return DMA_SUCCESS; ++} ++ ++static inline void ++async_tx_run_dependencies(struct dma_async_tx_descriptor *tx, ++ struct dma_chan *host_chan) ++{ ++ do { } while (0); ++} ++ ++static inline struct dma_chan * ++async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, ++ enum dma_transaction_type tx_type) ++{ ++ return NULL; ++} ++#endif ++ ++/** ++ * async_tx_sync_epilog - actions to take if an operation is run synchronously ++ * @flags: async_tx flags ++ * @depend_tx: transaction depends on depend_tx ++ * @cb_fn: function to call when the transaction completes ++ * @cb_fn_param: parameter to pass to the callback routine ++ */ ++static inline void ++async_tx_sync_epilog(unsigned long flags, ++ struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_fn_param) ++{ ++ if (cb_fn) ++ cb_fn(cb_fn_param); ++ ++ if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) ++ async_tx_ack(depend_tx); ++} ++ ++void ++async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, ++ enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_fn_param); ++ ++struct dma_async_tx_descriptor * ++async_xor(struct page *dest, struct page **src_list, unsigned int offset, ++ int src_cnt, size_t len, enum async_tx_flags flags, ++ struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_fn_param); ++ ++struct dma_async_tx_descriptor * ++async_xor_zero_sum(struct page *dest, struct page **src_list, ++ unsigned int offset, int src_cnt, size_t len, ++ u32 *result, enum async_tx_flags flags, ++ struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_fn_param); ++ ++struct dma_async_tx_descriptor * ++async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, ++ unsigned int src_offset, size_t len, enum async_tx_flags flags, ++ struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_fn_param); ++ ++struct dma_async_tx_descriptor * ++async_memset(struct page *dest, int val, unsigned int offset, ++ size_t len, enum async_tx_flags flags, ++ struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_fn_param); ++ ++struct dma_async_tx_descriptor * ++async_trigger_callback(enum async_tx_flags flags, ++ struct dma_async_tx_descriptor *depend_tx, ++ dma_async_tx_callback cb_fn, void *cb_fn_param); ++#endif /* _ASYNC_TX_H_ */ +diff -Nurb linux-2.6.22-570/include/linux/configfs.h linux-2.6.22-591/include/linux/configfs.h +--- linux-2.6.22-570/include/linux/configfs.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/configfs.h 2007-12-21 15:36:12.000000000 -0500 +@@ -75,7 +75,6 @@ + extern void config_item_init_type_name(struct config_item *item, + const char *name, + struct config_item_type *type); +-extern void config_item_cleanup(struct config_item *); + + extern struct config_item * config_item_get(struct config_item *); + extern void config_item_put(struct config_item *); +@@ -157,6 +156,7 @@ + struct config_item *(*make_item)(struct config_group *group, const char *name); + struct config_group *(*make_group)(struct config_group *group, const char *name); + int (*commit_item)(struct config_item *item); ++ void (*disconnect_notify)(struct config_group *group, struct config_item *item); + void (*drop_item)(struct config_group *group, struct config_item *item); + }; + +@@ -175,6 +175,11 @@ + int configfs_register_subsystem(struct configfs_subsystem *subsys); + void configfs_unregister_subsystem(struct configfs_subsystem *subsys); + ++/* These functions can sleep and can alloc with GFP_KERNEL */ ++/* WARNING: These cannot be called underneath configfs callbacks!! */ ++int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target); ++void configfs_undepend_item(struct configfs_subsystem *subsys, struct config_item *target); ++ + #endif /* __KERNEL__ */ + + #endif /* _CONFIGFS_H_ */ +diff -Nurb linux-2.6.22-570/include/linux/container.h linux-2.6.22-591/include/linux/container.h +--- linux-2.6.22-570/include/linux/container.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/linux/container.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,295 @@ ++#ifndef _LINUX_CONTAINER_H ++#define _LINUX_CONTAINER_H ++/* ++ * container interface ++ * ++ * Copyright (C) 2003 BULL SA ++ * Copyright (C) 2004-2006 Silicon Graphics, Inc. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef CONFIG_CONTAINERS ++ ++struct containerfs_root; ++struct inode; ++ ++extern int container_init_early(void); ++extern int container_init(void); ++extern void container_init_smp(void); ++extern void container_lock(void); ++extern void container_unlock(void); ++extern void container_fork(struct task_struct *p); ++extern void container_fork_callbacks(struct task_struct *p); ++extern void container_exit(struct task_struct *p, int run_callbacks); ++ ++extern struct file_operations proc_container_operations; ++ ++/* Define the enumeration of all container subsystems */ ++#define SUBSYS(_x) _x ## _subsys_id, ++enum container_subsys_id { ++#include ++ CONTAINER_SUBSYS_COUNT ++}; ++#undef SUBSYS ++ ++/* Per-subsystem/per-container state maintained by the system. */ ++struct container_subsys_state { ++ /* The container that this subsystem is attached to. Useful ++ * for subsystems that want to know about the container ++ * hierarchy structure */ ++ struct container *container; ++ ++ /* State maintained by the container system to allow ++ * subsystems to be "busy". Should be accessed via css_get() ++ * and css_put() */ ++ ++ atomic_t refcnt; ++}; ++ ++/* ++ * Call css_get() to hold a reference on the container; ++ * ++ */ ++ ++static inline void css_get(struct container_subsys_state *css) ++{ ++ atomic_inc(&css->refcnt); ++} ++/* ++ * css_put() should be called to release a reference taken by ++ * css_get() ++ */ ++void css_put(struct container_subsys_state *css); ++ ++struct container { ++ unsigned long flags; /* "unsigned long" so bitops work */ ++ ++ /* count users of this container. >0 means busy, but doesn't ++ * necessarily indicate the number of tasks in the ++ * container */ ++ atomic_t count; ++ ++ /* ++ * We link our 'sibling' struct into our parent's 'children'. ++ * Our children link their 'sibling' into our 'children'. ++ */ ++ struct list_head sibling; /* my parent's children */ ++ struct list_head children; /* my children */ ++ ++ struct container *parent; /* my parent */ ++ struct dentry *dentry; /* container fs entry */ ++ ++ /* Private pointers for each registered subsystem */ ++ struct container_subsys_state *subsys[CONTAINER_SUBSYS_COUNT]; ++ ++ struct containerfs_root *root; ++ struct container *top_container; ++ ++ /* ++ * List of cg_container_links pointing at css_groups with ++ * tasks in this container. Protected by css_group_lock ++ */ ++ struct list_head css_groups; ++ ++ /* ++ * Linked list running through all containers that can ++ * potentially be reaped by the release agent. Protected by ++ * container_mutex ++ */ ++ struct list_head release_list; ++}; ++ ++/* A css_group is a structure holding pointers to a set of ++ * container_subsys_state objects. This saves space in the task struct ++ * object and speeds up fork()/exit(), since a single inc/dec and a ++ * list_add()/del() can bump the reference count on the entire ++ * container set for a task. ++ */ ++ ++struct css_group { ++ ++ /* Reference count */ ++ struct kref ref; ++ ++ /* ++ * List running through all container groups. Protected by ++ * css_group_lock ++ */ ++ struct list_head list; ++ ++ /* ++ * List running through all tasks using this container ++ * group. Protected by css_group_lock ++ */ ++ struct list_head tasks; ++ ++ /* ++ * List of cg_container_link objects on link chains from ++ * containers referenced from this css_group. Protected by ++ * css_group_lock ++ */ ++ struct list_head cg_links; ++ ++ /* Set of subsystem states, one for each subsystem. NULL for ++ * subsystems that aren't part of this hierarchy. These ++ * pointers reduce the number of dereferences required to get ++ * from a task to its state for a given container, but result ++ * in increased space usage if tasks are in wildly different ++ * groupings across different hierarchies. This array is ++ * immutable after creation */ ++ struct container_subsys_state *subsys[CONTAINER_SUBSYS_COUNT]; ++ ++}; ++ ++/* struct cftype: ++ * ++ * The files in the container filesystem mostly have a very simple read/write ++ * handling, some common function will take care of it. Nevertheless some cases ++ * (read tasks) are special and therefore I define this structure for every ++ * kind of file. ++ * ++ * ++ * When reading/writing to a file: ++ * - the container to use in file->f_dentry->d_parent->d_fsdata ++ * - the 'cftype' of the file is file->f_dentry->d_fsdata ++ */ ++ ++#define MAX_CFTYPE_NAME 64 ++struct cftype { ++ /* By convention, the name should begin with the name of the ++ * subsystem, followed by a period */ ++ char name[MAX_CFTYPE_NAME]; ++ int private; ++ int (*open) (struct inode *inode, struct file *file); ++ ssize_t (*read) (struct container *cont, struct cftype *cft, ++ struct file *file, ++ char __user *buf, size_t nbytes, loff_t *ppos); ++ /* ++ * read_uint() is a shortcut for the common case of returning a ++ * single integer. Use it in place of read() ++ */ ++ u64 (*read_uint) (struct container *cont, struct cftype *cft); ++ ssize_t (*write) (struct container *cont, struct cftype *cft, ++ struct file *file, ++ const char __user *buf, size_t nbytes, loff_t *ppos); ++ int (*release) (struct inode *inode, struct file *file); ++}; ++ ++/* Add a new file to the given container directory. Should only be ++ * called by subsystems from within a populate() method */ ++int container_add_file(struct container *cont, const struct cftype *cft); ++ ++/* Add a set of new files to the given container directory. Should ++ * only be called by subsystems from within a populate() method */ ++int container_add_files(struct container *cont, const struct cftype cft[], ++ int count); ++ ++int container_is_removed(const struct container *cont); ++ ++int container_path(const struct container *cont, char *buf, int buflen); ++ ++int container_task_count(const struct container *cont); ++ ++/* Return true if the container is a descendant of the current container */ ++int container_is_descendant(const struct container *cont); ++ ++/* Container subsystem type. See Documentation/containers.txt for details */ ++ ++struct container_subsys { ++ int (*create)(struct container_subsys *ss, ++ struct container *cont); ++ void (*destroy)(struct container_subsys *ss, struct container *cont); ++ int (*can_attach)(struct container_subsys *ss, ++ struct container *cont, struct task_struct *tsk); ++ void (*attach)(struct container_subsys *ss, struct container *cont, ++ struct container *old_cont, struct task_struct *tsk); ++ void (*fork)(struct container_subsys *ss, struct task_struct *task); ++ void (*exit)(struct container_subsys *ss, struct task_struct *task); ++ int (*populate)(struct container_subsys *ss, ++ struct container *cont); ++ void (*post_clone)(struct container_subsys *ss, struct container *cont); ++ void (*bind)(struct container_subsys *ss, struct container *root); ++ int subsys_id; ++ int active; ++ int early_init; ++#define MAX_CONTAINER_TYPE_NAMELEN 32 ++ const char *name; ++ ++ /* Protected by RCU */ ++ struct containerfs_root *root; ++ ++ struct list_head sibling; ++ ++ void *private; ++}; ++ ++#define SUBSYS(_x) extern struct container_subsys _x ## _subsys; ++#include ++#undef SUBSYS ++ ++static inline struct container_subsys_state *container_subsys_state( ++ struct container *cont, int subsys_id) ++{ ++ return cont->subsys[subsys_id]; ++} ++ ++static inline struct container_subsys_state *task_subsys_state( ++ struct task_struct *task, int subsys_id) ++{ ++ return rcu_dereference(task->containers->subsys[subsys_id]); ++} ++ ++static inline struct container* task_container(struct task_struct *task, ++ int subsys_id) ++{ ++ return task_subsys_state(task, subsys_id)->container; ++} ++ ++int container_path(const struct container *cont, char *buf, int buflen); ++ ++int container_clone(struct task_struct *tsk, struct container_subsys *ss); ++ ++/* A container_iter should be treated as an opaque object */ ++struct container_iter { ++ struct list_head *cg_link; ++ struct list_head *task; ++}; ++ ++/* To iterate across the tasks in a container: ++ * ++ * 1) call container_iter_start to intialize an iterator ++ * ++ * 2) call container_iter_next() to retrieve member tasks until it ++ * returns NULL or until you want to end the iteration ++ * ++ * 3) call container_iter_end() to destroy the iterator. ++ */ ++void container_iter_start(struct container *cont, struct container_iter *it); ++struct task_struct *container_iter_next(struct container *cont, ++ struct container_iter *it); ++void container_iter_end(struct container *cont, struct container_iter *it); ++ ++void container_set_release_agent_path(struct container_subsys *ss, ++ const char *path); ++ ++#else /* !CONFIG_CONTAINERS */ ++ ++static inline int container_init_early(void) { return 0; } ++static inline int container_init(void) { return 0; } ++static inline void container_init_smp(void) {} ++static inline void container_fork(struct task_struct *p) {} ++static inline void container_fork_callbacks(struct task_struct *p) {} ++static inline void container_exit(struct task_struct *p, int callbacks) {} ++ ++static inline void container_lock(void) {} ++static inline void container_unlock(void) {} ++ ++#endif /* !CONFIG_CONTAINERS */ ++ ++#endif /* _LINUX_CONTAINER_H */ +diff -Nurb linux-2.6.22-570/include/linux/container_subsys.h linux-2.6.22-591/include/linux/container_subsys.h +--- linux-2.6.22-570/include/linux/container_subsys.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/linux/container_subsys.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,32 @@ ++/* Add subsystem definitions of the form SUBSYS() in this ++ * file. Surround each one by a line of comment markers so that ++ * patches don't collide ++ */ ++ ++/* */ ++ ++/* */ ++ ++#ifdef CONFIG_CONTAINER_CPUACCT ++SUBSYS(cpuacct) ++#endif ++ ++/* */ ++ ++#ifdef CONFIG_CPUSETS ++SUBSYS(cpuset) ++#endif ++ ++/* */ ++ ++#ifdef CONFIG_CONTAINER_DEBUG ++SUBSYS(debug) ++#endif ++ ++/* */ ++ ++#ifdef CONFIG_CONTAINER_NS ++SUBSYS(ns) ++#endif ++ ++/* */ +diff -Nurb linux-2.6.22-570/include/linux/cpu_acct.h linux-2.6.22-591/include/linux/cpu_acct.h +--- linux-2.6.22-570/include/linux/cpu_acct.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/linux/cpu_acct.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,14 @@ ++ ++#ifndef _LINUX_CPU_ACCT_H ++#define _LINUX_CPU_ACCT_H ++ ++#include ++#include ++ ++#ifdef CONFIG_CONTAINER_CPUACCT ++extern void cpuacct_charge(struct task_struct *, cputime_t cputime); ++#else ++static void inline cpuacct_charge(struct task_struct *p, cputime_t cputime) {} ++#endif ++ ++#endif +diff -Nurb linux-2.6.22-570/include/linux/cpuidle.h linux-2.6.22-591/include/linux/cpuidle.h +--- linux-2.6.22-570/include/linux/cpuidle.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/linux/cpuidle.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,189 @@ ++/* ++ * cpuidle.h - a generic framework for CPU idle power management ++ * ++ * (C) 2007 Venkatesh Pallipadi ++ * Shaohua Li ++ * Adam Belay ++ * ++ * This code is licenced under the GPL. ++ */ ++ ++#ifndef _LINUX_CPUIDLE_H ++#define _LINUX_CPUIDLE_H ++ ++#include ++#include ++#include ++#include ++#include ++ ++#define CPUIDLE_STATE_MAX 8 ++#define CPUIDLE_NAME_LEN 16 ++ ++struct cpuidle_device; ++ ++ ++/**************************** ++ * CPUIDLE DEVICE INTERFACE * ++ ****************************/ ++ ++struct cpuidle_state { ++ char name[CPUIDLE_NAME_LEN]; ++ void *driver_data; ++ ++ unsigned int flags; ++ unsigned int exit_latency; /* in US */ ++ unsigned int power_usage; /* in mW */ ++ unsigned int target_residency; /* in US */ ++ ++ unsigned int usage; ++ unsigned int time; /* in US */ ++ ++ int (*enter) (struct cpuidle_device *dev, ++ struct cpuidle_state *state); ++}; ++ ++/* Idle State Flags */ ++#define CPUIDLE_FLAG_TIME_VALID (0x01) /* is residency time measurable? */ ++#define CPUIDLE_FLAG_CHECK_BM (0x02) /* BM activity will exit state */ ++#define CPUIDLE_FLAG_SHALLOW (0x10) /* low latency, minimal savings */ ++#define CPUIDLE_FLAG_BALANCED (0x20) /* medium latency, moderate savings */ ++#define CPUIDLE_FLAG_DEEP (0x40) /* high latency, large savings */ ++ ++#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) ++ ++/** ++ * cpuidle_get_statedata - retrieves private driver state data ++ * @state: the state ++ */ ++static inline void * cpuidle_get_statedata(struct cpuidle_state *state) ++{ ++ return state->driver_data; ++} ++ ++/** ++ * cpuidle_set_statedata - stores private driver state data ++ * @state: the state ++ * @data: the private data ++ */ ++static inline void ++cpuidle_set_statedata(struct cpuidle_state *state, void *data) ++{ ++ state->driver_data = data; ++} ++ ++struct cpuidle_state_kobj { ++ struct cpuidle_state *state; ++ struct completion kobj_unregister; ++ struct kobject kobj; ++}; ++ ++struct cpuidle_device { ++ unsigned int status; ++ int cpu; ++ ++ int last_residency; ++ int state_count; ++ struct cpuidle_state states[CPUIDLE_STATE_MAX]; ++ struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; ++ struct cpuidle_state *last_state; ++ ++ struct list_head device_list; ++ struct kobject kobj; ++ struct completion kobj_unregister; ++ void *governor_data; ++}; ++ ++DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); ++ ++/* Device Status Flags */ ++#define CPUIDLE_STATUS_DETECTED (0x1) ++#define CPUIDLE_STATUS_DRIVER_ATTACHED (0x2) ++#define CPUIDLE_STATUS_GOVERNOR_ATTACHED (0x4) ++#define CPUIDLE_STATUS_DOIDLE (CPUIDLE_STATUS_DETECTED | \ ++ CPUIDLE_STATUS_DRIVER_ATTACHED | \ ++ CPUIDLE_STATUS_GOVERNOR_ATTACHED) ++ ++/** ++ * cpuidle_get_last_residency - retrieves the last state's residency time ++ * @dev: the target CPU ++ * ++ * NOTE: this value is invalid if CPUIDLE_FLAG_TIME_VALID isn't set ++ */ ++static inline int cpuidle_get_last_residency(struct cpuidle_device *dev) ++{ ++ return dev->last_residency; ++} ++ ++ ++/**************************** ++ * CPUIDLE DRIVER INTERFACE * ++ ****************************/ ++ ++struct cpuidle_driver { ++ char name[CPUIDLE_NAME_LEN]; ++ struct list_head driver_list; ++ ++ int (*init) (struct cpuidle_device *dev); ++ void (*exit) (struct cpuidle_device *dev); ++ int (*redetect) (struct cpuidle_device *dev); ++ ++ int (*bm_check) (void); ++ ++ struct module *owner; ++}; ++ ++#ifdef CONFIG_CPU_IDLE ++ ++extern int cpuidle_register_driver(struct cpuidle_driver *drv); ++extern void cpuidle_unregister_driver(struct cpuidle_driver *drv); ++extern int cpuidle_force_redetect(struct cpuidle_device *dev, struct cpuidle_driver *drv); ++extern int cpuidle_force_redetect_devices(struct cpuidle_driver *drv); ++ ++#else ++ ++static inline int cpuidle_register_driver(struct cpuidle_driver *drv) ++{return 0;} ++static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { } ++static inline int cpuidle_force_redetect(struct cpuidle_device *dev, struct cpuidle_driver *drv) ++{return 0;} ++static inline int cpuidle_force_redetect_devices(struct cpuidle_driver *drv) ++{return 0;} ++ ++#endif ++ ++/****************************** ++ * CPUIDLE GOVERNOR INTERFACE * ++ ******************************/ ++ ++struct cpuidle_governor { ++ char name[CPUIDLE_NAME_LEN]; ++ struct list_head governor_list; ++ ++ int (*init) (struct cpuidle_device *dev); ++ void (*exit) (struct cpuidle_device *dev); ++ void (*scan) (struct cpuidle_device *dev); ++ ++ int (*select) (struct cpuidle_device *dev); ++ void (*reflect) (struct cpuidle_device *dev); ++ ++ struct module *owner; ++}; ++ ++#ifdef CONFIG_CPU_IDLE ++ ++extern int cpuidle_register_governor(struct cpuidle_governor *gov); ++extern void cpuidle_unregister_governor(struct cpuidle_governor *gov); ++extern int cpuidle_get_bm_activity(void); ++ ++#else ++ ++static inline int cpuidle_register_governor(struct cpuidle_governor *gov) ++{return 0;} ++static inline void cpuidle_unregister_governor(struct cpuidle_governor *gov) { } ++static inline int cpuidle_get_bm_activity(void) ++{return 0;} ++ ++#endif ++ ++#endif /* _LINUX_CPUIDLE_H */ +diff -Nurb linux-2.6.22-570/include/linux/cpuset.h linux-2.6.22-591/include/linux/cpuset.h +--- linux-2.6.22-570/include/linux/cpuset.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/cpuset.h 2007-12-21 15:36:12.000000000 -0500 +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + + #ifdef CONFIG_CPUSETS + +@@ -19,8 +20,6 @@ + extern int cpuset_init_early(void); + extern int cpuset_init(void); + extern void cpuset_init_smp(void); +-extern void cpuset_fork(struct task_struct *p); +-extern void cpuset_exit(struct task_struct *p); + extern cpumask_t cpuset_cpus_allowed(struct task_struct *p); + extern nodemask_t cpuset_mems_allowed(struct task_struct *p); + #define cpuset_current_mems_allowed (current->mems_allowed) +@@ -75,13 +74,13 @@ + + extern void cpuset_track_online_nodes(void); + ++extern int current_cpuset_is_being_rebound(void); ++ + #else /* !CONFIG_CPUSETS */ + + static inline int cpuset_init_early(void) { return 0; } + static inline int cpuset_init(void) { return 0; } + static inline void cpuset_init_smp(void) {} +-static inline void cpuset_fork(struct task_struct *p) {} +-static inline void cpuset_exit(struct task_struct *p) {} + + static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p) + { +@@ -146,6 +145,11 @@ + + static inline void cpuset_track_online_nodes(void) {} + ++static inline int current_cpuset_is_being_rebound(void) ++{ ++ return 0; ++} ++ + #endif /* !CONFIG_CPUSETS */ + + #endif /* _LINUX_CPUSET_H */ +diff -Nurb linux-2.6.22-570/include/linux/device.h linux-2.6.22-591/include/linux/device.h +--- linux-2.6.22-570/include/linux/device.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/device.h 2007-12-21 15:36:14.000000000 -0500 +@@ -200,6 +200,8 @@ + + int (*suspend)(struct device *, pm_message_t state); + int (*resume)(struct device *); ++ ++ const struct shadow_dir_operations *shadow_ops; + }; + + extern int __must_check class_register(struct class *); +@@ -238,7 +240,6 @@ + * @devt: for internal use by the driver core only. + * @node: for internal use by the driver core only. + * @kobj: for internal use by the driver core only. +- * @devt_attr: for internal use by the driver core only. + * @groups: optional additional groups to be created + * @dev: if set, a symlink to the struct device is created in the sysfs + * directory for this struct class device. +@@ -263,8 +264,6 @@ + struct kobject kobj; + struct class * class; /* required */ + dev_t devt; /* dev_t, creates the sysfs "dev" */ +- struct class_device_attribute *devt_attr; +- struct class_device_attribute uevent_attr; + struct device * dev; /* not necessary, but nice to have */ + void * class_data; /* class-specific data */ + struct class_device *parent; /* parent of this child device, if there is one */ +@@ -419,8 +418,6 @@ + struct device_type *type; + unsigned is_registered:1; + unsigned uevent_suppress:1; +- struct device_attribute uevent_attr; +- struct device_attribute *devt_attr; + + struct semaphore sem; /* semaphore to synchronize calls to + * its driver. +diff -Nurb linux-2.6.22-570/include/linux/dmaengine.h linux-2.6.22-591/include/linux/dmaengine.h +--- linux-2.6.22-570/include/linux/dmaengine.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/dmaengine.h 2007-12-21 15:36:12.000000000 -0500 +@@ -21,29 +21,40 @@ + #ifndef DMAENGINE_H + #define DMAENGINE_H + +-#ifdef CONFIG_DMA_ENGINE +- + #include + #include + #include + #include + #include ++#include + + /** +- * enum dma_event - resource PNP/power managment events ++ * enum dma_state - resource PNP/power managment state + * @DMA_RESOURCE_SUSPEND: DMA device going into low power state + * @DMA_RESOURCE_RESUME: DMA device returning to full power +- * @DMA_RESOURCE_ADDED: DMA device added to the system ++ * @DMA_RESOURCE_AVAILABLE: DMA device available to the system + * @DMA_RESOURCE_REMOVED: DMA device removed from the system + */ +-enum dma_event { ++enum dma_state { + DMA_RESOURCE_SUSPEND, + DMA_RESOURCE_RESUME, +- DMA_RESOURCE_ADDED, ++ DMA_RESOURCE_AVAILABLE, + DMA_RESOURCE_REMOVED, + }; + + /** ++ * enum dma_state_client - state of the channel in the client ++ * @DMA_ACK: client would like to use, or was using this channel ++ * @DMA_DUP: client has already seen this channel, or is not using this channel ++ * @DMA_NAK: client does not want to see any more channels ++ */ ++enum dma_state_client { ++ DMA_ACK, ++ DMA_DUP, ++ DMA_NAK, ++}; ++ ++/** + * typedef dma_cookie_t - an opaque DMA cookie + * + * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code +@@ -65,6 +76,31 @@ + }; + + /** ++ * enum dma_transaction_type - DMA transaction types/indexes ++ */ ++enum dma_transaction_type { ++ DMA_MEMCPY, ++ DMA_XOR, ++ DMA_PQ_XOR, ++ DMA_DUAL_XOR, ++ DMA_PQ_UPDATE, ++ DMA_ZERO_SUM, ++ DMA_PQ_ZERO_SUM, ++ DMA_MEMSET, ++ DMA_MEMCPY_CRC32C, ++ DMA_INTERRUPT, ++}; ++ ++/* last transaction type for creation of the capabilities mask */ ++#define DMA_TX_TYPE_END (DMA_INTERRUPT + 1) ++ ++/** ++ * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. ++ * See linux/cpumask.h ++ */ ++typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t; ++ ++/** + * struct dma_chan_percpu - the per-CPU part of struct dma_chan + * @refcount: local_t used for open-coded "bigref" counting + * @memcpy_count: transaction counter +@@ -80,7 +116,6 @@ + + /** + * struct dma_chan - devices supply DMA channels, clients use them +- * @client: ptr to the client user of this chan, will be %NULL when unused + * @device: ptr to the dma device who supplies this channel, always !%NULL + * @cookie: last cookie value returned to client + * @chan_id: channel ID for sysfs +@@ -88,12 +123,10 @@ + * @refcount: kref, used in "bigref" slow-mode + * @slow_ref: indicates that the DMA channel is free + * @rcu: the DMA channel's RCU head +- * @client_node: used to add this to the client chan list + * @device_node: used to add this to the device chan list + * @local: per-cpu pointer to a struct dma_chan_percpu + */ + struct dma_chan { +- struct dma_client *client; + struct dma_device *device; + dma_cookie_t cookie; + +@@ -105,11 +138,11 @@ + int slow_ref; + struct rcu_head rcu; + +- struct list_head client_node; + struct list_head device_node; + struct dma_chan_percpu *local; + }; + ++ + void dma_chan_cleanup(struct kref *kref); + + static inline void dma_chan_get(struct dma_chan *chan) +@@ -134,27 +167,68 @@ + + /* + * typedef dma_event_callback - function pointer to a DMA event callback +- */ +-typedef void (*dma_event_callback) (struct dma_client *client, +- struct dma_chan *chan, enum dma_event event); ++ * For each channel added to the system this routine is called for each client. ++ * If the client would like to use the channel it returns '1' to signal (ack) ++ * the dmaengine core to take out a reference on the channel and its ++ * corresponding device. A client must not 'ack' an available channel more ++ * than once. When a channel is removed all clients are notified. If a client ++ * is using the channel it must 'ack' the removal. A client must not 'ack' a ++ * removed channel more than once. ++ * @client - 'this' pointer for the client context ++ * @chan - channel to be acted upon ++ * @state - available or removed ++ */ ++struct dma_client; ++typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, ++ struct dma_chan *chan, enum dma_state state); + + /** + * struct dma_client - info on the entity making use of DMA services + * @event_callback: func ptr to call when something happens +- * @chan_count: number of chans allocated +- * @chans_desired: number of chans requested. Can be +/- chan_count +- * @lock: protects access to the channels list +- * @channels: the list of DMA channels allocated ++ * @cap_mask: only return channels that satisfy the requested capabilities ++ * a value of zero corresponds to any capability + * @global_node: list_head for global dma_client_list + */ + struct dma_client { + dma_event_callback event_callback; +- unsigned int chan_count; +- unsigned int chans_desired; ++ dma_cap_mask_t cap_mask; ++ struct list_head global_node; ++}; + ++typedef void (*dma_async_tx_callback)(void *dma_async_param); ++/** ++ * struct dma_async_tx_descriptor - async transaction descriptor ++ * @cookie: tracking cookie for this transaction, set to -EBUSY if ++ * this tx is sitting on a dependency list ++ * @ack: the descriptor can not be reused until the client acknowledges ++ * receipt, i.e. has has a chance to establish any dependency chains ++ * @callback: routine to call after this operation is complete ++ * @callback_param: general parameter to pass to the callback routine ++ * @chan: target channel for this operation ++ * @tx_submit: execute an operation ++ * @tx_set_dest: set a destination address in a hardware descriptor ++ * @tx_set_src: set a source address in a hardware descriptor ++ * @depend_list: at completion this list of transactions are submitted ++ * @depend_node: allow this transaction to be executed after another ++ * transaction has completed ++ * @parent: pointer to the next level up in the dependency chain ++ * @lock: protect the dependency list ++ */ ++struct dma_async_tx_descriptor { ++ dma_cookie_t cookie; ++ int ack; ++ dma_async_tx_callback callback; ++ void *callback_param; ++ struct dma_chan *chan; ++ dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); ++ void (*tx_set_dest)(dma_addr_t addr, ++ struct dma_async_tx_descriptor *tx, int index); ++ void (*tx_set_src)(dma_addr_t addr, ++ struct dma_async_tx_descriptor *tx, int index); ++ struct list_head depend_list; ++ struct list_head depend_node; ++ struct dma_async_tx_descriptor *parent; + spinlock_t lock; +- struct list_head channels; +- struct list_head global_node; + }; + + /** +@@ -162,141 +236,130 @@ + * @chancnt: how many DMA channels are supported + * @channels: the list of struct dma_chan + * @global_node: list_head for global dma_device_list ++ * @cap_mask: one or more dma_capability flags ++ * @max_xor: maximum number of xor sources, 0 if no capability + * @refcount: reference count + * @done: IO completion struct + * @dev_id: unique device ID ++ * @dev: struct device reference for dma mapping api + * @device_alloc_chan_resources: allocate resources and return the + * number of allocated descriptors + * @device_free_chan_resources: release DMA channel's resources +- * @device_memcpy_buf_to_buf: memcpy buf pointer to buf pointer +- * @device_memcpy_buf_to_pg: memcpy buf pointer to struct page +- * @device_memcpy_pg_to_pg: memcpy struct page/offset to struct page/offset +- * @device_memcpy_complete: poll the status of an IOAT DMA transaction +- * @device_memcpy_issue_pending: push appended descriptors to hardware ++ * @device_prep_dma_memcpy: prepares a memcpy operation ++ * @device_prep_dma_xor: prepares a xor operation ++ * @device_prep_dma_zero_sum: prepares a zero_sum operation ++ * @device_prep_dma_memset: prepares a memset operation ++ * @device_prep_dma_interrupt: prepares an end of chain interrupt operation ++ * @device_dependency_added: async_tx notifies the channel about new deps ++ * @device_issue_pending: push pending transactions to hardware + */ + struct dma_device { + + unsigned int chancnt; + struct list_head channels; + struct list_head global_node; ++ dma_cap_mask_t cap_mask; ++ int max_xor; + + struct kref refcount; + struct completion done; + + int dev_id; ++ struct device *dev; + + int (*device_alloc_chan_resources)(struct dma_chan *chan); + void (*device_free_chan_resources)(struct dma_chan *chan); +- dma_cookie_t (*device_memcpy_buf_to_buf)(struct dma_chan *chan, +- void *dest, void *src, size_t len); +- dma_cookie_t (*device_memcpy_buf_to_pg)(struct dma_chan *chan, +- struct page *page, unsigned int offset, void *kdata, +- size_t len); +- dma_cookie_t (*device_memcpy_pg_to_pg)(struct dma_chan *chan, +- struct page *dest_pg, unsigned int dest_off, +- struct page *src_pg, unsigned int src_off, size_t len); +- enum dma_status (*device_memcpy_complete)(struct dma_chan *chan, ++ ++ struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( ++ struct dma_chan *chan, size_t len, int int_en); ++ struct dma_async_tx_descriptor *(*device_prep_dma_xor)( ++ struct dma_chan *chan, unsigned int src_cnt, size_t len, ++ int int_en); ++ struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( ++ struct dma_chan *chan, unsigned int src_cnt, size_t len, ++ u32 *result, int int_en); ++ struct dma_async_tx_descriptor *(*device_prep_dma_memset)( ++ struct dma_chan *chan, int value, size_t len, int int_en); ++ struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( ++ struct dma_chan *chan); ++ ++ void (*device_dependency_added)(struct dma_chan *chan); ++ enum dma_status (*device_is_tx_complete)(struct dma_chan *chan, + dma_cookie_t cookie, dma_cookie_t *last, + dma_cookie_t *used); +- void (*device_memcpy_issue_pending)(struct dma_chan *chan); ++ void (*device_issue_pending)(struct dma_chan *chan); + }; + + /* --- public DMA engine API --- */ + +-struct dma_client *dma_async_client_register(dma_event_callback event_callback); ++void dma_async_client_register(struct dma_client *client); + void dma_async_client_unregister(struct dma_client *client); +-void dma_async_client_chan_request(struct dma_client *client, +- unsigned int number); ++void dma_async_client_chan_request(struct dma_client *client); ++dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, ++ void *dest, void *src, size_t len); ++dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, ++ struct page *page, unsigned int offset, void *kdata, size_t len); ++dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, ++ struct page *dest_pg, unsigned int dest_off, struct page *src_pg, ++ unsigned int src_off, size_t len); ++void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, ++ struct dma_chan *chan); + +-/** +- * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses +- * @chan: DMA channel to offload copy to +- * @dest: destination address (virtual) +- * @src: source address (virtual) +- * @len: length +- * +- * Both @dest and @src must be mappable to a bus address according to the +- * DMA mapping API rules for streaming mappings. +- * Both @dest and @src must stay memory resident (kernel memory or locked +- * user space pages). +- */ +-static inline dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, +- void *dest, void *src, size_t len) ++static inline void ++async_tx_ack(struct dma_async_tx_descriptor *tx) + { +- int cpu = get_cpu(); +- per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; +- per_cpu_ptr(chan->local, cpu)->memcpy_count++; +- put_cpu(); +- +- return chan->device->device_memcpy_buf_to_buf(chan, dest, src, len); ++ tx->ack = 1; + } + +-/** +- * dma_async_memcpy_buf_to_pg - offloaded copy from address to page +- * @chan: DMA channel to offload copy to +- * @page: destination page +- * @offset: offset in page to copy to +- * @kdata: source address (virtual) +- * @len: length +- * +- * Both @page/@offset and @kdata must be mappable to a bus address according +- * to the DMA mapping API rules for streaming mappings. +- * Both @page/@offset and @kdata must stay memory resident (kernel memory or +- * locked user space pages) +- */ +-static inline dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, +- struct page *page, unsigned int offset, void *kdata, size_t len) ++#define first_dma_cap(mask) __first_dma_cap(&(mask)) ++static inline int __first_dma_cap(const dma_cap_mask_t *srcp) + { +- int cpu = get_cpu(); +- per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; +- per_cpu_ptr(chan->local, cpu)->memcpy_count++; +- put_cpu(); ++ return min_t(int, DMA_TX_TYPE_END, ++ find_first_bit(srcp->bits, DMA_TX_TYPE_END)); ++} + +- return chan->device->device_memcpy_buf_to_pg(chan, page, offset, +- kdata, len); ++#define next_dma_cap(n, mask) __next_dma_cap((n), &(mask)) ++static inline int __next_dma_cap(int n, const dma_cap_mask_t *srcp) ++{ ++ return min_t(int, DMA_TX_TYPE_END, ++ find_next_bit(srcp->bits, DMA_TX_TYPE_END, n+1)); + } + +-/** +- * dma_async_memcpy_pg_to_pg - offloaded copy from page to page +- * @chan: DMA channel to offload copy to +- * @dest_pg: destination page +- * @dest_off: offset in page to copy to +- * @src_pg: source page +- * @src_off: offset in page to copy from +- * @len: length +- * +- * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus +- * address according to the DMA mapping API rules for streaming mappings. +- * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident +- * (kernel memory or locked user space pages). +- */ +-static inline dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, +- struct page *dest_pg, unsigned int dest_off, struct page *src_pg, +- unsigned int src_off, size_t len) ++#define dma_cap_set(tx, mask) __dma_cap_set((tx), &(mask)) ++static inline void ++__dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp) + { +- int cpu = get_cpu(); +- per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; +- per_cpu_ptr(chan->local, cpu)->memcpy_count++; +- put_cpu(); ++ set_bit(tx_type, dstp->bits); ++} + +- return chan->device->device_memcpy_pg_to_pg(chan, dest_pg, dest_off, +- src_pg, src_off, len); ++#define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask)) ++static inline int ++__dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp) ++{ ++ return test_bit(tx_type, srcp->bits); + } + ++#define for_each_dma_cap_mask(cap, mask) \ ++ for ((cap) = first_dma_cap(mask); \ ++ (cap) < DMA_TX_TYPE_END; \ ++ (cap) = next_dma_cap((cap), (mask))) ++ + /** +- * dma_async_memcpy_issue_pending - flush pending copies to HW ++ * dma_async_issue_pending - flush pending transactions to HW + * @chan: target DMA channel + * + * This allows drivers to push copies to HW in batches, + * reducing MMIO writes where possible. + */ +-static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan) ++static inline void dma_async_issue_pending(struct dma_chan *chan) + { +- return chan->device->device_memcpy_issue_pending(chan); ++ return chan->device->device_issue_pending(chan); + } + ++#define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan) ++ + /** +- * dma_async_memcpy_complete - poll for transaction completion ++ * dma_async_is_tx_complete - poll for transaction completion + * @chan: DMA channel + * @cookie: transaction identifier to check status of + * @last: returns last completed cookie, can be NULL +@@ -306,12 +369,15 @@ + * internal state and can be used with dma_async_is_complete() to check + * the status of multiple cookies without re-checking hardware state. + */ +-static inline enum dma_status dma_async_memcpy_complete(struct dma_chan *chan, ++static inline enum dma_status dma_async_is_tx_complete(struct dma_chan *chan, + dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used) + { +- return chan->device->device_memcpy_complete(chan, cookie, last, used); ++ return chan->device->device_is_tx_complete(chan, cookie, last, used); + } + ++#define dma_async_memcpy_complete(chan, cookie, last, used)\ ++ dma_async_is_tx_complete(chan, cookie, last, used) ++ + /** + * dma_async_is_complete - test a cookie against chan state + * @cookie: transaction identifier to test status of +@@ -334,6 +400,7 @@ + return DMA_IN_PROGRESS; + } + ++enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie); + + /* --- DMA device --- */ + +@@ -362,5 +429,4 @@ + struct dma_pinned_list *pinned_list, struct page *page, + unsigned int offset, size_t len); + +-#endif /* CONFIG_DMA_ENGINE */ + #endif /* DMAENGINE_H */ +diff -Nurb linux-2.6.22-570/include/linux/etherdevice.h linux-2.6.22-591/include/linux/etherdevice.h +--- linux-2.6.22-570/include/linux/etherdevice.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/etherdevice.h 2007-12-21 15:36:12.000000000 -0500 +@@ -40,12 +40,6 @@ + struct hh_cache *hh); + + extern struct net_device *alloc_etherdev(int sizeof_priv); +-static inline void eth_copy_and_sum (struct sk_buff *dest, +- const unsigned char *src, +- int len, int base) +-{ +- memcpy (dest->data, src, len); +-} + + /** + * is_zero_ether_addr - Determine if give Ethernet address is all zeros. +diff -Nurb linux-2.6.22-570/include/linux/freezer.h linux-2.6.22-591/include/linux/freezer.h +--- linux-2.6.22-570/include/linux/freezer.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/freezer.h 2007-12-21 15:36:12.000000000 -0500 +@@ -115,6 +115,14 @@ + return !!(p->flags & PF_FREEZER_SKIP); + } + ++/* ++ * Tell the freezer that the current task should be frozen by it ++ */ ++static inline void set_freezable(void) ++{ ++ current->flags &= ~PF_NOFREEZE; ++} ++ + #else + static inline int frozen(struct task_struct *p) { return 0; } + static inline int freezing(struct task_struct *p) { return 0; } +@@ -130,4 +138,5 @@ + static inline void freezer_do_not_count(void) {} + static inline void freezer_count(void) {} + static inline int freezer_should_skip(struct task_struct *p) { return 0; } ++static inline void set_freezable(void) {} + #endif +diff -Nurb linux-2.6.22-570/include/linux/fs.h linux-2.6.22-591/include/linux/fs.h +--- linux-2.6.22-570/include/linux/fs.h 2007-12-21 15:36:06.000000000 -0500 ++++ linux-2.6.22-591/include/linux/fs.h 2007-12-21 15:36:12.000000000 -0500 +@@ -283,6 +283,17 @@ + #define SYNC_FILE_RANGE_WRITE 2 + #define SYNC_FILE_RANGE_WAIT_AFTER 4 + ++/* ++ * sys_fallocate modes ++ * Currently sys_fallocate supports two modes: ++ * FA_ALLOCATE : This is the preallocate mode, using which an application/user ++ * may request (pre)allocation of blocks. ++ * FA_DEALLOCATE: This is the deallocate mode, which can be used to free ++ * the preallocated blocks. ++ */ ++#define FA_ALLOCATE 0x1 ++#define FA_DEALLOCATE 0x2 ++ + #ifdef __KERNEL__ + + #include +@@ -300,6 +311,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -1139,6 +1151,7 @@ + int (*flock) (struct file *, int, struct file_lock *); + ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); + ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); ++ int (*revoke)(struct file *, struct address_space *); + }; + + struct inode_operations { +@@ -1164,6 +1177,8 @@ + ssize_t (*listxattr) (struct dentry *, char *, size_t); + int (*removexattr) (struct dentry *, const char *); + void (*truncate_range)(struct inode *, loff_t, loff_t); ++ long (*fallocate)(struct inode *inode, int mode, loff_t offset, ++ loff_t len); + int (*sync_flags) (struct inode *); + }; + +@@ -1809,6 +1824,13 @@ + extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + size_t len, unsigned int flags); + ++/* fs/revoke.c */ ++#ifdef CONFIG_MMU ++extern int generic_file_revoke(struct file *, struct address_space *); ++#else ++#define generic_file_revoke NULL ++#endif ++ + extern void + file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); + extern loff_t no_llseek(struct file *file, loff_t offset, int origin); +@@ -2077,5 +2099,9 @@ + { } + #endif /* CONFIG_SECURITY */ + ++int proc_nr_files(ctl_table *table, int write, struct file *filp, ++ void __user *buffer, size_t *lenp, loff_t *ppos); ++ ++ + #endif /* __KERNEL__ */ + #endif /* _LINUX_FS_H */ +diff -Nurb linux-2.6.22-570/include/linux/fs_stack.h linux-2.6.22-591/include/linux/fs_stack.h +--- linux-2.6.22-570/include/linux/fs_stack.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/fs_stack.h 2007-12-21 15:36:12.000000000 -0500 +@@ -1,7 +1,19 @@ ++/* ++ * Copyright (c) 2006-2007 Erez Zadok ++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2006-2007 Stony Brook University ++ * Copyright (c) 2006-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ + #ifndef _LINUX_FS_STACK_H + #define _LINUX_FS_STACK_H + +-/* This file defines generic functions used primarily by stackable ++/* ++ * This file defines generic functions used primarily by stackable + * filesystems; none of these functions require i_mutex to be held. + */ + +@@ -11,7 +23,8 @@ + extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src, + int (*get_nlinks)(struct inode *)); + +-extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src); ++extern void fsstack_copy_inode_size(struct inode *dst, ++ const struct inode *src); + + /* inlines */ + static inline void fsstack_copy_attr_atime(struct inode *dest, +diff -Nurb linux-2.6.22-570/include/linux/gfp.h linux-2.6.22-591/include/linux/gfp.h +--- linux-2.6.22-570/include/linux/gfp.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/gfp.h 2007-12-21 15:36:12.000000000 -0500 +@@ -30,6 +30,9 @@ + * cannot handle allocation failures. + * + * __GFP_NORETRY: The VM implementation must not retry indefinitely. ++ * ++ * __GFP_MOVABLE: Flag that this page will be movable by the page migration ++ * mechanism or reclaimed + */ + #define __GFP_WAIT ((__force gfp_t)0x10u) /* Can wait and reschedule? */ + #define __GFP_HIGH ((__force gfp_t)0x20u) /* Should access emergency pools? */ +@@ -45,15 +48,21 @@ + #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ + #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ + #define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ ++#define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */ ++#define __GFP_MOVABLE ((__force gfp_t)0x100000u) /* Page is movable */ + +-#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ ++#define __GFP_BITS_SHIFT 21 /* Room for 21 __GFP_FOO bits */ + #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) + + /* if you forget to add the bitmask here kernel will crash, period */ + #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ + __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ + __GFP_NOFAIL|__GFP_NORETRY|__GFP_COMP| \ +- __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE) ++ __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE| \ ++ __GFP_RECLAIMABLE|__GFP_MOVABLE) ++ ++/* This mask makes up all the page movable related flags */ ++#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) + + /* This equals 0, but use constants in case they ever change */ + #define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) +@@ -62,9 +71,20 @@ + #define GFP_NOIO (__GFP_WAIT) + #define GFP_NOFS (__GFP_WAIT | __GFP_IO) + #define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) ++#define GFP_TEMPORARY (__GFP_WAIT | __GFP_IO | __GFP_FS | \ ++ __GFP_RECLAIMABLE) + #define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) + #define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \ + __GFP_HIGHMEM) ++#define GFP_HIGHUSER_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ ++ __GFP_HARDWALL | __GFP_HIGHMEM | \ ++ __GFP_MOVABLE) ++#define GFP_NOFS_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_MOVABLE) ++#define GFP_USER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ ++ __GFP_HARDWALL | __GFP_MOVABLE) ++#define GFP_HIGHUSER_PAGECACHE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ ++ __GFP_HARDWALL | __GFP_HIGHMEM | \ ++ __GFP_MOVABLE) + + #ifdef CONFIG_NUMA + #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) +@@ -99,6 +119,12 @@ + return ZONE_NORMAL; + } + ++static inline gfp_t set_migrateflags(gfp_t gfp, gfp_t migrate_flags) ++{ ++ BUG_ON((gfp & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); ++ return (gfp & ~(GFP_MOVABLE_MASK)) | migrate_flags; ++} ++ + /* + * There is only one page-allocator function, and two main namespaces to + * it. The alloc_page*() variants return 'struct page *' and as such +diff -Nurb linux-2.6.22-570/include/linux/highmem.h linux-2.6.22-591/include/linux/highmem.h +--- linux-2.6.22-570/include/linux/highmem.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/highmem.h 2007-12-21 15:36:12.000000000 -0500 +@@ -73,10 +73,27 @@ + } + + #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE ++/** ++ * __alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA with caller-specified movable GFP flags ++ * @movableflags: The GFP flags related to the pages future ability to move like __GFP_MOVABLE ++ * @vma: The VMA the page is to be allocated for ++ * @vaddr: The virtual address the page will be inserted into ++ * ++ * This function will allocate a page for a VMA but the caller is expected ++ * to specify via movableflags whether the page will be movable in the ++ * future or not ++ * ++ * An architecture may override this function by defining ++ * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE and providing their own ++ * implementation. ++ */ + static inline struct page * +-alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) ++__alloc_zeroed_user_highpage(gfp_t movableflags, ++ struct vm_area_struct *vma, ++ unsigned long vaddr) + { +- struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr); ++ struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags, ++ vma, vaddr); + + if (page) + clear_user_highpage(page, vaddr); +@@ -85,6 +102,36 @@ + } + #endif + ++/** ++ * alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA ++ * @vma: The VMA the page is to be allocated for ++ * @vaddr: The virtual address the page will be inserted into ++ * ++ * This function will allocate a page for a VMA that the caller knows will ++ * not be able to move in the future using move_pages() or reclaim. If it ++ * is known that the page can move, use alloc_zeroed_user_highpage_movable ++ */ ++static inline struct page * ++alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) ++{ ++ return __alloc_zeroed_user_highpage(0, vma, vaddr); ++} ++ ++/** ++ * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move ++ * @vma: The VMA the page is to be allocated for ++ * @vaddr: The virtual address the page will be inserted into ++ * ++ * This function will allocate a page for a VMA that the caller knows will ++ * be able to migrate in the future using move_pages() or reclaimed ++ */ ++static inline struct page * ++alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, ++ unsigned long vaddr) ++{ ++ return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr); ++} ++ + static inline void clear_highpage(struct page *page) + { + void *kaddr = kmap_atomic(page, KM_USER0); +diff -Nurb linux-2.6.22-570/include/linux/hugetlb.h linux-2.6.22-591/include/linux/hugetlb.h +--- linux-2.6.22-570/include/linux/hugetlb.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/hugetlb.h 2007-12-21 15:36:12.000000000 -0500 +@@ -15,6 +15,7 @@ + } + + int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); ++int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); + int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); + int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); + void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); +@@ -29,6 +30,8 @@ + void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); + + extern unsigned long max_huge_pages; ++extern unsigned long hugepages_treat_as_movable; ++extern gfp_t htlb_alloc_mask; + extern const unsigned long hugetlb_zero, hugetlb_infinity; + extern int sysctl_hugetlb_shm_group; + +diff -Nurb linux-2.6.22-570/include/linux/idr.h linux-2.6.22-591/include/linux/idr.h +--- linux-2.6.22-570/include/linux/idr.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/idr.h 2007-12-21 15:36:14.000000000 -0500 +@@ -83,4 +83,33 @@ + void idr_destroy(struct idr *idp); + void idr_init(struct idr *idp); + ++ ++/* ++ * IDA - IDR based id allocator, use when translation from id to ++ * pointer isn't necessary. ++ */ ++#define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */ ++#define IDA_BITMAP_LONGS (128 / sizeof(long) - 1) ++#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8) ++ ++struct ida_bitmap { ++ long nr_busy; ++ unsigned long bitmap[IDA_BITMAP_LONGS]; ++}; ++ ++struct ida { ++ struct idr idr; ++ struct ida_bitmap *free_bitmap; ++}; ++ ++#define IDA_INIT(name) { .idr = IDR_INIT(name), .free_bitmap = NULL, } ++#define DEFINE_IDA(name) struct ida name = IDA_INIT(name) ++ ++int ida_pre_get(struct ida *ida, gfp_t gfp_mask); ++int ida_get_new_above(struct ida *ida, int starting_id, int *p_id); ++int ida_get_new(struct ida *ida, int *p_id); ++void ida_remove(struct ida *ida, int id); ++void ida_destroy(struct ida *ida); ++void ida_init(struct ida *ida); ++ + #endif /* __IDR_H__ */ +diff -Nurb linux-2.6.22-570/include/linux/if_bridge.h linux-2.6.22-591/include/linux/if_bridge.h +--- linux-2.6.22-570/include/linux/if_bridge.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/if_bridge.h 2007-12-21 15:36:14.000000000 -0500 +@@ -104,7 +104,7 @@ + + #include + +-extern void brioctl_set(int (*ioctl_hook)(unsigned int, void __user *)); ++extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); + extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, + struct sk_buff *skb); + extern int (*br_should_route_hook)(struct sk_buff **pskb); +diff -Nurb linux-2.6.22-570/include/linux/if_link.h linux-2.6.22-591/include/linux/if_link.h +--- linux-2.6.22-570/include/linux/if_link.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/if_link.h 2007-12-21 15:36:12.000000000 -0500 +@@ -76,6 +76,8 @@ + #define IFLA_WEIGHT IFLA_WEIGHT + IFLA_OPERSTATE, + IFLA_LINKMODE, ++ IFLA_LINKINFO, ++#define IFLA_LINKINFO IFLA_LINKINFO + __IFLA_MAX + }; + +@@ -140,4 +142,49 @@ + __u32 retrans_time; + }; + ++enum ++{ ++ IFLA_INFO_UNSPEC, ++ IFLA_INFO_KIND, ++ IFLA_INFO_DATA, ++ IFLA_INFO_XSTATS, ++ __IFLA_INFO_MAX, ++}; ++ ++#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1) ++ ++/* VLAN section */ ++ ++enum ++{ ++ IFLA_VLAN_UNSPEC, ++ IFLA_VLAN_ID, ++ IFLA_VLAN_FLAGS, ++ IFLA_VLAN_EGRESS_QOS, ++ IFLA_VLAN_INGRESS_QOS, ++ __IFLA_VLAN_MAX, ++}; ++ ++#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1) ++ ++struct ifla_vlan_flags { ++ __u32 flags; ++ __u32 mask; ++}; ++ ++enum ++{ ++ IFLA_VLAN_QOS_UNSPEC, ++ IFLA_VLAN_QOS_MAPPING, ++ __IFLA_VLAN_QOS_MAX ++}; ++ ++#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1) ++ ++struct ifla_vlan_qos_mapping ++{ ++ __u32 from; ++ __u32 to; ++}; ++ + #endif /* _LINUX_IF_LINK_H */ +diff -Nurb linux-2.6.22-570/include/linux/if_pppox.h linux-2.6.22-591/include/linux/if_pppox.h +--- linux-2.6.22-570/include/linux/if_pppox.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/if_pppox.h 2007-12-21 15:36:14.000000000 -0500 +@@ -160,7 +160,7 @@ + struct module; + + struct pppox_proto { +- int (*create)(struct socket *sock); ++ int (*create)(struct net *net, struct socket *sock); + int (*ioctl)(struct socket *sock, unsigned int cmd, + unsigned long arg); + struct module *owner; +diff -Nurb linux-2.6.22-570/include/linux/if_tun.h linux-2.6.22-591/include/linux/if_tun.h +--- linux-2.6.22-570/include/linux/if_tun.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/if_tun.h 2007-12-21 15:36:12.000000000 -0500 +@@ -36,6 +36,7 @@ + unsigned long flags; + int attached; + uid_t owner; ++ gid_t group; + + wait_queue_head_t read_wait; + struct sk_buff_head readq; +@@ -78,6 +79,7 @@ + #define TUNSETPERSIST _IOW('T', 203, int) + #define TUNSETOWNER _IOW('T', 204, int) + #define TUNSETLINK _IOW('T', 205, int) ++#define TUNSETGROUP _IOW('T', 206, int) + + /* TUNSETIFF ifr flags */ + #define IFF_TUN 0x0001 +diff -Nurb linux-2.6.22-570/include/linux/if_vlan.h linux-2.6.22-591/include/linux/if_vlan.h +--- linux-2.6.22-570/include/linux/if_vlan.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/if_vlan.h 2007-12-21 15:36:14.000000000 -0500 +@@ -62,7 +62,7 @@ + #define VLAN_VID_MASK 0xfff + + /* found in socket.c */ +-extern void vlan_ioctl_set(int (*hook)(void __user *)); ++extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *)); + + #define VLAN_NAME "vlan" + +@@ -99,7 +99,7 @@ + } + + struct vlan_priority_tci_mapping { +- unsigned long priority; ++ u32 priority; + unsigned short vlan_qos; /* This should be shifted when first set, so we only do it + * at provisioning time. + * ((skb->priority << 13) & 0xE000) +@@ -112,7 +112,10 @@ + /** This will be the mapping that correlates skb->priority to + * 3 bits of VLAN QOS tags... + */ +- unsigned long ingress_priority_map[8]; ++ unsigned int nr_ingress_mappings; ++ u32 ingress_priority_map[8]; ++ ++ unsigned int nr_egress_mappings; + struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */ + + unsigned short vlan_id; /* The VLAN Identifier for this interface. */ +@@ -395,6 +398,10 @@ + GET_VLAN_VID_CMD /* Get the VID of this VLAN (specified by name) */ + }; + ++enum vlan_flags { ++ VLAN_FLAG_REORDER_HDR = 0x1, ++}; ++ + enum vlan_name_types { + VLAN_NAME_TYPE_PLUS_VID, /* Name will look like: vlan0005 */ + VLAN_NAME_TYPE_RAW_PLUS_VID, /* name will look like: eth1.0005 */ +diff -Nurb linux-2.6.22-570/include/linux/inetdevice.h linux-2.6.22-591/include/linux/inetdevice.h +--- linux-2.6.22-570/include/linux/inetdevice.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/inetdevice.h 2007-12-21 15:36:14.000000000 -0500 +@@ -17,8 +17,6 @@ + DECLARE_BITMAP(state, __NET_IPV4_CONF_MAX - 1); + }; + +-extern struct ipv4_devconf ipv4_devconf; +- + struct in_device + { + struct net_device *dev; +@@ -44,7 +42,7 @@ + }; + + #define IPV4_DEVCONF(cnf, attr) ((cnf).data[NET_IPV4_CONF_ ## attr - 1]) +-#define IPV4_DEVCONF_ALL(attr) IPV4_DEVCONF(ipv4_devconf, attr) ++#define IPV4_DEVCONF_ALL(net, attr) IPV4_DEVCONF(*((net)->ipv4_devconf), attr) + + static inline int ipv4_devconf_get(struct in_device *in_dev, int index) + { +@@ -71,14 +69,14 @@ + ipv4_devconf_set((in_dev), NET_IPV4_CONF_ ## attr, (val)) + + #define IN_DEV_ANDCONF(in_dev, attr) \ +- (IPV4_DEVCONF_ALL(attr) && IN_DEV_CONF_GET((in_dev), attr)) ++ (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr) && IN_DEV_CONF_GET((in_dev), attr)) + #define IN_DEV_ORCONF(in_dev, attr) \ +- (IPV4_DEVCONF_ALL(attr) || IN_DEV_CONF_GET((in_dev), attr)) ++ (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr) || IN_DEV_CONF_GET((in_dev), attr)) + #define IN_DEV_MAXCONF(in_dev, attr) \ +- (max(IPV4_DEVCONF_ALL(attr), IN_DEV_CONF_GET((in_dev), attr))) ++ (max(IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr), IN_DEV_CONF_GET((in_dev), attr))) + + #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) +-#define IN_DEV_MFORWARD(in_dev) (IPV4_DEVCONF_ALL(MC_FORWARDING) && \ ++#define IN_DEV_MFORWARD(in_dev) (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, MC_FORWARDING) && \ + IPV4_DEVCONF((in_dev)->cnf, \ + MC_FORWARDING)) + #define IN_DEV_RPFILTER(in_dev) IN_DEV_ANDCONF((in_dev), RP_FILTER) +@@ -127,15 +125,15 @@ + extern int register_inetaddr_notifier(struct notifier_block *nb); + extern int unregister_inetaddr_notifier(struct notifier_block *nb); + +-extern struct net_device *ip_dev_find(__be32 addr); ++extern struct net_device *ip_dev_find(struct net *net, __be32 addr); + extern int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b); +-extern int devinet_ioctl(unsigned int cmd, void __user *); ++extern int devinet_ioctl(struct net *net, unsigned int cmd, void __user *); + extern void devinet_init(void); +-extern struct in_device *inetdev_by_index(int); ++extern struct in_device *inetdev_by_index(struct net *, int); + extern __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope); +-extern __be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope); ++extern __be32 inet_confirm_addr(struct net *net, const struct net_device *dev, __be32 dst, __be32 local, int scope); + extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); +-extern void inet_forward_change(void); ++extern void inet_forward_change(struct net *net); + + static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) + { +diff -Nurb linux-2.6.22-570/include/linux/init_task.h linux-2.6.22-591/include/linux/init_task.h +--- linux-2.6.22-570/include/linux/init_task.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/init_task.h 2007-12-21 15:36:14.000000000 -0500 +@@ -8,6 +8,8 @@ + #include + #include + #include ++#include ++#include + + #define INIT_FDTABLE \ + { \ +@@ -77,7 +79,9 @@ + .nslock = __SPIN_LOCK_UNLOCKED(nsproxy.nslock), \ + .uts_ns = &init_uts_ns, \ + .mnt_ns = NULL, \ ++ .net_ns = &init_net, \ + INIT_IPC_NS(ipc_ns) \ ++ .user_ns = &init_user_ns, \ + } + + #define INIT_SIGHAND(sighand) { \ +diff -Nurb linux-2.6.22-570/include/linux/io.h linux-2.6.22-591/include/linux/io.h +--- linux-2.6.22-570/include/linux/io.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/io.h 2007-12-21 15:36:12.000000000 -0500 +@@ -63,32 +63,7 @@ + void __iomem * devm_ioremap_nocache(struct device *dev, unsigned long offset, + unsigned long size); + void devm_iounmap(struct device *dev, void __iomem *addr); +- +-/** +- * check_signature - find BIOS signatures +- * @io_addr: mmio address to check +- * @signature: signature block +- * @length: length of signature +- * +- * Perform a signature comparison with the mmio address io_addr. This +- * address should have been obtained by ioremap. +- * Returns 1 on a match. +- */ +- +-static inline int check_signature(const volatile void __iomem *io_addr, +- const unsigned char *signature, int length) +-{ +- int retval = 0; +- do { +- if (readb(io_addr) != *signature) +- goto out; +- io_addr++; +- signature++; +- length--; +- } while (length); +- retval = 1; +-out: +- return retval; +-} ++int check_signature(const volatile void __iomem *io_addr, ++ const unsigned char *signature, int length); + + #endif /* _LINUX_IO_H */ +diff -Nurb linux-2.6.22-570/include/linux/ip_mp_alg.h linux-2.6.22-591/include/linux/ip_mp_alg.h +--- linux-2.6.22-570/include/linux/ip_mp_alg.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/ip_mp_alg.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,22 +0,0 @@ +-/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values. +- * +- * Copyright (C) 2004, 2005 Einar Lueck +- * Copyright (C) 2005 David S. Miller +- */ +- +-#ifndef _LINUX_IP_MP_ALG_H +-#define _LINUX_IP_MP_ALG_H +- +-enum ip_mp_alg { +- IP_MP_ALG_NONE, +- IP_MP_ALG_RR, +- IP_MP_ALG_DRR, +- IP_MP_ALG_RANDOM, +- IP_MP_ALG_WRANDOM, +- __IP_MP_ALG_MAX +-}; +- +-#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1) +- +-#endif /* _LINUX_IP_MP_ALG_H */ +- +diff -Nurb linux-2.6.22-570/include/linux/ipc.h linux-2.6.22-591/include/linux/ipc.h +--- linux-2.6.22-570/include/linux/ipc.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/ipc.h 2007-12-21 15:36:12.000000000 -0500 +@@ -93,6 +93,7 @@ + + #ifdef CONFIG_SYSVIPC + #define INIT_IPC_NS(ns) .ns = &init_ipc_ns, ++extern void free_ipc_ns(struct kref *kref); + extern struct ipc_namespace *copy_ipcs(unsigned long flags, + struct ipc_namespace *ns); + #else +@@ -104,13 +105,9 @@ + } + #endif + +-#ifdef CONFIG_IPC_NS +-extern void free_ipc_ns(struct kref *kref); +-#endif +- + static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) + { +-#ifdef CONFIG_IPC_NS ++#ifdef CONFIG_SYSVIPC + if (ns) + kref_get(&ns->kref); + #endif +@@ -119,7 +116,7 @@ + + static inline void put_ipc_ns(struct ipc_namespace *ns) + { +-#ifdef CONFIG_IPC_NS ++#ifdef CONFIG_SYSVIPC + kref_put(&ns->kref, free_ipc_ns); + #endif + } +@@ -127,5 +124,3 @@ + #endif /* __KERNEL__ */ + + #endif /* _LINUX_IPC_H */ +- +- +diff -Nurb linux-2.6.22-570/include/linux/ipv6.h linux-2.6.22-591/include/linux/ipv6.h +--- linux-2.6.22-570/include/linux/ipv6.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/ipv6.h 2007-12-21 15:36:12.000000000 -0500 +@@ -247,7 +247,7 @@ + __u16 lastopt; + __u32 nhoff; + __u16 flags; +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + __u16 dsthao; + #endif + +diff -Nurb linux-2.6.22-570/include/linux/kgdb.h linux-2.6.22-591/include/linux/kgdb.h +--- linux-2.6.22-570/include/linux/kgdb.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/linux/kgdb.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,291 @@ ++/* ++ * include/linux/kgdb.h ++ * ++ * This provides the hooks and functions that KGDB needs to share between ++ * the core, I/O and arch-specific portions. ++ * ++ * Author: Amit Kale and ++ * Tom Rini ++ * ++ * 2001-2004 (c) Amit S. Kale and 2003-2005 (c) MontaVista Software, Inc. ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program is licensed "as is" without any warranty of any ++ * kind, whether express or implied. ++ */ ++#ifdef __KERNEL__ ++#ifndef _KGDB_H_ ++#define _KGDB_H_ ++ ++#include ++ ++#ifdef CONFIG_KGDB ++#include ++#include ++#include ++#include ++ ++#ifndef CHECK_EXCEPTION_STACK ++#define CHECK_EXCEPTION_STACK() 1 ++#endif ++ ++struct tasklet_struct; ++struct pt_regs; ++struct task_struct; ++struct uart_port; ++ ++#ifdef CONFIG_KGDB_CONSOLE ++extern struct console kgdbcons; ++#endif ++ ++/* To enter the debugger explicitly. */ ++extern void breakpoint(void); ++extern int kgdb_connected; ++extern int kgdb_may_fault; ++extern struct tasklet_struct kgdb_tasklet_breakpoint; ++ ++extern atomic_t kgdb_setting_breakpoint; ++extern atomic_t cpu_doing_single_step; ++extern atomic_t kgdb_sync_softlockup[NR_CPUS]; ++ ++extern struct task_struct *kgdb_usethread, *kgdb_contthread; ++ ++enum kgdb_bptype { ++ bp_breakpoint = '0', ++ bp_hardware_breakpoint, ++ bp_write_watchpoint, ++ bp_read_watchpoint, ++ bp_access_watchpoint ++}; ++ ++enum kgdb_bpstate { ++ bp_none = 0, ++ bp_removed, ++ bp_set, ++ bp_active ++}; ++ ++struct kgdb_bkpt { ++ unsigned long bpt_addr; ++ unsigned char saved_instr[BREAK_INSTR_SIZE]; ++ enum kgdb_bptype type; ++ enum kgdb_bpstate state; ++}; ++ ++/* The maximum number of KGDB I/O modules that can be loaded */ ++#define MAX_KGDB_IO_HANDLERS 3 ++ ++#ifndef MAX_BREAKPOINTS ++#define MAX_BREAKPOINTS 1000 ++#endif ++ ++#define KGDB_HW_BREAKPOINT 1 ++ ++/* Required functions. */ ++/** ++ * kgdb_arch_init - Perform any architecture specific initalization. ++ * ++ * This function will handle the initalization of any architecture ++ * specific hooks. ++ */ ++extern int kgdb_arch_init(void); ++ ++/** ++ * regs_to_gdb_regs - Convert ptrace regs to GDB regs ++ * @gdb_regs: A pointer to hold the registers in the order GDB wants. ++ * @regs: The &struct pt_regs of the current process. ++ * ++ * Convert the pt_regs in @regs into the format for registers that ++ * GDB expects, stored in @gdb_regs. ++ */ ++extern void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs); ++ ++/** ++ * sleeping_regs_to_gdb_regs - Convert ptrace regs to GDB regs ++ * @gdb_regs: A pointer to hold the registers in the order GDB wants. ++ * @p: The &struct task_struct of the desired process. ++ * ++ * Convert the register values of the sleeping process in @p to ++ * the format that GDB expects. ++ * This function is called when kgdb does not have access to the ++ * &struct pt_regs and therefore it should fill the gdb registers ++ * @gdb_regs with what has been saved in &struct thread_struct ++ * thread field during switch_to. ++ */ ++extern void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, ++ struct task_struct *p); ++ ++/** ++ * gdb_regs_to_regs - Convert GDB regs to ptrace regs. ++ * @gdb_regs: A pointer to hold the registers we've recieved from GDB. ++ * @regs: A pointer to a &struct pt_regs to hold these values in. ++ * ++ * Convert the GDB regs in @gdb_regs into the pt_regs, and store them ++ * in @regs. ++ */ ++extern void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs); ++ ++/** ++ * kgdb_arch_handle_exception - Handle architecture specific GDB packets. ++ * @vector: The error vector of the exception that happened. ++ * @signo: The signal number of the exception that happened. ++ * @err_code: The error code of the exception that happened. ++ * @remcom_in_buffer: The buffer of the packet we have read. ++ * @remcom_out_buffer: The buffer, of %BUFMAX to write a packet into. ++ * @regs: The &struct pt_regs of the current process. ++ * ++ * This function MUST handle the 'c' and 's' command packets, ++ * as well packets to set / remove a hardware breakpoint, if used. ++ * If there are additional packets which the hardware needs to handle, ++ * they are handled here. The code should return -1 if it wants to ++ * process more packets, and a %0 or %1 if it wants to exit from the ++ * kgdb hook. ++ */ ++extern int kgdb_arch_handle_exception(int vector, int signo, int err_code, ++ char *remcom_in_buffer, ++ char *remcom_out_buffer, ++ struct pt_regs *regs); ++ ++/** ++ * kgdb_roundup_cpus - Get other CPUs into a holding pattern ++ * @flags: Current IRQ state ++ * ++ * On SMP systems, we need to get the attention of the other CPUs ++ * and get them be in a known state. This should do what is needed ++ * to get the other CPUs to call kgdb_wait(). Note that on some arches, ++ * the NMI approach is not used for rounding up all the CPUs. For example, ++ * in case of MIPS, smp_call_function() is used to roundup CPUs. In ++ * this case, we have to make sure that interrupts are enabled before ++ * calling smp_call_function(). The argument to this function is ++ * the flags that will be used when restoring the interrupts. There is ++ * local_irq_save() call before kgdb_roundup_cpus(). ++ * ++ * On non-SMP systems, this is not called. ++ */ ++extern void kgdb_roundup_cpus(unsigned long flags); ++ ++#ifndef JMP_REGS_ALIGNMENT ++#define JMP_REGS_ALIGNMENT ++#endif ++ ++extern unsigned long kgdb_fault_jmp_regs[]; ++ ++/** ++ * kgdb_fault_setjmp - Store state in case we fault. ++ * @curr_context: An array to store state into. ++ * ++ * Certain functions may try and access memory, and in doing so may ++ * cause a fault. When this happens, we trap it, restore state to ++ * this call, and let ourself know that something bad has happened. ++ */ ++extern asmlinkage int kgdb_fault_setjmp(unsigned long *curr_context); ++ ++/** ++ * kgdb_fault_longjmp - Restore state when we have faulted. ++ * @curr_context: The previously stored state. ++ * ++ * When something bad does happen, this function is called to ++ * restore the known good state, and set the return value to 1, so ++ * we know something bad happened. ++ */ ++extern asmlinkage void kgdb_fault_longjmp(unsigned long *curr_context); ++ ++/* Optional functions. */ ++extern int kgdb_validate_break_address(unsigned long addr); ++extern int kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr); ++extern int kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle); ++ ++/** ++ * struct kgdb_arch - Desribe architecture specific values. ++ * @gdb_bpt_instr: The instruction to trigger a breakpoint. ++ * @flags: Flags for the breakpoint, currently just %KGDB_HW_BREAKPOINT. ++ * @shadowth: A value of %1 indicates we shadow information on processes. ++ * @set_breakpoint: Allow an architecture to specify how to set a software ++ * breakpoint. ++ * @remove_breakpoint: Allow an architecture to specify how to remove a ++ * software breakpoint. ++ * @set_hw_breakpoint: Allow an architecture to specify how to set a hardware ++ * breakpoint. ++ * @remove_hw_breakpoint: Allow an architecture to specify how to remove a ++ * hardware breakpoint. ++ * ++ * The @shadowth flag is an option to shadow information not retrievable by ++ * gdb otherwise. This is deprecated in favor of a binutils which supports ++ * CFI macros. ++ */ ++struct kgdb_arch { ++ unsigned char gdb_bpt_instr[BREAK_INSTR_SIZE]; ++ unsigned long flags; ++ unsigned shadowth; ++ int (*set_breakpoint) (unsigned long, char *); ++ int (*remove_breakpoint)(unsigned long, char *); ++ int (*set_hw_breakpoint)(unsigned long, int, enum kgdb_bptype); ++ int (*remove_hw_breakpoint)(unsigned long, int, enum kgdb_bptype); ++ void (*remove_all_hw_break)(void); ++ void (*correct_hw_break)(void); ++}; ++ ++/* Thread reference */ ++typedef unsigned char threadref[8]; ++ ++/** ++ * struct kgdb_io - Desribe the interface for an I/O driver to talk with KGDB. ++ * @read_char: Pointer to a function that will return one char. ++ * @write_char: Pointer to a function that will write one char. ++ * @flush: Pointer to a function that will flush any pending writes. ++ * @init: Pointer to a function that will initialize the device. ++ * @late_init: Pointer to a function that will do any setup that has ++ * other dependencies. ++ * @pre_exception: Pointer to a function that will do any prep work for ++ * the I/O driver. ++ * @post_exception: Pointer to a function that will do any cleanup work ++ * for the I/O driver. ++ * ++ * The @init and @late_init function pointers allow for an I/O driver ++ * such as a serial driver to fully initialize the port with @init and ++ * be called very early, yet safely call request_irq() later in the boot ++ * sequence. ++ * ++ * @init is allowed to return a non-0 return value to indicate failure. ++ * If this is called early on, then KGDB will try again when it would call ++ * @late_init. If it has failed later in boot as well, the user will be ++ * notified. ++ */ ++struct kgdb_io { ++ int (*read_char) (void); ++ void (*write_char) (u8); ++ void (*flush) (void); ++ int (*init) (void); ++ void (*late_init) (void); ++ void (*pre_exception) (void); ++ void (*post_exception) (void); ++}; ++ ++extern struct kgdb_io kgdb_io_ops; ++extern struct kgdb_arch arch_kgdb_ops; ++extern int kgdb_initialized; ++ ++extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops); ++extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops); ++ ++extern void __init kgdb8250_add_port(int i, struct uart_port *serial_req); ++extern void __init kgdb8250_add_platform_port(int i, struct plat_serial8250_port *serial_req); ++ ++extern int kgdb_hex2long(char **ptr, long *long_val); ++extern char *kgdb_mem2hex(char *mem, char *buf, int count); ++extern char *kgdb_hex2mem(char *buf, char *mem, int count); ++extern int kgdb_get_mem(char *addr, unsigned char *buf, int count); ++extern int kgdb_set_mem(char *addr, unsigned char *buf, int count); ++ ++int kgdb_isremovedbreak(unsigned long addr); ++ ++extern int kgdb_handle_exception(int ex_vector, int signo, int err_code, ++ struct pt_regs *regs); ++extern void kgdb_nmihook(int cpu, void *regs); ++extern int debugger_step; ++extern atomic_t debugger_active; ++#else ++/* Stubs for when KGDB is not set. */ ++static const atomic_t debugger_active = ATOMIC_INIT(0); ++#endif /* CONFIG_KGDB */ ++#endif /* _KGDB_H_ */ ++#endif /* __KERNEL__ */ +diff -Nurb linux-2.6.22-570/include/linux/kmod.h linux-2.6.22-591/include/linux/kmod.h +--- linux-2.6.22-570/include/linux/kmod.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/kmod.h 2007-12-21 15:36:12.000000000 -0500 +@@ -36,13 +36,57 @@ + #define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x))) + + struct key; +-extern int call_usermodehelper_keys(char *path, char *argv[], char *envp[], +- struct key *session_keyring, int wait); ++struct file; ++struct subprocess_info; ++ ++/* Allocate a subprocess_info structure */ ++struct subprocess_info *call_usermodehelper_setup(char *path, ++ char **argv, char **envp); ++ ++/* Set various pieces of state into the subprocess_info structure */ ++void call_usermodehelper_setkeys(struct subprocess_info *info, ++ struct key *session_keyring); ++int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info, ++ struct file **filp); ++void call_usermodehelper_setcleanup(struct subprocess_info *info, ++ void (*cleanup)(char **argv, char **envp)); ++ ++enum umh_wait { ++ UMH_NO_WAIT = -1, /* don't wait at all */ ++ UMH_WAIT_EXEC = 0, /* wait for the exec, but not the process */ ++ UMH_WAIT_PROC = 1, /* wait for the process to complete */ ++}; ++ ++/* Actually execute the sub-process */ ++int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait); ++ ++/* Free the subprocess_info. This is only needed if you're not going ++ to call call_usermodehelper_exec */ ++void call_usermodehelper_freeinfo(struct subprocess_info *info); + + static inline int +-call_usermodehelper(char *path, char **argv, char **envp, int wait) ++call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait) + { +- return call_usermodehelper_keys(path, argv, envp, NULL, wait); ++ struct subprocess_info *info; ++ ++ info = call_usermodehelper_setup(path, argv, envp); ++ if (info == NULL) ++ return -ENOMEM; ++ return call_usermodehelper_exec(info, wait); ++} ++ ++static inline int ++call_usermodehelper_keys(char *path, char **argv, char **envp, ++ struct key *session_keyring, enum umh_wait wait) ++{ ++ struct subprocess_info *info; ++ ++ info = call_usermodehelper_setup(path, argv, envp); ++ if (info == NULL) ++ return -ENOMEM; ++ ++ call_usermodehelper_setkeys(info, session_keyring); ++ return call_usermodehelper_exec(info, wait); + } + + extern void usermodehelper_init(void); +diff -Nurb linux-2.6.22-570/include/linux/kobject.h linux-2.6.22-591/include/linux/kobject.h +--- linux-2.6.22-570/include/linux/kobject.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/kobject.h 2007-12-21 15:36:14.000000000 -0500 +@@ -55,7 +55,7 @@ + struct kobject * parent; + struct kset * kset; + struct kobj_type * ktype; +- struct dentry * dentry; ++ struct sysfs_dirent * sd; + wait_queue_head_t poll; + }; + +@@ -71,13 +71,9 @@ + extern void kobject_cleanup(struct kobject *); + + extern int __must_check kobject_add(struct kobject *); +-extern int __must_check kobject_shadow_add(struct kobject *, struct dentry *); + extern void kobject_del(struct kobject *); + + extern int __must_check kobject_rename(struct kobject *, const char *new_name); +-extern int __must_check kobject_shadow_rename(struct kobject *kobj, +- struct dentry *new_parent, +- const char *new_name); + extern int __must_check kobject_move(struct kobject *, struct kobject *); + + extern int __must_check kobject_register(struct kobject *); +diff -Nurb linux-2.6.22-570/include/linux/ktime.h linux-2.6.22-591/include/linux/ktime.h +--- linux-2.6.22-570/include/linux/ktime.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/ktime.h 2007-12-21 15:36:12.000000000 -0500 +@@ -279,6 +279,16 @@ + return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec; + } + ++static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier) ++{ ++ return ktime_to_us(ktime_sub(later, earlier)); ++} ++ ++static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec) ++{ ++ return ktime_add_ns(kt, usec * 1000); ++} ++ + /* + * The resolution of the clocks. The resolution value is returned in + * the clock_getres() system call to give application programmers an +diff -Nurb linux-2.6.22-570/include/linux/magic.h linux-2.6.22-591/include/linux/magic.h +--- linux-2.6.22-570/include/linux/magic.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/magic.h 2007-12-21 15:36:12.000000000 -0500 +@@ -36,8 +36,12 @@ + #define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" + #define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" + #define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" ++#define REVOKEFS_MAGIC 0x5245564B /* REVK */ ++ ++#define UNIONFS_SUPER_MAGIC 0xf15f083d + + #define SMB_SUPER_MAGIC 0x517B + #define USBDEVICE_SUPER_MAGIC 0x9fa2 ++#define CONTAINER_SUPER_MAGIC 0x27e0eb + + #endif /* __LINUX_MAGIC_H__ */ +diff -Nurb linux-2.6.22-570/include/linux/mempolicy.h linux-2.6.22-591/include/linux/mempolicy.h +--- linux-2.6.22-570/include/linux/mempolicy.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/mempolicy.h 2007-12-21 15:36:12.000000000 -0500 +@@ -148,18 +148,10 @@ + const nodemask_t *new); + extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); + extern void mpol_fix_fork_child_flag(struct task_struct *p); +-#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x)) +- +-#ifdef CONFIG_CPUSETS +-#define current_cpuset_is_being_rebound() \ +- (cpuset_being_rebound == current->cpuset) +-#else +-#define current_cpuset_is_being_rebound() 0 +-#endif + + extern struct mempolicy default_policy; + extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, +- unsigned long addr); ++ unsigned long addr, gfp_t gfp_flags); + extern unsigned slab_node(struct mempolicy *policy); + + extern enum zone_type policy_zone; +@@ -173,8 +165,6 @@ + int do_migrate_pages(struct mm_struct *mm, + const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags); + +-extern void *cpuset_being_rebound; /* Trigger mpol_copy vma rebind */ +- + #else + + struct mempolicy {}; +@@ -253,12 +243,10 @@ + { + } + +-#define set_cpuset_being_rebound(x) do {} while (0) +- + static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, +- unsigned long addr) ++ unsigned long addr, gfp_t gfp_flags) + { +- return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER); ++ return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags); + } + + static inline int do_migrate_pages(struct mm_struct *mm, +diff -Nurb linux-2.6.22-570/include/linux/mm.h linux-2.6.22-591/include/linux/mm.h +--- linux-2.6.22-570/include/linux/mm.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/mm.h 2007-12-21 15:36:14.000000000 -0500 +@@ -42,6 +42,8 @@ + + #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) + ++#define VM_REVOKED 0x20000000 /* Mapping has been revoked */ ++ + /* + * Linux kernel virtual memory manager primitives. + * The idea being to have a "virtual" mm in the same way +@@ -170,6 +172,13 @@ + #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ + #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ + ++#define VM_CAN_INVALIDATE 0x08000000 /* The mapping may be invalidated, ++ * eg. truncate or invalidate_inode_*. ++ * In this case, do_no_page must ++ * return with the page locked. ++ */ ++#define VM_CAN_NONLINEAR 0x10000000 /* Has ->fault & does nonlinear pages */ ++ + #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ + #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS + #endif +@@ -192,6 +201,25 @@ + */ + extern pgprot_t protection_map[16]; + ++#define FAULT_FLAG_WRITE 0x01 ++#define FAULT_FLAG_NONLINEAR 0x02 ++ ++/* ++ * fault_data is filled in the the pagefault handler and passed to the ++ * vma's ->fault function. That function is responsible for filling in ++ * 'type', which is the type of fault if a page is returned, or the type ++ * of error if NULL is returned. ++ * ++ * pgoff should be used in favour of address, if possible. If pgoff is ++ * used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get ++ * nonlinear mapping support. ++ */ ++struct fault_data { ++ unsigned long address; ++ pgoff_t pgoff; ++ unsigned int flags; ++ int type; ++}; + + /* + * These are the virtual MM functions - opening of an area, closing and +@@ -201,9 +229,15 @@ + struct vm_operations_struct { + void (*open)(struct vm_area_struct * area); + void (*close)(struct vm_area_struct * area); +- struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type); +- unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address); +- int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock); ++ struct page *(*fault)(struct vm_area_struct *vma, ++ struct fault_data *fdata); ++ struct page *(*nopage)(struct vm_area_struct *area, ++ unsigned long address, int *type); ++ unsigned long (*nopfn)(struct vm_area_struct *area, ++ unsigned long address); ++ int (*populate)(struct vm_area_struct *area, unsigned long address, ++ unsigned long len, pgprot_t prot, unsigned long pgoff, ++ int nonblock); + + /* notification that a previously read-only page is about to become + * writable, if an error is returned it will cause a SIGBUS */ +@@ -656,7 +690,6 @@ + */ + #define NOPAGE_SIGBUS (NULL) + #define NOPAGE_OOM ((struct page *) (-1)) +-#define NOPAGE_REFAULT ((struct page *) (-2)) /* Return to userspace, rerun */ + + /* + * Error return values for the *_nopfn functions +@@ -744,6 +777,16 @@ + struct vm_area_struct *start_vma, unsigned long start_addr, + unsigned long end_addr, unsigned long *nr_accounted, + struct zap_details *); ++ ++struct mm_walk { ++ int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, void *); ++ int (*pud_entry)(pud_t *, unsigned long, unsigned long, void *); ++ int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, void *); ++ int (*pte_entry)(pte_t *, unsigned long, unsigned long, void *); ++}; ++ ++int walk_page_range(struct mm_struct *, unsigned long addr, unsigned long end, ++ struct mm_walk *walk, void *private); + void free_pgd_range(struct mmu_gather **tlb, unsigned long addr, + unsigned long end, unsigned long floor, unsigned long ceiling); + void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma, +@@ -1058,6 +1101,7 @@ + extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); + extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, + struct rb_node **, struct rb_node *); ++extern void __unlink_file_vma(struct vm_area_struct *); + extern void unlink_file_vma(struct vm_area_struct *); + extern struct vm_area_struct *copy_vma(struct vm_area_struct **, + unsigned long addr, unsigned long len, pgoff_t pgoff); +@@ -1097,9 +1141,11 @@ + loff_t lstart, loff_t lend); + + /* generic vm_area_ops exported for stackable file systems */ +-extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *); +-extern int filemap_populate(struct vm_area_struct *, unsigned long, +- unsigned long, pgprot_t, unsigned long, int); ++extern struct page *filemap_fault(struct vm_area_struct *, struct fault_data *); ++extern struct page * __deprecated_for_modules ++filemap_nopage(struct vm_area_struct *, unsigned long, int *); ++extern int __deprecated_for_modules filemap_populate(struct vm_area_struct *, ++ unsigned long, unsigned long, pgprot_t, unsigned long, int); + + /* mm/page-writeback.c */ + int write_one_page(struct page *page, int wait); +@@ -1199,6 +1245,7 @@ + void __user *, size_t *, loff_t *); + unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, + unsigned long lru_pages); ++extern void drop_pagecache_sb(struct super_block *); + void drop_pagecache(void); + void drop_slab(void); + +diff -Nurb linux-2.6.22-570/include/linux/mmc/card.h linux-2.6.22-591/include/linux/mmc/card.h +--- linux-2.6.22-570/include/linux/mmc/card.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/mmc/card.h 2007-12-21 15:36:12.000000000 -0500 +@@ -72,6 +72,7 @@ + #define MMC_STATE_READONLY (1<<1) /* card is read-only */ + #define MMC_STATE_HIGHSPEED (1<<2) /* card is in high speed mode */ + #define MMC_STATE_BLOCKADDR (1<<3) /* card uses block-addressing */ ++#define MMC_STATE_LOCKED (1<<4) /* card is currently locked */ + u32 raw_cid[4]; /* raw card CID */ + u32 raw_csd[4]; /* raw card CSD */ + u32 raw_scr[2]; /* raw card SCR */ +@@ -89,11 +90,16 @@ + #define mmc_card_readonly(c) ((c)->state & MMC_STATE_READONLY) + #define mmc_card_highspeed(c) ((c)->state & MMC_STATE_HIGHSPEED) + #define mmc_card_blockaddr(c) ((c)->state & MMC_STATE_BLOCKADDR) ++#define mmc_card_locked(c) ((c)->state & MMC_STATE_LOCKED) ++ ++#define mmc_card_lockable(c) (((c)->csd.cmdclass & CCC_LOCK_CARD) && \ ++ ((c)->host->caps & MMC_CAP_BYTEBLOCK)) + + #define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT) + #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY) + #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED) + #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR) ++#define mmc_card_set_locked(c) ((c)->state |= MMC_STATE_LOCKED) + + #define mmc_card_name(c) ((c)->cid.prod_name) + #define mmc_card_id(c) ((c)->dev.bus_id) +diff -Nurb linux-2.6.22-570/include/linux/mmc/mmc.h linux-2.6.22-591/include/linux/mmc/mmc.h +--- linux-2.6.22-570/include/linux/mmc/mmc.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/mmc/mmc.h 2007-12-21 15:36:12.000000000 -0500 +@@ -253,5 +253,13 @@ + #define MMC_SWITCH_MODE_CLEAR_BITS 0x02 /* Clear bits which are 1 in value */ + #define MMC_SWITCH_MODE_WRITE_BYTE 0x03 /* Set target to value */ + ++/* ++ * MMC_LOCK_UNLOCK modes ++ */ ++#define MMC_LOCK_MODE_ERASE (1<<3) ++#define MMC_LOCK_MODE_UNLOCK (1<<2) ++#define MMC_LOCK_MODE_CLR_PWD (1<<1) ++#define MMC_LOCK_MODE_SET_PWD (1<<0) ++ + #endif /* MMC_MMC_PROTOCOL_H */ + +diff -Nurb linux-2.6.22-570/include/linux/mmzone.h linux-2.6.22-591/include/linux/mmzone.h +--- linux-2.6.22-570/include/linux/mmzone.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/mmzone.h 2007-12-21 15:36:12.000000000 -0500 +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -24,8 +25,24 @@ + #endif + #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) + ++#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY ++#define MIGRATE_UNMOVABLE 0 ++#define MIGRATE_RECLAIMABLE 1 ++#define MIGRATE_MOVABLE 2 ++#define MIGRATE_TYPES 3 ++#else ++#define MIGRATE_UNMOVABLE 0 ++#define MIGRATE_UNRECLAIMABLE 0 ++#define MIGRATE_MOVABLE 0 ++#define MIGRATE_TYPES 1 ++#endif ++ ++#define for_each_migratetype_order(order, type) \ ++ for (order = 0; order < MAX_ORDER; order++) \ ++ for (type = 0; type < MIGRATE_TYPES; type++) ++ + struct free_area { +- struct list_head free_list; ++ struct list_head free_list[MIGRATE_TYPES]; + unsigned long nr_free; + }; + +@@ -213,6 +230,14 @@ + #endif + struct free_area free_area[MAX_ORDER]; + ++#ifndef CONFIG_SPARSEMEM ++ /* ++ * Flags for a MAX_ORDER_NR_PAGES block. See pageblock-flags.h. ++ * In SPARSEMEM, this map is stored in struct mem_section ++ */ ++ unsigned long *pageblock_flags; ++#endif /* CONFIG_SPARSEMEM */ ++ + + ZONE_PADDING(_pad1_) + +@@ -468,6 +493,7 @@ + void get_zone_counts(unsigned long *active, unsigned long *inactive, + unsigned long *free); + void build_all_zonelists(void); ++void raise_kswapd_order(unsigned int order); + void wakeup_kswapd(struct zone *zone, int order); + int zone_watermark_ok(struct zone *z, int order, unsigned long mark, + int classzone_idx, int alloc_flags); +@@ -662,6 +688,9 @@ + #define PAGES_PER_SECTION (1UL << PFN_SECTION_SHIFT) + #define PAGE_SECTION_MASK (~(PAGES_PER_SECTION-1)) + ++#define SECTION_BLOCKFLAGS_BITS \ ++ ((SECTION_SIZE_BITS - (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS) ++ + #if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS + #error Allocator MAX_ORDER exceeds SECTION_SIZE + #endif +@@ -681,6 +710,7 @@ + * before using it wrong. + */ + unsigned long section_mem_map; ++ DECLARE_BITMAP(pageblock_flags, SECTION_BLOCKFLAGS_BITS); + }; + + #ifdef CONFIG_SPARSEMEM_EXTREME +diff -Nurb linux-2.6.22-570/include/linux/mnt_namespace.h linux-2.6.22-591/include/linux/mnt_namespace.h +--- linux-2.6.22-570/include/linux/mnt_namespace.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/mnt_namespace.h 2007-12-21 15:36:12.000000000 -0500 +@@ -14,7 +14,7 @@ + int event; + }; + +-extern struct mnt_namespace *copy_mnt_ns(int, struct mnt_namespace *, ++extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, + struct fs_struct *); + extern void __put_mnt_ns(struct mnt_namespace *ns); + +diff -Nurb linux-2.6.22-570/include/linux/module.h linux-2.6.22-591/include/linux/module.h +--- linux-2.6.22-570/include/linux/module.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/module.h 2007-12-21 15:36:12.000000000 -0500 +@@ -227,8 +227,17 @@ + MODULE_STATE_LIVE, + MODULE_STATE_COMING, + MODULE_STATE_GOING, ++ MODULE_STATE_GONE, + }; + ++#ifdef CONFIG_KGDB ++#define MAX_SECTNAME 31 ++struct mod_section { ++ void *address; ++ char name[MAX_SECTNAME + 1]; ++}; ++#endif ++ + /* Similar stuff for section attributes. */ + struct module_sect_attr + { +@@ -256,6 +265,13 @@ + /* Unique handle for this module */ + char name[MODULE_NAME_LEN]; + ++#ifdef CONFIG_KGDB ++ /* keep kgdb info at the begining so that gdb doesn't have a chance to ++ * miss out any fields */ ++ unsigned long num_sections; ++ struct mod_section *mod_sections; ++#endif ++ + /* Sysfs stuff. */ + struct module_kobject mkobj; + struct module_param_attrs *param_attrs; +diff -Nurb linux-2.6.22-570/include/linux/namei.h linux-2.6.22-591/include/linux/namei.h +--- linux-2.6.22-570/include/linux/namei.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/namei.h 2007-12-21 15:36:12.000000000 -0500 +@@ -3,6 +3,7 @@ + + #include + #include ++#include + + struct vfsmount; + +@@ -81,9 +82,16 @@ + extern struct file *nameidata_to_filp(struct nameidata *nd, int flags); + extern void release_open_intent(struct nameidata *); + +-extern struct dentry * lookup_one_len(const char *, struct dentry *, int); ++extern struct dentry * lookup_one_len_nd(const char *, struct dentry *, ++ int, struct nameidata *); + extern struct dentry *lookup_one_len_kern(const char *, struct dentry *, int); + ++static inline struct dentry *lookup_one_len(const char *name, ++ struct dentry *dir, int len) ++{ ++ return lookup_one_len_nd(name, dir, len, NULL); ++} ++ + extern int follow_down(struct vfsmount **, struct dentry **); + extern int follow_up(struct vfsmount **, struct dentry **); + +@@ -100,4 +108,16 @@ + return nd->saved_names[nd->depth]; + } + ++static inline void pathget(struct path *path) ++{ ++ mntget(path->mnt); ++ dget(path->dentry); ++} ++ ++static inline void pathput(struct path *path) ++{ ++ dput(path->dentry); ++ mntput(path->mnt); ++} ++ + #endif /* _LINUX_NAMEI_H */ +diff -Nurb linux-2.6.22-570/include/linux/net.h linux-2.6.22-591/include/linux/net.h +--- linux-2.6.22-570/include/linux/net.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/net.h 2007-12-21 15:36:14.000000000 -0500 +@@ -23,6 +23,7 @@ + + struct poll_table_struct; + struct inode; ++struct net; + + #define NPROTO 34 /* should be enough for now.. */ + +@@ -170,7 +171,7 @@ + + struct net_proto_family { + int family; +- int (*create)(struct socket *sock, int protocol); ++ int (*create)(struct net *net, struct socket *sock, int protocol); + struct module *owner; + }; + +diff -Nurb linux-2.6.22-570/include/linux/netdevice.h linux-2.6.22-591/include/linux/netdevice.h +--- linux-2.6.22-570/include/linux/netdevice.h 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/include/linux/netdevice.h 2007-12-21 15:36:14.000000000 -0500 +@@ -39,6 +39,7 @@ + #include + #include + ++struct net; + struct vlan_group; + struct ethtool_ops; + struct netpoll_info; +@@ -314,9 +315,10 @@ + /* Net device features */ + unsigned long features; + #define NETIF_F_SG 1 /* Scatter/gather IO. */ +-#define NETIF_F_IP_CSUM 2 /* Can checksum only TCP/UDP over IPv4. */ ++#define NETIF_F_IP_CSUM 2 /* Can checksum TCP/UDP over IPv4. */ + #define NETIF_F_NO_CSUM 4 /* Does not require checksum. F.e. loopack. */ + #define NETIF_F_HW_CSUM 8 /* Can checksum all the packets. */ ++#define NETIF_F_IPV6_CSUM 16 /* Can checksum TCP/UDP over IPV6 */ + #define NETIF_F_HIGHDMA 32 /* Can DMA to high memory. */ + #define NETIF_F_FRAGLIST 64 /* Scatter/gather IO. */ + #define NETIF_F_HW_VLAN_TX 128 /* Transmit VLAN hw acceleration */ +@@ -325,6 +327,7 @@ + #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ + #define NETIF_F_GSO 2048 /* Enable software GSO. */ + #define NETIF_F_LLTX 4096 /* LockLess TX */ ++#define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ + + /* Segmentation offload features */ + #define NETIF_F_GSO_SHIFT 16 +@@ -338,8 +341,11 @@ + /* List of features with software fallbacks. */ + #define NETIF_F_GSO_SOFTWARE (NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6) + ++ + #define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) +-#define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) ++#define NETIF_F_V4_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM) ++#define NETIF_F_V6_CSUM (NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM) ++#define NETIF_F_ALL_CSUM (NETIF_F_V4_CSUM | NETIF_F_V6_CSUM) + + struct net_device *next_sched; + +@@ -533,6 +539,9 @@ + void (*poll_controller)(struct net_device *dev); + #endif + ++ /* Network namespace this network device is inside */ ++ struct net *nd_net; ++ + /* bridge stuff */ + struct net_bridge_port *br_port; + +@@ -540,13 +549,16 @@ + struct device dev; + /* space for optional statistics and wireless sysfs groups */ + struct attribute_group *sysfs_groups[3]; ++ ++ /* rtnetlink link ops */ ++ const struct rtnl_link_ops *rtnl_link_ops; + }; + #define to_net_dev(d) container_of(d, struct net_device, dev) + + #define NETDEV_ALIGN 32 + #define NETDEV_ALIGN_CONST (NETDEV_ALIGN - 1) + +-static inline void *netdev_priv(struct net_device *dev) ++static inline void *netdev_priv(const struct net_device *dev) + { + return (char *)dev + ((sizeof(struct net_device) + + NETDEV_ALIGN_CONST) +@@ -576,45 +588,48 @@ + #include + #include + +-extern struct net_device loopback_dev; /* The loopback */ +-extern struct list_head dev_base_head; /* All devices */ + extern rwlock_t dev_base_lock; /* Device list lock */ + +-#define for_each_netdev(d) \ +- list_for_each_entry(d, &dev_base_head, dev_list) +-#define for_each_netdev_safe(d, n) \ +- list_for_each_entry_safe(d, n, &dev_base_head, dev_list) +-#define for_each_netdev_continue(d) \ +- list_for_each_entry_continue(d, &dev_base_head, dev_list) +-#define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) +- +-static inline struct net_device *next_net_device(struct net_device *dev) +-{ +- struct list_head *lh; + +- lh = dev->dev_list.next; +- return lh == &dev_base_head ? NULL : net_device_entry(lh); +-} ++#define for_each_netdev(net, d) \ ++ list_for_each_entry(d, &(net)->dev_base_head, dev_list) ++#define for_each_netdev_safe(net, d, n) \ ++ list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) ++#define for_each_netdev_continue(net, d) \ ++ list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) ++#define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) + +-static inline struct net_device *first_net_device(void) +-{ +- return list_empty(&dev_base_head) ? NULL : +- net_device_entry(dev_base_head.next); +-} ++#define next_net_device(d) \ ++({ \ ++ struct net_device *dev = d; \ ++ struct list_head *lh; \ ++ struct net *net; \ ++ \ ++ net = dev->nd_net; \ ++ lh = dev->dev_list.next; \ ++ lh == &net->dev_base_head ? NULL : net_device_entry(lh); \ ++}) ++ ++#define first_net_device(N) \ ++({ \ ++ struct net *NET = (N); \ ++ list_empty(&NET->dev_base_head) ? NULL : \ ++ net_device_entry(NET->dev_base_head.next); \ ++}) + + extern int netdev_boot_setup_check(struct net_device *dev); + extern unsigned long netdev_boot_base(const char *prefix, int unit); +-extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr); +-extern struct net_device *dev_getfirstbyhwtype(unsigned short type); +-extern struct net_device *__dev_getfirstbyhwtype(unsigned short type); ++extern struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *hwaddr); ++extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); ++extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type); + extern void dev_add_pack(struct packet_type *pt); + extern void dev_remove_pack(struct packet_type *pt); + extern void __dev_remove_pack(struct packet_type *pt); + +-extern struct net_device *dev_get_by_flags(unsigned short flags, ++extern struct net_device *dev_get_by_flags(struct net *net, unsigned short flags, + unsigned short mask); +-extern struct net_device *dev_get_by_name(const char *name); +-extern struct net_device *__dev_get_by_name(const char *name); ++extern struct net_device *dev_get_by_name(struct net *net, const char *name); ++extern struct net_device *__dev_get_by_name(struct net *net, const char *name); + extern int dev_alloc_name(struct net_device *dev, const char *name); + extern int dev_open(struct net_device *dev); + extern int dev_close(struct net_device *dev); +@@ -625,9 +640,9 @@ + extern void synchronize_net(void); + extern int register_netdevice_notifier(struct notifier_block *nb); + extern int unregister_netdevice_notifier(struct notifier_block *nb); +-extern int call_netdevice_notifiers(unsigned long val, void *v); +-extern struct net_device *dev_get_by_index(int ifindex); +-extern struct net_device *__dev_get_by_index(int ifindex); ++extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); ++extern struct net_device *dev_get_by_index(struct net *net, int ifindex); ++extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); + extern int dev_restart(struct net_device *dev); + #ifdef CONFIG_NETPOLL_TRAP + extern int netpoll_trap(void); +@@ -732,11 +747,13 @@ + #define HAVE_NETIF_RECEIVE_SKB 1 + extern int netif_receive_skb(struct sk_buff *skb); + extern int dev_valid_name(const char *name); +-extern int dev_ioctl(unsigned int cmd, void __user *); +-extern int dev_ethtool(struct ifreq *); ++extern int dev_ioctl(struct net *net, unsigned int cmd, void __user *); ++extern int dev_ethtool(struct net *net, struct ifreq *); + extern unsigned dev_get_flags(const struct net_device *); + extern int dev_change_flags(struct net_device *, unsigned); + extern int dev_change_name(struct net_device *, char *); ++extern int dev_change_net_namespace(struct net_device *, ++ struct net *, const char *); + extern int dev_set_mtu(struct net_device *, int); + extern int dev_set_mac_address(struct net_device *, + struct sockaddr *); +@@ -1006,7 +1023,7 @@ + extern void netdev_state_change(struct net_device *dev); + extern void netdev_features_change(struct net_device *dev); + /* Load a device via the kmod */ +-extern void dev_load(const char *name); ++extern void dev_load(struct net *net, const char *name); + extern void dev_mcast_init(void); + extern int netdev_max_backlog; + extern int weight_p; +diff -Nurb linux-2.6.22-570/include/linux/netfilter/x_tables.h linux-2.6.22-591/include/linux/netfilter/x_tables.h +--- linux-2.6.22-570/include/linux/netfilter/x_tables.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/netfilter/x_tables.h 2007-12-21 15:36:14.000000000 -0500 +@@ -289,7 +289,7 @@ + unsigned int size, const char *table, unsigned int hook, + unsigned short proto, int inv_proto); + +-extern int xt_register_table(struct xt_table *table, ++extern int xt_register_table(struct net *net, struct xt_table *table, + struct xt_table_info *bootstrap, + struct xt_table_info *newinfo); + extern void *xt_unregister_table(struct xt_table *table); +@@ -306,7 +306,7 @@ + extern int xt_find_revision(int af, const char *name, u8 revision, int target, + int *err); + +-extern struct xt_table *xt_find_table_lock(int af, const char *name); ++extern struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name); + extern void xt_table_unlock(struct xt_table *t); + + extern int xt_proto_init(int af); +diff -Nurb linux-2.6.22-570/include/linux/netfilter.h linux-2.6.22-591/include/linux/netfilter.h +--- linux-2.6.22-570/include/linux/netfilter.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/netfilter.h 2007-12-21 15:36:14.000000000 -0500 +@@ -362,11 +362,6 @@ + #endif + } + +-#ifdef CONFIG_PROC_FS +-#include +-extern struct proc_dir_entry *proc_net_netfilter; +-#endif +- + #else /* !CONFIG_NETFILTER */ + #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) + #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) +diff -Nurb linux-2.6.22-570/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.22-591/include/linux/netfilter_ipv4/ip_tables.h +--- linux-2.6.22-570/include/linux/netfilter_ipv4/ip_tables.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/netfilter_ipv4/ip_tables.h 2007-12-21 15:36:14.000000000 -0500 +@@ -292,7 +292,7 @@ + #include + extern void ipt_init(void) __init; + +-extern int ipt_register_table(struct xt_table *table, ++extern int ipt_register_table(struct net *net, struct xt_table *table, + const struct ipt_replace *repl); + extern void ipt_unregister_table(struct xt_table *table); + +diff -Nurb linux-2.6.22-570/include/linux/netfilter_ipv4.h linux-2.6.22-591/include/linux/netfilter_ipv4.h +--- linux-2.6.22-570/include/linux/netfilter_ipv4.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/netfilter_ipv4.h 2007-12-21 15:36:14.000000000 -0500 +@@ -75,7 +75,7 @@ + #define SO_ORIGINAL_DST 80 + + #ifdef __KERNEL__ +-extern int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type); ++extern int ip_route_me_harder(struct net *net, struct sk_buff **pskb, unsigned addr_type); + extern int ip_xfrm_me_harder(struct sk_buff **pskb); + extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, u_int8_t protocol); +diff -Nurb linux-2.6.22-570/include/linux/netlink.h linux-2.6.22-591/include/linux/netlink.h +--- linux-2.6.22-570/include/linux/netlink.h 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/include/linux/netlink.h 2007-12-21 15:36:14.000000000 -0500 +@@ -21,12 +21,14 @@ + #define NETLINK_DNRTMSG 14 /* DECnet routing messages */ + #define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ + #define NETLINK_GENERIC 16 +-/* leave room for NETLINK_DM (DM Events) */ ++#define NETLINK_DM 17 /* Device Mapper */ + #define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */ + #define NETLINK_ECRYPTFS 19 + + #define MAX_LINKS 32 + ++struct net; ++ + struct sockaddr_nl + { + sa_family_t nl_family; /* AF_NETLINK */ +@@ -157,7 +159,8 @@ + #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) + + +-extern struct sock *netlink_kernel_create(int unit, unsigned int groups, ++extern struct sock *netlink_kernel_create(struct net *net, ++ int unit,unsigned int groups, + void (*input)(struct sock *sk, int len), + struct mutex *cb_mutex, + struct module *module); +@@ -204,6 +207,7 @@ + + struct netlink_notify + { ++ struct net *net; + int pid; + int protocol; + }; +diff -Nurb linux-2.6.22-570/include/linux/netpoll.h linux-2.6.22-591/include/linux/netpoll.h +--- linux-2.6.22-570/include/linux/netpoll.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/netpoll.h 2007-12-21 15:36:12.000000000 -0500 +@@ -16,7 +16,7 @@ + struct net_device *dev; + char dev_name[IFNAMSIZ]; + const char *name; +- void (*rx_hook)(struct netpoll *, int, char *, int); ++ void (*rx_hook)(struct netpoll *, int, char *, int, struct sk_buff *); + + u32 local_ip, remote_ip; + u16 local_port, remote_port; +diff -Nurb linux-2.6.22-570/include/linux/nfs4.h linux-2.6.22-591/include/linux/nfs4.h +--- linux-2.6.22-570/include/linux/nfs4.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/nfs4.h 2007-12-21 15:36:12.000000000 -0500 +@@ -15,6 +15,7 @@ + + #include + ++#define NFS4_BITMAP_SIZE 2 + #define NFS4_VERIFIER_SIZE 8 + #define NFS4_STATEID_SIZE 16 + #define NFS4_FHSIZE 128 +diff -Nurb linux-2.6.22-570/include/linux/nfs4_mount.h linux-2.6.22-591/include/linux/nfs4_mount.h +--- linux-2.6.22-570/include/linux/nfs4_mount.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/nfs4_mount.h 2007-12-21 15:36:12.000000000 -0500 +@@ -65,6 +65,7 @@ + #define NFS4_MOUNT_NOCTO 0x0010 /* 1 */ + #define NFS4_MOUNT_NOAC 0x0020 /* 1 */ + #define NFS4_MOUNT_STRICTLOCK 0x1000 /* 1 */ ++#define NFS4_MOUNT_UNSHARED 0x8000 /* 1 */ + #define NFS4_MOUNT_FLAGMASK 0xFFFF + + #endif +diff -Nurb linux-2.6.22-570/include/linux/nfs_fs.h linux-2.6.22-591/include/linux/nfs_fs.h +--- linux-2.6.22-570/include/linux/nfs_fs.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/nfs_fs.h 2007-12-21 15:36:12.000000000 -0500 +@@ -30,7 +30,9 @@ + #ifdef __KERNEL__ + + #include ++#include + #include ++#include + #include + #include + #include +@@ -69,9 +71,8 @@ + + struct nfs4_state; + struct nfs_open_context { +- atomic_t count; +- struct vfsmount *vfsmnt; +- struct dentry *dentry; ++ struct kref kref; ++ struct path path; + struct rpc_cred *cred; + struct nfs4_state *state; + fl_owner_t lockowner; +@@ -156,12 +157,9 @@ + * This is the list of dirty unwritten pages. + */ + spinlock_t req_lock; +- struct list_head dirty; +- struct list_head commit; + struct radix_tree_root nfs_page_tree; + +- unsigned int ndirty, +- ncommit, ++ unsigned long ncommit, + npages; + + /* Open contexts for shared mmap writes */ +diff -Nurb linux-2.6.22-570/include/linux/nfs_fs_sb.h linux-2.6.22-591/include/linux/nfs_fs_sb.h +--- linux-2.6.22-570/include/linux/nfs_fs_sb.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/nfs_fs_sb.h 2007-12-21 15:36:12.000000000 -0500 +@@ -16,7 +16,6 @@ + #define NFS_CS_INITING 1 /* busy initialising */ + int cl_nfsversion; /* NFS protocol version */ + unsigned long cl_res_state; /* NFS resources state */ +-#define NFS_CS_RPCIOD 0 /* - rpciod started */ + #define NFS_CS_CALLBACK 1 /* - callback started */ + #define NFS_CS_IDMAP 2 /* - idmap started */ + #define NFS_CS_RENEWD 3 /* - renewd started */ +diff -Nurb linux-2.6.22-570/include/linux/nfs_mount.h linux-2.6.22-591/include/linux/nfs_mount.h +--- linux-2.6.22-570/include/linux/nfs_mount.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/nfs_mount.h 2007-12-21 15:36:12.000000000 -0500 +@@ -62,6 +62,7 @@ + #define NFS_MOUNT_STRICTLOCK 0x1000 /* reserved for NFSv4 */ + #define NFS_MOUNT_SECFLAVOUR 0x2000 /* 5 */ + #define NFS_MOUNT_NORDIRPLUS 0x4000 /* 5 */ ++#define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ + #define NFS_MOUNT_TAGGED 0x8000 /* context tagging */ + #define NFS_MOUNT_FLAGMASK 0xFFFF + +diff -Nurb linux-2.6.22-570/include/linux/nfs_page.h linux-2.6.22-591/include/linux/nfs_page.h +--- linux-2.6.22-570/include/linux/nfs_page.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/nfs_page.h 2007-12-21 15:36:12.000000000 -0500 +@@ -16,12 +16,13 @@ + #include + #include + +-#include ++#include + + /* + * Valid flags for the radix tree + */ +-#define NFS_PAGE_TAG_WRITEBACK 0 ++#define NFS_PAGE_TAG_LOCKED 0 ++#define NFS_PAGE_TAG_COMMIT 1 + + /* + * Valid flags for a dirty buffer +@@ -33,8 +34,7 @@ + + struct nfs_inode; + struct nfs_page { +- struct list_head wb_list, /* Defines state of page: */ +- *wb_list_head; /* read/write/commit */ ++ struct list_head wb_list; /* Defines state of page: */ + struct page *wb_page; /* page to read in/write out */ + struct nfs_open_context *wb_context; /* File state context info */ + atomic_t wb_complete; /* i/os we're waiting for */ +@@ -42,7 +42,7 @@ + unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ + wb_pgbase, /* Start of page data */ + wb_bytes; /* Length of request */ +- atomic_t wb_count; /* reference count */ ++ struct kref wb_kref; /* reference count */ + unsigned long wb_flags; + struct nfs_writeverf wb_verf; /* Commit cookie */ + }; +@@ -71,8 +71,8 @@ + extern void nfs_release_request(struct nfs_page *req); + + +-extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst, +- pgoff_t idx_start, unsigned int npages); ++extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, ++ pgoff_t idx_start, unsigned int npages, int tag); + extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, + struct inode *inode, + int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), +@@ -84,12 +84,11 @@ + extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); + extern int nfs_wait_on_request(struct nfs_page *); + extern void nfs_unlock_request(struct nfs_page *req); +-extern int nfs_set_page_writeback_locked(struct nfs_page *req); +-extern void nfs_clear_page_writeback(struct nfs_page *req); ++extern void nfs_clear_page_tag_locked(struct nfs_page *req); + + + /* +- * Lock the page of an asynchronous request without incrementing the wb_count ++ * Lock the page of an asynchronous request without getting a new reference + */ + static inline int + nfs_lock_request_dontget(struct nfs_page *req) +@@ -98,14 +97,14 @@ + } + + /* +- * Lock the page of an asynchronous request ++ * Lock the page of an asynchronous request and take a reference + */ + static inline int + nfs_lock_request(struct nfs_page *req) + { + if (test_and_set_bit(PG_BUSY, &req->wb_flags)) + return 0; +- atomic_inc(&req->wb_count); ++ kref_get(&req->wb_kref); + return 1; + } + +@@ -118,7 +117,6 @@ + nfs_list_add_request(struct nfs_page *req, struct list_head *head) + { + list_add_tail(&req->wb_list, head); +- req->wb_list_head = head; + } + + +@@ -132,7 +130,6 @@ + if (list_empty(&req->wb_list)) + return; + list_del_init(&req->wb_list); +- req->wb_list_head = NULL; + } + + static inline struct nfs_page * +diff -Nurb linux-2.6.22-570/include/linux/nfs_xdr.h linux-2.6.22-591/include/linux/nfs_xdr.h +--- linux-2.6.22-570/include/linux/nfs_xdr.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/nfs_xdr.h 2007-12-21 15:36:12.000000000 -0500 +@@ -144,6 +144,7 @@ + nfs4_stateid delegation; + __u32 do_recall; + __u64 maxsize; ++ __u32 attrset[NFS4_BITMAP_SIZE]; + }; + + /* +diff -Nurb linux-2.6.22-570/include/linux/nsproxy.h linux-2.6.22-591/include/linux/nsproxy.h +--- linux-2.6.22-570/include/linux/nsproxy.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/nsproxy.h 2007-12-21 15:36:14.000000000 -0500 +@@ -10,6 +10,12 @@ + struct ipc_namespace; + struct pid_namespace; + ++#ifdef CONFIG_CONTAINER_NS ++int ns_container_clone(struct task_struct *tsk); ++#else ++static inline int ns_container_clone(struct task_struct *tsk) { return 0; } ++#endif ++ + /* + * A structure to contain pointers to all per-process + * namespaces - fs (mount), uts, network, sysvipc, etc. +@@ -29,10 +35,12 @@ + struct ipc_namespace *ipc_ns; + struct mnt_namespace *mnt_ns; + struct pid_namespace *pid_ns; ++ struct user_namespace *user_ns; ++ struct net *net_ns; + }; + extern struct nsproxy init_nsproxy; + +-int copy_namespaces(int flags, struct task_struct *tsk); ++int copy_namespaces(unsigned long flags, struct task_struct *tsk); + struct nsproxy *copy_nsproxy(struct nsproxy *orig); + void get_task_namespaces(struct task_struct *tsk); + void free_nsproxy(struct nsproxy *ns); +diff -Nurb linux-2.6.22-570/include/linux/pageblock-flags.h linux-2.6.22-591/include/linux/pageblock-flags.h +--- linux-2.6.22-570/include/linux/pageblock-flags.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/linux/pageblock-flags.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,52 @@ ++/* ++ * Macros for manipulating and testing flags related to a ++ * MAX_ORDER_NR_PAGES block of pages. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation version 2 of the License ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Copyright (C) IBM Corporation, 2006 ++ * ++ * Original author, Mel Gorman ++ * Major cleanups and reduction of bit operations, Andy Whitcroft ++ */ ++#ifndef PAGEBLOCK_FLAGS_H ++#define PAGEBLOCK_FLAGS_H ++ ++#include ++ ++/* Macro to aid the definition of ranges of bits */ ++#define PB_range(name, required_bits) \ ++ name, name ## _end = (name + required_bits) - 1 ++ ++/* Bit indices that affect a whole block of pages */ ++enum pageblock_bits { ++ PB_range(PB_migrate, 2), /* 2 bits required for migrate types */ ++ NR_PAGEBLOCK_BITS ++}; ++ ++/* Forward declaration */ ++struct page; ++ ++/* Declarations for getting and setting flags. See mm/page_alloc.c */ ++unsigned long get_pageblock_flags_group(struct page *page, ++ int start_bitidx, int end_bitidx); ++void set_pageblock_flags_group(struct page *page, unsigned long flags, ++ int start_bitidx, int end_bitidx); ++ ++#define get_pageblock_flags(page) \ ++ get_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1) ++#define set_pageblock_flags(page) \ ++ set_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1) ++ ++#endif /* PAGEBLOCK_FLAGS_H */ +diff -Nurb linux-2.6.22-570/include/linux/pci_ids.h linux-2.6.22-591/include/linux/pci_ids.h +--- linux-2.6.22-570/include/linux/pci_ids.h 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/include/linux/pci_ids.h 2007-12-21 15:36:12.000000000 -0500 +@@ -2003,6 +2003,7 @@ + + #define PCI_VENDOR_ID_ENE 0x1524 + #define PCI_DEVICE_ID_ENE_CB712_SD 0x0550 ++#define PCI_DEVICE_ID_ENE_CB712_SD_2 0x0551 + #define PCI_DEVICE_ID_ENE_1211 0x1211 + #define PCI_DEVICE_ID_ENE_1225 0x1225 + #define PCI_DEVICE_ID_ENE_1410 0x1410 +diff -Nurb linux-2.6.22-570/include/linux/pid_namespace.h linux-2.6.22-591/include/linux/pid_namespace.h +--- linux-2.6.22-570/include/linux/pid_namespace.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/pid_namespace.h 2007-12-21 15:36:12.000000000 -0500 +@@ -29,7 +29,7 @@ + kref_get(&ns->kref); + } + +-extern struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *ns); ++extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns); + extern void free_pid_ns(struct kref *kref); + + static inline void put_pid_ns(struct pid_namespace *ns) +diff -Nurb linux-2.6.22-570/include/linux/pnp.h linux-2.6.22-591/include/linux/pnp.h +--- linux-2.6.22-570/include/linux/pnp.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/pnp.h 2007-12-21 15:36:12.000000000 -0500 +@@ -335,6 +335,10 @@ + int (*set)(struct pnp_dev *dev, struct pnp_resource_table *res); + int (*disable)(struct pnp_dev *dev); + ++ /* protocol specific suspend/resume */ ++ int (*suspend)(struct pnp_dev *dev, pm_message_t state); ++ int (*resume)(struct pnp_dev *dev); ++ + /* used by pnp layer only (look but don't touch) */ + unsigned char number; /* protocol number*/ + struct device dev; /* link to driver model */ +diff -Nurb linux-2.6.22-570/include/linux/prctl.h linux-2.6.22-591/include/linux/prctl.h +--- linux-2.6.22-570/include/linux/prctl.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/prctl.h 2007-12-21 15:36:12.000000000 -0500 +@@ -59,4 +59,8 @@ + # define PR_ENDIAN_LITTLE 1 /* True little endian mode */ + # define PR_ENDIAN_PPC_LITTLE 2 /* "PowerPC" pseudo little endian */ + ++/* Get/set process seccomp mode */ ++#define PR_GET_SECCOMP 21 ++#define PR_SET_SECCOMP 22 ++ + #endif /* _LINUX_PRCTL_H */ +diff -Nurb linux-2.6.22-570/include/linux/proc_fs.h linux-2.6.22-591/include/linux/proc_fs.h +--- linux-2.6.22-570/include/linux/proc_fs.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/proc_fs.h 2007-12-21 15:36:14.000000000 -0500 +@@ -86,8 +86,6 @@ + + extern struct proc_dir_entry proc_root; + extern struct proc_dir_entry *proc_root_fs; +-extern struct proc_dir_entry *proc_net; +-extern struct proc_dir_entry *proc_net_stat; + extern struct proc_dir_entry *proc_bus; + extern struct proc_dir_entry *proc_root_driver; + extern struct proc_dir_entry *proc_root_kcore; +@@ -105,7 +103,6 @@ + unsigned long task_vsize(struct mm_struct *); + int task_statm(struct mm_struct *, int *, int *, int *, int *); + char *task_mem(struct mm_struct *, char *); +-void clear_refs_smap(struct mm_struct *mm); + + struct proc_dir_entry *de_get(struct proc_dir_entry *de); + void de_put(struct proc_dir_entry *de); +@@ -113,6 +110,10 @@ + extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode, + struct proc_dir_entry *parent); + extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent); ++static inline void remove_proc_pde(struct proc_dir_entry *pde) ++{ ++ return remove_proc_entry(pde->name, pde->parent); ++} + + extern struct vfsmount *proc_mnt; + extern int proc_fill_super(struct super_block *,void *,int); +@@ -182,42 +183,18 @@ + return res; + } + +-static inline struct proc_dir_entry *proc_net_create(const char *name, +- mode_t mode, get_info_t *get_info) +-{ +- return create_proc_info_entry(name,mode,proc_net,get_info); +-} +- +-static inline struct proc_dir_entry *proc_net_fops_create(const char *name, +- mode_t mode, const struct file_operations *fops) +-{ +- struct proc_dir_entry *res = create_proc_entry(name, mode, proc_net); +- if (res) +- res->proc_fops = fops; +- return res; +-} +- +-static inline void proc_net_remove(const char *name) +-{ +- remove_proc_entry(name,proc_net); +-} +- + #else + + #define proc_root_driver NULL +-#define proc_net NULL + #define proc_bus NULL + +-#define proc_net_fops_create(name, mode, fops) ({ (void)(mode), NULL; }) +-#define proc_net_create(name, mode, info) ({ (void)(mode), NULL; }) +-static inline void proc_net_remove(const char *name) {} +- + static inline void proc_flush_task(struct task_struct *task) { } + + static inline struct proc_dir_entry *create_proc_entry(const char *name, + mode_t mode, struct proc_dir_entry *parent) { return NULL; } + + #define remove_proc_entry(name, parent) do {} while (0) ++#define remove_proc_pde(PDE) do {} while (0) + + static inline struct proc_dir_entry *proc_symlink(const char *name, + struct proc_dir_entry *parent,const char *dest) {return NULL;} +diff -Nurb linux-2.6.22-570/include/linux/raid/raid5.h linux-2.6.22-591/include/linux/raid/raid5.h +--- linux-2.6.22-570/include/linux/raid/raid5.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/raid/raid5.h 2007-12-21 15:36:12.000000000 -0500 +@@ -116,13 +116,46 @@ + * attach a request to an active stripe (add_stripe_bh()) + * lockdev attach-buffer unlockdev + * handle a stripe (handle_stripe()) +- * lockstripe clrSTRIPE_HANDLE ... (lockdev check-buffers unlockdev) .. change-state .. record io needed unlockstripe schedule io ++ * lockstripe clrSTRIPE_HANDLE ... ++ * (lockdev check-buffers unlockdev) .. ++ * change-state .. ++ * record io/ops needed unlockstripe schedule io/ops + * release an active stripe (release_stripe()) + * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev + * + * The refcount counts each thread that have activated the stripe, + * plus raid5d if it is handling it, plus one for each active request +- * on a cached buffer. ++ * on a cached buffer, and plus one if the stripe is undergoing stripe ++ * operations. ++ * ++ * Stripe operations are performed outside the stripe lock, ++ * the stripe operations are: ++ * -copying data between the stripe cache and user application buffers ++ * -computing blocks to save a disk access, or to recover a missing block ++ * -updating the parity on a write operation (reconstruct write and ++ * read-modify-write) ++ * -checking parity correctness ++ * -running i/o to disk ++ * These operations are carried out by raid5_run_ops which uses the async_tx ++ * api to (optionally) offload operations to dedicated hardware engines. ++ * When requesting an operation handle_stripe sets the pending bit for the ++ * operation and increments the count. raid5_run_ops is then run whenever ++ * the count is non-zero. ++ * There are some critical dependencies between the operations that prevent some ++ * from being requested while another is in flight. ++ * 1/ Parity check operations destroy the in cache version of the parity block, ++ * so we prevent parity dependent operations like writes and compute_blocks ++ * from starting while a check is in progress. Some dma engines can perform ++ * the check without damaging the parity block, in these cases the parity ++ * block is re-marked up to date (assuming the check was successful) and is ++ * not re-read from disk. ++ * 2/ When a write operation is requested we immediately lock the affected ++ * blocks, and mark them as not up to date. This causes new read requests ++ * to be held off, as well as parity checks and compute block operations. ++ * 3/ Once a compute block operation has been requested handle_stripe treats ++ * that block as if it is up to date. raid5_run_ops guaruntees that any ++ * operation that is dependent on the compute block result is initiated after ++ * the compute block completes. + */ + + struct stripe_head { +@@ -136,15 +169,46 @@ + spinlock_t lock; + int bm_seq; /* sequence number for bitmap flushes */ + int disks; /* disks in stripe */ ++ /* stripe_operations ++ * @pending - pending ops flags (set for request->issue->complete) ++ * @ack - submitted ops flags (set for issue->complete) ++ * @complete - completed ops flags (set for complete) ++ * @target - STRIPE_OP_COMPUTE_BLK target ++ * @count - raid5_runs_ops is set to run when this is non-zero ++ */ ++ struct stripe_operations { ++ unsigned long pending; ++ unsigned long ack; ++ unsigned long complete; ++ int target; ++ int count; ++ u32 zero_sum_result; ++ } ops; + struct r5dev { + struct bio req; + struct bio_vec vec; + struct page *page; +- struct bio *toread, *towrite, *written; ++ struct bio *toread, *read, *towrite, *written; + sector_t sector; /* sector of this page */ + unsigned long flags; + } dev[1]; /* allocated with extra space depending of RAID geometry */ + }; ++ ++/* stripe_head_state - collects and tracks the dynamic state of a stripe_head ++ * for handle_stripe. It is only valid under spin_lock(sh->lock); ++ */ ++struct stripe_head_state { ++ int syncing, expanding, expanded; ++ int locked, uptodate, to_read, to_write, failed, written; ++ int to_fill, compute, req_compute, non_overwrite, dirty; ++ int failed_num; ++}; ++ ++/* r6_state - extra state data only relevant to r6 */ ++struct r6_state { ++ int p_failed, q_failed, qd_idx, failed_num[2]; ++}; ++ + /* Flags */ + #define R5_UPTODATE 0 /* page contains current data */ + #define R5_LOCKED 1 /* IO has been submitted on "req" */ +@@ -158,6 +222,15 @@ + #define R5_ReWrite 9 /* have tried to over-write the readerror */ + + #define R5_Expanded 10 /* This block now has post-expand data */ ++#define R5_Wantcompute 11 /* compute_block in progress treat as ++ * uptodate ++ */ ++#define R5_Wantfill 12 /* dev->toread contains a bio that needs ++ * filling ++ */ ++#define R5_Wantprexor 13 /* distinguish blocks ready for rmw from ++ * other "towrites" ++ */ + /* + * Write method + */ +@@ -180,6 +253,24 @@ + #define STRIPE_EXPAND_SOURCE 10 + #define STRIPE_EXPAND_READY 11 + /* ++ * Operations flags (in issue order) ++ */ ++#define STRIPE_OP_BIOFILL 0 ++#define STRIPE_OP_COMPUTE_BLK 1 ++#define STRIPE_OP_PREXOR 2 ++#define STRIPE_OP_BIODRAIN 3 ++#define STRIPE_OP_POSTXOR 4 ++#define STRIPE_OP_CHECK 5 ++#define STRIPE_OP_IO 6 ++ ++/* modifiers to the base operations ++ * STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back ++ * STRIPE_OP_MOD_DMA_CHECK - parity is not corrupted by the check ++ */ ++#define STRIPE_OP_MOD_REPAIR_PD 7 ++#define STRIPE_OP_MOD_DMA_CHECK 8 ++ ++/* + * Plugging: + * + * To improve write throughput, we need to delay the handling of some +diff -Nurb linux-2.6.22-570/include/linux/raid/xor.h linux-2.6.22-591/include/linux/raid/xor.h +--- linux-2.6.22-570/include/linux/raid/xor.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/raid/xor.h 2007-12-21 15:36:12.000000000 -0500 +@@ -3,9 +3,10 @@ + + #include + +-#define MAX_XOR_BLOCKS 5 ++#define MAX_XOR_BLOCKS 4 + +-extern void xor_block(unsigned int count, unsigned int bytes, void **ptr); ++extern void xor_blocks(unsigned int count, unsigned int bytes, ++ void *dest, void **srcs); + + struct xor_block_template { + struct xor_block_template *next; +diff -Nurb linux-2.6.22-570/include/linux/reboot.h linux-2.6.22-591/include/linux/reboot.h +--- linux-2.6.22-570/include/linux/reboot.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/reboot.h 2007-12-21 15:36:12.000000000 -0500 +@@ -67,6 +67,11 @@ + + void ctrl_alt_del(void); + ++#define POWEROFF_CMD_PATH_LEN 256 ++extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN]; ++ ++extern int orderly_poweroff(bool force); ++ + /* + * Emergency restart, callable from an interrupt handler. + */ +diff -Nurb linux-2.6.22-570/include/linux/revoked_fs_i.h linux-2.6.22-591/include/linux/revoked_fs_i.h +--- linux-2.6.22-570/include/linux/revoked_fs_i.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/linux/revoked_fs_i.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,18 @@ ++#ifndef _LINUX_REVOKED_FS_I_H ++#define _LINUX_REVOKED_FS_I_H ++ ++struct revokefs_inode_info { ++ struct task_struct *owner; ++ struct file *file; ++ unsigned int fd; ++ struct inode vfs_inode; ++}; ++ ++static inline struct revokefs_inode_info *revokefs_i(struct inode *inode) ++{ ++ return container_of(inode, struct revokefs_inode_info, vfs_inode); ++} ++ ++void make_revoked_inode(struct inode *, int); ++ ++#endif +diff -Nurb linux-2.6.22-570/include/linux/rtnetlink.h linux-2.6.22-591/include/linux/rtnetlink.h +--- linux-2.6.22-570/include/linux/rtnetlink.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/rtnetlink.h 2007-12-21 15:36:14.000000000 -0500 +@@ -261,7 +261,7 @@ + RTA_FLOW, + RTA_CACHEINFO, + RTA_SESSION, +- RTA_MP_ALGO, ++ RTA_MP_ALGO, /* no longer used */ + RTA_TABLE, + __RTA_MAX + }; +@@ -570,15 +570,21 @@ + } + + extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len); ++extern int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, ++ struct rtattr *rta, int len); + + #define rtattr_parse_nested(tb, max, rta) \ + rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta))) + +-extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); +-extern int rtnl_unicast(struct sk_buff *skb, u32 pid); +-extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, ++#define rtattr_parse_nested_compat(tb, max, rta, data, len) \ ++({ data = RTA_PAYLOAD(rta) >= len ? RTA_DATA(rta) : NULL; \ ++ __rtattr_parse_nested_compat(tb, max, rta, len); }) ++ ++extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); ++extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid); ++extern int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags); +-extern void rtnl_set_sk_err(u32 group, int error); ++extern void rtnl_set_sk_err(struct net *net, u32 group, int error); + extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); + extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, + u32 id, u32 ts, u32 tsage, long expires, +@@ -638,6 +644,18 @@ + ({ (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \ + (skb)->len; }) + ++#define RTA_NEST_COMPAT(skb, type, attrlen, data) \ ++({ struct rtattr *__start = (struct rtattr *)skb_tail_pointer(skb); \ ++ RTA_PUT(skb, type, attrlen, data); \ ++ RTA_NEST(skb, type); \ ++ __start; }) ++ ++#define RTA_NEST_COMPAT_END(skb, start) \ ++({ struct rtattr *__nest = (void *)(start) + NLMSG_ALIGN((start)->rta_len); \ ++ (start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \ ++ RTA_NEST_END(skb, __nest); \ ++ (skb)->len; }) ++ + #define RTA_NEST_CANCEL(skb, start) \ + ({ if (start) \ + skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ +diff -Nurb linux-2.6.22-570/include/linux/sched.h linux-2.6.22-591/include/linux/sched.h +--- linux-2.6.22-570/include/linux/sched.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/sched.h 2007-12-21 15:36:14.000000000 -0500 +@@ -26,7 +26,9 @@ + #define CLONE_STOPPED 0x02000000 /* Start in stopped state */ + #define CLONE_NEWUTS 0x04000000 /* New utsname group? */ + #define CLONE_NEWIPC 0x08000000 /* New ipcs */ ++#define CLONE_NEWUSER 0x20000000 /* New user namespace */ + #define CLONE_KTHREAD 0x10000000 /* clone a kernel thread */ ++#define CLONE_NEWNET 0x40000000 /* New network namespace */ + + /* + * Scheduling policies +@@ -266,6 +268,7 @@ + asmlinkage void schedule(void); + + struct nsproxy; ++struct user_namespace; + + /* Maximum number of active map areas.. This is a random (large) number */ + #define DEFAULT_MAX_MAP_COUNT 65536 +@@ -325,6 +328,27 @@ + (mm)->hiwater_vm = (mm)->total_vm; \ + } while (0) + ++extern void set_dumpable(struct mm_struct *mm, int value); ++extern int get_dumpable(struct mm_struct *mm); ++ ++/* mm flags */ ++/* dumpable bits */ ++#define MMF_DUMPABLE 0 /* core dump is permitted */ ++#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ ++#define MMF_DUMPABLE_BITS 2 ++ ++/* coredump filter bits */ ++#define MMF_DUMP_ANON_PRIVATE 2 ++#define MMF_DUMP_ANON_SHARED 3 ++#define MMF_DUMP_MAPPED_PRIVATE 4 ++#define MMF_DUMP_MAPPED_SHARED 5 ++#define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS ++#define MMF_DUMP_FILTER_BITS 4 ++#define MMF_DUMP_FILTER_MASK \ ++ (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) ++#define MMF_DUMP_FILTER_DEFAULT \ ++ ((1 << MMF_DUMP_ANON_PRIVATE) | (1 << MMF_DUMP_ANON_SHARED)) ++ + struct mm_struct { + struct vm_area_struct * mmap; /* list of VMAs */ + struct rb_root mm_rb; +@@ -383,7 +407,7 @@ + unsigned int token_priority; + unsigned int last_interval; + +- unsigned char dumpable:2; ++ unsigned long flags; /* Must use atomic bitops to access the bits */ + + /* coredumping support */ + int core_waiters; +@@ -757,9 +781,6 @@ + #endif + }; + +-extern int partition_sched_domains(cpumask_t *partition1, +- cpumask_t *partition2); +- + /* + * Maximum cache size the migration-costs auto-tuning code will + * search from: +@@ -770,8 +791,6 @@ + + + struct io_context; /* See blkdev.h */ +-struct cpuset; +- + #define NGROUPS_SMALL 32 + #define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t))) + struct group_info { +@@ -912,7 +931,7 @@ + unsigned int rt_priority; + cputime_t utime, stime; + unsigned long nvcsw, nivcsw; /* context switch counts */ +- struct timespec start_time; ++ struct timespec start_time, real_start_time; + /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ + unsigned long min_flt, maj_flt; + +@@ -1067,11 +1086,16 @@ + short il_next; + #endif + #ifdef CONFIG_CPUSETS +- struct cpuset *cpuset; + nodemask_t mems_allowed; + int cpuset_mems_generation; + int cpuset_mem_spread_rotor; + #endif ++#ifdef CONFIG_CONTAINERS ++ /* Container info protected by css_group_lock */ ++ struct css_group *containers; ++ /* cg_list protected by css_group_lock and tsk->alloc_lock */ ++ struct list_head cg_list; ++#endif + struct robust_list_head __user *robust_list; + #ifdef CONFIG_COMPAT + struct compat_robust_list_head __user *compat_robust_list; +@@ -1514,7 +1538,8 @@ + /* + * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring + * subscriptions and synchronises with wait4(). Also used in procfs. Also +- * pins the final release of task.io_context. Also protects ->cpuset. ++ * pins the final release of task.io_context. Also protects ->cpuset and ++ * ->container.subsys[]. + * + * Nests both inside and outside of read_lock(&tasklist_lock). + * It must not be nested with write_lock_irq(&tasklist_lock), +diff -Nurb linux-2.6.22-570/include/linux/seccomp.h linux-2.6.22-591/include/linux/seccomp.h +--- linux-2.6.22-570/include/linux/seccomp.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/seccomp.h 2007-12-21 15:36:12.000000000 -0500 +@@ -4,8 +4,6 @@ + + #ifdef CONFIG_SECCOMP + +-#define NR_SECCOMP_MODES 1 +- + #include + #include + +@@ -23,6 +21,9 @@ + return unlikely(test_ti_thread_flag(ti, TIF_SECCOMP)); + } + ++extern long prctl_get_seccomp(void); ++extern long prctl_set_seccomp(unsigned long); ++ + #else /* CONFIG_SECCOMP */ + + typedef struct { } seccomp_t; +@@ -34,6 +35,16 @@ + return 0; + } + ++static inline long prctl_get_seccomp(void) ++{ ++ return -EINVAL; ++} ++ ++static inline long prctl_set_seccomp(unsigned long arg2) ++{ ++ return -EINVAL; ++} ++ + #endif /* CONFIG_SECCOMP */ + + #endif /* _LINUX_SECCOMP_H */ +diff -Nurb linux-2.6.22-570/include/linux/security.h linux-2.6.22-591/include/linux/security.h +--- linux-2.6.22-570/include/linux/security.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/security.h 2007-12-21 15:36:12.000000000 -0500 +@@ -71,6 +71,7 @@ + extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); + extern int cap_netlink_recv(struct sk_buff *skb, int cap); + ++extern unsigned long mmap_min_addr; + /* + * Values used in the task_security_ops calls + */ +@@ -1241,8 +1242,9 @@ + int (*file_ioctl) (struct file * file, unsigned int cmd, + unsigned long arg); + int (*file_mmap) (struct file * file, +- unsigned long reqprot, +- unsigned long prot, unsigned long flags); ++ unsigned long reqprot, unsigned long prot, ++ unsigned long flags, unsigned long addr, ++ unsigned long addr_only); + int (*file_mprotect) (struct vm_area_struct * vma, + unsigned long reqprot, + unsigned long prot); +@@ -1814,9 +1816,12 @@ + + static inline int security_file_mmap (struct file *file, unsigned long reqprot, + unsigned long prot, +- unsigned long flags) ++ unsigned long flags, ++ unsigned long addr, ++ unsigned long addr_only) + { +- return security_ops->file_mmap (file, reqprot, prot, flags); ++ return security_ops->file_mmap (file, reqprot, prot, flags, addr, ++ addr_only); + } + + static inline int security_file_mprotect (struct vm_area_struct *vma, +@@ -2489,7 +2494,9 @@ + + static inline int security_file_mmap (struct file *file, unsigned long reqprot, + unsigned long prot, +- unsigned long flags) ++ unsigned long flags, ++ unsigned long addr, ++ unsigned long addr_only) + { + return 0; + } +diff -Nurb linux-2.6.22-570/include/linux/serial_8250.h linux-2.6.22-591/include/linux/serial_8250.h +--- linux-2.6.22-570/include/linux/serial_8250.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/serial_8250.h 2007-12-21 15:36:12.000000000 -0500 +@@ -57,6 +57,7 @@ + + int serial8250_register_port(struct uart_port *); + void serial8250_unregister_port(int line); ++void serial8250_unregister_by_port(struct uart_port *port); + void serial8250_suspend_port(int line); + void serial8250_resume_port(int line); + +diff -Nurb linux-2.6.22-570/include/linux/signal.h linux-2.6.22-591/include/linux/signal.h +--- linux-2.6.22-570/include/linux/signal.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/signal.h 2007-12-21 15:36:12.000000000 -0500 +@@ -238,12 +238,15 @@ + extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *); + extern long do_sigpending(void __user *, unsigned long); + extern int sigprocmask(int, sigset_t *, sigset_t *); ++extern int show_unhandled_signals; + + struct pt_regs; + extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie); + + extern struct kmem_cache *sighand_cachep; + ++int unhandled_signal(struct task_struct *tsk, int sig); ++ + /* + * In POSIX a signal is sent either to a specific thread (Linux task) + * or to the process as a whole (Linux thread group). How the signal +diff -Nurb linux-2.6.22-570/include/linux/skbuff.h linux-2.6.22-591/include/linux/skbuff.h +--- linux-2.6.22-570/include/linux/skbuff.h 2007-12-21 15:36:03.000000000 -0500 ++++ linux-2.6.22-591/include/linux/skbuff.h 2007-12-21 15:36:12.000000000 -0500 +@@ -147,8 +147,8 @@ + + /* We divide dataref into two halves. The higher 16 bits hold references + * to the payload part of skb->data. The lower 16 bits hold references to +- * the entire skb->data. It is up to the users of the skb to agree on +- * where the payload starts. ++ * the entire skb->data. A clone of a headerless skb holds the length of ++ * the header in skb->hdr_len. + * + * All users must obey the rule that the skb->data reference count must be + * greater than or equal to the payload reference count. +@@ -206,6 +206,7 @@ + * @len: Length of actual data + * @data_len: Data length + * @mac_len: Length of link layer header ++ * @hdr_len: writable header length of cloned skb + * @csum: Checksum (must include start/offset pair) + * @csum_start: Offset from skb->head where checksumming should start + * @csum_offset: Offset from csum_start where checksum should be stored +@@ -260,8 +261,9 @@ + char cb[48]; + + unsigned int len, +- data_len, +- mac_len; ++ data_len; ++ __u16 mac_len, ++ hdr_len; + union { + __wsum csum; + struct { +@@ -1323,6 +1325,20 @@ + } + + /** ++ * skb_clone_writable - is the header of a clone writable ++ * @skb: buffer to check ++ * @len: length up to which to write ++ * ++ * Returns true if modifying the header part of the cloned buffer ++ * does not requires the data to be copied. ++ */ ++static inline int skb_clone_writable(struct sk_buff *skb, int len) ++{ ++ return !skb_header_cloned(skb) && ++ skb_headroom(skb) + len <= skb->hdr_len; ++} ++ ++/** + * skb_cow - copy header of skb when it is required + * @skb: buffer to cow + * @headroom: needed headroom +diff -Nurb linux-2.6.22-570/include/linux/slab.h linux-2.6.22-591/include/linux/slab.h +--- linux-2.6.22-570/include/linux/slab.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/slab.h 2007-12-21 15:36:12.000000000 -0500 +@@ -26,12 +26,14 @@ + #define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */ + #define SLAB_CACHE_DMA 0x00004000UL /* Use GFP_DMA memory */ + #define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */ +-#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ + #define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */ + #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ + #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ + #define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ + ++/* The following flags affect the page allocator grouping pages by mobility */ ++#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ ++#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ + /* + * struct kmem_cache related prototypes + */ +diff -Nurb linux-2.6.22-570/include/linux/socket.h linux-2.6.22-591/include/linux/socket.h +--- linux-2.6.22-570/include/linux/socket.h 2007-12-21 15:36:03.000000000 -0500 ++++ linux-2.6.22-591/include/linux/socket.h 2007-12-21 15:36:14.000000000 -0500 +@@ -24,7 +24,6 @@ + #include /* pid_t */ + #include /* __user */ + +-extern int sysctl_somaxconn; + #ifdef CONFIG_PROC_FS + struct seq_file; + extern void socket_seq_show(struct seq_file *seq); +diff -Nurb linux-2.6.22-570/include/linux/string.h linux-2.6.22-591/include/linux/string.h +--- linux-2.6.22-570/include/linux/string.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/string.h 2007-12-21 15:36:12.000000000 -0500 +@@ -105,8 +105,12 @@ + #endif + + extern char *kstrdup(const char *s, gfp_t gfp); ++extern char *kstrndup(const char *s, size_t len, gfp_t gfp); + extern void *kmemdup(const void *src, size_t len, gfp_t gfp); + ++extern char **argv_split(gfp_t gfp, const char *str, int *argcp); ++extern void argv_free(char **argv); ++ + #ifdef __cplusplus + } + #endif +diff -Nurb linux-2.6.22-570/include/linux/sunrpc/auth.h linux-2.6.22-591/include/linux/sunrpc/auth.h +--- linux-2.6.22-570/include/linux/sunrpc/auth.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/sunrpc/auth.h 2007-12-21 15:36:12.000000000 -0500 +@@ -16,6 +16,7 @@ + #include + + #include ++#include + + /* size of the nodename buffer */ + #define UNX_MAXNODENAME 32 +@@ -31,22 +32,28 @@ + /* + * Client user credentials + */ ++struct rpc_auth; ++struct rpc_credops; + struct rpc_cred { + struct hlist_node cr_hash; /* hash chain */ +- struct rpc_credops * cr_ops; +- unsigned long cr_expire; /* when to gc */ +- atomic_t cr_count; /* ref count */ +- unsigned short cr_flags; /* various flags */ ++ struct list_head cr_lru; /* lru garbage collection */ ++ struct rcu_head cr_rcu; ++ struct rpc_auth * cr_auth; ++ const struct rpc_credops *cr_ops; + #ifdef RPC_DEBUG + unsigned long cr_magic; /* 0x0f4aa4f0 */ + #endif ++ unsigned long cr_expire; /* when to gc */ ++ unsigned long cr_flags; /* various flags */ ++ atomic_t cr_count; /* ref count */ + + uid_t cr_uid; + + /* per-flavor data */ + }; +-#define RPCAUTH_CRED_NEW 0x0001 +-#define RPCAUTH_CRED_UPTODATE 0x0002 ++#define RPCAUTH_CRED_NEW 0 ++#define RPCAUTH_CRED_UPTODATE 1 ++#define RPCAUTH_CRED_HASHED 2 + + #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 + +@@ -57,10 +64,10 @@ + #define RPC_CREDCACHE_MASK (RPC_CREDCACHE_NR - 1) + struct rpc_cred_cache { + struct hlist_head hashtable[RPC_CREDCACHE_NR]; +- unsigned long nextgc; /* next garbage collection */ +- unsigned long expire; /* cache expiry interval */ ++ spinlock_t lock; + }; + ++struct rpc_authops; + struct rpc_auth { + unsigned int au_cslack; /* call cred size estimate */ + /* guess at number of u32's auth adds before +@@ -70,7 +77,7 @@ + unsigned int au_verfsize; + + unsigned int au_flags; /* various flags */ +- struct rpc_authops * au_ops; /* operations */ ++ const struct rpc_authops *au_ops; /* operations */ + rpc_authflavor_t au_flavor; /* pseudoflavor (note may + * differ from the flavor in + * au_ops->au_flavor in gss +@@ -116,17 +123,19 @@ + void *, __be32 *, void *); + }; + +-extern struct rpc_authops authunix_ops; +-extern struct rpc_authops authnull_ops; +-#ifdef CONFIG_SUNRPC_SECURE +-extern struct rpc_authops authdes_ops; +-#endif ++extern const struct rpc_authops authunix_ops; ++extern const struct rpc_authops authnull_ops; ++ ++void __init rpc_init_authunix(void); ++void __init rpcauth_init_module(void); ++void __exit rpcauth_remove_module(void); + +-int rpcauth_register(struct rpc_authops *); +-int rpcauth_unregister(struct rpc_authops *); ++int rpcauth_register(const struct rpc_authops *); ++int rpcauth_unregister(const struct rpc_authops *); + struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *); +-void rpcauth_destroy(struct rpc_auth *); ++void rpcauth_release(struct rpc_auth *); + struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int); ++void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); + struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); + struct rpc_cred * rpcauth_bindcred(struct rpc_task *); + void rpcauth_holdcred(struct rpc_task *); +@@ -139,8 +148,9 @@ + int rpcauth_refreshcred(struct rpc_task *); + void rpcauth_invalcred(struct rpc_task *); + int rpcauth_uptodatecred(struct rpc_task *); +-int rpcauth_init_credcache(struct rpc_auth *, unsigned long); +-void rpcauth_free_credcache(struct rpc_auth *); ++int rpcauth_init_credcache(struct rpc_auth *); ++void rpcauth_destroy_credcache(struct rpc_auth *); ++void rpcauth_clear_credcache(struct rpc_cred_cache *); + + static inline + struct rpc_cred * get_rpccred(struct rpc_cred *cred) +diff -Nurb linux-2.6.22-570/include/linux/sunrpc/auth_gss.h linux-2.6.22-591/include/linux/sunrpc/auth_gss.h +--- linux-2.6.22-570/include/linux/sunrpc/auth_gss.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/sunrpc/auth_gss.h 2007-12-21 15:36:12.000000000 -0500 +@@ -85,11 +85,6 @@ + struct gss_upcall_msg *gc_upcall; + }; + +-#define gc_uid gc_base.cr_uid +-#define gc_count gc_base.cr_count +-#define gc_flags gc_base.cr_flags +-#define gc_expire gc_base.cr_expire +- + #endif /* __KERNEL__ */ + #endif /* _LINUX_SUNRPC_AUTH_GSS_H */ + +diff -Nurb linux-2.6.22-570/include/linux/sunrpc/clnt.h linux-2.6.22-591/include/linux/sunrpc/clnt.h +--- linux-2.6.22-570/include/linux/sunrpc/clnt.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/sunrpc/clnt.h 2007-12-21 15:36:12.000000000 -0500 +@@ -24,8 +24,10 @@ + * The high-level client handle + */ + struct rpc_clnt { +- atomic_t cl_count; /* Number of clones */ +- atomic_t cl_users; /* number of references */ ++ struct kref cl_kref; /* Number of references */ ++ struct list_head cl_clients; /* Global list of clients */ ++ struct list_head cl_tasks; /* List of tasks */ ++ spinlock_t cl_lock; /* spinlock */ + struct rpc_xprt * cl_xprt; /* transport */ + struct rpc_procinfo * cl_procinfo; /* procedure info */ + u32 cl_prog, /* RPC program number */ +@@ -41,10 +43,7 @@ + unsigned int cl_softrtry : 1,/* soft timeouts */ + cl_intr : 1,/* interruptible */ + cl_discrtry : 1,/* disconnect before retry */ +- cl_autobind : 1,/* use getport() */ +- cl_oneshot : 1,/* dispose after use */ +- cl_dead : 1,/* abandoned */ +- cl_tag : 1;/* context tagging */ ++ cl_autobind : 1;/* use getport() */ + + struct rpc_rtt * cl_rtt; /* RTO estimator data */ + +@@ -111,17 +110,15 @@ + #define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) + #define RPC_CLNT_CREATE_INTR (1UL << 1) + #define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) +-#define RPC_CLNT_CREATE_ONESHOT (1UL << 3) +-#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 4) +-#define RPC_CLNT_CREATE_NOPING (1UL << 5) +-#define RPC_CLNT_CREATE_DISCRTRY (1UL << 6) ++#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3) ++#define RPC_CLNT_CREATE_NOPING (1UL << 4) ++#define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) + + struct rpc_clnt *rpc_create(struct rpc_create_args *args); + struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, + struct rpc_program *, int); + struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); +-int rpc_shutdown_client(struct rpc_clnt *); +-int rpc_destroy_client(struct rpc_clnt *); ++void rpc_shutdown_client(struct rpc_clnt *); + void rpc_release_client(struct rpc_clnt *); + int rpcb_register(u32, u32, int, unsigned short, int *); + void rpcb_getport(struct rpc_task *); +@@ -133,13 +130,14 @@ + void *calldata); + int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, + int flags); ++struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, ++ int flags); + void rpc_restart_call(struct rpc_task *); + void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset); + void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); + void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); + size_t rpc_max_payload(struct rpc_clnt *); + void rpc_force_rebind(struct rpc_clnt *); +-int rpc_ping(struct rpc_clnt *clnt, int flags); + size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); + char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); + +diff -Nurb linux-2.6.22-570/include/linux/sunrpc/gss_api.h linux-2.6.22-591/include/linux/sunrpc/gss_api.h +--- linux-2.6.22-570/include/linux/sunrpc/gss_api.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/sunrpc/gss_api.h 2007-12-21 15:36:12.000000000 -0500 +@@ -77,7 +77,7 @@ + struct module *gm_owner; + struct xdr_netobj gm_oid; + char *gm_name; +- struct gss_api_ops *gm_ops; ++ const struct gss_api_ops *gm_ops; + /* pseudoflavors supported by this mechanism: */ + int gm_pf_num; + struct pf_desc * gm_pfs; +diff -Nurb linux-2.6.22-570/include/linux/sunrpc/rpc_pipe_fs.h linux-2.6.22-591/include/linux/sunrpc/rpc_pipe_fs.h +--- linux-2.6.22-570/include/linux/sunrpc/rpc_pipe_fs.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/sunrpc/rpc_pipe_fs.h 2007-12-21 15:36:12.000000000 -0500 +@@ -23,9 +23,11 @@ + void *private; + struct list_head pipe; + struct list_head in_upcall; ++ struct list_head in_downcall; + int pipelen; + int nreaders; + int nwriters; ++ int nkern_readwriters; + wait_queue_head_t waitq; + #define RPC_PIPE_WAIT_FOR_OPEN 1 + int flags; +diff -Nurb linux-2.6.22-570/include/linux/sunrpc/sched.h linux-2.6.22-591/include/linux/sunrpc/sched.h +--- linux-2.6.22-570/include/linux/sunrpc/sched.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/sunrpc/sched.h 2007-12-21 15:36:12.000000000 -0500 +@@ -110,11 +110,6 @@ + if (!list_empty(head) && \ + ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1)) + +-/* .. and walking list of all tasks */ +-#define alltask_for_each(task, pos, head) \ +- list_for_each(pos, head) \ +- if ((task=list_entry(pos, struct rpc_task, tk_task)),1) +- + typedef void (*rpc_action)(struct rpc_task *); + + struct rpc_call_ops { +diff -Nurb linux-2.6.22-570/include/linux/syscalls.h linux-2.6.22-591/include/linux/syscalls.h +--- linux-2.6.22-570/include/linux/syscalls.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/syscalls.h 2007-12-21 15:36:12.000000000 -0500 +@@ -110,6 +110,9 @@ + asmlinkage long sys_capset(cap_user_header_t header, + const cap_user_data_t data); + asmlinkage long sys_personality(u_long personality); ++asmlinkage long sys_sync_file_range2(int fd, unsigned int flags, ++ loff_t offset, loff_t nbytes); ++ + + asmlinkage long sys_sigpending(old_sigset_t __user *set); + asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set, +@@ -612,7 +615,11 @@ + asmlinkage long sys_timerfd(int ufd, int clockid, int flags, + const struct itimerspec __user *utmr); + asmlinkage long sys_eventfd(unsigned int count); ++asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); + + int kernel_execve(const char *filename, char *const argv[], char *const envp[]); + ++asmlinkage long sys_revokeat(int dfd, const char __user *filename); ++asmlinkage long sys_frevoke(unsigned int fd); ++ + #endif +diff -Nurb linux-2.6.22-570/include/linux/sysctl.h linux-2.6.22-591/include/linux/sysctl.h +--- linux-2.6.22-570/include/linux/sysctl.h 2007-12-21 15:36:02.000000000 -0500 ++++ linux-2.6.22-591/include/linux/sysctl.h 2007-12-21 15:36:14.000000000 -0500 +@@ -31,6 +31,7 @@ + + struct file; + struct completion; ++struct net; + + #define CTL_MAXNAME 10 /* how many path components do we allow in a + call to sysctl? In other words, what is +@@ -166,6 +167,7 @@ + KERN_MAX_LOCK_DEPTH=74, + KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */ + KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */ ++ KERN_POWEROFF_CMD=77, /* string: poweroff command line */ + }; + + +@@ -208,6 +210,7 @@ + VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ + VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ + VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ ++ VM_HUGETLB_TREAT_MOVABLE=36, /* Allocate hugepages from ZONE_MOVABLE */ + + /* s390 vm cmm sysctls */ + VM_CMM_PAGES=1111, +@@ -843,6 +846,9 @@ + }; + + /* CTL_DEBUG names: */ ++enum { ++ DEBUG_UNHANDLED_SIGNALS = 1, ++}; + + /* CTL_DEV names: */ + enum { +@@ -980,6 +986,7 @@ + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen); + ++extern ctl_handler sysctl_data; + extern ctl_handler sysctl_string; + extern ctl_handler sysctl_intvec; + extern ctl_handler sysctl_jiffies; +@@ -1056,6 +1063,12 @@ + + void unregister_sysctl_table(struct ctl_table_header * table); + ++#ifdef CONFIG_NET ++extern struct ctl_table_header *register_net_sysctl_table(struct net *net, struct ctl_table *table); ++extern void unregister_net_sysctl_table(struct ctl_table_header *header); ++extern ctl_table net_root_table[]; ++#endif ++ + #else /* __KERNEL__ */ + + #endif /* __KERNEL__ */ +diff -Nurb linux-2.6.22-570/include/linux/sysdev.h linux-2.6.22-591/include/linux/sysdev.h +--- linux-2.6.22-570/include/linux/sysdev.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/sysdev.h 2007-12-21 15:36:12.000000000 -0500 +@@ -101,8 +101,7 @@ + + #define _SYSDEV_ATTR(_name,_mode,_show,_store) \ + { \ +- .attr = { .name = __stringify(_name), .mode = _mode, \ +- .owner = THIS_MODULE }, \ ++ .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + } +diff -Nurb linux-2.6.22-570/include/linux/sysfs.h linux-2.6.22-591/include/linux/sysfs.h +--- linux-2.6.22-570/include/linux/sysfs.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/sysfs.h 2007-12-21 15:36:14.000000000 -0500 +@@ -19,9 +19,11 @@ + + struct kobject; + struct module; +-struct nameidata; +-struct dentry; + ++/* FIXME ++ * The *owner field is no longer used, but leave around ++ * until the tree gets cleaned up fully. ++ */ + struct attribute { + const char * name; + struct module * owner; +@@ -41,13 +43,13 @@ + */ + + #define __ATTR(_name,_mode,_show,_store) { \ +- .attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE }, \ ++ .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + } + + #define __ATTR_RO(_name) { \ +- .attr = { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE }, \ ++ .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .show = _name##_show, \ + } + +@@ -61,8 +63,10 @@ + struct attribute attr; + size_t size; + void *private; +- ssize_t (*read)(struct kobject *, char *, loff_t, size_t); +- ssize_t (*write)(struct kobject *, char *, loff_t, size_t); ++ ssize_t (*read)(struct kobject *, struct bin_attribute *, ++ char *, loff_t, size_t); ++ ssize_t (*write)(struct kobject *, struct bin_attribute *, ++ char *, loff_t, size_t); + int (*mmap)(struct kobject *, struct bin_attribute *attr, + struct vm_area_struct *vma); + }; +@@ -72,12 +76,23 @@ + ssize_t (*store)(struct kobject *,struct attribute *,const char *, size_t); + }; + ++struct shadow_dir_operations { ++ const void *(*current_tag)(void); ++ const void *(*kobject_tag)(struct kobject *kobj); ++}; ++ ++#define SYSFS_TYPE_MASK 0x00ff + #define SYSFS_ROOT 0x0001 + #define SYSFS_DIR 0x0002 + #define SYSFS_KOBJ_ATTR 0x0004 + #define SYSFS_KOBJ_BIN_ATTR 0x0008 + #define SYSFS_KOBJ_LINK 0x0020 +-#define SYSFS_NOT_PINNED (SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR | SYSFS_KOBJ_LINK) ++#define SYSFS_SHADOW_DIR 0x0040 ++#define SYSFS_COPY_NAME (SYSFS_DIR | SYSFS_KOBJ_LINK) ++ ++#define SYSFS_FLAG_MASK ~SYSFS_TYPE_MASK ++#define SYSFS_FLAG_REMOVED 0x0100 ++#define SYSFS_FLAG_SHADOWED 0x0200 + + #ifdef CONFIG_SYSFS + +@@ -85,13 +100,13 @@ + void (*func)(void *), void *data, struct module *owner); + + extern int __must_check +-sysfs_create_dir(struct kobject *, struct dentry *); ++sysfs_create_dir(struct kobject *); + + extern void + sysfs_remove_dir(struct kobject *); + + extern int __must_check +-sysfs_rename_dir(struct kobject *, struct dentry *, const char *new_name); ++sysfs_rename_dir(struct kobject *kobj, const char *new_name); + + extern int __must_check + sysfs_move_dir(struct kobject *, struct kobject *); +@@ -114,6 +129,13 @@ + extern void + sysfs_remove_link(struct kobject *, const char * name); + ++extern int ++sysfs_rename_link(struct kobject *kobj, struct kobject *target, ++ const char *old_name, const char *new_name); ++ ++extern void ++sysfs_delete_link(struct kobject *dir, struct kobject *targ, const char *name); ++ + int __must_check sysfs_create_bin_file(struct kobject *kobj, + struct bin_attribute *attr); + void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr); +@@ -128,11 +150,7 @@ + + void sysfs_notify(struct kobject * k, char *dir, char *attr); + +- +-extern int sysfs_make_shadowed_dir(struct kobject *kobj, +- void * (*follow_link)(struct dentry *, struct nameidata *)); +-extern struct dentry *sysfs_create_shadow_dir(struct kobject *kobj); +-extern void sysfs_remove_shadow_dir(struct dentry *dir); ++int sysfs_enable_shadowing(struct kobject *, const struct shadow_dir_operations *); + + extern int __must_check sysfs_init(void); + +@@ -144,7 +162,7 @@ + return -ENOSYS; + } + +-static inline int sysfs_create_dir(struct kobject * k, struct dentry *shadow) ++static inline int sysfs_create_dir(struct kobject * kobj) + { + return 0; + } +@@ -154,9 +172,7 @@ + ; + } + +-static inline int sysfs_rename_dir(struct kobject * k, +- struct dentry *new_parent, +- const char *new_name) ++static inline int sysfs_rename_dir(struct kobject * kobj, const char *new_name) + { + return 0; + } +@@ -195,6 +211,17 @@ + ; + } + ++static inline int ++sysfs_rename_link(struct kobject * k, struct kobject *t, ++ const char *old_name, const char * new_name) ++{ ++ return 0; ++} ++ ++static inline void ++sysfs_delete_link(struct kobject *k, struct kobject *t, const char *name) ++{ ++} + + static inline int sysfs_create_bin_file(struct kobject * k, struct bin_attribute * a) + { +@@ -231,8 +258,8 @@ + { + } + +-static inline int sysfs_make_shadowed_dir(struct kobject *kobj, +- void * (*follow_link)(struct dentry *, struct nameidata *)) ++static inline int sysfs_enable_shadowing(struct kobject *kobj, ++ const struct shadow_dir_operations *shadow_ops) + { + return 0; + } +diff -Nurb linux-2.6.22-570/include/linux/taskstats.h linux-2.6.22-591/include/linux/taskstats.h +--- linux-2.6.22-570/include/linux/taskstats.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/taskstats.h 2007-12-21 15:36:12.000000000 -0500 +@@ -31,7 +31,7 @@ + */ + + +-#define TASKSTATS_VERSION 4 ++#define TASKSTATS_VERSION 5 + #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN + * in linux/sched.h */ + +@@ -149,6 +149,9 @@ + __u64 read_bytes; /* bytes of read I/O */ + __u64 write_bytes; /* bytes of write I/O */ + __u64 cancelled_write_bytes; /* bytes of cancelled write I/O */ ++ ++ __u64 nvcsw; /* voluntary_ctxt_switches */ ++ __u64 nivcsw; /* nonvoluntary_ctxt_switches */ + }; + + +diff -Nurb linux-2.6.22-570/include/linux/tick.h linux-2.6.22-591/include/linux/tick.h +--- linux-2.6.22-570/include/linux/tick.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/tick.h 2007-12-21 15:36:12.000000000 -0500 +@@ -40,6 +40,7 @@ + * @idle_sleeps: Number of idle calls, where the sched tick was stopped + * @idle_entrytime: Time when the idle call was entered + * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped ++ * @sleep_length: Duration of the current idle sleep + */ + struct tick_sched { + struct hrtimer sched_timer; +@@ -52,6 +53,7 @@ + unsigned long idle_sleeps; + ktime_t idle_entrytime; + ktime_t idle_sleeptime; ++ ktime_t sleep_length; + unsigned long last_jiffies; + unsigned long next_jiffies; + ktime_t idle_expires; +@@ -100,10 +102,18 @@ + extern void tick_nohz_stop_sched_tick(void); + extern void tick_nohz_restart_sched_tick(void); + extern void tick_nohz_update_jiffies(void); ++extern ktime_t tick_nohz_get_sleep_length(void); ++extern unsigned long tick_nohz_get_idle_jiffies(void); + # else + static inline void tick_nohz_stop_sched_tick(void) { } + static inline void tick_nohz_restart_sched_tick(void) { } + static inline void tick_nohz_update_jiffies(void) { } ++static inline ktime_t tick_nohz_get_sleep_length(void) ++{ ++ ktime_t len = { .tv64 = NSEC_PER_SEC/HZ }; ++ ++ return len; ++} + # endif /* !NO_HZ */ + + #endif +diff -Nurb linux-2.6.22-570/include/linux/time.h linux-2.6.22-591/include/linux/time.h +--- linux-2.6.22-570/include/linux/time.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/linux/time.h 2007-12-21 15:36:14.000000000 -0500 +@@ -116,6 +116,8 @@ + extern unsigned int alarm_setitimer(unsigned int seconds); + extern int do_getitimer(int which, struct itimerval *value); + extern void getnstimeofday(struct timespec *tv); ++extern void getboottime(struct timespec *ts); ++extern void monotonic_to_bootbased(struct timespec *ts); + + extern struct timespec timespec_trunc(struct timespec t, unsigned gran); + extern int timekeeping_is_continuous(void); +diff -Nurb linux-2.6.22-570/include/linux/union_fs.h linux-2.6.22-591/include/linux/union_fs.h +--- linux-2.6.22-570/include/linux/union_fs.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/linux/union_fs.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,29 @@ ++/* ++ * Copyright (c) 2003-2007 Erez Zadok ++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek ++ * Copyright (c) 2003-2007 Stony Brook University ++ * Copyright (c) 2003-2007 The Research Foundation of SUNY ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++#ifndef _LINUX_UNION_FS_H ++#define _LINUX_UNION_FS_H ++ ++#define UNIONFS_VERSION "2.0" ++/* ++ * DEFINITIONS FOR USER AND KERNEL CODE: ++ */ ++# define UNIONFS_IOCTL_INCGEN _IOR(0x15, 11, int) ++# define UNIONFS_IOCTL_QUERYFILE _IOR(0x15, 15, int) ++ ++/* We don't support normal remount, but unionctl uses it. */ ++# define UNIONFS_REMOUNT_MAGIC 0x4a5a4380 ++ ++/* should be at least LAST_USED_UNIONFS_PERMISSION<<1 */ ++#define MAY_NFSRO 16 ++ ++#endif /* _LINUX_UNIONFS_H */ ++ +diff -Nurb linux-2.6.22-570/include/linux/unwind.h linux-2.6.22-591/include/linux/unwind.h +--- linux-2.6.22-570/include/linux/unwind.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/unwind.h 2007-12-21 15:36:12.000000000 -0500 +@@ -14,6 +14,63 @@ + + struct module; + ++#ifdef CONFIG_STACK_UNWIND ++ ++#include ++ ++#ifndef ARCH_UNWIND_SECTION_NAME ++#define ARCH_UNWIND_SECTION_NAME ".eh_frame" ++#endif ++ ++/* ++ * Initialize unwind support. ++ */ ++extern void unwind_init(void); ++extern void unwind_setup(void); ++ ++#ifdef CONFIG_MODULES ++ ++extern void *unwind_add_table(struct module *, ++ const void *table_start, ++ unsigned long table_size); ++ ++extern void unwind_remove_table(void *handle, int init_only); ++ ++#endif ++ ++extern int unwind_init_frame_info(struct unwind_frame_info *, ++ struct task_struct *, ++ /*const*/ struct pt_regs *); ++ ++/* ++ * Prepare to unwind a blocked task. ++ */ ++extern int unwind_init_blocked(struct unwind_frame_info *, ++ struct task_struct *); ++ ++/* ++ * Prepare to unwind the currently running thread. ++ */ ++extern int unwind_init_running(struct unwind_frame_info *, ++ asmlinkage int (*callback)(struct unwind_frame_info *, ++ void *arg), ++ void *arg); ++ ++/* ++ * Unwind to previous to frame. Returns 0 if successful, negative ++ * number in case of an error. ++ */ ++extern int unwind(struct unwind_frame_info *); ++ ++/* ++ * Unwind until the return pointer is in user-land (or until an error ++ * occurs). Returns 0 if successful, negative number in case of ++ * error. ++ */ ++extern int unwind_to_user(struct unwind_frame_info *); ++ ++#else ++ + struct unwind_frame_info {}; + + static inline void unwind_init(void) {} +@@ -28,12 +85,12 @@ + return NULL; + } + ++#endif ++ + static inline void unwind_remove_table(void *handle, int init_only) + { + } + +-#endif +- + static inline int unwind_init_frame_info(struct unwind_frame_info *info, + struct task_struct *tsk, + const struct pt_regs *regs) +@@ -65,4 +122,6 @@ + return -ENOSYS; + } + ++#endif ++ + #endif /* _LINUX_UNWIND_H */ +diff -Nurb linux-2.6.22-570/include/linux/usb.h linux-2.6.22-591/include/linux/usb.h +--- linux-2.6.22-570/include/linux/usb.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/usb.h 2007-12-21 15:36:14.000000000 -0500 +@@ -146,6 +146,10 @@ + * active alternate setting */ + unsigned num_altsetting; /* number of alternate settings */ + ++ /* If there is an interface association descriptor then it will list ++ * the associated interfaces */ ++ struct usb_interface_assoc_descriptor *intf_assoc; ++ + int minor; /* minor number this interface is + * bound to */ + enum usb_interface_condition condition; /* state of binding */ +@@ -175,6 +179,7 @@ + + /* this maximum is arbitrary */ + #define USB_MAXINTERFACES 32 ++#define USB_MAXIADS USB_MAXINTERFACES/2 + + /** + * struct usb_interface_cache - long-term representation of a device interface +@@ -245,6 +250,11 @@ + struct usb_config_descriptor desc; + + char *string; /* iConfiguration string, if present */ ++ ++ /* List of any Interface Association Descriptors in this ++ * configuration. */ ++ struct usb_interface_assoc_descriptor *intf_assoc[USB_MAXIADS]; ++ + /* the interfaces associated with this configuration, + * stored in no particular order */ + struct usb_interface *interface[USB_MAXINTERFACES]; +diff -Nurb linux-2.6.22-570/include/linux/user_namespace.h linux-2.6.22-591/include/linux/user_namespace.h +--- linux-2.6.22-570/include/linux/user_namespace.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/linux/user_namespace.h 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,61 @@ ++#ifndef _LINUX_USER_NAMESPACE_H ++#define _LINUX_USER_NAMESPACE_H ++ ++#include ++#include ++#include ++#include ++ ++#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) ++#define UIDHASH_SZ (1 << UIDHASH_BITS) ++ ++struct user_namespace { ++ struct kref kref; ++ struct list_head uidhash_table[UIDHASH_SZ]; ++ struct user_struct *root_user; ++}; ++ ++extern struct user_namespace init_user_ns; ++ ++#ifdef CONFIG_USER_NS ++ ++static inline struct user_namespace *get_user_ns(struct user_namespace *ns) ++{ ++ if (ns) ++ kref_get(&ns->kref); ++ return ns; ++} ++ ++extern struct user_namespace *copy_user_ns(int flags, ++ struct user_namespace *old_ns); ++extern void free_user_ns(struct kref *kref); ++ ++static inline void put_user_ns(struct user_namespace *ns) ++{ ++ if (ns) ++ kref_put(&ns->kref, free_user_ns); ++} ++ ++#else ++ ++static inline struct user_namespace *get_user_ns(struct user_namespace *ns) ++{ ++ return &init_user_ns; ++} ++ ++static inline struct user_namespace *copy_user_ns(int flags, ++ struct user_namespace *old_ns) ++{ ++ if (flags & CLONE_NEWUSER) ++ return ERR_PTR(-EINVAL); ++ ++ return NULL; ++} ++ ++static inline void put_user_ns(struct user_namespace *ns) ++{ ++} ++ ++#endif ++ ++#endif /* _LINUX_USER_H */ +diff -Nurb linux-2.6.22-570/include/linux/utsname.h linux-2.6.22-591/include/linux/utsname.h +--- linux-2.6.22-570/include/linux/utsname.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/utsname.h 2007-12-21 15:36:12.000000000 -0500 +@@ -48,26 +48,14 @@ + kref_get(&ns->kref); + } + +-#ifdef CONFIG_UTS_NS +-extern struct uts_namespace *copy_utsname(int flags, struct uts_namespace *ns); ++extern struct uts_namespace *copy_utsname(unsigned long flags, ++ struct uts_namespace *ns); + extern void free_uts_ns(struct kref *kref); + + static inline void put_uts_ns(struct uts_namespace *ns) + { + kref_put(&ns->kref, free_uts_ns); + } +-#else +-static inline struct uts_namespace *copy_utsname(int flags, +- struct uts_namespace *ns) +-{ +- return ns; +-} +- +-static inline void put_uts_ns(struct uts_namespace *ns) +-{ +-} +-#endif +- + static inline struct new_utsname *utsname(void) + { + return ¤t->nsproxy->uts_ns->name; +diff -Nurb linux-2.6.22-570/include/linux/vmalloc.h linux-2.6.22-591/include/linux/vmalloc.h +--- linux-2.6.22-570/include/linux/vmalloc.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/linux/vmalloc.h 2007-12-21 15:36:12.000000000 -0500 +@@ -65,9 +65,10 @@ + unsigned long flags, int node, + gfp_t gfp_mask); + extern struct vm_struct *remove_vm_area(void *addr); ++ + extern int map_vm_area(struct vm_struct *area, pgprot_t prot, + struct page ***pages); +-extern void unmap_vm_area(struct vm_struct *area); ++extern void unmap_kernel_range(unsigned long addr, unsigned long size); + + /* + * Internals. Dont't use.. +diff -Nurb linux-2.6.22-570/include/net/addrconf.h linux-2.6.22-591/include/net/addrconf.h +--- linux-2.6.22-570/include/net/addrconf.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/net/addrconf.h 2007-12-21 15:36:12.000000000 -0500 +@@ -61,7 +61,7 @@ + extern int ipv6_chk_addr(struct in6_addr *addr, + struct net_device *dev, + int strict); +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + extern int ipv6_chk_home_addr(struct in6_addr *addr); + #endif + extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, +diff -Nurb linux-2.6.22-570/include/net/af_unix.h linux-2.6.22-591/include/net/af_unix.h +--- linux-2.6.22-570/include/net/af_unix.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/net/af_unix.h 2007-12-21 15:36:14.000000000 -0500 +@@ -91,12 +91,11 @@ + #define unix_sk(__sk) ((struct unix_sock *)__sk) + + #ifdef CONFIG_SYSCTL +-extern int sysctl_unix_max_dgram_qlen; +-extern void unix_sysctl_register(void); +-extern void unix_sysctl_unregister(void); ++extern void unix_sysctl_register(struct net *net); ++extern void unix_sysctl_unregister(struct net *net); + #else +-static inline void unix_sysctl_register(void) {} +-static inline void unix_sysctl_unregister(void) {} ++static inline void unix_sysctl_register(struct net *net) {} ++static inline void unix_sysctl_unregister(struct net *net) {} + #endif + #endif + #endif +diff -Nurb linux-2.6.22-570/include/net/arp.h linux-2.6.22-591/include/net/arp.h +--- linux-2.6.22-570/include/net/arp.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/arp.h 2007-12-21 15:36:14.000000000 -0500 +@@ -11,7 +11,7 @@ + + extern void arp_init(void); + extern int arp_find(unsigned char *haddr, struct sk_buff *skb); +-extern int arp_ioctl(unsigned int cmd, void __user *arg); ++extern int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg); + extern void arp_send(int type, int ptype, __be32 dest_ip, + struct net_device *dev, __be32 src_ip, + unsigned char *dest_hw, unsigned char *src_hw, unsigned char *th); +diff -Nurb linux-2.6.22-570/include/net/dst.h linux-2.6.22-591/include/net/dst.h +--- linux-2.6.22-570/include/net/dst.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/dst.h 2007-12-21 15:36:12.000000000 -0500 +@@ -47,7 +47,6 @@ + #define DST_NOXFRM 2 + #define DST_NOPOLICY 4 + #define DST_NOHASH 8 +-#define DST_BALANCED 0x10 + unsigned long expires; + + unsigned short header_len; /* more space at head required */ +diff -Nurb linux-2.6.22-570/include/net/fib_rules.h linux-2.6.22-591/include/net/fib_rules.h +--- linux-2.6.22-570/include/net/fib_rules.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/fib_rules.h 2007-12-21 15:36:14.000000000 -0500 +@@ -56,12 +56,12 @@ + int (*fill)(struct fib_rule *, struct sk_buff *, + struct nlmsghdr *, + struct fib_rule_hdr *); +- u32 (*default_pref)(void); ++ u32 (*default_pref)(struct fib_rules_ops *ops); + size_t (*nlmsg_payload)(struct fib_rule *); + + /* Called after modifications to the rules set, must flush + * the route cache if one exists. */ +- void (*flush_cache)(void); ++ void (*flush_cache)(struct fib_rules_ops *ops); + + int nlgroup; + const struct nla_policy *policy; +@@ -101,8 +101,8 @@ + return frh->table; + } + +-extern int fib_rules_register(struct fib_rules_ops *); +-extern int fib_rules_unregister(struct fib_rules_ops *); ++extern int fib_rules_register(struct net *net, struct fib_rules_ops *); ++extern int fib_rules_unregister(struct net *net, struct fib_rules_ops *); + + extern int fib_rules_lookup(struct fib_rules_ops *, + struct flowi *, int flags, +diff -Nurb linux-2.6.22-570/include/net/flow.h linux-2.6.22-591/include/net/flow.h +--- linux-2.6.22-570/include/net/flow.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/flow.h 2007-12-21 15:36:14.000000000 -0500 +@@ -8,9 +8,11 @@ + #define _NET_FLOW_H + + #include ++#include + #include + + struct flowi { ++ struct net *fl_net; + int oif; + int iif; + __u32 mark; +@@ -67,20 +69,16 @@ + + __be32 spi; + +-#ifdef CONFIG_IPV6_MIP6 + struct { + __u8 type; + } mht; +-#endif + } uli_u; + #define fl_ip_sport uli_u.ports.sport + #define fl_ip_dport uli_u.ports.dport + #define fl_icmp_type uli_u.icmpt.type + #define fl_icmp_code uli_u.icmpt.code + #define fl_ipsec_spi uli_u.spi +-#ifdef CONFIG_IPV6_MIP6 + #define fl_mh_type uli_u.mht.type +-#endif + __u32 secid; /* used by xfrm; see secid.txt */ + } __attribute__((__aligned__(BITS_PER_LONG/8))); + +diff -Nurb linux-2.6.22-570/include/net/inet6_hashtables.h linux-2.6.22-591/include/net/inet6_hashtables.h +--- linux-2.6.22-570/include/net/inet6_hashtables.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/inet6_hashtables.h 2007-12-21 15:36:14.000000000 -0500 +@@ -62,31 +62,31 @@ + const __be16 sport, + const struct in6_addr *daddr, + const u16 hnum, +- const int dif); ++ const int dif, struct net *net); + + extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, + const struct in6_addr *daddr, + const unsigned short hnum, +- const int dif); ++ const int dif, struct net *net); + + static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const __be16 sport, + const struct in6_addr *daddr, + const u16 hnum, +- const int dif) ++ const int dif, struct net *net) + { + struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, +- daddr, hnum, dif); ++ daddr, hnum, dif, net); + if (sk) + return sk; + +- return inet6_lookup_listener(hashinfo, daddr, hnum, dif); ++ return inet6_lookup_listener(hashinfo, daddr, hnum, dif, net); + } + + extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, const __be16 sport, + const struct in6_addr *daddr, const __be16 dport, +- const int dif); ++ const int dif, struct net *net); + #endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */ + #endif /* _INET6_HASHTABLES_H */ +diff -Nurb linux-2.6.22-570/include/net/inet_hashtables.h linux-2.6.22-591/include/net/inet_hashtables.h +--- linux-2.6.22-570/include/net/inet_hashtables.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/inet_hashtables.h 2007-12-21 15:36:14.000000000 -0500 +@@ -75,6 +75,7 @@ + * ports are created in O(1) time? I thought so. ;-) -DaveM + */ + struct inet_bind_bucket { ++ struct net *net; + unsigned short port; + signed short fastreuse; + struct hlist_node node; +@@ -138,34 +139,35 @@ + extern struct inet_bind_bucket * + inet_bind_bucket_create(struct kmem_cache *cachep, + struct inet_bind_hashbucket *head, ++ struct net *net, + const unsigned short snum); + extern void inet_bind_bucket_destroy(struct kmem_cache *cachep, + struct inet_bind_bucket *tb); + +-static inline int inet_bhashfn(const __u16 lport, const int bhash_size) ++static inline int inet_bhashfn(struct net *net, const __u16 lport, const int bhash_size) + { +- return lport & (bhash_size - 1); ++ return (((unsigned long)net) ^ lport) & (bhash_size - 1); + } + + extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, + const unsigned short snum); + + /* These can have wildcards, don't try too hard. */ +-static inline int inet_lhashfn(const unsigned short num) ++static inline int inet_lhashfn(struct net *net, const unsigned short num) + { +- return num & (INET_LHTABLE_SIZE - 1); ++ return (((unsigned long)net) ^ num) & (INET_LHTABLE_SIZE - 1); + } + + static inline int inet_sk_listen_hashfn(const struct sock *sk) + { +- return inet_lhashfn(inet_sk(sk)->num); ++ return inet_lhashfn(sk->sk_net, inet_sk(sk)->num); + } + + /* Caller must disable local BH processing. */ + static inline void __inet_inherit_port(struct inet_hashinfo *table, + struct sock *sk, struct sock *child) + { +- const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); ++ const int bhash = inet_bhashfn(sk->sk_net, inet_sk(child)->num, table->bhash_size); + struct inet_bind_hashbucket *head = &table->bhash[bhash]; + struct inet_bind_bucket *tb; + +@@ -274,12 +276,13 @@ + extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, + const __be32 daddr, + const unsigned short hnum, +- const int dif); ++ const int dif, struct net *net); + + static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, +- __be32 daddr, __be16 dport, int dif) ++ __be32 daddr, __be16 dport, ++ int dif, struct net *net) + { +- return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif); ++ return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif, net); + } + + /* Socket demux engine toys. */ +@@ -313,30 +316,34 @@ + (((__force __u64)(__be32)(__daddr)) << 32) | \ + ((__force __u64)(__be32)(__saddr))); + #endif /* __BIG_ENDIAN */ +-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ ++#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net)\ + (((__sk)->sk_hash == (__hash)) && \ + ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ + ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ +- (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +-#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ ++ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \ ++ ((__sk)->sk_net == __net)) ++#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net)\ + (((__sk)->sk_hash == (__hash)) && \ + ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ + ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ +- (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) ++ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \ ++ ((__sk)->sk_net == __net)) + #else /* 32-bit arch */ + #define INET_ADDR_COOKIE(__name, __saddr, __daddr) +-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif) \ ++#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net) \ + (((__sk)->sk_hash == (__hash)) && \ + (inet_sk(__sk)->daddr == (__saddr)) && \ + (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ + ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ +- (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +-#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ ++ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \ ++ ((__sk)->sk_net == __net)) ++#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif, __net) \ + (((__sk)->sk_hash == (__hash)) && \ + (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ + (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ + ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ +- (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) ++ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \ ++ ((__sk)->sk_net == __net)) + #endif /* 64-bit arch */ + + /* +@@ -349,7 +356,7 @@ + __inet_lookup_established(struct inet_hashinfo *hashinfo, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const u16 hnum, +- const int dif) ++ const int dif, struct net *net) + { + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __portpair ports = INET_COMBINED_PORTS(sport, hnum); +@@ -358,19 +365,19 @@ + /* Optimize here for direct hit, only listening connections can + * have wildcards anyways. + */ +- unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); ++ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); + struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); + + prefetch(head->chain.first); + read_lock(&head->lock); + sk_for_each(sk, node, &head->chain) { +- if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) ++ if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif, net)) + goto hit; /* You sunk my battleship! */ + } + + /* Must check for a TIME_WAIT'er before going to listener hash. */ + sk_for_each(sk, node, &head->twchain) { +- if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) ++ if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif, net)) + goto hit; + } + sk = NULL; +@@ -386,32 +393,32 @@ + inet_lookup_established(struct inet_hashinfo *hashinfo, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const __be16 dport, +- const int dif) ++ const int dif, struct net *net) + { + return __inet_lookup_established(hashinfo, saddr, sport, daddr, +- ntohs(dport), dif); ++ ntohs(dport), dif, net); + } + + static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const __be16 dport, +- const int dif) ++ const int dif, struct net *net) + { + u16 hnum = ntohs(dport); + struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, +- hnum, dif); +- return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif); ++ hnum, dif, net); ++ return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif, net); + } + + static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const __be16 dport, +- const int dif) ++ const int dif, struct net *net) + { + struct sock *sk; + + local_bh_disable(); +- sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif); ++ sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif, net); + local_bh_enable(); + + return sk; +diff -Nurb linux-2.6.22-570/include/net/inet_sock.h linux-2.6.22-591/include/net/inet_sock.h +--- linux-2.6.22-570/include/net/inet_sock.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/inet_sock.h 2007-12-21 15:36:14.000000000 -0500 +@@ -171,10 +171,12 @@ + extern u32 inet_ehash_secret; + extern void build_ehash_secret(void); + +-static inline unsigned int inet_ehashfn(const __be32 laddr, const __u16 lport, ++static inline unsigned int inet_ehashfn(struct net *net, ++ const __be32 laddr, const __u16 lport, + const __be32 faddr, const __be16 fport) + { +- return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr, ++ return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr ^ ++ (__force __u32) ((unsigned long)net), + ((__u32) lport) << 16 | (__force __u32)fport, + inet_ehash_secret); + } +@@ -187,7 +189,7 @@ + const __be32 faddr = inet->daddr; + const __be16 fport = inet->dport; + +- return inet_ehashfn(laddr, lport, faddr, fport); ++ return inet_ehashfn(sk->sk_net, laddr, lport, faddr, fport); + } + + #endif /* _INET_SOCK_H */ +diff -Nurb linux-2.6.22-570/include/net/inet_timewait_sock.h linux-2.6.22-591/include/net/inet_timewait_sock.h +--- linux-2.6.22-570/include/net/inet_timewait_sock.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/net/inet_timewait_sock.h 2007-12-21 15:36:14.000000000 -0500 +@@ -115,6 +115,7 @@ + #define tw_refcnt __tw_common.skc_refcnt + #define tw_hash __tw_common.skc_hash + #define tw_prot __tw_common.skc_prot ++#define tw_net __tw_common.skc_net + #define tw_xid __tw_common.skc_xid + #define tw_vx_info __tw_common.skc_vx_info + #define tw_nid __tw_common.skc_nid +diff -Nurb linux-2.6.22-570/include/net/inetpeer.h linux-2.6.22-591/include/net/inetpeer.h +--- linux-2.6.22-570/include/net/inetpeer.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/inetpeer.h 2007-12-21 15:36:14.000000000 -0500 +@@ -15,6 +15,8 @@ + #include + #include + ++struct net; ++ + struct inet_peer + { + /* group together avl_left,avl_right,v4daddr to speedup lookups */ +@@ -22,7 +24,11 @@ + __be32 v4daddr; /* peer's address */ + __u16 avl_height; + __u16 ip_id_count; /* IP ID for the next packet */ +- struct inet_peer *unused_next, **unused_prevp; ++ union { ++ struct inet_peer *unused_next; ++ struct net *net; ++ } u; ++ struct inet_peer **unused_prevp; + __u32 dtime; /* the time of last use of not + * referenced entries */ + atomic_t refcnt; +@@ -34,7 +40,7 @@ + void inet_initpeers(void) __init; + + /* can be called with or without local BH being disabled */ +-struct inet_peer *inet_getpeer(__be32 daddr, int create); ++struct inet_peer *inet_getpeer(struct net *net, __be32 daddr, int create); + + /* can be called from BH context or outside */ + extern void inet_putpeer(struct inet_peer *p); +diff -Nurb linux-2.6.22-570/include/net/ip.h linux-2.6.22-591/include/net/ip.h +--- linux-2.6.22-570/include/net/ip.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/ip.h 2007-12-21 15:36:14.000000000 -0500 +@@ -149,13 +149,6 @@ + void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, + unsigned int len); + +-struct ipv4_config +-{ +- int log_martians; +- int no_pmtu_disc; +-}; +- +-extern struct ipv4_config ipv4_config; + DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics); + #define IP_INC_STATS(field) SNMP_INC_STATS(ip_statistics, field) + #define IP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ip_statistics, field) +@@ -171,27 +164,6 @@ + extern int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign); + extern void snmp_mib_free(void *ptr[2]); + +-extern int sysctl_local_port_range[2]; +-extern int sysctl_ip_default_ttl; +-extern int sysctl_ip_nonlocal_bind; +- +-/* From ip_fragment.c */ +-extern int sysctl_ipfrag_high_thresh; +-extern int sysctl_ipfrag_low_thresh; +-extern int sysctl_ipfrag_time; +-extern int sysctl_ipfrag_secret_interval; +-extern int sysctl_ipfrag_max_dist; +- +-/* From inetpeer.c */ +-extern int inet_peer_threshold; +-extern int inet_peer_minttl; +-extern int inet_peer_maxttl; +-extern int inet_peer_gc_mintime; +-extern int inet_peer_gc_maxtime; +- +-/* From ip_output.c */ +-extern int sysctl_ip_dynaddr; +- + extern void ipfrag_init(void); + + #ifdef CONFIG_INET +@@ -332,8 +304,6 @@ + }; + + struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user); +-extern int ip_frag_nqueues; +-extern atomic_t ip_frag_mem; + + /* + * Functions provided by ip_forward.c +@@ -392,5 +362,6 @@ + #endif + + extern struct ctl_table ipv4_table[]; ++extern struct ctl_table multi_ipv4_table[]; + + #endif /* _IP_H */ +diff -Nurb linux-2.6.22-570/include/net/ip_fib.h linux-2.6.22-591/include/net/ip_fib.h +--- linux-2.6.22-570/include/net/ip_fib.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/ip_fib.h 2007-12-21 15:36:14.000000000 -0500 +@@ -39,7 +39,6 @@ + int fc_mx_len; + int fc_mp_len; + u32 fc_flow; +- u32 fc_mp_alg; + u32 fc_nlflags; + struct nl_info fc_nlinfo; + }; +@@ -89,6 +88,7 @@ + #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED + u32 fib_mp_alg; + #endif ++ struct net * fib_net; + struct fib_nh fib_nh[0]; + #define fib_dev fib_nh[0].nh_dev + }; +@@ -103,10 +103,6 @@ + unsigned char nh_sel; + unsigned char type; + unsigned char scope; +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- __be32 network; +- __be32 netmask; +-#endif + struct fib_info *fi; + #ifdef CONFIG_IP_MULTIPLE_TABLES + struct fib_rule *r; +@@ -145,14 +141,6 @@ + #define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev) + #define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif) + +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +-#define FIB_RES_NETWORK(res) ((res).network) +-#define FIB_RES_NETMASK(res) ((res).netmask) +-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ +-#define FIB_RES_NETWORK(res) (0) +-#define FIB_RES_NETMASK(res) (0) +-#endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */ +- + struct fib_table { + struct hlist_node tb_hlist; + u32 tb_id; +@@ -171,43 +159,43 @@ + + #ifndef CONFIG_IP_MULTIPLE_TABLES + +-extern struct fib_table *ip_fib_local_table; +-extern struct fib_table *ip_fib_main_table; +- +-static inline struct fib_table *fib_get_table(u32 id) ++static inline struct fib_table *fib_get_table(struct net *net, u32 id) + { + if (id != RT_TABLE_LOCAL) +- return ip_fib_main_table; +- return ip_fib_local_table; ++ return net->ip_fib_main_table; ++ return net->ip_fib_local_table; + } + +-static inline struct fib_table *fib_new_table(u32 id) ++static inline struct fib_table *fib_new_table(struct net *net, u32 id) + { +- return fib_get_table(id); ++ return fib_get_table(net, id); + } + + static inline int fib_lookup(const struct flowi *flp, struct fib_result *res) + { +- if (ip_fib_local_table->tb_lookup(ip_fib_local_table, flp, res) && +- ip_fib_main_table->tb_lookup(ip_fib_main_table, flp, res)) ++ struct net *net = flp->fl_net; ++ struct fib_table *local_table = net->ip_fib_local_table; ++ struct fib_table *main_table = net->ip_fib_main_table; ++ if (local_table->tb_lookup(local_table, flp, res) && ++ main_table->tb_lookup(main_table, flp, res)) + return -ENETUNREACH; + return 0; + } + + static inline void fib_select_default(const struct flowi *flp, struct fib_result *res) + { ++ struct net *net = flp->fl_net; ++ struct fib_table *main_table = net->ip_fib_main_table; + if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) +- ip_fib_main_table->tb_select_default(ip_fib_main_table, flp, res); ++ main_table->tb_select_default(main_table, flp, res); + } + + #else /* CONFIG_IP_MULTIPLE_TABLES */ +-#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL) +-#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN) + + extern int fib_lookup(struct flowi *flp, struct fib_result *res); + +-extern struct fib_table *fib_new_table(u32 id); +-extern struct fib_table *fib_get_table(u32 id); ++extern struct fib_table *fib_new_table(struct net *net, u32 id); ++extern struct fib_table *fib_get_table(struct net *net, u32 id); + extern void fib_select_default(const struct flowi *flp, struct fib_result *res); + + #endif /* CONFIG_IP_MULTIPLE_TABLES */ +@@ -223,15 +211,17 @@ + + /* Exported by fib_semantics.c */ + extern int ip_fib_check_default(__be32 gw, struct net_device *dev); +-extern int fib_sync_down(__be32 local, struct net_device *dev, int force); ++extern int fib_sync_down(struct net *net, __be32 local, struct net_device *dev, int force); + extern int fib_sync_up(struct net_device *dev); + extern __be32 __fib_res_prefsrc(struct fib_result *res); + + /* Exported by fib_hash.c */ + extern struct fib_table *fib_hash_init(u32 id); ++extern void fib_hash_exit(struct fib_table *tb); + + #ifdef CONFIG_IP_MULTIPLE_TABLES +-extern void __init fib4_rules_init(void); ++extern void fib4_rules_init(struct net * net); ++extern void fib4_rules_exit(struct net * net); + + #ifdef CONFIG_NET_CLS_ROUTE + extern u32 fib_rules_tclass(struct fib_result *res); +@@ -274,8 +264,11 @@ + } + + #ifdef CONFIG_PROC_FS +-extern int fib_proc_init(void); +-extern void fib_proc_exit(void); ++extern int fib_proc_init(struct net * net); ++extern void fib_proc_exit(struct net * net); + #endif + ++extern int fib_info_init(struct net *net); ++extern void fib_info_exit(struct net *net); ++ + #endif /* _NET_FIB_H */ +diff -Nurb linux-2.6.22-570/include/net/ip_mp_alg.h linux-2.6.22-591/include/net/ip_mp_alg.h +--- linux-2.6.22-570/include/net/ip_mp_alg.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/ip_mp_alg.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,96 +0,0 @@ +-/* ip_mp_alg.h: IPV4 multipath algorithm support. +- * +- * Copyright (C) 2004, 2005 Einar Lueck +- * Copyright (C) 2005 David S. Miller +- */ +- +-#ifndef _NET_IP_MP_ALG_H +-#define _NET_IP_MP_ALG_H +- +-#include +-#include +-#include +- +-struct fib_nh; +- +-struct ip_mp_alg_ops { +- void (*mp_alg_select_route)(const struct flowi *flp, +- struct rtable *rth, struct rtable **rp); +- void (*mp_alg_flush)(void); +- void (*mp_alg_set_nhinfo)(__be32 network, __be32 netmask, +- unsigned char prefixlen, +- const struct fib_nh *nh); +- void (*mp_alg_remove)(struct rtable *rth); +-}; +- +-extern int multipath_alg_register(struct ip_mp_alg_ops *, enum ip_mp_alg); +-extern void multipath_alg_unregister(struct ip_mp_alg_ops *, enum ip_mp_alg); +- +-extern struct ip_mp_alg_ops *ip_mp_alg_table[]; +- +-static inline int multipath_select_route(const struct flowi *flp, +- struct rtable *rth, +- struct rtable **rp) +-{ +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg]; +- +- /* mp_alg_select_route _MUST_ be implemented */ +- if (ops && (rth->u.dst.flags & DST_BALANCED)) { +- ops->mp_alg_select_route(flp, rth, rp); +- return 1; +- } +-#endif +- return 0; +-} +- +-static inline void multipath_flush(void) +-{ +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- int i; +- +- for (i = IP_MP_ALG_NONE; i <= IP_MP_ALG_MAX; i++) { +- struct ip_mp_alg_ops *ops = ip_mp_alg_table[i]; +- +- if (ops && ops->mp_alg_flush) +- ops->mp_alg_flush(); +- } +-#endif +-} +- +-static inline void multipath_set_nhinfo(struct rtable *rth, +- __be32 network, __be32 netmask, +- unsigned char prefixlen, +- const struct fib_nh *nh) +-{ +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg]; +- +- if (ops && ops->mp_alg_set_nhinfo) +- ops->mp_alg_set_nhinfo(network, netmask, prefixlen, nh); +-#endif +-} +- +-static inline void multipath_remove(struct rtable *rth) +-{ +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg]; +- +- if (ops && ops->mp_alg_remove && +- (rth->u.dst.flags & DST_BALANCED)) +- ops->mp_alg_remove(rth); +-#endif +-} +- +-static inline int multipath_comparekeys(const struct flowi *flp1, +- const struct flowi *flp2) +-{ +- return flp1->fl4_dst == flp2->fl4_dst && +- flp1->fl4_src == flp2->fl4_src && +- flp1->oif == flp2->oif && +- flp1->mark == flp2->mark && +- !((flp1->fl4_tos ^ flp2->fl4_tos) & +- (IPTOS_RT_MASK | RTO_ONLINK)); +-} +- +-#endif /* _NET_IP_MP_ALG_H */ +diff -Nurb linux-2.6.22-570/include/net/llc_conn.h linux-2.6.22-591/include/net/llc_conn.h +--- linux-2.6.22-570/include/net/llc_conn.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/llc_conn.h 2007-12-21 15:36:14.000000000 -0500 +@@ -93,7 +93,7 @@ + return skb->cb[sizeof(skb->cb) - 1]; + } + +-extern struct sock *llc_sk_alloc(int family, gfp_t priority, ++extern struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, + struct proto *prot); + extern void llc_sk_free(struct sock *sk); + +diff -Nurb linux-2.6.22-570/include/net/mip6.h linux-2.6.22-591/include/net/mip6.h +--- linux-2.6.22-570/include/net/mip6.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/mip6.h 2007-12-21 15:36:12.000000000 -0500 +@@ -54,8 +54,4 @@ + #define IP6_MH_TYPE_BERROR 7 /* Binding Error */ + #define IP6_MH_TYPE_MAX IP6_MH_TYPE_BERROR + +-extern int mip6_init(void); +-extern void mip6_fini(void); +-extern int mip6_mh_filter(struct sock *sk, struct sk_buff *skb); +- + #endif +diff -Nurb linux-2.6.22-570/include/net/neighbour.h linux-2.6.22-591/include/net/neighbour.h +--- linux-2.6.22-570/include/net/neighbour.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/neighbour.h 2007-12-21 15:36:14.000000000 -0500 +@@ -34,6 +34,7 @@ + + struct neigh_parms + { ++ struct net *net; + struct net_device *dev; + struct neigh_parms *next; + int (*neigh_setup)(struct neighbour *); +@@ -126,6 +127,7 @@ + struct pneigh_entry + { + struct pneigh_entry *next; ++ struct net *net; + struct net_device *dev; + u8 flags; + u8 key[0]; +@@ -187,6 +189,7 @@ + const void *pkey, + struct net_device *dev); + extern struct neighbour * neigh_lookup_nodev(struct neigh_table *tbl, ++ struct net *net, + const void *pkey); + extern struct neighbour * neigh_create(struct neigh_table *tbl, + const void *pkey, +@@ -205,21 +208,24 @@ + struct net_device *dev); + + extern struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl); ++extern struct neigh_parms *neigh_parms_alloc_default(struct neigh_table *tbl, struct net *net); + extern void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms); + extern void neigh_parms_destroy(struct neigh_parms *parms); + extern unsigned long neigh_rand_reach_time(unsigned long base); + + extern void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, + struct sk_buff *skb); +-extern struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, const void *key, struct net_device *dev, int creat); +-extern int pneigh_delete(struct neigh_table *tbl, const void *key, struct net_device *dev); ++extern struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev, int creat); ++extern int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev); + + extern void neigh_app_ns(struct neighbour *n); + extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie); + extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)); + extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *)); + +-struct neigh_seq_state { ++struct neigh_seq_state ++{ ++ struct net *net; + struct neigh_table *tbl; + void *(*neigh_sub_iter)(struct neigh_seq_state *state, + struct neighbour *n, loff_t *pos); +diff -Nurb linux-2.6.22-570/include/net/net_namespace.h linux-2.6.22-591/include/net/net_namespace.h +--- linux-2.6.22-570/include/net/net_namespace.h 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/include/net/net_namespace.h 2007-12-21 15:36:14.000000000 -0500 +@@ -0,0 +1,236 @@ ++/* ++ * Operations on the network namespace ++ */ ++#ifndef __NET_NET_NAMESPACE_H ++#define __NET_NET_NAMESPACE_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++struct sock; ++struct xt_af_pernet; ++struct ipv4_devconf; ++struct neigh_parms; ++struct inet_peer; ++struct xt_table; ++struct net { ++ atomic_t count; /* To decided when the network namespace ++ * should go ++ */ ++ atomic_t use_count; /* For references we destroy on demand */ ++ struct list_head list; /* list of network namespace structures */ ++ struct work_struct work; /* work struct for freeing */ ++ ++#ifdef CONFIG_PROC_FS ++ struct proc_dir_entry *proc_net; ++ struct proc_dir_entry *proc_net_stat; ++ struct proc_dir_entry proc_net_root; ++# ifdef CONFIG_NETFILTER ++ struct proc_dir_entry *proc_net_netfilter; ++# endif ++#endif ++#ifdef CONFIG_SYSCTL ++ struct ctl_table_header net_table_header; ++#endif ++ struct net_device loopback_dev; /* The loopback */ ++ struct list_head dev_base_head; /* All devices */ ++ ++ struct hlist_head *dev_name_head; ++ struct hlist_head *dev_index_head; ++ ++ struct sock * rtnl; /* rtnetlink socket */ ++ ++ ++ /* core netfilter */ ++ struct xt_af_pernet * xtn; ++ ++ /* core fib_rules */ ++ struct list_head rules_ops; ++ spinlock_t rules_mod_lock; ++ ++#ifdef CONFIG_XFRM ++ u32 sysctl_xfrm_aevent_etime; ++ u32 sysctl_xfrm_aevent_rseqth; ++ int sysctl_xfrm_larval_drop; ++ u32 sysctl_xfrm_acq_expires; ++#endif /* CONFIG_XFRM */ ++ ++ int sysctl_somaxconn; ++ ++#ifdef CONFIG_PACKET ++ /* List of all packet sockets. */ ++ rwlock_t packet_sklist_lock; ++ struct hlist_head packet_sklist; ++#endif /* CONFIG_PACKET */ ++#ifdef CONFIG_UNIX ++ int sysctl_unix_max_dgram_qlen; ++ void * unix_sysctl; ++#endif /* CONFIG_UNIX */ ++#ifdef CONFIG_IP_MULTIPLE_TABLES ++ void * fib4_table; ++#endif /* CONFIG_IP_MULTIPLE_TABLES */ ++#ifdef CONFIG_IP_FIB_HASH ++ int fn_hash_last_dflt; ++#endif ++#ifdef CONFIG_IP_FIB_TRIE ++ int trie_last_dflt; ++#endif ++#ifndef CONFIG_IP_MULTIPLE_TABLES ++ struct fib_table *ip_fib_local_table; ++ struct fib_table *ip_fib_main_table; ++#endif ++ struct hlist_head *ip_fib_table_hash; ++ struct sock *nlfl; ++ ++ /* fib_semantics */ ++ struct hlist_head *fib_info_hash; ++ struct hlist_head *fib_info_laddrhash; ++ unsigned int fib_info_hash_size; ++ unsigned int fib_info_cnt; ++ struct hlist_head *fib_info_devhash; ++ ++ /* af_inet.c */ ++ int sysctl_ip_nonlocal_bind; /* __read_mostly */ ++ int sysctl_ip_default_ttl; /* __read_mostly */ ++ int sysctl_ipfrag_high_thresh; ++ int sysctl_ipfrag_low_thresh; ++ int sysctl_ipfrag_time; ++ int sysctl_ipfrag_secret_interval; ++ int sysctl_ipfrag_max_dist; ++ int sysctl_ipv4_no_pmtu_disc; ++ int sysctl_local_port_range[2]; ++ int sysctl_ip_dynaddr; ++ int sysctl_tcp_timestamps; /* __read_mostly */ ++ int sysctl_tcp_window_scaling; /* __read_mostly */ ++ /* inetpeer.c */ ++ int inet_peer_threshold; ++ int inet_peer_minttl; ++ int inet_peer_maxttl; ++ int inet_peer_gc_mintime; ++ int inet_peer_gc_maxtime; ++ ++ /* devinet */ ++ struct ipv4_devconf *ipv4_devconf; ++ struct ipv4_devconf *ipv4_devconf_dflt; ++ ++ /* arp.c */ ++ struct neigh_parms *arp_neigh_parms_default; ++ ++ /* icmp.c */ ++ struct socket **__icmp_socket; ++ ++ /* inetpeer.c */ ++ struct inet_peer *peer_root; ++ int peer_total; ++ struct inet_peer *inet_peer_unused_head; ++ struct inet_peer **inet_peer_unused_tailp; ++ struct timer_list peer_periodic_timer; ++ ++ /* ip_fragment.c */ ++ struct hlist_head *ipq_hash; ++ u32 ipfrag_hash_rnd; ++ struct list_head ipq_lru_list; ++ int ip_frag_nqueues; ++ atomic_t ip_frag_mem; ++ struct timer_list ipfrag_secret_timer; ++ ++ /* udp.c */ ++ int udp_port_rover; ++ ++ /* iptable_filter.c */ ++ struct xt_table *ip_packet_filter; ++}; ++ ++extern struct net init_net; ++extern struct list_head net_namespace_list; ++ ++extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns); ++extern void __put_net(struct net *net); ++ ++static inline struct net *get_net(struct net *net) ++{ ++ atomic_inc(&net->count); ++ return net; ++} ++ ++static inline void put_net(struct net *net) ++{ ++ if (atomic_dec_and_test(&net->count)) ++ __put_net(net); ++} ++ ++static inline struct net *hold_net(struct net *net) ++{ ++ atomic_inc(&net->use_count); ++ return net; ++} ++ ++static inline void release_net(struct net *net) ++{ ++ atomic_dec(&net->use_count); ++} ++ ++extern void net_lock(void); ++extern void net_unlock(void); ++ ++#define for_each_net(VAR) \ ++ list_for_each_entry(VAR, &net_namespace_list, list) ++ ++ ++struct pernet_operations { ++ struct list_head list; ++ int (*init)(struct net *net); ++ void (*exit)(struct net *net); ++}; ++ ++extern int register_pernet_subsys(struct pernet_operations *); ++extern void unregister_pernet_subsys(struct pernet_operations *); ++extern int register_pernet_device(struct pernet_operations *); ++extern void unregister_pernet_device(struct pernet_operations *); ++ ++#ifdef CONFIG_PROC_FS ++static inline struct net *PDE_NET(struct proc_dir_entry *pde) ++{ ++ return pde->parent->data; ++} ++ ++static inline struct net *PROC_NET(const struct inode *inode) ++{ ++ return PDE_NET(PDE(inode)); ++} ++ ++static inline struct proc_dir_entry *proc_net_create(struct net *net, ++ const char *name, mode_t mode, get_info_t *get_info) ++{ ++ return create_proc_info_entry(name,mode, net->proc_net, get_info); ++} ++ ++static inline struct proc_dir_entry *proc_net_fops_create(struct net *net, ++ const char *name, mode_t mode, const struct file_operations *fops) ++{ ++ struct proc_dir_entry *res = ++ create_proc_entry(name, mode, net->proc_net); ++ if (res) ++ res->proc_fops = fops; ++ return res; ++} ++ ++static inline void proc_net_remove(struct net *net, const char *name) ++{ ++ remove_proc_entry(name, net->proc_net); ++} ++ ++#else ++ ++#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) ++#define proc_net_create(net, name, mode, info) ({ (void)(mode), NULL; }) ++static inline void proc_net_remove(struct net *net, const char *name) {} ++ ++#endif /* CONFIG_PROC_FS */ ++ ++#endif /* __NET_NET_NAMESPACE_H */ +diff -Nurb linux-2.6.22-570/include/net/netlink.h linux-2.6.22-591/include/net/netlink.h +--- linux-2.6.22-570/include/net/netlink.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/netlink.h 2007-12-21 15:36:14.000000000 -0500 +@@ -118,6 +118,9 @@ + * Nested Attributes Construction: + * nla_nest_start(skb, type) start a nested attribute + * nla_nest_end(skb, nla) finalize a nested attribute ++ * nla_nest_compat_start(skb, type, start a nested compat attribute ++ * len, data) ++ * nla_nest_compat_end(skb, type) finalize a nested compat attribute + * nla_nest_cancel(skb, nla) cancel nested attribute construction + * + * Attribute Length Calculations: +@@ -152,6 +155,7 @@ + * nla_find_nested() find attribute in nested attributes + * nla_parse() parse and validate stream of attrs + * nla_parse_nested() parse nested attribuets ++ * nla_parse_nested_compat() parse nested compat attributes + * nla_for_each_attr() loop over all attributes + * nla_for_each_nested() loop over the nested attributes + *========================================================================= +@@ -170,6 +174,7 @@ + NLA_FLAG, + NLA_MSECS, + NLA_NESTED, ++ NLA_NESTED_COMPAT, + NLA_NUL_STRING, + NLA_BINARY, + __NLA_TYPE_MAX, +@@ -190,6 +195,7 @@ + * NLA_NUL_STRING Maximum length of string (excluding NUL) + * NLA_FLAG Unused + * NLA_BINARY Maximum length of attribute payload ++ * NLA_NESTED_COMPAT Exact length of structure payload + * All other Exact length of attribute payload + * + * Example: +@@ -212,6 +218,7 @@ + struct nl_info { + struct nlmsghdr *nlh; + u32 pid; ++ struct net *net; + }; + + extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, +@@ -733,6 +740,39 @@ + { + return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy); + } ++ ++/** ++ * nla_parse_nested_compat - parse nested compat attributes ++ * @tb: destination array with maxtype+1 elements ++ * @maxtype: maximum attribute type to be expected ++ * @nla: attribute containing the nested attributes ++ * @data: pointer to point to contained structure ++ * @len: length of contained structure ++ * @policy: validation policy ++ * ++ * Parse a nested compat attribute. The compat attribute contains a structure ++ * and optionally a set of nested attributes. On success the data pointer ++ * points to the nested data and tb contains the parsed attributes ++ * (see nla_parse). ++ */ ++static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype, ++ struct nlattr *nla, ++ const struct nla_policy *policy, ++ int len) ++{ ++ if (nla_len(nla) < len) ++ return -1; ++ if (nla_len(nla) >= NLA_ALIGN(len) + sizeof(struct nlattr)) ++ return nla_parse_nested(tb, maxtype, ++ nla_data(nla) + NLA_ALIGN(len), ++ policy); ++ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); ++ return 0; ++} ++ ++#define nla_parse_nested_compat(tb, maxtype, nla, policy, data, len) \ ++({ data = nla_len(nla) >= len ? nla_data(nla) : NULL; \ ++ __nla_parse_nested_compat(tb, maxtype, nla, policy, len); }) + /** + * nla_put_u8 - Add a u16 netlink attribute to a socket buffer + * @skb: socket buffer to add attribute to +@@ -965,6 +1005,51 @@ + } + + /** ++ * nla_nest_compat_start - Start a new level of nested compat attributes ++ * @skb: socket buffer to add attributes to ++ * @attrtype: attribute type of container ++ * @attrlen: length of structure ++ * @data: pointer to structure ++ * ++ * Start a nested compat attribute that contains both a structure and ++ * a set of nested attributes. ++ * ++ * Returns the container attribute ++ */ ++static inline struct nlattr *nla_nest_compat_start(struct sk_buff *skb, ++ int attrtype, int attrlen, ++ const void *data) ++{ ++ struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb); ++ ++ if (nla_put(skb, attrtype, attrlen, data) < 0) ++ return NULL; ++ if (nla_nest_start(skb, attrtype) == NULL) { ++ nlmsg_trim(skb, start); ++ return NULL; ++ } ++ return start; ++} ++ ++/** ++ * nla_nest_compat_end - Finalize nesting of compat attributes ++ * @skb: socket buffer the attribtues are stored in ++ * @start: container attribute ++ * ++ * Corrects the container attribute header to include the all ++ * appeneded attributes. ++ * ++ * Returns the total data length of the skb. ++ */ ++static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start) ++{ ++ struct nlattr *nest = (void *)start + NLMSG_ALIGN(start->nla_len); ++ ++ start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start; ++ return nla_nest_end(skb, nest); ++} ++ ++/** + * nla_nest_cancel - Cancel nesting of attributes + * @skb: socket buffer the message is stored in + * @start: container attribute +diff -Nurb linux-2.6.22-570/include/net/pkt_cls.h linux-2.6.22-591/include/net/pkt_cls.h +--- linux-2.6.22-570/include/net/pkt_cls.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/pkt_cls.h 2007-12-21 15:36:14.000000000 -0500 +@@ -2,6 +2,7 @@ + #define __NET_PKT_CLS_H + + #include ++#include + #include + #include + +@@ -357,7 +358,7 @@ + if (indev[0]) { + if (!skb->iif) + return 0; +- dev = __dev_get_by_index(skb->iif); ++ dev = __dev_get_by_index(&init_net, skb->iif); + if (!dev || strcmp(indev, dev->name)) + return 0; + } +diff -Nurb linux-2.6.22-570/include/net/protocol.h linux-2.6.22-591/include/net/protocol.h +--- linux-2.6.22-570/include/net/protocol.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/protocol.h 2007-12-21 15:36:14.000000000 -0500 +@@ -86,6 +86,7 @@ + #define INET_PROTOSW_REUSE 0x01 /* Are ports automatically reusable? */ + #define INET_PROTOSW_PERMANENT 0x02 /* Permanent protocols are unremovable. */ + #define INET_PROTOSW_ICSK 0x04 /* Is this an inet_connection_sock? */ ++#define INET_PROTOSW_NETNS 0x08 /* Multiple namespaces support? */ + + extern struct net_protocol *inet_protocol_base; + extern struct net_protocol *inet_protos[MAX_INET_PROTOS]; +diff -Nurb linux-2.6.22-570/include/net/raw.h linux-2.6.22-591/include/net/raw.h +--- linux-2.6.22-570/include/net/raw.h 2007-12-21 15:36:03.000000000 -0500 ++++ linux-2.6.22-591/include/net/raw.h 2007-12-21 15:36:14.000000000 -0500 +@@ -34,7 +34,7 @@ + extern rwlock_t raw_v4_lock; + + +-extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, ++extern struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, + __be32 raddr, __be32 laddr, + int dif, int tag); + +diff -Nurb linux-2.6.22-570/include/net/rawv6.h linux-2.6.22-591/include/net/rawv6.h +--- linux-2.6.22-570/include/net/rawv6.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/rawv6.h 2007-12-21 15:36:12.000000000 -0500 +@@ -3,6 +3,8 @@ + + #ifdef __KERNEL__ + ++#include ++ + #define RAWV6_HTABLE_SIZE MAX_INET_PROTOS + extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; + extern rwlock_t raw_v6_lock; +@@ -23,6 +25,13 @@ + int type, int code, + int offset, __be32 info); + ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) ++int rawv6_mh_filter_register(int (*filter)(struct sock *sock, ++ struct sk_buff *skb)); ++int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, ++ struct sk_buff *skb)); ++#endif ++ + #endif + + #endif +diff -Nurb linux-2.6.22-570/include/net/route.h linux-2.6.22-591/include/net/route.h +--- linux-2.6.22-570/include/net/route.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/net/route.h 2007-12-21 15:36:14.000000000 -0500 +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -66,7 +67,6 @@ + + unsigned rt_flags; + __u16 rt_type; +- __u16 rt_multipath_alg; + + __be32 rt_dst; /* Path destination */ + __be32 rt_src; /* Path source */ +@@ -123,9 +123,9 @@ + extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); + extern void ip_rt_send_redirect(struct sk_buff *skb); + +-extern unsigned inet_addr_type(__be32 addr); ++extern unsigned inet_addr_type(struct net *net, __be32 addr); + extern void ip_rt_multicast_event(struct in_device *); +-extern int ip_rt_ioctl(unsigned int cmd, void __user *arg); ++extern int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg); + extern void ip_rt_get_source(u8 *src, struct rtable *rt); + extern int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb); + +@@ -154,7 +154,8 @@ + __be16 sport, __be16 dport, struct sock *sk, + int flags) + { +- struct flowi fl = { .oif = oif, ++ struct flowi fl = { .fl_net = sk->sk_net, ++ .oif = oif, + .nl_u = { .ip4_u = { .daddr = dst, + .saddr = src, + .tos = tos } }, +@@ -199,6 +200,7 @@ + struct flowi fl; + + memcpy(&fl, &(*rp)->fl, sizeof(fl)); ++ fl.fl_net = sk->sk_net; + fl.fl_ip_sport = sport; + fl.fl_ip_dport = dport; + fl.proto = protocol; +diff -Nurb linux-2.6.22-570/include/net/rtnetlink.h linux-2.6.22-591/include/net/rtnetlink.h +--- linux-2.6.22-570/include/net/rtnetlink.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/rtnetlink.h 2007-12-21 15:36:12.000000000 -0500 +@@ -22,4 +22,62 @@ + return AF_UNSPEC; + } + ++/** ++ * struct rtnl_link_ops - rtnetlink link operations ++ * ++ * @list: Used internally ++ * @kind: Identifier ++ * @maxtype: Highest device specific netlink attribute number ++ * @policy: Netlink policy for device specific attribute validation ++ * @validate: Optional validation function for netlink/changelink parameters ++ * @priv_size: sizeof net_device private space ++ * @setup: net_device setup function ++ * @newlink: Function for configuring and registering a new device ++ * @changelink: Function for changing parameters of an existing device ++ * @dellink: Function to remove a device ++ * @get_size: Function to calculate required room for dumping device ++ * specific netlink attributes ++ * @fill_info: Function to dump device specific netlink attributes ++ * @get_xstats_size: Function to calculate required room for dumping devic ++ * specific statistics ++ * @fill_xstats: Function to dump device specific statistics ++ */ ++struct rtnl_link_ops { ++ struct list_head list; ++ ++ const char *kind; ++ ++ size_t priv_size; ++ void (*setup)(struct net_device *dev); ++ ++ int maxtype; ++ const struct nla_policy *policy; ++ int (*validate)(struct nlattr *tb[], ++ struct nlattr *data[]); ++ ++ int (*newlink)(struct net_device *dev, ++ struct nlattr *tb[], ++ struct nlattr *data[]); ++ int (*changelink)(struct net_device *dev, ++ struct nlattr *tb[], ++ struct nlattr *data[]); ++ void (*dellink)(struct net_device *dev); ++ ++ size_t (*get_size)(const struct net_device *dev); ++ int (*fill_info)(struct sk_buff *skb, ++ const struct net_device *dev); ++ ++ size_t (*get_xstats_size)(const struct net_device *dev); ++ int (*fill_xstats)(struct sk_buff *skb, ++ const struct net_device *dev); ++}; ++ ++extern int __rtnl_link_register(struct rtnl_link_ops *ops); ++extern void __rtnl_link_unregister(struct rtnl_link_ops *ops); ++ ++extern int rtnl_link_register(struct rtnl_link_ops *ops); ++extern void rtnl_link_unregister(struct rtnl_link_ops *ops); ++ ++#define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind) ++ + #endif +diff -Nurb linux-2.6.22-570/include/net/sock.h linux-2.6.22-591/include/net/sock.h +--- linux-2.6.22-570/include/net/sock.h 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/include/net/sock.h 2007-12-21 15:36:14.000000000 -0500 +@@ -55,6 +55,7 @@ + #include + #include + #include ++#include + + /* + * This structure really needs to be cleaned up. +@@ -105,6 +106,7 @@ + * @skc_refcnt: reference count + * @skc_hash: hash value used with various protocol lookup tables + * @skc_prot: protocol handlers inside a network family ++ * @skc_net: reference to the network namespace of this socket + * + * This is the minimal network layer representation of sockets, the header + * for struct sock and struct inet_timewait_sock. +@@ -119,6 +121,7 @@ + atomic_t skc_refcnt; + unsigned int skc_hash; + struct proto *skc_prot; ++ struct net *skc_net; + xid_t skc_xid; + struct vx_info *skc_vx_info; + nid_t skc_nid; +@@ -199,6 +202,7 @@ + #define sk_refcnt __sk_common.skc_refcnt + #define sk_hash __sk_common.skc_hash + #define sk_prot __sk_common.skc_prot ++#define sk_net __sk_common.skc_net + #define sk_xid __sk_common.skc_xid + #define sk_vx_info __sk_common.skc_vx_info + #define sk_nid __sk_common.skc_nid +@@ -781,7 +785,7 @@ + SINGLE_DEPTH_NESTING) + #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) + +-extern struct sock *sk_alloc(int family, ++extern struct sock *sk_alloc(struct net *net, int family, + gfp_t priority, + struct proto *prot, int zero_it); + extern void sk_free(struct sock *sk); +@@ -1010,6 +1014,7 @@ + #endif + + memcpy(nsk, osk, osk->sk_prot->obj_size); ++ get_net(nsk->sk_net); + #ifdef CONFIG_SECURITY_NETWORK + nsk->sk_security = sptr; + security_sk_clone(osk, nsk); +@@ -1373,6 +1378,7 @@ + + #ifdef CONFIG_SYSCTL + extern struct ctl_table core_table[]; ++extern struct ctl_table multi_core_table[]; + #endif + + extern int sysctl_optmem_max; +diff -Nurb linux-2.6.22-570/include/net/tcp.h linux-2.6.22-591/include/net/tcp.h +--- linux-2.6.22-570/include/net/tcp.h 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/include/net/tcp.h 2007-12-21 15:36:14.000000000 -0500 +@@ -191,8 +191,6 @@ + extern struct inet_timewait_death_row tcp_death_row; + + /* sysctl variables for tcp */ +-extern int sysctl_tcp_timestamps; +-extern int sysctl_tcp_window_scaling; + extern int sysctl_tcp_sack; + extern int sysctl_tcp_fin_timeout; + extern int sysctl_tcp_keepalive_time; +@@ -1293,6 +1291,7 @@ + }; + + struct tcp_iter_state { ++ struct net *net; + sa_family_t family; + enum tcp_seq_states state; + struct sock *syn_wait_sk; +@@ -1300,8 +1299,8 @@ + struct seq_operations seq_ops; + }; + +-extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); +-extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); ++extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo); ++extern void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo); + + extern struct request_sock_ops tcp_request_sock_ops; + +diff -Nurb linux-2.6.22-570/include/net/tipc/tipc_port.h linux-2.6.22-591/include/net/tipc/tipc_port.h +--- linux-2.6.22-570/include/net/tipc/tipc_port.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/tipc/tipc_port.h 2007-12-21 15:36:12.000000000 -0500 +@@ -1,8 +1,8 @@ + /* + * include/net/tipc/tipc_port.h: Include file for privileged access to TIPC ports + * +- * Copyright (c) 1994-2006, Ericsson AB +- * Copyright (c) 2005, Wind River Systems ++ * Copyright (c) 1994-2007, Ericsson AB ++ * Copyright (c) 2005-2007, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -55,6 +55,7 @@ + * @conn_unacked: number of unacknowledged messages received from peer port + * @published: non-zero if port has one or more associated names + * @congested: non-zero if cannot send because of link or port congestion ++ * @max_pkt: maximum packet size "hint" used when building messages sent by port + * @ref: unique reference to port in TIPC object registry + * @phdr: preformatted message header used when sending messages + */ +@@ -68,6 +69,7 @@ + u32 conn_unacked; + int published; + u32 congested; ++ u32 max_pkt; + u32 ref; + struct tipc_msg phdr; + }; +diff -Nurb linux-2.6.22-570/include/net/udp.h linux-2.6.22-591/include/net/udp.h +--- linux-2.6.22-570/include/net/udp.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/udp.h 2007-12-21 15:36:14.000000000 -0500 +@@ -160,6 +160,7 @@ + }; + + struct udp_iter_state { ++ struct net *net; + sa_family_t family; + struct hlist_head *hashtable; + int bucket; +@@ -167,8 +168,8 @@ + }; + + #ifdef CONFIG_PROC_FS +-extern int udp_proc_register(struct udp_seq_afinfo *afinfo); +-extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); ++extern int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo); ++extern void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo); + + extern int udp4_proc_init(void); + extern void udp4_proc_exit(void); +diff -Nurb linux-2.6.22-570/include/net/wext.h linux-2.6.22-591/include/net/wext.h +--- linux-2.6.22-570/include/net/wext.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/net/wext.h 2007-12-21 15:36:14.000000000 -0500 +@@ -5,16 +5,23 @@ + * wireless extensions interface to the core code + */ + ++struct net; ++ + #ifdef CONFIG_WIRELESS_EXT +-extern int wext_proc_init(void); +-extern int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, ++extern int wext_proc_init(struct net *net); ++extern void wext_proc_exit(struct net *net); ++extern int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, + void __user *arg); + #else +-static inline int wext_proc_init(void) ++static inline int wext_proc_init(struct net *net) + { + return 0; + } +-static inline int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, ++static inline void wext_proc_exit(struct net *net) ++{ ++ return; ++} ++static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, + void __user *arg) + { + return -EINVAL; +diff -Nurb linux-2.6.22-570/include/net/xfrm.h linux-2.6.22-591/include/net/xfrm.h +--- linux-2.6.22-570/include/net/xfrm.h 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/include/net/xfrm.h 2007-12-21 15:36:14.000000000 -0500 +@@ -19,13 +19,21 @@ + #include + #include + ++#define XFRM_PROTO_ESP 50 ++#define XFRM_PROTO_AH 51 ++#define XFRM_PROTO_COMP 108 ++#define XFRM_PROTO_IPIP 4 ++#define XFRM_PROTO_IPV6 41 ++#define XFRM_PROTO_ROUTING IPPROTO_ROUTING ++#define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS ++ + #define XFRM_ALIGN8(len) (((len) + 7) & ~7) + #define MODULE_ALIAS_XFRM_MODE(family, encap) \ + MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap)) ++#define MODULE_ALIAS_XFRM_TYPE(family, proto) \ ++ MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto)) + + extern struct sock *xfrm_nl; +-extern u32 sysctl_xfrm_aevent_etime; +-extern u32 sysctl_xfrm_aevent_rseqth; + + extern struct mutex xfrm_cfg_mutex; + +@@ -509,11 +517,9 @@ + case IPPROTO_ICMPV6: + port = htons(fl->fl_icmp_type); + break; +-#ifdef CONFIG_IPV6_MIP6 + case IPPROTO_MH: + port = htons(fl->fl_mh_type); + break; +-#endif + default: + port = 0; /*XXX*/ + } +diff -Nurb linux-2.6.22-570/include/scsi/iscsi_if.h linux-2.6.22-591/include/scsi/iscsi_if.h +--- linux-2.6.22-570/include/scsi/iscsi_if.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/scsi/iscsi_if.h 2007-12-21 15:36:12.000000000 -0500 +@@ -48,6 +48,7 @@ + ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT = UEVENT_BASE + 14, + + ISCSI_UEVENT_TGT_DSCVR = UEVENT_BASE + 15, ++ ISCSI_UEVENT_SET_HOST_PARAM = UEVENT_BASE + 16, + + /* up events */ + ISCSI_KEVENT_RECV_PDU = KEVENT_BASE + 1, +@@ -71,6 +72,8 @@ + /* messages u -> k */ + struct msg_create_session { + uint32_t initial_cmdsn; ++ uint16_t cmds_max; ++ uint16_t queue_depth; + } c_session; + struct msg_destroy_session { + uint32_t sid; +@@ -136,6 +139,11 @@ + */ + uint32_t enable; + } tgt_dscvr; ++ struct msg_set_host_param { ++ uint32_t host_no; ++ uint32_t param; /* enum iscsi_host_param */ ++ uint32_t len; ++ } set_host_param; + } u; + union { + /* messages k -> u */ +@@ -223,6 +231,11 @@ + ISCSI_PARAM_CONN_PORT, + ISCSI_PARAM_CONN_ADDRESS, + ++ ISCSI_PARAM_USERNAME, ++ ISCSI_PARAM_USERNAME_IN, ++ ISCSI_PARAM_PASSWORD, ++ ISCSI_PARAM_PASSWORD_IN, ++ + /* must always be last */ + ISCSI_PARAM_MAX, + }; +@@ -249,6 +262,24 @@ + #define ISCSI_SESS_RECOVERY_TMO (1 << ISCSI_PARAM_SESS_RECOVERY_TMO) + #define ISCSI_CONN_PORT (1 << ISCSI_PARAM_CONN_PORT) + #define ISCSI_CONN_ADDRESS (1 << ISCSI_PARAM_CONN_ADDRESS) ++#define ISCSI_USERNAME (1 << ISCSI_PARAM_USERNAME) ++#define ISCSI_USERNAME_IN (1 << ISCSI_PARAM_USERNAME_IN) ++#define ISCSI_PASSWORD (1 << ISCSI_PARAM_PASSWORD) ++#define ISCSI_PASSWORD_IN (1 << ISCSI_PARAM_PASSWORD_IN) ++ ++/* iSCSI HBA params */ ++enum iscsi_host_param { ++ ISCSI_HOST_PARAM_HWADDRESS, ++ ISCSI_HOST_PARAM_INITIATOR_NAME, ++ ISCSI_HOST_PARAM_NETDEV_NAME, ++ ISCSI_HOST_PARAM_IPADDRESS, ++ ISCSI_HOST_PARAM_MAX, ++}; ++ ++#define ISCSI_HOST_HWADDRESS (1 << ISCSI_HOST_PARAM_HWADDRESS) ++#define ISCSI_HOST_INITIATOR_NAME (1 << ISCSI_HOST_PARAM_INITIATOR_NAME) ++#define ISCSI_HOST_NETDEV_NAME (1 << ISCSI_HOST_PARAM_NETDEV_NAME) ++#define ISCSI_HOST_IPADDRESS (1 << ISCSI_HOST_PARAM_IPADDRESS) + + #define iscsi_ptr(_handle) ((void*)(unsigned long)_handle) + #define iscsi_handle(_ptr) ((uint64_t)(unsigned long)_ptr) +@@ -272,6 +303,9 @@ + #define CAP_MULTI_CONN 0x40 + #define CAP_TEXT_NEGO 0x80 + #define CAP_MARKERS 0x100 ++#define CAP_FW_DB 0x200 ++#define CAP_SENDTARGETS_OFFLOAD 0x400 ++#define CAP_DATA_PATH_OFFLOAD 0x800 + + /* + * These flags describes reason of stop_conn() call +diff -Nurb linux-2.6.22-570/include/scsi/libiscsi.h linux-2.6.22-591/include/scsi/libiscsi.h +--- linux-2.6.22-570/include/scsi/libiscsi.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/scsi/libiscsi.h 2007-12-21 15:36:12.000000000 -0500 +@@ -48,9 +48,8 @@ + #define debug_scsi(fmt...) + #endif + +-#define ISCSI_XMIT_CMDS_MAX 128 /* must be power of 2 */ +-#define ISCSI_MGMT_CMDS_MAX 32 /* must be power of 2 */ +-#define ISCSI_CONN_MAX 1 ++#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* must be power of 2 */ ++#define ISCSI_MGMT_CMDS_MAX 16 /* must be power of 2 */ + + #define ISCSI_MGMT_ITT_OFFSET 0xa00 + +@@ -73,6 +72,8 @@ + #define ISCSI_AGE_SHIFT 28 + #define ISCSI_AGE_MASK (0xf << ISCSI_AGE_SHIFT) + ++#define ISCSI_ADDRESS_BUF_LEN 64 ++ + struct iscsi_mgmt_task { + /* + * Becuae LLDs allocate their hdr differently, this is a pointer to +@@ -80,7 +81,7 @@ + */ + struct iscsi_hdr *hdr; + char *data; /* mgmt payload */ +- int data_count; /* counts data to be sent */ ++ unsigned data_count; /* counts data to be sent */ + uint32_t itt; /* this ITT */ + void *dd_data; /* driver/transport data */ + struct list_head running; +@@ -90,6 +91,7 @@ + ISCSI_TASK_COMPLETED, + ISCSI_TASK_PENDING, + ISCSI_TASK_RUNNING, ++ ISCSI_TASK_ABORTING, + }; + + struct iscsi_cmd_task { +@@ -99,16 +101,14 @@ + */ + struct iscsi_cmd *hdr; + int itt; /* this ITT */ +- int datasn; /* DataSN */ + + uint32_t unsol_datasn; +- int imm_count; /* imm-data (bytes) */ +- int unsol_count; /* unsolicited (bytes)*/ ++ unsigned imm_count; /* imm-data (bytes) */ ++ unsigned unsol_count; /* unsolicited (bytes)*/ + /* offset in unsolicited stream (bytes); */ +- int unsol_offset; +- int data_count; /* remaining Data-Out */ ++ unsigned unsol_offset; ++ unsigned data_count; /* remaining Data-Out */ + struct scsi_cmnd *sc; /* associated SCSI cmd*/ +- int total_length; + struct iscsi_conn *conn; /* used connection */ + struct iscsi_mgmt_task *mtask; /* tmf mtask in progr */ + +@@ -152,18 +152,11 @@ + struct iscsi_cmd_task *ctask; /* xmit ctask in progress */ + + /* xmit */ +- struct kfifo *immqueue; /* immediate xmit queue */ + struct kfifo *mgmtqueue; /* mgmt (control) xmit queue */ + struct list_head mgmt_run_list; /* list of control tasks */ + struct list_head xmitqueue; /* data-path cmd queue */ + struct list_head run_list; /* list of cmds in progress */ + struct work_struct xmitwork; /* per-conn. xmit workqueue */ +- /* +- * serializes connection xmit, access to kfifos: +- * xmitqueue, immqueue, mgmtqueue +- */ +- struct mutex xmitmutex; +- + unsigned long suspend_tx; /* suspend Tx */ + unsigned long suspend_rx; /* suspend Rx */ + +@@ -174,8 +167,8 @@ + int tmabort_state; /* see TMABORT_INITIAL, etc.*/ + + /* negotiated params */ +- int max_recv_dlength; /* initiator_max_recv_dsl*/ +- int max_xmit_dlength; /* target_max_recv_dsl */ ++ unsigned max_recv_dlength; /* initiator_max_recv_dsl*/ ++ unsigned max_xmit_dlength; /* target_max_recv_dsl */ + int hdrdgst_en; + int datadgst_en; + int ifmarker_en; +@@ -183,6 +176,12 @@ + /* values userspace uses to id a conn */ + int persistent_port; + char *persistent_address; ++ /* remote portal currently connected to */ ++ int portal_port; ++ char portal_address[ISCSI_ADDRESS_BUF_LEN]; ++ /* local address */ ++ int local_port; ++ char local_address[ISCSI_ADDRESS_BUF_LEN]; + + /* MIB-statistics */ + uint64_t txdata_octets; +@@ -213,18 +212,25 @@ + + /* configuration */ + int initial_r2t_en; +- int max_r2t; ++ unsigned max_r2t; + int imm_data_en; +- int first_burst; +- int max_burst; ++ unsigned first_burst; ++ unsigned max_burst; + int time2wait; + int time2retain; + int pdu_inorder_en; + int dataseq_inorder_en; + int erl; + int tpgt; ++ char *username; ++ char *username_in; ++ char *password; ++ char *password_in; + char *targetname; +- ++ char *initiatorname; ++ /* hw address or netdev iscsi connection is bound to */ ++ char *hwaddress; ++ char *netdev; + /* control data */ + struct iscsi_transport *tt; + struct Scsi_Host *host; +@@ -255,12 +261,22 @@ + extern int iscsi_queuecommand(struct scsi_cmnd *sc, + void (*done)(struct scsi_cmnd *)); + ++ ++/* ++ * iSCSI host helpers. ++ */ ++extern int iscsi_host_set_param(struct Scsi_Host *shost, ++ enum iscsi_host_param param, char *buf, ++ int buflen); ++extern int iscsi_host_get_param(struct Scsi_Host *shost, ++ enum iscsi_host_param param, char *buf); ++ + /* + * session management + */ + extern struct iscsi_cls_session * + iscsi_session_setup(struct iscsi_transport *, struct scsi_transport_template *, +- int, int, uint32_t, uint32_t *); ++ uint16_t, uint16_t, int, int, uint32_t, uint32_t *); + extern void iscsi_session_teardown(struct iscsi_cls_session *); + extern struct iscsi_session *class_to_transport_session(struct iscsi_cls_session *); + extern void iscsi_session_recovery_timedout(struct iscsi_cls_session *); +@@ -289,8 +305,7 @@ + /* + * pdu and task processing + */ +-extern int iscsi_check_assign_cmdsn(struct iscsi_session *, +- struct iscsi_nopin *); ++extern void iscsi_update_cmdsn(struct iscsi_session *, struct iscsi_nopin *); + extern void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *, + struct iscsi_data *hdr); + extern int iscsi_conn_send_pdu(struct iscsi_cls_conn *, struct iscsi_hdr *, +diff -Nurb linux-2.6.22-570/include/scsi/scsi_cmnd.h linux-2.6.22-591/include/scsi/scsi_cmnd.h +--- linux-2.6.22-570/include/scsi/scsi_cmnd.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/scsi/scsi_cmnd.h 2007-12-21 15:36:12.000000000 -0500 +@@ -135,4 +135,24 @@ + extern struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *, gfp_t); + extern void scsi_free_sgtable(struct scatterlist *, int); + ++extern int scsi_dma_map(struct scsi_cmnd *cmd); ++extern void scsi_dma_unmap(struct scsi_cmnd *cmd); ++ ++#define scsi_sg_count(cmd) ((cmd)->use_sg) ++#define scsi_sglist(cmd) ((struct scatterlist *)(cmd)->request_buffer) ++#define scsi_bufflen(cmd) ((cmd)->request_bufflen) ++ ++static inline void scsi_set_resid(struct scsi_cmnd *cmd, int resid) ++{ ++ cmd->resid = resid; ++} ++ ++static inline int scsi_get_resid(struct scsi_cmnd *cmd) ++{ ++ return cmd->resid; ++} ++ ++#define scsi_for_each_sg(cmd, sg, nseg, __i) \ ++ for (__i = 0, sg = scsi_sglist(cmd); __i < (nseg); __i++, (sg)++) ++ + #endif /* _SCSI_SCSI_CMND_H */ +diff -Nurb linux-2.6.22-570/include/scsi/scsi_device.h linux-2.6.22-591/include/scsi/scsi_device.h +--- linux-2.6.22-570/include/scsi/scsi_device.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/scsi/scsi_device.h 2007-12-21 15:36:12.000000000 -0500 +@@ -287,6 +287,7 @@ + extern void scsi_target_unblock(struct device *); + extern void scsi_remove_target(struct device *); + extern void int_to_scsilun(unsigned int, struct scsi_lun *); ++extern int scsilun_to_int(struct scsi_lun *); + extern const char *scsi_device_state_name(enum scsi_device_state); + extern int scsi_is_sdev_device(const struct device *); + extern int scsi_is_target_device(const struct device *); +diff -Nurb linux-2.6.22-570/include/scsi/scsi_host.h linux-2.6.22-591/include/scsi/scsi_host.h +--- linux-2.6.22-570/include/scsi/scsi_host.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/scsi/scsi_host.h 2007-12-21 15:36:12.000000000 -0500 +@@ -339,12 +339,6 @@ + enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); + + /* +- * suspend support +- */ +- int (*resume)(struct scsi_device *); +- int (*suspend)(struct scsi_device *, pm_message_t state); +- +- /* + * Name of proc directory + */ + char *proc_name; +@@ -677,6 +671,10 @@ + #define shost_printk(prefix, shost, fmt, a...) \ + dev_printk(prefix, &(shost)->shost_gendev, fmt, ##a) + ++static inline void *shost_priv(struct Scsi_Host *shost) ++{ ++ return (void *)shost->hostdata; ++} + + int scsi_is_host_device(const struct device *); + +diff -Nurb linux-2.6.22-570/include/scsi/scsi_transport_fc.h linux-2.6.22-591/include/scsi/scsi_transport_fc.h +--- linux-2.6.22-570/include/scsi/scsi_transport_fc.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/scsi/scsi_transport_fc.h 2007-12-21 15:36:12.000000000 -0500 +@@ -19,7 +19,7 @@ + * + * ======== + * +- * Copyright (C) 2004-2005 James Smart, Emulex Corporation ++ * Copyright (C) 2004-2007 James Smart, Emulex Corporation + * Rewrite for host, target, device, and remote port attributes, + * statistics, and service functions... + * +@@ -62,8 +62,10 @@ + FC_PORTTYPE_NLPORT, /* (Public) Loop w/ FLPort */ + FC_PORTTYPE_LPORT, /* (Private) Loop w/o FLPort */ + FC_PORTTYPE_PTP, /* Point to Point w/ another NPort */ ++ FC_PORTTYPE_NPIV, /* VPORT based on NPIV */ + }; + ++ + /* + * fc_port_state: If you alter this, you also need to alter scsi_transport_fc.c + * (for the ascii descriptions). +@@ -84,6 +86,25 @@ + + + /* ++ * fc_vport_state: If you alter this, you also need to alter ++ * scsi_transport_fc.c (for the ascii descriptions). ++ */ ++enum fc_vport_state { ++ FC_VPORT_UNKNOWN, ++ FC_VPORT_ACTIVE, ++ FC_VPORT_DISABLED, ++ FC_VPORT_LINKDOWN, ++ FC_VPORT_INITIALIZING, ++ FC_VPORT_NO_FABRIC_SUPP, ++ FC_VPORT_NO_FABRIC_RSCS, ++ FC_VPORT_FABRIC_LOGOUT, ++ FC_VPORT_FABRIC_REJ_WWN, ++ FC_VPORT_FAILED, ++}; ++ ++ ++ ++/* + * FC Classes of Service + * Note: values are not enumerated, as they can be "or'd" together + * for reporting (e.g. report supported_classes). If you alter this list, +@@ -124,18 +145,116 @@ + }; + + /* +- * FC Remote Port Roles ++ * FC Port Roles + * Note: values are not enumerated, as they can be "or'd" together + * for reporting (e.g. report roles). If you alter this list, + * you also need to alter scsi_transport_fc.c (for the ascii descriptions). + */ +-#define FC_RPORT_ROLE_UNKNOWN 0x00 +-#define FC_RPORT_ROLE_FCP_TARGET 0x01 +-#define FC_RPORT_ROLE_FCP_INITIATOR 0x02 +-#define FC_RPORT_ROLE_IP_PORT 0x04 ++#define FC_PORT_ROLE_UNKNOWN 0x00 ++#define FC_PORT_ROLE_FCP_TARGET 0x01 ++#define FC_PORT_ROLE_FCP_INITIATOR 0x02 ++#define FC_PORT_ROLE_IP_PORT 0x04 ++ ++/* The following are for compatibility */ ++#define FC_RPORT_ROLE_UNKNOWN FC_PORT_ROLE_UNKNOWN ++#define FC_RPORT_ROLE_FCP_TARGET FC_PORT_ROLE_FCP_TARGET ++#define FC_RPORT_ROLE_FCP_INITIATOR FC_PORT_ROLE_FCP_INITIATOR ++#define FC_RPORT_ROLE_IP_PORT FC_PORT_ROLE_IP_PORT ++ ++ ++/* Macro for use in defining Virtual Port attributes */ ++#define FC_VPORT_ATTR(_name,_mode,_show,_store) \ ++struct class_device_attribute class_device_attr_vport_##_name = \ ++ __ATTR(_name,_mode,_show,_store) + + + /* ++ * FC Virtual Port Attributes ++ * ++ * This structure exists for each FC port is a virtual FC port. Virtual ++ * ports share the physical link with the Physical port. Each virtual ++ * ports has a unique presense on the SAN, and may be instantiated via ++ * NPIV, Virtual Fabrics, or via additional ALPAs. As the vport is a ++ * unique presense, each vport has it's own view of the fabric, ++ * authentication priviledge, and priorities. ++ * ++ * A virtual port may support 1 or more FC4 roles. Typically it is a ++ * FCP Initiator. It could be a FCP Target, or exist sole for an IP over FC ++ * roles. FC port attributes for the vport will be reported on any ++ * fc_host class object allocated for an FCP Initiator. ++ * ++ * -- ++ * ++ * Fixed attributes are not expected to change. The driver is ++ * expected to set these values after receiving the fc_vport structure ++ * via the vport_create() call from the transport. ++ * The transport fully manages all get functions w/o driver interaction. ++ * ++ * Dynamic attributes are expected to change. The driver participates ++ * in all get/set operations via functions provided by the driver. ++ * ++ * Private attributes are transport-managed values. They are fully ++ * managed by the transport w/o driver interaction. ++ */ ++ ++#define FC_VPORT_SYMBOLIC_NAMELEN 64 ++struct fc_vport { ++ /* Fixed Attributes */ ++ ++ /* Dynamic Attributes */ ++ ++ /* Private (Transport-managed) Attributes */ ++ enum fc_vport_state vport_state; ++ enum fc_vport_state vport_last_state; ++ u64 node_name; ++ u64 port_name; ++ u32 roles; ++ u32 vport_id; /* Admin Identifier for the vport */ ++ enum fc_port_type vport_type; ++ char symbolic_name[FC_VPORT_SYMBOLIC_NAMELEN]; ++ ++ /* exported data */ ++ void *dd_data; /* Used for driver-specific storage */ ++ ++ /* internal data */ ++ struct Scsi_Host *shost; /* Physical Port Parent */ ++ unsigned int channel; ++ u32 number; ++ u8 flags; ++ struct list_head peers; ++ struct device dev; ++ struct work_struct vport_delete_work; ++} __attribute__((aligned(sizeof(unsigned long)))); ++ ++/* bit field values for struct fc_vport "flags" field: */ ++#define FC_VPORT_CREATING 0x01 ++#define FC_VPORT_DELETING 0x02 ++#define FC_VPORT_DELETED 0x04 ++#define FC_VPORT_DEL 0x06 /* Any DELETE state */ ++ ++#define dev_to_vport(d) \ ++ container_of(d, struct fc_vport, dev) ++#define transport_class_to_vport(classdev) \ ++ dev_to_vport(classdev->dev) ++#define vport_to_shost(v) \ ++ (v->shost) ++#define vport_to_shost_channel(v) \ ++ (v->channel) ++#define vport_to_parent(v) \ ++ (v->dev.parent) ++ ++ ++/* Error return codes for vport_create() callback */ ++#define VPCERR_UNSUPPORTED -ENOSYS /* no driver/adapter ++ support */ ++#define VPCERR_BAD_WWN -ENOTUNIQ /* driver validation ++ of WWNs failed */ ++#define VPCERR_NO_FABRIC_SUPP -EOPNOTSUPP /* Fabric connection ++ is loop or the ++ Fabric Port does ++ not support NPIV */ ++ ++/* + * fc_rport_identifiers: This set of data contains all elements + * to uniquely identify a remote FC port. The driver uses this data + * to report the existence of a remote FC port in the topology. Internally, +@@ -149,6 +268,7 @@ + u32 roles; + }; + ++ + /* Macro for use in defining Remote Port attributes */ + #define FC_RPORT_ATTR(_name,_mode,_show,_store) \ + struct class_device_attribute class_device_attr_rport_##_name = \ +@@ -343,6 +463,7 @@ + u8 supported_fc4s[FC_FC4_LIST_SIZE]; + u32 supported_speeds; + u32 maxframe_size; ++ u16 max_npiv_vports; + char serial_number[FC_SERIAL_NUMBER_SIZE]; + + /* Dynamic Attributes */ +@@ -361,8 +482,11 @@ + /* internal data */ + struct list_head rports; + struct list_head rport_bindings; ++ struct list_head vports; + u32 next_rport_number; + u32 next_target_id; ++ u32 next_vport_number; ++ u16 npiv_vports_inuse; + + /* work queues for rport state manipulation */ + char work_q_name[KOBJ_NAME_LEN]; +@@ -388,6 +512,8 @@ + (((struct fc_host_attrs *)(x)->shost_data)->supported_speeds) + #define fc_host_maxframe_size(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->maxframe_size) ++#define fc_host_max_npiv_vports(x) \ ++ (((struct fc_host_attrs *)(x)->shost_data)->max_npiv_vports) + #define fc_host_serial_number(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->serial_number) + #define fc_host_port_id(x) \ +@@ -412,10 +538,16 @@ + (((struct fc_host_attrs *)(x)->shost_data)->rports) + #define fc_host_rport_bindings(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->rport_bindings) ++#define fc_host_vports(x) \ ++ (((struct fc_host_attrs *)(x)->shost_data)->vports) + #define fc_host_next_rport_number(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->next_rport_number) + #define fc_host_next_target_id(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->next_target_id) ++#define fc_host_next_vport_number(x) \ ++ (((struct fc_host_attrs *)(x)->shost_data)->next_vport_number) ++#define fc_host_npiv_vports_inuse(x) \ ++ (((struct fc_host_attrs *)(x)->shost_data)->npiv_vports_inuse) + #define fc_host_work_q_name(x) \ + (((struct fc_host_attrs *)(x)->shost_data)->work_q_name) + #define fc_host_work_q(x) \ +@@ -452,8 +584,14 @@ + void (*dev_loss_tmo_callbk)(struct fc_rport *); + void (*terminate_rport_io)(struct fc_rport *); + ++ void (*set_vport_symbolic_name)(struct fc_vport *); ++ int (*vport_create)(struct fc_vport *, bool); ++ int (*vport_disable)(struct fc_vport *, bool); ++ int (*vport_delete)(struct fc_vport *); ++ + /* allocation lengths for host-specific data */ + u32 dd_fcrport_size; ++ u32 dd_fcvport_size; + + /* + * The driver sets these to tell the transport class it +@@ -512,7 +650,7 @@ + + switch (rport->port_state) { + case FC_PORTSTATE_ONLINE: +- if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) ++ if (rport->roles & FC_PORT_ROLE_FCP_TARGET) + result = 0; + else if (rport->flags & FC_RPORT_DEVLOSS_PENDING) + result = DID_IMM_RETRY << 16; +@@ -549,6 +687,27 @@ + wwn[7] = inm & 0xff; + } + ++/** ++ * fc_vport_set_state() - called to set a vport's state. Saves the old state, ++ * excepting the transitory states of initializing and sending the ELS ++ * traffic to instantiate the vport on the link. ++ * ++ * Assumes the driver has surrounded this with the proper locking to ensure ++ * a coherent state change. ++ * ++ * @vport: virtual port whose state is changing ++ * @new_state: new state ++ **/ ++static inline void ++fc_vport_set_state(struct fc_vport *vport, enum fc_vport_state new_state) ++{ ++ if ((new_state != FC_VPORT_UNKNOWN) && ++ (new_state != FC_VPORT_INITIALIZING)) ++ vport->vport_last_state = vport->vport_state; ++ vport->vport_state = new_state; ++} ++ ++ + struct scsi_transport_template *fc_attach_transport( + struct fc_function_template *); + void fc_release_transport(struct scsi_transport_template *); +@@ -567,5 +726,6 @@ + * be sure to read the Vendor Type and ID formatting requirements + * specified in scsi_netlink.h + */ ++int fc_vport_terminate(struct fc_vport *vport); + + #endif /* SCSI_TRANSPORT_FC_H */ +diff -Nurb linux-2.6.22-570/include/scsi/scsi_transport_iscsi.h linux-2.6.22-591/include/scsi/scsi_transport_iscsi.h +--- linux-2.6.22-570/include/scsi/scsi_transport_iscsi.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/include/scsi/scsi_transport_iscsi.h 2007-12-21 15:36:12.000000000 -0500 +@@ -79,7 +79,8 @@ + char *name; + unsigned int caps; + /* LLD sets this to indicate what values it can export to sysfs */ +- unsigned int param_mask; ++ uint64_t param_mask; ++ uint64_t host_param_mask; + struct scsi_host_template *host_template; + /* LLD connection data size */ + int conndata_size; +@@ -89,7 +90,8 @@ + unsigned int max_conn; + unsigned int max_cmd_len; + struct iscsi_cls_session *(*create_session) (struct iscsi_transport *it, +- struct scsi_transport_template *t, uint32_t sn, uint32_t *hn); ++ struct scsi_transport_template *t, uint16_t, uint16_t, ++ uint32_t sn, uint32_t *hn); + void (*destroy_session) (struct iscsi_cls_session *session); + struct iscsi_cls_conn *(*create_conn) (struct iscsi_cls_session *sess, + uint32_t cid); +@@ -105,14 +107,18 @@ + enum iscsi_param param, char *buf); + int (*get_session_param) (struct iscsi_cls_session *session, + enum iscsi_param param, char *buf); ++ int (*get_host_param) (struct Scsi_Host *shost, ++ enum iscsi_host_param param, char *buf); ++ int (*set_host_param) (struct Scsi_Host *shost, ++ enum iscsi_host_param param, char *buf, ++ int buflen); + int (*send_pdu) (struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, + char *data, uint32_t data_size); + void (*get_stats) (struct iscsi_cls_conn *conn, + struct iscsi_stats *stats); + void (*init_cmd_task) (struct iscsi_cmd_task *ctask); + void (*init_mgmt_task) (struct iscsi_conn *conn, +- struct iscsi_mgmt_task *mtask, +- char *data, uint32_t data_size); ++ struct iscsi_mgmt_task *mtask); + int (*xmit_cmd_task) (struct iscsi_conn *conn, + struct iscsi_cmd_task *ctask); + void (*cleanup_cmd_task) (struct iscsi_conn *conn, +@@ -124,7 +130,7 @@ + uint64_t *ep_handle); + int (*ep_poll) (uint64_t ep_handle, int timeout_ms); + void (*ep_disconnect) (uint64_t ep_handle); +- int (*tgt_dscvr) (enum iscsi_tgt_dscvr type, uint32_t host_no, ++ int (*tgt_dscvr) (struct Scsi_Host *shost, enum iscsi_tgt_dscvr type, + uint32_t enable, struct sockaddr *dst_addr); + }; + +diff -Nurb linux-2.6.22-570/init/Kconfig linux-2.6.22-591/init/Kconfig +--- linux-2.6.22-570/init/Kconfig 2007-12-21 15:36:05.000000000 -0500 ++++ linux-2.6.22-591/init/Kconfig 2007-12-21 15:36:12.000000000 -0500 +@@ -120,15 +120,6 @@ + section 6.4 of the Linux Programmer's Guide, available from + . + +-config IPC_NS +- bool "IPC Namespaces" +- depends on SYSVIPC +- default n +- help +- Support ipc namespaces. This allows containers, i.e. virtual +- environments, to use ipc namespaces to provide different ipc +- objects for different servers. If unsure, say N. +- + config SYSVIPC_SYSCTL + bool + depends on SYSVIPC +@@ -218,13 +209,14 @@ + + Say N if unsure. + +-config UTS_NS +- bool "UTS Namespaces" ++config USER_NS ++ bool "User Namespaces (EXPERIMENTAL)" + default n ++ depends on EXPERIMENTAL + help +- Support uts namespaces. This allows containers, i.e. +- vservers, to use uts namespaces to provide different +- uts info for different servers. If unsure, say N. ++ Support user namespaces. This allows containers, i.e. ++ vservers, to use user namespaces to provide different ++ user info for different servers. If unsure, say N. + + config AUDIT + bool "Auditing support" +@@ -298,9 +290,23 @@ + depends on !OOM_PANIC + default y + ++config CONTAINERS ++ bool ++ ++config CONTAINER_DEBUG ++ bool "Example debug container subsystem" ++ select CONTAINERS ++ help ++ This option enables a simple container subsystem that ++ exports useful debugging information about the containers ++ framework ++ ++ Say N if unsure ++ + config CPUSETS + bool "Cpuset support" + depends on SMP ++ select CONTAINERS + help + This option will let you create and manage CPUSETs which + allow dynamically partitioning a system into sets of CPUs and +@@ -329,6 +335,27 @@ + If you are using a distro that was released in 2006 or later, + it should be safe to say N here. + ++config CONTAINER_CPUACCT ++ bool "Simple CPU accounting container subsystem" ++ select CONTAINERS ++ help ++ Provides a simple Resource Controller for monitoring the ++ total CPU consumed by the tasks in a container ++ ++config CONTAINER_NS ++ bool "Namespace container subsystem" ++ select CONTAINERS ++ help ++ Provides a simple namespace container subsystem to ++ provide hierarchical naming of sets of namespaces, ++ for instance virtual servers and checkpoint/restart ++ jobs. ++ ++config PROC_PID_CPUSET ++ bool "Include legacy /proc//cpuset file" ++ depends on CPUSETS ++ default y ++ + config RELAY + bool "Kernel->user space relay support (formerly relayfs)" + help +@@ -605,6 +632,33 @@ + + endchoice + ++config PROC_SMAPS ++ default y ++ bool "Enable /proc/pid/smaps support" if EMBEDDED && PROC_FS && MMU ++ help ++ The /proc/pid/smaps interface reports a process's private and ++ shared memory per mapping. Disabling this interface will reduce ++ the size of the kernel for small machines. ++ ++config PROC_CLEAR_REFS ++ default y ++ bool "Enable /proc/pid/clear_refs support" if EMBEDDED && PROC_FS && MMU ++ help ++ The /proc/pid/clear_refs interface allows clearing the ++ referenced bits on a process's memory maps to allow monitoring ++ working set size. Disabling this interface will reduce ++ the size of the kernel for small machines. ++ ++config PROC_PAGEMAP ++ default y ++ bool "Enable /proc/pid/pagemap support" if EMBEDDED && PROC_FS && MMU ++ help ++ The /proc/pid/pagemap interface allows reading the ++ kernel's virtual memory to page frame mapping to determine which ++ individual pages a process has mapped and which pages it shares ++ with other processes. Disabling this interface will reduce the ++ size of the kernel for small machines. ++ + endmenu # General setup + + config RT_MUTEXES +@@ -620,6 +674,19 @@ + default 0 if BASE_FULL + default 1 if !BASE_FULL + ++config PAGE_GROUP_BY_MOBILITY ++ bool "Group pages based on their mobility in the page allocator" ++ def_bool y ++ help ++ The standard allocator will fragment memory over time which means ++ that high order allocations will fail even if kswapd is running. If ++ this option is set, the allocator will try and group page types ++ based on their ability to migrate or reclaim. This is a best effort ++ attempt at lowering fragmentation which a few workloads care about. ++ The loss is a more complex allocator that may perform slower. If ++ you are interested in working with large pages, say Y and set ++ /proc/sys/vm/min_free_bytes to 16374. Otherwise say N ++ + menu "Loadable module support" + + config MODULES +diff -Nurb linux-2.6.22-570/init/do_mounts_initrd.c linux-2.6.22-591/init/do_mounts_initrd.c +--- linux-2.6.22-570/init/do_mounts_initrd.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/init/do_mounts_initrd.c 2007-12-21 15:36:12.000000000 -0500 +@@ -56,12 +56,9 @@ + sys_chroot("."); + + pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD); +- if (pid > 0) { +- while (pid != sys_wait4(-1, NULL, 0, NULL)) { +- try_to_freeze(); ++ if (pid > 0) ++ while (pid != sys_wait4(-1, NULL, 0, NULL)) + yield(); +- } +- } + + /* move initrd to rootfs' /old */ + sys_fchdir(old_fd); +diff -Nurb linux-2.6.22-570/init/main.c linux-2.6.22-591/init/main.c +--- linux-2.6.22-570/init/main.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/init/main.c 2007-12-21 15:36:12.000000000 -0500 +@@ -39,6 +39,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -502,6 +503,7 @@ + char * command_line; + extern struct kernel_param __start___param[], __stop___param[]; + ++ container_init_early(); + smp_setup_processor_id(); + + /* +@@ -627,6 +629,7 @@ + #ifdef CONFIG_PROC_FS + proc_root_init(); + #endif ++ container_init(); + cpuset_init(); + taskstats_init_early(); + delayacct_init(); +diff -Nurb linux-2.6.22-570/ipc/msg.c linux-2.6.22-591/ipc/msg.c +--- linux-2.6.22-570/ipc/msg.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/ipc/msg.c 2007-12-21 15:36:12.000000000 -0500 +@@ -88,7 +88,7 @@ + static int sysvipc_msg_proc_show(struct seq_file *s, void *it); + #endif + +-static void __ipc_init __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) ++static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) + { + ns->ids[IPC_MSG_IDS] = ids; + ns->msg_ctlmax = MSGMAX; +@@ -97,7 +97,6 @@ + ipc_init_ids(ids, ns->msg_ctlmni); + } + +-#ifdef CONFIG_IPC_NS + int msg_init_ns(struct ipc_namespace *ns) + { + struct ipc_ids *ids; +@@ -129,7 +128,6 @@ + kfree(ns->ids[IPC_MSG_IDS]); + ns->ids[IPC_MSG_IDS] = NULL; + } +-#endif + + void __init msg_init(void) + { +diff -Nurb linux-2.6.22-570/ipc/sem.c linux-2.6.22-591/ipc/sem.c +--- linux-2.6.22-570/ipc/sem.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/ipc/sem.c 2007-12-21 15:36:12.000000000 -0500 +@@ -123,7 +123,7 @@ + #define sc_semopm sem_ctls[2] + #define sc_semmni sem_ctls[3] + +-static void __ipc_init __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) ++static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) + { + ns->ids[IPC_SEM_IDS] = ids; + ns->sc_semmsl = SEMMSL; +@@ -134,7 +134,6 @@ + ipc_init_ids(ids, ns->sc_semmni); + } + +-#ifdef CONFIG_IPC_NS + int sem_init_ns(struct ipc_namespace *ns) + { + struct ipc_ids *ids; +@@ -166,7 +165,6 @@ + kfree(ns->ids[IPC_SEM_IDS]); + ns->ids[IPC_SEM_IDS] = NULL; + } +-#endif + + void __init sem_init (void) + { +diff -Nurb linux-2.6.22-570/ipc/shm.c linux-2.6.22-591/ipc/shm.c +--- linux-2.6.22-570/ipc/shm.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/ipc/shm.c 2007-12-21 15:36:12.000000000 -0500 +@@ -79,7 +79,7 @@ + static int sysvipc_shm_proc_show(struct seq_file *s, void *it); + #endif + +-static void __ipc_init __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) ++static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) + { + ns->ids[IPC_SHM_IDS] = ids; + ns->shm_ctlmax = SHMMAX; +@@ -100,7 +100,6 @@ + shm_destroy(ns, shp); + } + +-#ifdef CONFIG_IPC_NS + int shm_init_ns(struct ipc_namespace *ns) + { + struct ipc_ids *ids; +@@ -132,7 +131,6 @@ + kfree(ns->ids[IPC_SHM_IDS]); + ns->ids[IPC_SHM_IDS] = NULL; + } +-#endif + + void __init shm_init (void) + { +@@ -234,13 +232,13 @@ + mutex_unlock(&shm_ids(ns).mutex); + } + +-static struct page *shm_nopage(struct vm_area_struct *vma, +- unsigned long address, int *type) ++static struct page *shm_fault(struct vm_area_struct *vma, ++ struct fault_data *fdata) + { + struct file *file = vma->vm_file; + struct shm_file_data *sfd = shm_file_data(file); + +- return sfd->vm_ops->nopage(vma, address, type); ++ return sfd->vm_ops->fault(vma, fdata); + } + + #ifdef CONFIG_NUMA +@@ -279,6 +277,7 @@ + if (ret != 0) + return ret; + sfd->vm_ops = vma->vm_ops; ++ BUG_ON(!sfd->vm_ops->fault); + vma->vm_ops = &shm_vm_ops; + shm_open(vma); + +@@ -337,7 +336,7 @@ + static struct vm_operations_struct shm_vm_ops = { + .open = shm_open, /* callback for a new vm-area open */ + .close = shm_close, /* callback for when the vm-area is released */ +- .nopage = shm_nopage, ++ .fault = shm_fault, + #if defined(CONFIG_NUMA) + .set_policy = shm_set_policy, + .get_policy = shm_get_policy, +diff -Nurb linux-2.6.22-570/ipc/util.c linux-2.6.22-591/ipc/util.c +--- linux-2.6.22-570/ipc/util.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/ipc/util.c 2007-12-21 15:36:12.000000000 -0500 +@@ -52,7 +52,6 @@ + }, + }; + +-#ifdef CONFIG_IPC_NS + static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns) + { + int err; +@@ -114,14 +113,6 @@ + atomic_dec(&vs_global_ipc_ns); + kfree(ns); + } +-#else +-struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns) +-{ +- if (flags & CLONE_NEWIPC) +- return ERR_PTR(-EINVAL); +- return ns; +-} +-#endif + + /** + * ipc_init - initialise IPC subsystem +@@ -149,7 +140,7 @@ + * array itself. + */ + +-void __ipc_init ipc_init_ids(struct ipc_ids* ids, int size) ++void ipc_init_ids(struct ipc_ids* ids, int size) + { + int i; + +diff -Nurb linux-2.6.22-570/ipc/util.h linux-2.6.22-591/ipc/util.h +--- linux-2.6.22-570/ipc/util.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/ipc/util.h 2007-12-21 15:36:12.000000000 -0500 +@@ -41,12 +41,8 @@ + }; + + struct seq_file; +-#ifdef CONFIG_IPC_NS +-#define __ipc_init +-#else +-#define __ipc_init __init +-#endif +-void __ipc_init ipc_init_ids(struct ipc_ids *ids, int size); ++ ++void ipc_init_ids(struct ipc_ids *ids, int size); + #ifdef CONFIG_PROC_FS + void __init ipc_init_proc_interface(const char *path, const char *header, + int ids, int (*show)(struct seq_file *, void *)); +diff -Nurb linux-2.6.22-570/kernel/Makefile linux-2.6.22-591/kernel/Makefile +--- linux-2.6.22-570/kernel/Makefile 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/Makefile 2007-12-21 15:36:12.000000000 -0500 +@@ -4,11 +4,12 @@ + + obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ + exit.o itimer.o time.o softirq.o resource.o \ +- sysctl.o capability.o ptrace.o timer.o user.o \ ++ sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ + signal.o sys.o kmod.o workqueue.o pid.o \ + rcupdate.o extable.o params.o posix-timers.o \ + kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ +- hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o ++ hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \ ++ utsname.o + + obj-y += vserver/ + +@@ -33,16 +34,22 @@ + obj-$(CONFIG_UID16) += uid16.o + obj-$(CONFIG_MODULES) += module.o + obj-$(CONFIG_KALLSYMS) += kallsyms.o ++obj-$(CONFIG_STACK_UNWIND) += unwind.o + obj-$(CONFIG_PM) += power/ + obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o + obj-$(CONFIG_KEXEC) += kexec.o + obj-$(CONFIG_COMPAT) += compat.o ++obj-$(CONFIG_CONTAINERS) += container.o ++obj-$(CONFIG_CONTAINER_DEBUG) += container_debug.o + obj-$(CONFIG_CPUSETS) += cpuset.o ++obj-$(CONFIG_CONTAINER_CPUACCT) += cpu_acct.o ++obj-$(CONFIG_CONTAINER_NS) += ns_container.o + obj-$(CONFIG_IKCONFIG) += configs.o + obj-$(CONFIG_STOP_MACHINE) += stop_machine.o + obj-$(CONFIG_AUDIT) += audit.o auditfilter.o + obj-$(CONFIG_AUDITSYSCALL) += auditsc.o + obj-$(CONFIG_KPROBES) += kprobes.o ++obj-$(CONFIG_KGDB) += kgdb.o + obj-$(CONFIG_SYSFS) += ksysfs.o + obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o + obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ +@@ -50,7 +57,6 @@ + obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o + obj-$(CONFIG_RELAY) += relay.o + obj-$(CONFIG_SYSCTL) += utsname_sysctl.o +-obj-$(CONFIG_UTS_NS) += utsname.o + obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o + obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o + +diff -Nurb linux-2.6.22-570/kernel/audit.c linux-2.6.22-591/kernel/audit.c +--- linux-2.6.22-570/kernel/audit.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/kernel/audit.c 2007-12-21 15:36:15.000000000 -0500 +@@ -391,6 +391,7 @@ + { + struct sk_buff *skb; + ++ set_freezable(); + while (!kthread_should_stop()) { + skb = skb_dequeue(&audit_skb_queue); + wake_up(&audit_backlog_wait); +@@ -794,8 +795,8 @@ + + printk(KERN_INFO "audit: initializing netlink socket (%s)\n", + audit_default ? "enabled" : "disabled"); +- audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, +- NULL, THIS_MODULE); ++ audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, 0, ++ audit_receive, NULL, THIS_MODULE); + if (!audit_sock) + audit_panic("cannot initialize netlink socket"); + else +diff -Nurb linux-2.6.22-570/kernel/auditsc.c linux-2.6.22-591/kernel/auditsc.c +--- linux-2.6.22-570/kernel/auditsc.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/kernel/auditsc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1500,6 +1500,7 @@ + context->names[idx].ino = (unsigned long)-1; + } + } ++EXPORT_SYMBOL(__audit_inode_child); + + /** + * auditsc_get_stamp - get local copies of audit_context values +diff -Nurb linux-2.6.22-570/kernel/container.c linux-2.6.22-591/kernel/container.c +--- linux-2.6.22-570/kernel/container.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/kernel/container.c 2007-12-21 15:36:15.000000000 -0500 +@@ -0,0 +1,2545 @@ ++/* ++ * kernel/container.c ++ * ++ * Generic process-grouping system. ++ * ++ * Based originally on the cpuset system, extracted by Paul Menage ++ * Copyright (C) 2006 Google, Inc ++ * ++ * Copyright notices from the original cpuset code: ++ * -------------------------------------------------- ++ * Copyright (C) 2003 BULL SA. ++ * Copyright (C) 2004-2006 Silicon Graphics, Inc. ++ * ++ * Portions derived from Patrick Mochel's sysfs code. ++ * sysfs is Copyright (c) 2001-3 Patrick Mochel ++ * ++ * 2003-10-10 Written by Simon Derr. ++ * 2003-10-22 Updates by Stephen Hemminger. ++ * 2004 May-July Rework by Paul Jackson. ++ * --------------------------------------------------- ++ * ++ * This file is subject to the terms and conditions of the GNU General Public ++ * License. See the file COPYING in the main directory of the Linux ++ * distribution for more details. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++static DEFINE_MUTEX(container_mutex); ++ ++/* Generate an array of container subsystem pointers */ ++#define SUBSYS(_x) &_x ## _subsys, ++ ++static struct container_subsys *subsys[] = { ++#include ++}; ++ ++/* A containerfs_root represents the root of a container hierarchy, ++ * and may be associated with a superblock to form an active ++ * hierarchy */ ++struct containerfs_root { ++ struct super_block *sb; ++ ++ /* The bitmask of subsystems attached to this hierarchy */ ++ unsigned long subsys_bits; ++ ++ /* A list running through the attached subsystems */ ++ struct list_head subsys_list; ++ ++ /* The root container for this hierarchy */ ++ struct container top_container; ++ ++ /* Tracks how many containers are currently defined in hierarchy.*/ ++ int number_of_containers; ++ ++ /* A list running through the mounted hierarchies */ ++ struct list_head root_list; ++ ++ /* The path to use for release notifications. No locking ++ * between setting and use - so if userspace updates this ++ * while subcontainers exist, you could miss a ++ * notification. We ensure that it's always a valid ++ * NUL-terminated string */ ++ char release_agent_path[PATH_MAX]; ++}; ++ ++ ++/* The "rootnode" hierarchy is the "dummy hierarchy", reserved for the ++ * subsystems that are otherwise unattached - it never has more than a ++ * single container, and all tasks are part of that container. */ ++ ++static struct containerfs_root rootnode; ++ ++/* The list of hierarchy roots */ ++ ++static LIST_HEAD(roots); ++static int root_count; ++ ++/* dummytop is a shorthand for the dummy hierarchy's top container */ ++#define dummytop (&rootnode.top_container) ++ ++/* This flag indicates whether tasks in the fork and exit paths should ++ * take callback_mutex and check for fork/exit handlers to call. This ++ * avoids us having to do extra work in the fork/exit path if none of the ++ * subsystems need to be called. ++ */ ++static int need_forkexit_callback; ++ ++/* bits in struct container flags field */ ++enum { ++ /* Container is dead */ ++ CONT_REMOVED, ++ /* Container has previously had a child container or a task, ++ * but no longer (only if CONT_NOTIFY_ON_RELEASE is set) */ ++ CONT_RELEASABLE, ++ /* Container requires release notifications to userspace */ ++ CONT_NOTIFY_ON_RELEASE, ++}; ++ ++/* convenient tests for these bits */ ++inline int container_is_removed(const struct container *cont) ++{ ++ return test_bit(CONT_REMOVED, &cont->flags); ++} ++ ++inline int container_is_releasable(const struct container *cont) ++{ ++ const int bits = ++ (1 << CONT_RELEASABLE) | ++ (1 << CONT_NOTIFY_ON_RELEASE); ++ return (cont->flags & bits) == bits; ++} ++ ++inline int notify_on_release(const struct container *cont) ++{ ++ return test_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags); ++} ++ ++/* for_each_subsys() allows you to iterate on each subsystem attached to ++ * an active hierarchy */ ++#define for_each_subsys(_root, _ss) \ ++list_for_each_entry(_ss, &_root->subsys_list, sibling) ++ ++/* for_each_root() allows you to iterate across the active hierarchies */ ++#define for_each_root(_root) \ ++list_for_each_entry(_root, &roots, root_list) ++ ++/* the list of containers eligible for automatic release */ ++static LIST_HEAD(release_list); ++static void container_release_agent(struct work_struct *work); ++static DECLARE_WORK(release_agent_work, container_release_agent); ++static void check_for_release(struct container *cont); ++ ++/* Link structure for associating css_group objects with containers */ ++struct cg_container_link { ++ /* ++ * List running through cg_container_links associated with a ++ * container, anchored on container->css_groups ++ */ ++ struct list_head cont_link_list; ++ /* ++ * List running through cg_container_links pointing at a ++ * single css_group object, anchored on css_group->cg_links ++ */ ++ struct list_head cg_link_list; ++ struct css_group *cg; ++}; ++ ++/* The default css_group - used by init and its children prior to any ++ * hierarchies being mounted. It contains a pointer to the root state ++ * for each subsystem. Also used to anchor the list of css_groups. Not ++ * reference-counted, to improve performance when child containers ++ * haven't been created. ++ */ ++ ++static struct css_group init_css_group; ++static struct cg_container_link init_css_group_link; ++ ++/* css_group_lock protects the list of css_group objects, and the ++ * chain of tasks off each css_group. Nests inside task->alloc_lock */ ++static DEFINE_RWLOCK(css_group_lock); ++static int css_group_count; ++ ++ ++/* When we create or destroy a css_group, the operation simply ++ * takes/releases a reference count on all the containers referenced ++ * by subsystems in this css_group. This can end up multiple-counting ++ * some containers, but that's OK - the ref-count is just a ++ * busy/not-busy indicator; ensuring that we only count each container ++ * once would require taking a global lock to ensure that no ++ * subsystems moved between hierarchies while we were doing so. ++ * ++ * Possible TODO: decide at boot time based on the number of ++ * registered subsystems and the number of CPUs or NUMA nodes whether ++ * it's better for performance to ref-count every subsystem, or to ++ * take a global lock and only add one ref count to each hierarchy. ++ */ ++ ++/* ++ * unlink a css_group from the list and free it ++ */ ++static void unlink_css_group(struct css_group *cg) ++{ ++ write_lock(&css_group_lock); ++ list_del(&cg->list); ++ css_group_count--; ++ while (!list_empty(&cg->cg_links)) { ++ struct cg_container_link *link; ++ link = list_entry(cg->cg_links.next, ++ struct cg_container_link, cg_link_list); ++ list_del(&link->cg_link_list); ++ list_del(&link->cont_link_list); ++ kfree(link); ++ } ++ write_unlock(&css_group_lock); ++} ++ ++static void release_css_group(struct kref *k) ++{ ++ int i; ++ struct css_group *cg = container_of(k, struct css_group, ref); ++ ++ BUG_ON(!mutex_is_locked(&container_mutex)); ++ unlink_css_group(cg); ++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { ++ struct container *cont = cg->subsys[i]->container; ++ if (atomic_dec_and_test(&cont->count) && ++ container_is_releasable(cont)) { ++ check_for_release(cont); ++ } ++ } ++ kfree(cg); ++} ++ ++/* ++ * In the task exit path we want to avoid taking container_mutex ++ * unless absolutely necessary, so the release process is slightly ++ * different. ++ */ ++static void release_css_group_taskexit(struct kref *k) ++{ ++ int i; ++ struct css_group *cg = container_of(k, struct css_group, ref); ++ ++ unlink_css_group(cg); ++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { ++ struct container *cont = cg->subsys[i]->container; ++ if (notify_on_release(cont)) { ++ mutex_lock(&container_mutex); ++ set_bit(CONT_RELEASABLE, &cont->flags); ++ if (atomic_dec_and_test(&cont->count)) ++ check_for_release(cont); ++ mutex_unlock(&container_mutex); ++ } else { ++ atomic_dec(&cont->count); ++ } ++ } ++ kfree(cg); ++} ++ ++/* ++ * refcounted get/put for css_group objects ++ */ ++static inline void get_css_group(struct css_group *cg) ++{ ++ kref_get(&cg->ref); ++} ++ ++static inline void put_css_group(struct css_group *cg) ++{ ++ kref_put(&cg->ref, release_css_group); ++} ++ ++static inline void put_css_group_taskexit(struct css_group *cg) ++{ ++ kref_put(&cg->ref, release_css_group_taskexit); ++} ++ ++/* ++ * find_existing_css_group() is a helper for ++ * find_css_group(), and checks to see whether an existing ++ * css_group is suitable. This currently walks a linked-list for ++ * simplicity; a later patch will use a hash table for better ++ * performance ++ * ++ * oldcg: the container group that we're using before the container ++ * transition ++ * ++ * cont: the container that we're moving into ++ * ++ * template: location in which to build the desired set of subsystem ++ * state objects for the new container group ++ */ ++ ++static struct css_group *find_existing_css_group( ++ struct css_group *oldcg, ++ struct container *cont, ++ struct container_subsys_state *template[]) ++{ ++ int i; ++ struct containerfs_root *root = cont->root; ++ struct list_head *l = &init_css_group.list; ++ ++ /* Built the set of subsystem state objects that we want to ++ * see in the new css_group */ ++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { ++ if (root->subsys_bits & (1ull << i)) { ++ /* Subsystem is in this hierarchy. So we want ++ * the subsystem state from the new ++ * container */ ++ template[i] = cont->subsys[i]; ++ } else { ++ /* Subsystem is not in this hierarchy, so we ++ * don't want to change the subsystem state */ ++ template[i] = oldcg->subsys[i]; ++ } ++ } ++ ++ /* Look through existing container groups to find one to reuse */ ++ do { ++ struct css_group *cg = ++ list_entry(l, struct css_group, list); ++ ++ if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { ++ /* All subsystems matched */ ++ return cg; ++ } ++ /* Try the next container group */ ++ l = l->next; ++ } while (l != &init_css_group.list); ++ ++ /* No existing container group matched */ ++ return NULL; ++} ++ ++/* ++ * allocate_cg_links() allocates "count" cg_container_link structures ++ * and chains them on tmp through their cont_link_list fields. Returns 0 on ++ * success or a negative error ++ */ ++ ++static int allocate_cg_links(int count, struct list_head *tmp) ++{ ++ struct cg_container_link *link; ++ int i; ++ INIT_LIST_HEAD(tmp); ++ for (i = 0; i < count; i++) { ++ link = kmalloc(sizeof(*link), GFP_KERNEL); ++ if (!link) { ++ while (!list_empty(tmp)) { ++ link = list_entry(tmp->next, ++ struct cg_container_link, ++ cont_link_list); ++ list_del(&link->cont_link_list); ++ kfree(link); ++ } ++ return -ENOMEM; ++ } ++ list_add(&link->cont_link_list, tmp); ++ } ++ return 0; ++} ++ ++/* ++ * find_css_group() takes an existing container group and a ++ * container object, and returns a css_group object that's ++ * equivalent to the old group, but with the given container ++ * substituted into the appropriate hierarchy. Must be called with ++ * container_mutex held ++ */ ++ ++static struct css_group *find_css_group( ++ struct css_group *oldcg, struct container *cont) ++{ ++ struct css_group *res; ++ struct container_subsys_state *template[CONTAINER_SUBSYS_COUNT]; ++ int i; ++ ++ struct list_head tmp_cg_links; ++ struct cg_container_link *link; ++ ++ /* First see if we already have a container group that matches ++ * the desired set */ ++ write_lock(&css_group_lock); ++ res = find_existing_css_group(oldcg, cont, template); ++ if (res) ++ get_css_group(res); ++ write_unlock(&css_group_lock); ++ ++ if (res) ++ return res; ++ ++ res = kmalloc(sizeof(*res), GFP_KERNEL); ++ if (!res) ++ return NULL; ++ ++ /* Allocate all the cg_container_link objects that we'll need */ ++ if (allocate_cg_links(root_count, &tmp_cg_links) < 0) { ++ kfree(res); ++ return NULL; ++ } ++ ++ kref_init(&res->ref); ++ INIT_LIST_HEAD(&res->cg_links); ++ INIT_LIST_HEAD(&res->tasks); ++ ++ /* Copy the set of subsystem state objects generated in ++ * find_existing_css_group() */ ++ memcpy(res->subsys, template, sizeof(res->subsys)); ++ ++ write_lock(&css_group_lock); ++ /* Add reference counts and links from the new css_group. */ ++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { ++ struct container *cont = res->subsys[i]->container; ++ struct container_subsys *ss = subsys[i]; ++ atomic_inc(&cont->count); ++ /* ++ * We want to add a link once per container, so we ++ * only do it for the first subsystem in each ++ * hierarchy ++ */ ++ if (ss->root->subsys_list.next == &ss->sibling) { ++ BUG_ON(list_empty(&tmp_cg_links)); ++ link = list_entry(tmp_cg_links.next, ++ struct cg_container_link, ++ cont_link_list); ++ list_del(&link->cont_link_list); ++ list_add(&link->cont_link_list, &cont->css_groups); ++ link->cg = res; ++ list_add(&link->cg_link_list, &res->cg_links); ++ } ++ } ++ if (list_empty(&rootnode.subsys_list)) { ++ link = list_entry(tmp_cg_links.next, ++ struct cg_container_link, ++ cont_link_list); ++ list_del(&link->cont_link_list); ++ list_add(&link->cont_link_list, &dummytop->css_groups); ++ link->cg = res; ++ list_add(&link->cg_link_list, &res->cg_links); ++ } ++ ++ BUG_ON(!list_empty(&tmp_cg_links)); ++ ++ /* Link this container group into the list */ ++ list_add(&res->list, &init_css_group.list); ++ css_group_count++; ++ INIT_LIST_HEAD(&res->tasks); ++ write_unlock(&css_group_lock); ++ ++ return res; ++} ++ ++/* ++ * There is one global container mutex. We also require taking ++ * task_lock() when dereferencing a task's container subsys pointers. ++ * See "The task_lock() exception", at the end of this comment. ++ * ++ * A task must hold container_mutex to modify containers. ++ * ++ * Any task can increment and decrement the count field without lock. ++ * So in general, code holding container_mutex can't rely on the count ++ * field not changing. However, if the count goes to zero, then only ++ * attach_task() can increment it again. Because a count of zero ++ * means that no tasks are currently attached, therefore there is no ++ * way a task attached to that container can fork (the other way to ++ * increment the count). So code holding container_mutex can safely ++ * assume that if the count is zero, it will stay zero. Similarly, if ++ * a task holds container_mutex on a container with zero count, it ++ * knows that the container won't be removed, as container_rmdir() ++ * needs that mutex. ++ * ++ * The container_common_file_write handler for operations that modify ++ * the container hierarchy holds container_mutex across the entire operation, ++ * single threading all such container modifications across the system. ++ * ++ * The fork and exit callbacks container_fork() and container_exit(), don't ++ * (usually) take container_mutex. These are the two most performance ++ * critical pieces of code here. The exception occurs on container_exit(), ++ * when a task in a notify_on_release container exits. Then container_mutex ++ * is taken, and if the container count is zero, a usermode call made ++ * to /sbin/container_release_agent with the name of the container (path ++ * relative to the root of container file system) as the argument. ++ * ++ * A container can only be deleted if both its 'count' of using tasks ++ * is zero, and its list of 'children' containers is empty. Since all ++ * tasks in the system use _some_ container, and since there is always at ++ * least one task in the system (init, pid == 1), therefore, top_container ++ * always has either children containers and/or using tasks. So we don't ++ * need a special hack to ensure that top_container cannot be deleted. ++ * ++ * The task_lock() exception ++ * ++ * The need for this exception arises from the action of ++ * attach_task(), which overwrites one tasks container pointer with ++ * another. It does so using container_mutexe, however there are ++ * several performance critical places that need to reference ++ * task->container without the expense of grabbing a system global ++ * mutex. Therefore except as noted below, when dereferencing or, as ++ * in attach_task(), modifying a task'ss container pointer we use ++ * task_lock(), which acts on a spinlock (task->alloc_lock) already in ++ * the task_struct routinely used for such matters. ++ * ++ * P.S. One more locking exception. RCU is used to guard the ++ * update of a tasks container pointer by attach_task() ++ */ ++ ++/** ++ * container_lock - lock out any changes to container structures ++ * ++ */ ++ ++void container_lock(void) ++{ ++ mutex_lock(&container_mutex); ++} ++ ++/** ++ * container_unlock - release lock on container changes ++ * ++ * Undo the lock taken in a previous container_lock() call. ++ */ ++ ++void container_unlock(void) ++{ ++ mutex_unlock(&container_mutex); ++} ++ ++/* ++ * A couple of forward declarations required, due to cyclic reference loop: ++ * container_mkdir -> container_create -> container_populate_dir -> ++ * container_add_file -> container_create_file -> container_dir_inode_operations ++ * -> container_mkdir. ++ */ ++ ++static int container_mkdir(struct inode *dir, struct dentry *dentry, int mode); ++static int container_rmdir(struct inode *unused_dir, struct dentry *dentry); ++static int container_populate_dir(struct container *cont); ++static struct inode_operations container_dir_inode_operations; ++static struct file_operations proc_containerstats_operations; ++ ++static struct inode *container_new_inode(mode_t mode, struct super_block *sb) ++{ ++ struct inode *inode = new_inode(sb); ++ static struct backing_dev_info container_backing_dev_info = { ++ .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, ++ }; ++ ++ if (inode) { ++ inode->i_mode = mode; ++ inode->i_uid = current->fsuid; ++ inode->i_gid = current->fsgid; ++ inode->i_blocks = 0; ++ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; ++ inode->i_mapping->backing_dev_info = &container_backing_dev_info; ++ } ++ return inode; ++} ++ ++static void container_diput(struct dentry *dentry, struct inode *inode) ++{ ++ /* is dentry a directory ? if so, kfree() associated container */ ++ if (S_ISDIR(inode->i_mode)) { ++ struct container *cont = dentry->d_fsdata; ++ BUG_ON(!(container_is_removed(cont))); ++ kfree(cont); ++ } ++ iput(inode); ++} ++ ++static struct dentry *container_get_dentry(struct dentry *parent, ++ const char *name) ++{ ++ struct dentry *d = lookup_one_len(name, parent, strlen(name)); ++ static struct dentry_operations container_dops = { ++ .d_iput = container_diput, ++ }; ++ ++ if (!IS_ERR(d)) ++ d->d_op = &container_dops; ++ return d; ++} ++ ++static void remove_dir(struct dentry *d) ++{ ++ struct dentry *parent = dget(d->d_parent); ++ ++ d_delete(d); ++ simple_rmdir(parent->d_inode, d); ++ dput(parent); ++} ++ ++static void container_clear_directory(struct dentry *dentry) ++{ ++ struct list_head *node; ++ ++ BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); ++ spin_lock(&dcache_lock); ++ node = dentry->d_subdirs.next; ++ while (node != &dentry->d_subdirs) { ++ struct dentry *d = list_entry(node, struct dentry, d_u.d_child); ++ list_del_init(node); ++ if (d->d_inode) { ++ /* This should never be called on a container ++ * directory with child containers */ ++ BUG_ON(d->d_inode->i_mode & S_IFDIR); ++ d = dget_locked(d); ++ spin_unlock(&dcache_lock); ++ d_delete(d); ++ simple_unlink(dentry->d_inode, d); ++ dput(d); ++ spin_lock(&dcache_lock); ++ } ++ node = dentry->d_subdirs.next; ++ } ++ spin_unlock(&dcache_lock); ++} ++ ++/* ++ * NOTE : the dentry must have been dget()'ed ++ */ ++static void container_d_remove_dir(struct dentry *dentry) ++{ ++ container_clear_directory(dentry); ++ ++ spin_lock(&dcache_lock); ++ list_del_init(&dentry->d_u.d_child); ++ spin_unlock(&dcache_lock); ++ remove_dir(dentry); ++} ++ ++static int rebind_subsystems(struct containerfs_root *root, ++ unsigned long final_bits) ++{ ++ unsigned long added_bits, removed_bits; ++ struct container *cont = &root->top_container; ++ int i; ++ ++ removed_bits = root->subsys_bits & ~final_bits; ++ added_bits = final_bits & ~root->subsys_bits; ++ /* Check that any added subsystems are currently free */ ++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { ++ unsigned long long bit = 1ull << i; ++ struct container_subsys *ss = subsys[i]; ++ if (!(bit & added_bits)) ++ continue; ++ if (ss->root != &rootnode) { ++ /* Subsystem isn't free */ ++ return -EBUSY; ++ } ++ } ++ ++ /* Currently we don't handle adding/removing subsystems when ++ * any subcontainers exist. This is theoretically supportable ++ * but involves complex error handling, so it's being left until ++ * later */ ++ if (!list_empty(&cont->children)) ++ return -EBUSY; ++ ++ /* Process each subsystem */ ++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { ++ struct container_subsys *ss = subsys[i]; ++ unsigned long bit = 1UL << i; ++ if (bit & added_bits) { ++ /* We're binding this subsystem to this hierarchy */ ++ BUG_ON(cont->subsys[i]); ++ BUG_ON(!dummytop->subsys[i]); ++ BUG_ON(dummytop->subsys[i]->container != dummytop); ++ cont->subsys[i] = dummytop->subsys[i]; ++ cont->subsys[i]->container = cont; ++ list_add(&ss->sibling, &root->subsys_list); ++ rcu_assign_pointer(ss->root, root); ++ if (ss->bind) ++ ss->bind(ss, cont); ++ ++ } else if (bit & removed_bits) { ++ /* We're removing this subsystem */ ++ BUG_ON(cont->subsys[i] != dummytop->subsys[i]); ++ BUG_ON(cont->subsys[i]->container != cont); ++ if (ss->bind) ++ ss->bind(ss, dummytop); ++ dummytop->subsys[i]->container = dummytop; ++ cont->subsys[i] = NULL; ++ rcu_assign_pointer(subsys[i]->root, &rootnode); ++ list_del(&ss->sibling); ++ } else if (bit & final_bits) { ++ /* Subsystem state should already exist */ ++ BUG_ON(!cont->subsys[i]); ++ } else { ++ /* Subsystem state shouldn't exist */ ++ BUG_ON(cont->subsys[i]); ++ } ++ } ++ root->subsys_bits = final_bits; ++ synchronize_rcu(); ++ ++ return 0; ++} ++ ++/* ++ * Release the last use of a hierarchy. Will never be called when ++ * there are active subcontainers since each subcontainer bumps the ++ * value of sb->s_active. ++ */ ++static void container_put_super(struct super_block *sb) ++{ ++ struct containerfs_root *root = sb->s_fs_info; ++ struct container *cont = &root->top_container; ++ int ret; ++ ++ root->sb = NULL; ++ sb->s_fs_info = NULL; ++ ++ mutex_lock(&container_mutex); ++ ++ BUG_ON(root->number_of_containers != 1); ++ BUG_ON(!list_empty(&cont->children)); ++ BUG_ON(!list_empty(&cont->sibling)); ++ BUG_ON(!root->subsys_bits); ++ ++ /* Rebind all subsystems back to the default hierarchy */ ++ ret = rebind_subsystems(root, 0); ++ BUG_ON(ret); ++ ++ write_lock(&css_group_lock); ++ while (!list_empty(&cont->css_groups)) { ++ struct cg_container_link *link; ++ link = list_entry(cont->css_groups.next, ++ struct cg_container_link, cont_link_list); ++ list_del(&link->cg_link_list); ++ list_del(&link->cont_link_list); ++ kfree(link); ++ } ++ write_unlock(&css_group_lock); ++ ++ list_del(&root->root_list); ++ root_count--; ++ kfree(root); ++ mutex_unlock(&container_mutex); ++} ++ ++static int container_show_options(struct seq_file *seq, struct vfsmount *vfs) ++{ ++ struct containerfs_root *root = vfs->mnt_sb->s_fs_info; ++ struct container_subsys *ss; ++ ++ for_each_subsys(root, ss) ++ seq_printf(seq, ",%s", ss->name); ++ return 0; ++} ++ ++/* Convert a hierarchy specifier into a bitmask. LL=container_mutex */ ++static int parse_containerfs_options(char *opts, unsigned long *bits) ++{ ++ char *token, *o = opts ?: "all"; ++ ++ *bits = 0; ++ ++ while ((token = strsep(&o, ",")) != NULL) { ++ if (!*token) ++ return -EINVAL; ++ if (!strcmp(token, "all")) { ++ *bits = (1 << CONTAINER_SUBSYS_COUNT) - 1; ++ } else { ++ struct container_subsys *ss; ++ int i; ++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { ++ ss = subsys[i]; ++ if (!strcmp(token, ss->name)) { ++ *bits |= 1 << i; ++ break; ++ } ++ } ++ if (i == CONTAINER_SUBSYS_COUNT) ++ return -ENOENT; ++ } ++ } ++ ++ /* We can't have an empty hierarchy */ ++ if (!*bits) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++static int container_remount(struct super_block *sb, int *flags, char *data) ++{ ++ int ret = 0; ++ unsigned long subsys_bits; ++ struct containerfs_root *root = sb->s_fs_info; ++ struct container *cont = &root->top_container; ++ ++ mutex_lock(&cont->dentry->d_inode->i_mutex); ++ mutex_lock(&container_mutex); ++ ++ /* See what subsystems are wanted */ ++ ret = parse_containerfs_options(data, &subsys_bits); ++ if (ret) ++ goto out_unlock; ++ ++ ret = rebind_subsystems(root, subsys_bits); ++ ++ /* (re)populate subsystem files */ ++ if (!ret) ++ container_populate_dir(cont); ++ ++ out_unlock: ++ mutex_unlock(&container_mutex); ++ mutex_unlock(&cont->dentry->d_inode->i_mutex); ++ return ret; ++} ++ ++static struct super_operations container_ops = { ++ .statfs = simple_statfs, ++ .drop_inode = generic_delete_inode, ++ .put_super = container_put_super, ++ .show_options = container_show_options, ++ .remount_fs = container_remount, ++}; ++ ++static int container_fill_super(struct super_block *sb, void *options, ++ int unused_silent) ++{ ++ struct inode *inode; ++ struct dentry *root; ++ struct containerfs_root *hroot = options; ++ ++ sb->s_blocksize = PAGE_CACHE_SIZE; ++ sb->s_blocksize_bits = PAGE_CACHE_SHIFT; ++ sb->s_magic = CONTAINER_SUPER_MAGIC; ++ sb->s_op = &container_ops; ++ ++ inode = container_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb); ++ if (!inode) ++ return -ENOMEM; ++ ++ inode->i_op = &simple_dir_inode_operations; ++ inode->i_fop = &simple_dir_operations; ++ inode->i_op = &container_dir_inode_operations; ++ /* directories start off with i_nlink == 2 (for "." entry) */ ++ inc_nlink(inode); ++ ++ root = d_alloc_root(inode); ++ if (!root) { ++ iput(inode); ++ return -ENOMEM; ++ } ++ sb->s_root = root; ++ root->d_fsdata = &hroot->top_container; ++ hroot->top_container.dentry = root; ++ ++ strcpy(hroot->release_agent_path, ""); ++ sb->s_fs_info = hroot; ++ hroot->sb = sb; ++ ++ return 0; ++} ++ ++static void init_container_root(struct containerfs_root *root) ++{ ++ struct container *cont = &root->top_container; ++ INIT_LIST_HEAD(&root->subsys_list); ++ root->number_of_containers = 1; ++ cont->root = root; ++ cont->top_container = cont; ++ INIT_LIST_HEAD(&cont->sibling); ++ INIT_LIST_HEAD(&cont->children); ++ INIT_LIST_HEAD(&cont->css_groups); ++ INIT_LIST_HEAD(&cont->release_list); ++ list_add(&root->root_list, &roots); ++ root_count++; ++} ++ ++static int container_get_sb(struct file_system_type *fs_type, ++ int flags, const char *unused_dev_name, ++ void *data, struct vfsmount *mnt) ++{ ++ unsigned long subsys_bits = 0; ++ int ret = 0; ++ struct containerfs_root *root = NULL; ++ int use_existing = 0; ++ ++ mutex_lock(&container_mutex); ++ ++ /* First find the desired set of resource controllers */ ++ ret = parse_containerfs_options(data, &subsys_bits); ++ if (ret) ++ goto out_unlock; ++ ++ /* See if we already have a hierarchy containing this set */ ++ ++ for_each_root(root) { ++ /* We match - use this hieracrchy */ ++ if (root->subsys_bits == subsys_bits) { ++ use_existing = 1; ++ break; ++ } ++ /* We clash - fail */ ++ if (root->subsys_bits & subsys_bits) { ++ ret = -EBUSY; ++ goto out_unlock; ++ } ++ } ++ ++ if (!use_existing) { ++ /* We need a new root */ ++ struct list_head tmp_cg_links, *l; ++ root = kzalloc(sizeof(*root), GFP_KERNEL); ++ if (!root) { ++ ret = -ENOMEM; ++ goto out_unlock; ++ } ++ /* We're accessing css_group_count without locking ++ * here, but that's OK - it can only be increased by ++ * someone holding container_lock, and that's us. The ++ * worst that can happen is that we have some link ++ * structures left over */ ++ ret = allocate_cg_links(css_group_count, &tmp_cg_links); ++ if (ret < 0) { ++ kfree(root); ++ goto out_unlock; ++ } ++ init_container_root(root); ++ ++ /* Link the top container in this hierarchy into all ++ * the css_group objects */ ++ write_lock(&css_group_lock); ++ l = &init_css_group.list; ++ do { ++ struct css_group *cg; ++ struct cg_container_link *link; ++ cg = list_entry(l, struct css_group, list); ++ BUG_ON(list_empty(&tmp_cg_links)); ++ link = list_entry(tmp_cg_links.next, ++ struct cg_container_link, ++ cont_link_list); ++ list_del(&link->cont_link_list); ++ link->cg = cg; ++ list_add(&link->cont_link_list, ++ &root->top_container.css_groups); ++ list_add(&link->cg_link_list, &cg->cg_links); ++ l = l->next; ++ } while (l != &init_css_group.list); ++ write_unlock(&css_group_lock); ++ ++ while (!list_empty(&tmp_cg_links)) { ++ /* Probably shouldn't happen */ ++ struct cg_container_link *link; ++ printk(KERN_INFO "Freeing unused cg_container_link\n"); ++ link = list_entry(tmp_cg_links.next, ++ struct cg_container_link, ++ cont_link_list); ++ list_del(&link->cont_link_list); ++ kfree(link); ++ } ++ } ++ ++ if (!root->sb) { ++ /* We need a new superblock for this container combination */ ++ struct container *cont = &root->top_container; ++ ++ BUG_ON(root->subsys_bits); ++ ret = get_sb_nodev(fs_type, flags, root, ++ container_fill_super, mnt); ++ if (ret) ++ goto out_unlock; ++ ++ BUG_ON(!list_empty(&cont->sibling)); ++ BUG_ON(!list_empty(&cont->children)); ++ BUG_ON(root->number_of_containers != 1); ++ ++ ret = rebind_subsystems(root, subsys_bits); ++ ++ /* It's safe to nest i_mutex inside container_mutex in ++ * this case, since no-one else can be accessing this ++ * directory yet */ ++ mutex_lock(&cont->dentry->d_inode->i_mutex); ++ container_populate_dir(cont); ++ mutex_unlock(&cont->dentry->d_inode->i_mutex); ++ BUG_ON(ret); ++ } else { ++ /* Reuse the existing superblock */ ++ down_write(&(root->sb->s_umount)); ++ ret = simple_set_mnt(mnt, root->sb); ++ if (!ret) ++ atomic_inc(&root->sb->s_active); ++ } ++ ++ out_unlock: ++ mutex_unlock(&container_mutex); ++ return ret; ++} ++ ++static struct file_system_type container_fs_type = { ++ .name = "container", ++ .get_sb = container_get_sb, ++ .kill_sb = kill_litter_super, ++}; ++ ++static inline struct container *__d_cont(struct dentry *dentry) ++{ ++ return dentry->d_fsdata; ++} ++ ++static inline struct cftype *__d_cft(struct dentry *dentry) ++{ ++ return dentry->d_fsdata; ++} ++ ++/* ++ * Called with container_mutex held. Writes path of container into buf. ++ * Returns 0 on success, -errno on error. ++ */ ++int container_path(const struct container *cont, char *buf, int buflen) ++{ ++ char *start; ++ ++ start = buf + buflen; ++ ++ *--start = '\0'; ++ for (;;) { ++ int len = cont->dentry->d_name.len; ++ if ((start -= len) < buf) ++ return -ENAMETOOLONG; ++ memcpy(start, cont->dentry->d_name.name, len); ++ cont = cont->parent; ++ if (!cont) ++ break; ++ if (!cont->parent) ++ continue; ++ if (--start < buf) ++ return -ENAMETOOLONG; ++ *start = '/'; ++ } ++ memmove(buf, start, buf + buflen - start); ++ return 0; ++} ++ ++static void get_first_subsys(const struct container *cont, ++ struct container_subsys_state **css, int *subsys_id) ++{ ++ const struct containerfs_root *root = cont->root; ++ const struct container_subsys *test_ss; ++ BUG_ON(list_empty(&root->subsys_list)); ++ test_ss = list_entry(root->subsys_list.next, ++ struct container_subsys, sibling); ++ if (css) { ++ *css = cont->subsys[test_ss->subsys_id]; ++ BUG_ON(!*css); ++ } ++ if (subsys_id) ++ *subsys_id = test_ss->subsys_id; ++} ++ ++/* ++ * Attach task 'tsk' to container 'cont' ++ * ++ * Call holding container_mutex. May take task_lock of ++ * the task 'pid' during call. ++ */ ++static int attach_task(struct container *cont, struct task_struct *tsk) ++{ ++ int retval = 0; ++ struct container_subsys *ss; ++ struct container *oldcont; ++ struct css_group *cg = tsk->containers; ++ struct css_group *newcg; ++ struct containerfs_root *root = cont->root; ++ int subsys_id; ++ ++ get_first_subsys(cont, NULL, &subsys_id); ++ ++ /* Nothing to do if the task is already in that container */ ++ oldcont = task_container(tsk, subsys_id); ++ if (cont == oldcont) ++ return 0; ++ ++ for_each_subsys(root, ss) { ++ if (ss->can_attach) { ++ retval = ss->can_attach(ss, cont, tsk); ++ if (retval) { ++ return retval; ++ } ++ } ++ } ++ ++ /* Locate or allocate a new css_group for this task, ++ * based on its final set of containers */ ++ newcg = find_css_group(cg, cont); ++ if (!newcg) { ++ return -ENOMEM; ++ } ++ ++ task_lock(tsk); ++ if (tsk->flags & PF_EXITING) { ++ task_unlock(tsk); ++ put_css_group(newcg); ++ return -ESRCH; ++ } ++ rcu_assign_pointer(tsk->containers, newcg); ++ if (!list_empty(&tsk->cg_list)) { ++ write_lock(&css_group_lock); ++ list_del(&tsk->cg_list); ++ list_add(&tsk->cg_list, &newcg->tasks); ++ write_unlock(&css_group_lock); ++ } ++ task_unlock(tsk); ++ ++ for_each_subsys(root, ss) { ++ if (ss->attach) { ++ ss->attach(ss, cont, oldcont, tsk); ++ } ++ } ++ set_bit(CONT_RELEASABLE, &oldcont->flags); ++ synchronize_rcu(); ++ put_css_group(cg); ++ return 0; ++} ++ ++/* ++ * Attach task with pid 'pid' to container 'cont'. Call with ++ * container_mutex, may take task_lock of task ++ */ ++static int attach_task_by_pid(struct container *cont, char *pidbuf) ++{ ++ pid_t pid; ++ struct task_struct *tsk; ++ int ret; ++ ++ if (sscanf(pidbuf, "%d", &pid) != 1) ++ return -EIO; ++ ++ if (pid) { ++ rcu_read_lock(); ++ tsk = find_task_by_pid(pid); ++ if (!tsk || tsk->flags & PF_EXITING) { ++ rcu_read_unlock(); ++ return -ESRCH; ++ } ++ get_task_struct(tsk); ++ rcu_read_unlock(); ++ ++ if ((current->euid) && (current->euid != tsk->uid) ++ && (current->euid != tsk->suid)) { ++ put_task_struct(tsk); ++ return -EACCES; ++ } ++ } else { ++ tsk = current; ++ get_task_struct(tsk); ++ } ++ ++ ret = attach_task(cont, tsk); ++ put_task_struct(tsk); ++ return ret; ++} ++ ++/* The various types of files and directories in a container file system */ ++ ++enum container_filetype { ++ FILE_ROOT, ++ FILE_DIR, ++ FILE_TASKLIST, ++ FILE_NOTIFY_ON_RELEASE, ++ FILE_RELEASABLE, ++ FILE_RELEASE_AGENT, ++}; ++ ++static ssize_t container_common_file_write(struct container *cont, ++ struct cftype *cft, ++ struct file *file, ++ const char __user *userbuf, ++ size_t nbytes, loff_t *unused_ppos) ++{ ++ enum container_filetype type = cft->private; ++ char *buffer; ++ int retval = 0; ++ ++ if (nbytes >= PATH_MAX) ++ return -E2BIG; ++ ++ /* +1 for nul-terminator */ ++ buffer = kmalloc(nbytes + 1, GFP_KERNEL); ++ if (buffer == NULL) ++ return -ENOMEM; ++ ++ if (copy_from_user(buffer, userbuf, nbytes)) { ++ retval = -EFAULT; ++ goto out1; ++ } ++ buffer[nbytes] = 0; /* nul-terminate */ ++ ++ mutex_lock(&container_mutex); ++ ++ if (container_is_removed(cont)) { ++ retval = -ENODEV; ++ goto out2; ++ } ++ ++ switch (type) { ++ case FILE_TASKLIST: ++ retval = attach_task_by_pid(cont, buffer); ++ break; ++ case FILE_NOTIFY_ON_RELEASE: ++ clear_bit(CONT_RELEASABLE, &cont->flags); ++ if (simple_strtoul(buffer, NULL, 10) != 0) ++ set_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags); ++ else ++ clear_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags); ++ break; ++ case FILE_RELEASE_AGENT: ++ { ++ struct containerfs_root *root = cont->root; ++ if (nbytes < sizeof(root->release_agent_path)) { ++ /* We never write anything other than '\0' ++ * into the last char of release_agent_path, ++ * so it always remains a NUL-terminated ++ * string */ ++ strncpy(root->release_agent_path, buffer, nbytes); ++ root->release_agent_path[nbytes] = 0; ++ } else { ++ retval = -ENOSPC; ++ } ++ break; ++ } ++ default: ++ retval = -EINVAL; ++ goto out2; ++ } ++ ++ if (retval == 0) ++ retval = nbytes; ++out2: ++ mutex_unlock(&container_mutex); ++out1: ++ kfree(buffer); ++ return retval; ++} ++ ++static ssize_t container_file_write(struct file *file, const char __user *buf, ++ size_t nbytes, loff_t *ppos) ++{ ++ struct cftype *cft = __d_cft(file->f_dentry); ++ struct container *cont = __d_cont(file->f_dentry->d_parent); ++ ++ if (!cft) ++ return -ENODEV; ++ if (!cft->write) ++ return -EINVAL; ++ ++ return cft->write(cont, cft, file, buf, nbytes, ppos); ++} ++ ++static ssize_t container_read_uint(struct container *cont, struct cftype *cft, ++ struct file *file, ++ char __user *buf, size_t nbytes, ++ loff_t *ppos) ++{ ++ char tmp[64]; ++ u64 val = cft->read_uint(cont, cft); ++ int len = sprintf(tmp, "%llu\n", (unsigned long long) val); ++ ++ return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); ++} ++ ++static ssize_t container_common_file_read(struct container *cont, ++ struct cftype *cft, ++ struct file *file, ++ char __user *buf, ++ size_t nbytes, loff_t *ppos) ++{ ++ enum container_filetype type = cft->private; ++ char *page; ++ ssize_t retval = 0; ++ char *s; ++ ++ if (!(page = (char *)__get_free_page(GFP_KERNEL))) ++ return -ENOMEM; ++ ++ s = page; ++ ++ switch (type) { ++ case FILE_RELEASE_AGENT: ++ { ++ struct containerfs_root *root; ++ size_t n; ++ mutex_lock(&container_mutex); ++ root = cont->root; ++ n = strnlen(root->release_agent_path, ++ sizeof(root->release_agent_path)); ++ n = min(n, (size_t) PAGE_SIZE); ++ strncpy(s, root->release_agent_path, n); ++ mutex_unlock(&container_mutex); ++ s += n; ++ break; ++ } ++ default: ++ retval = -EINVAL; ++ goto out; ++ } ++ *s++ = '\n'; ++ ++ retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page); ++out: ++ free_page((unsigned long)page); ++ return retval; ++} ++ ++static ssize_t container_file_read(struct file *file, char __user *buf, ++ size_t nbytes, loff_t *ppos) ++{ ++ struct cftype *cft = __d_cft(file->f_dentry); ++ struct container *cont = __d_cont(file->f_dentry->d_parent); ++ ++ if (!cft) ++ return -ENODEV; ++ ++ if (cft->read) ++ return cft->read(cont, cft, file, buf, nbytes, ppos); ++ if (cft->read_uint) ++ return container_read_uint(cont, cft, file, buf, nbytes, ppos); ++ return -EINVAL; ++} ++ ++static int container_file_open(struct inode *inode, struct file *file) ++{ ++ int err; ++ struct cftype *cft; ++ ++ err = generic_file_open(inode, file); ++ if (err) ++ return err; ++ ++ cft = __d_cft(file->f_dentry); ++ if (!cft) ++ return -ENODEV; ++ if (cft->open) ++ err = cft->open(inode, file); ++ else ++ err = 0; ++ ++ return err; ++} ++ ++static int container_file_release(struct inode *inode, struct file *file) ++{ ++ struct cftype *cft = __d_cft(file->f_dentry); ++ if (cft->release) ++ return cft->release(inode, file); ++ return 0; ++} ++ ++/* ++ * container_rename - Only allow simple rename of directories in place. ++ */ ++static int container_rename(struct inode *old_dir, struct dentry *old_dentry, ++ struct inode *new_dir, struct dentry *new_dentry) ++{ ++ if (!S_ISDIR(old_dentry->d_inode->i_mode)) ++ return -ENOTDIR; ++ if (new_dentry->d_inode) ++ return -EEXIST; ++ if (old_dir != new_dir) ++ return -EIO; ++ return simple_rename(old_dir, old_dentry, new_dir, new_dentry); ++} ++ ++static struct file_operations container_file_operations = { ++ .read = container_file_read, ++ .write = container_file_write, ++ .llseek = generic_file_llseek, ++ .open = container_file_open, ++ .release = container_file_release, ++}; ++ ++static struct inode_operations container_dir_inode_operations = { ++ .lookup = simple_lookup, ++ .mkdir = container_mkdir, ++ .rmdir = container_rmdir, ++ .rename = container_rename, ++}; ++ ++static int container_create_file(struct dentry *dentry, int mode, ++ struct super_block *sb) ++{ ++ struct inode *inode; ++ ++ if (!dentry) ++ return -ENOENT; ++ if (dentry->d_inode) ++ return -EEXIST; ++ ++ inode = container_new_inode(mode, sb); ++ if (!inode) ++ return -ENOMEM; ++ ++ if (S_ISDIR(mode)) { ++ inode->i_op = &container_dir_inode_operations; ++ inode->i_fop = &simple_dir_operations; ++ ++ /* start off with i_nlink == 2 (for "." entry) */ ++ inc_nlink(inode); ++ ++ /* start with the directory inode held, so that we can ++ * populate it without racing with another mkdir */ ++ mutex_lock(&inode->i_mutex); ++ } else if (S_ISREG(mode)) { ++ inode->i_size = 0; ++ inode->i_fop = &container_file_operations; ++ } ++ ++ d_instantiate(dentry, inode); ++ dget(dentry); /* Extra count - pin the dentry in core */ ++ return 0; ++} ++ ++/* ++ * container_create_dir - create a directory for an object. ++ * cont: the container we create the directory for. ++ * It must have a valid ->parent field ++ * And we are going to fill its ->dentry field. ++ * name: The name to give to the container directory. Will be copied. ++ * mode: mode to set on new directory. ++ */ ++static int container_create_dir(struct container *cont, struct dentry *dentry, ++ int mode) ++{ ++ struct dentry *parent; ++ int error = 0; ++ ++ parent = cont->parent->dentry; ++ if (IS_ERR(dentry)) ++ return PTR_ERR(dentry); ++ error = container_create_file(dentry, S_IFDIR | mode, cont->root->sb); ++ if (!error) { ++ dentry->d_fsdata = cont; ++ inc_nlink(parent->d_inode); ++ cont->dentry = dentry; ++ } ++ dput(dentry); ++ ++ return error; ++} ++ ++int container_add_file(struct container *cont, const struct cftype *cft) ++{ ++ struct dentry *dir = cont->dentry; ++ struct dentry *dentry; ++ int error; ++ ++ BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); ++ dentry = container_get_dentry(dir, cft->name); ++ if (!IS_ERR(dentry)) { ++ error = container_create_file(dentry, 0644 | S_IFREG, ++ cont->root->sb); ++ if (!error) ++ dentry->d_fsdata = (void *)cft; ++ dput(dentry); ++ } else ++ error = PTR_ERR(dentry); ++ return error; ++} ++ ++int container_add_files(struct container *cont, const struct cftype cft[], ++ int count) ++{ ++ int i, err; ++ for (i = 0; i < count; i++) { ++ err = container_add_file(cont, &cft[i]); ++ if (err) ++ return err; ++ } ++ return 0; ++} ++ ++/* Count the number of tasks in a container. */ ++ ++int container_task_count(const struct container *cont) ++{ ++ int count = 0; ++ struct list_head *l; ++ ++ read_lock(&css_group_lock); ++ l = cont->css_groups.next; ++ while (l != &cont->css_groups) { ++ struct cg_container_link *link = ++ list_entry(l, struct cg_container_link, cont_link_list); ++ count += atomic_read(&link->cg->ref.refcount); ++ l = l->next; ++ } ++ read_unlock(&css_group_lock); ++ return count; ++} ++ ++/* Advance a list_head iterator pointing at a cg_container_link's */ ++static void container_advance_iter(struct container *cont, ++ struct container_iter *it) ++{ ++ struct list_head *l = it->cg_link; ++ struct cg_container_link *link; ++ struct css_group *cg; ++ ++ /* Advance to the next non-empty css_group */ ++ do { ++ l = l->next; ++ if (l == &cont->css_groups) { ++ it->cg_link = NULL; ++ return; ++ } ++ link = list_entry(l, struct cg_container_link, cont_link_list); ++ cg = link->cg; ++ } while (list_empty(&cg->tasks)); ++ it->cg_link = l; ++ it->task = cg->tasks.next; ++} ++ ++void container_iter_start(struct container *cont, struct container_iter *it) ++{ ++ read_lock(&css_group_lock); ++ it->cg_link = &cont->css_groups; ++ container_advance_iter(cont, it); ++} ++ ++struct task_struct *container_iter_next(struct container *cont, ++ struct container_iter *it) ++{ ++ struct task_struct *res; ++ struct list_head *l = it->task; ++ ++ /* If the iterator cg is NULL, we have no tasks */ ++ if (!it->cg_link) ++ return NULL; ++ res = list_entry(l, struct task_struct, cg_list); ++ /* Advance iterator to find next entry */ ++ l = l->next; ++ if (l == &res->containers->tasks) { ++ /* We reached the end of this task list - move on to ++ * the next cg_container_link */ ++ container_advance_iter(cont, it); ++ } else { ++ it->task = l; ++ } ++ return res; ++} ++ ++void container_iter_end(struct container *cont, struct container_iter *it) ++{ ++ read_unlock(&css_group_lock); ++} ++ ++/* ++ * Stuff for reading the 'tasks' file. ++ * ++ * Reading this file can return large amounts of data if a container has ++ * *lots* of attached tasks. So it may need several calls to read(), ++ * but we cannot guarantee that the information we produce is correct ++ * unless we produce it entirely atomically. ++ * ++ * Upon tasks file open(), a struct ctr_struct is allocated, that ++ * will have a pointer to an array (also allocated here). The struct ++ * ctr_struct * is stored in file->private_data. Its resources will ++ * be freed by release() when the file is closed. The array is used ++ * to sprintf the PIDs and then used by read(). ++ */ ++struct ctr_struct { ++ char *buf; ++ int bufsz; ++}; ++ ++/* ++ * Load into 'pidarray' up to 'npids' of the tasks using container ++ * 'cont'. Return actual number of pids loaded. No need to ++ * task_lock(p) when reading out p->container, since we're in an RCU ++ * read section, so the css_group can't go away, and is ++ * immutable after creation. ++ */ ++static int pid_array_load(pid_t *pidarray, int npids, struct container *cont) ++{ ++ int n = 0; ++ struct container_iter it; ++ struct task_struct *tsk; ++ container_iter_start(cont, &it); ++ while ((tsk = container_iter_next(cont, &it))) { ++ if (unlikely(n == npids)) ++ break; ++ pidarray[n++] = pid_nr(task_pid(tsk)); ++ } ++ container_iter_end(cont, &it); ++ return n; ++} ++ ++static int cmppid(const void *a, const void *b) ++{ ++ return *(pid_t *)a - *(pid_t *)b; ++} ++ ++/* ++ * Convert array 'a' of 'npids' pid_t's to a string of newline separated ++ * decimal pids in 'buf'. Don't write more than 'sz' chars, but return ++ * count 'cnt' of how many chars would be written if buf were large enough. ++ */ ++static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids) ++{ ++ int cnt = 0; ++ int i; ++ ++ for (i = 0; i < npids; i++) ++ cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]); ++ return cnt; ++} ++ ++/* ++ * Handle an open on 'tasks' file. Prepare a buffer listing the ++ * process id's of tasks currently attached to the container being opened. ++ * ++ * Does not require any specific container mutexes, and does not take any. ++ */ ++static int container_tasks_open(struct inode *unused, struct file *file) ++{ ++ struct container *cont = __d_cont(file->f_dentry->d_parent); ++ struct ctr_struct *ctr; ++ pid_t *pidarray; ++ int npids; ++ char c; ++ ++ if (!(file->f_mode & FMODE_READ)) ++ return 0; ++ ++ ctr = kmalloc(sizeof(*ctr), GFP_KERNEL); ++ if (!ctr) ++ goto err0; ++ ++ /* ++ * If container gets more users after we read count, we won't have ++ * enough space - tough. This race is indistinguishable to the ++ * caller from the case that the additional container users didn't ++ * show up until sometime later on. ++ */ ++ npids = container_task_count(cont); ++ if (npids) { ++ pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); ++ if (!pidarray) ++ goto err1; ++ ++ npids = pid_array_load(pidarray, npids, cont); ++ sort(pidarray, npids, sizeof(pid_t), cmppid, NULL); ++ ++ /* Call pid_array_to_buf() twice, first just to get bufsz */ ++ ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1; ++ ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL); ++ if (!ctr->buf) ++ goto err2; ++ ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids); ++ ++ kfree(pidarray); ++ } else { ++ ctr->buf = 0; ++ ctr->bufsz = 0; ++ } ++ file->private_data = ctr; ++ return 0; ++ ++err2: ++ kfree(pidarray); ++err1: ++ kfree(ctr); ++err0: ++ return -ENOMEM; ++} ++ ++static ssize_t container_tasks_read(struct container *cont, ++ struct cftype *cft, ++ struct file *file, char __user *buf, ++ size_t nbytes, loff_t *ppos) ++{ ++ struct ctr_struct *ctr = file->private_data; ++ ++ return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz); ++} ++ ++static int container_tasks_release(struct inode *unused_inode, ++ struct file *file) ++{ ++ struct ctr_struct *ctr; ++ ++ if (file->f_mode & FMODE_READ) { ++ ctr = file->private_data; ++ kfree(ctr->buf); ++ kfree(ctr); ++ } ++ return 0; ++} ++ ++static u64 container_read_notify_on_release(struct container *cont, ++ struct cftype *cft) ++{ ++ return notify_on_release(cont); ++} ++ ++static u64 container_read_releasable(struct container *cont, struct cftype *cft) ++{ ++ return test_bit(CONT_RELEASABLE, &cont->flags); ++} ++ ++/* ++ * for the common functions, 'private' gives the type of file ++ */ ++static struct cftype files[] = { ++ { ++ .name = "tasks", ++ .open = container_tasks_open, ++ .read = container_tasks_read, ++ .write = container_common_file_write, ++ .release = container_tasks_release, ++ .private = FILE_TASKLIST, ++ }, ++ ++ { ++ .name = "notify_on_release", ++ .read_uint = container_read_notify_on_release, ++ .write = container_common_file_write, ++ .private = FILE_NOTIFY_ON_RELEASE, ++ }, ++ ++ { ++ .name = "releasable", ++ .read_uint = container_read_releasable, ++ .private = FILE_RELEASABLE, ++ } ++}; ++ ++static struct cftype cft_release_agent = { ++ .name = "release_agent", ++ .read = container_common_file_read, ++ .write = container_common_file_write, ++ .private = FILE_RELEASE_AGENT, ++}; ++ ++static int container_populate_dir(struct container *cont) ++{ ++ int err; ++ struct container_subsys *ss; ++ ++ /* First clear out any existing files */ ++ container_clear_directory(cont->dentry); ++ ++ err = container_add_files(cont, files, ARRAY_SIZE(files)); ++ if (err < 0) ++ return err; ++ ++ if (cont == cont->top_container) { ++ if ((err = container_add_file(cont, &cft_release_agent)) < 0) ++ return err; ++ } ++ ++ for_each_subsys(cont->root, ss) { ++ if (ss->populate && (err = ss->populate(ss, cont)) < 0) ++ return err; ++ } ++ ++ return 0; ++} ++ ++static void init_container_css(struct container_subsys *ss, ++ struct container *cont) ++{ ++ struct container_subsys_state *css = cont->subsys[ss->subsys_id]; ++ css->container = cont; ++ atomic_set(&css->refcnt, 0); ++} ++ ++/* ++ * container_create - create a container ++ * parent: container that will be parent of the new container. ++ * name: name of the new container. Will be strcpy'ed. ++ * mode: mode to set on new inode ++ * ++ * Must be called with the mutex on the parent inode held ++ */ ++ ++static long container_create(struct container *parent, struct dentry *dentry, ++ int mode) ++{ ++ struct container *cont; ++ struct containerfs_root *root = parent->root; ++ int err = 0; ++ struct container_subsys *ss; ++ struct super_block *sb = root->sb; ++ ++ cont = kzalloc(sizeof(*cont), GFP_KERNEL); ++ if (!cont) ++ return -ENOMEM; ++ ++ /* Grab a reference on the superblock so the hierarchy doesn't ++ * get deleted on unmount if there are child containers. This ++ * can be done outside container_mutex, since the sb can't ++ * disappear while someone has an open control file on the ++ * fs */ ++ atomic_inc(&sb->s_active); ++ ++ mutex_lock(&container_mutex); ++ ++ cont->flags = 0; ++ INIT_LIST_HEAD(&cont->sibling); ++ INIT_LIST_HEAD(&cont->children); ++ INIT_LIST_HEAD(&cont->css_groups); ++ INIT_LIST_HEAD(&cont->release_list); ++ ++ cont->parent = parent; ++ cont->root = parent->root; ++ cont->top_container = parent->top_container; ++ ++ for_each_subsys(root, ss) { ++ err = ss->create(ss, cont); ++ if (err) ++ goto err_destroy; ++ init_container_css(ss, cont); ++ } ++ ++ list_add(&cont->sibling, &cont->parent->children); ++ root->number_of_containers++; ++ ++ err = container_create_dir(cont, dentry, mode); ++ if (err < 0) ++ goto err_remove; ++ ++ /* The container directory was pre-locked for us */ ++ BUG_ON(!mutex_is_locked(&cont->dentry->d_inode->i_mutex)); ++ ++ err = container_populate_dir(cont); ++ /* If err < 0, we have a half-filled directory - oh well ;) */ ++ ++ mutex_unlock(&container_mutex); ++ mutex_unlock(&cont->dentry->d_inode->i_mutex); ++ ++ return 0; ++ ++ err_remove: ++ ++ list_del(&cont->sibling); ++ root->number_of_containers--; ++ ++ err_destroy: ++ ++ for_each_subsys(root, ss) { ++ if (cont->subsys[ss->subsys_id]) ++ ss->destroy(ss, cont); ++ } ++ ++ mutex_unlock(&container_mutex); ++ ++ /* Release the reference count that we took on the superblock */ ++ deactivate_super(sb); ++ ++ kfree(cont); ++ return err; ++} ++ ++static int container_mkdir(struct inode *dir, struct dentry *dentry, int mode) ++{ ++ struct container *c_parent = dentry->d_parent->d_fsdata; ++ ++ /* the vfs holds inode->i_mutex already */ ++ return container_create(c_parent, dentry, mode | S_IFDIR); ++} ++ ++static inline int container_has_css_refs(struct container *cont) ++{ ++ /* Check the reference count on each subsystem. Since we ++ * already established that there are no tasks in the ++ * container, if the css refcount is also 0, then there should ++ * be no outstanding references, so the subsystem is safe to ++ * destroy */ ++ struct container_subsys *ss; ++ for_each_subsys(cont->root, ss) { ++ struct container_subsys_state *css; ++ css = cont->subsys[ss->subsys_id]; ++ if (atomic_read(&css->refcnt)) { ++ return 1; ++ } ++ } ++ return 0; ++} ++ ++static int container_rmdir(struct inode *unused_dir, struct dentry *dentry) ++{ ++ struct container *cont = dentry->d_fsdata; ++ struct dentry *d; ++ struct container *parent; ++ struct container_subsys *ss; ++ struct super_block *sb; ++ struct containerfs_root *root; ++ ++ /* the vfs holds both inode->i_mutex already */ ++ ++ mutex_lock(&container_mutex); ++ if (atomic_read(&cont->count) != 0) { ++ mutex_unlock(&container_mutex); ++ return -EBUSY; ++ } ++ if (!list_empty(&cont->children)) { ++ mutex_unlock(&container_mutex); ++ return -EBUSY; ++ } ++ ++ parent = cont->parent; ++ root = cont->root; ++ sb = root->sb; ++ ++ if (container_has_css_refs(cont)) { ++ mutex_unlock(&container_mutex); ++ return -EBUSY; ++ } ++ ++ for_each_subsys(root, ss) { ++ if (cont->subsys[ss->subsys_id]) ++ ss->destroy(ss, cont); ++ } ++ ++ set_bit(CONT_REMOVED, &cont->flags); ++ /* delete my sibling from parent->children */ ++ list_del(&cont->sibling); ++ spin_lock(&cont->dentry->d_lock); ++ d = dget(cont->dentry); ++ cont->dentry = NULL; ++ spin_unlock(&d->d_lock); ++ ++ container_d_remove_dir(d); ++ dput(d); ++ root->number_of_containers--; ++ ++ if (!list_empty(&cont->release_list)) ++ list_del(&cont->release_list); ++ set_bit(CONT_RELEASABLE, &parent->flags); ++ check_for_release(parent); ++ ++ mutex_unlock(&container_mutex); ++ /* Drop the active superblock reference that we took when we ++ * created the container */ ++ deactivate_super(sb); ++ return 0; ++} ++ ++static void container_init_subsys(struct container_subsys *ss) ++{ ++ int retval; ++ struct container_subsys_state *css; ++ struct list_head *l; ++ printk(KERN_ERR "Initializing container subsys %s\n", ss->name); ++ ++ /* Create the top container state for this subsystem */ ++ ss->root = &rootnode; ++ retval = ss->create(ss, dummytop); ++ BUG_ON(retval); ++ BUG_ON(!dummytop->subsys[ss->subsys_id]); ++ init_container_css(ss, dummytop); ++ css = dummytop->subsys[ss->subsys_id]; ++ ++ /* Update all container groups to contain a subsys ++ * pointer to this state - since the subsystem is ++ * newly registered, all tasks and hence all container ++ * groups are in the subsystem's top container. */ ++ write_lock(&css_group_lock); ++ l = &init_css_group.list; ++ do { ++ struct css_group *cg = ++ list_entry(l, struct css_group, list); ++ cg->subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; ++ l = l->next; ++ } while (l != &init_css_group.list); ++ write_unlock(&css_group_lock); ++ ++ /* If this subsystem requested that it be notified with fork ++ * events, we should send it one now for every process in the ++ * system */ ++ if (ss->fork) { ++ struct task_struct *g, *p; ++ ++ read_lock(&tasklist_lock); ++ do_each_thread(g, p) { ++ ss->fork(ss, p); ++ } while_each_thread(g, p); ++ read_unlock(&tasklist_lock); ++ } ++ ++ need_forkexit_callback |= ss->fork || ss->exit; ++ ++ ss->active = 1; ++} ++ ++/** ++ * container_init_early - initialize containers at system boot, and ++ * initialize any subsystems that request early init. ++ */ ++int __init container_init_early(void) ++{ ++ int i; ++ kref_init(&init_css_group.ref); ++ kref_get(&init_css_group.ref); ++ INIT_LIST_HEAD(&init_css_group.list); ++ INIT_LIST_HEAD(&init_css_group.cg_links); ++ INIT_LIST_HEAD(&init_css_group.tasks); ++ css_group_count = 1; ++ init_container_root(&rootnode); ++ init_task.containers = &init_css_group; ++ ++ init_css_group_link.cg = &init_css_group; ++ list_add(&init_css_group_link.cont_link_list, ++ &rootnode.top_container.css_groups); ++ list_add(&init_css_group_link.cg_link_list, ++ &init_css_group.cg_links); ++ ++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { ++ struct container_subsys *ss = subsys[i]; ++ ++ BUG_ON(!ss->name); ++ BUG_ON(strlen(ss->name) > MAX_CONTAINER_TYPE_NAMELEN); ++ BUG_ON(!ss->create); ++ BUG_ON(!ss->destroy); ++ if (ss->subsys_id != i) { ++ printk(KERN_ERR "Subsys %s id == %d\n", ++ ss->name, ss->subsys_id); ++ BUG(); ++ } ++ ++ if (ss->early_init) ++ container_init_subsys(ss); ++ } ++ return 0; ++} ++ ++/** ++ * container_init - register container filesystem and /proc file, and ++ * initialize any subsystems that didn't request early init. ++ */ ++int __init container_init(void) ++{ ++ int err; ++ int i; ++ struct proc_dir_entry *entry; ++ ++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { ++ struct container_subsys *ss = subsys[i]; ++ if (!ss->early_init) ++ container_init_subsys(ss); ++ } ++ ++ err = register_filesystem(&container_fs_type); ++ if (err < 0) ++ goto out; ++ ++ entry = create_proc_entry("containers", 0, NULL); ++ if (entry) ++ entry->proc_fops = &proc_containerstats_operations; ++ ++out: ++ return err; ++} ++ ++/* ++ * proc_container_show() ++ * - Print task's container paths into seq_file, one line for each hierarchy ++ * - Used for /proc//container. ++ * - No need to task_lock(tsk) on this tsk->container reference, as it ++ * doesn't really matter if tsk->container changes after we read it, ++ * and we take container_mutex, keeping attach_task() from changing it ++ * anyway. No need to check that tsk->container != NULL, thanks to ++ * the_top_container_hack in container_exit(), which sets an exiting tasks ++ * container to top_container. ++ */ ++ ++/* TODO: Use a proper seq_file iterator */ ++static int proc_container_show(struct seq_file *m, void *v) ++{ ++ struct pid *pid; ++ struct task_struct *tsk; ++ char *buf; ++ int retval; ++ struct containerfs_root *root; ++ ++ retval = -ENOMEM; ++ buf = kmalloc(PAGE_SIZE, GFP_KERNEL); ++ if (!buf) ++ goto out; ++ ++ retval = -ESRCH; ++ pid = m->private; ++ tsk = get_pid_task(pid, PIDTYPE_PID); ++ if (!tsk) ++ goto out_free; ++ ++ retval = 0; ++ ++ mutex_lock(&container_mutex); ++ ++ for_each_root(root) { ++ struct container_subsys *ss; ++ struct container *cont; ++ int subsys_id; ++ int count = 0; ++ ++ /* Skip this hierarchy if it has no active subsystems */ ++ if (!root->subsys_bits) ++ continue; ++ for_each_subsys(root, ss) ++ seq_printf(m, "%s%s", count++ ? "," : "", ss->name); ++ seq_putc(m, ':'); ++ get_first_subsys(&root->top_container, NULL, &subsys_id); ++ cont = task_container(tsk, subsys_id); ++ retval = container_path(cont, buf, PAGE_SIZE); ++ if (retval < 0) ++ goto out_unlock; ++ seq_puts(m, buf); ++ seq_putc(m, '\n'); ++ } ++ ++out_unlock: ++ mutex_unlock(&container_mutex); ++ put_task_struct(tsk); ++out_free: ++ kfree(buf); ++out: ++ return retval; ++} ++ ++static int container_open(struct inode *inode, struct file *file) ++{ ++ struct pid *pid = PROC_I(inode)->pid; ++ return single_open(file, proc_container_show, pid); ++} ++ ++struct file_operations proc_container_operations = { ++ .open = container_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++/* Display information about each subsystem and each hierarchy */ ++static int proc_containerstats_show(struct seq_file *m, void *v) ++{ ++ int i; ++ struct containerfs_root *root; ++ ++ mutex_lock(&container_mutex); ++ seq_puts(m, "Hierarchies:\n"); ++ for_each_root(root) { ++ struct container_subsys *ss; ++ int first = 1; ++ seq_printf(m, "%p: bits=%lx containers=%d (", root, ++ root->subsys_bits, root->number_of_containers); ++ for_each_subsys(root, ss) { ++ seq_printf(m, "%s%s", first ? "" : ", ", ss->name); ++ first = false; ++ } ++ seq_putc(m, ')'); ++ if (root->sb) { ++ seq_printf(m, " s_active=%d", ++ atomic_read(&root->sb->s_active)); ++ } ++ seq_putc(m, '\n'); ++ } ++ seq_puts(m, "Subsystems:\n"); ++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { ++ struct container_subsys *ss = subsys[i]; ++ seq_printf(m, "%d: name=%s hierarchy=%p\n", ++ i, ss->name, ss->root); ++ } ++ seq_printf(m, "Container groups: %d\n", css_group_count); ++ mutex_unlock(&container_mutex); ++ return 0; ++} ++ ++static int containerstats_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, proc_containerstats_show, 0); ++} ++ ++static struct file_operations proc_containerstats_operations = { ++ .open = containerstats_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ ++/** ++ * container_fork - attach newly forked task to its parents container. ++ * @tsk: pointer to task_struct of forking parent process. ++ * ++ * Description: A task inherits its parent's container at fork(). ++ * ++ * A pointer to the shared css_group was automatically copied in ++ * fork.c by dup_task_struct(). However, we ignore that copy, since ++ * it was not made under the protection of RCU or container_mutex, so ++ * might no longer be a valid container pointer. attach_task() might ++ * have already changed current->containers, allowing the previously ++ * referenced container group to be removed and freed. ++ * ++ * At the point that container_fork() is called, 'current' is the parent ++ * task, and the passed argument 'child' points to the child task. ++ */ ++void container_fork(struct task_struct *child) ++{ ++ write_lock(&css_group_lock); ++ child->containers = current->containers; ++ get_css_group(child->containers); ++ list_add(&child->cg_list, &child->containers->tasks); ++ write_unlock(&css_group_lock); ++} ++ ++/** ++ * container_fork_callbacks - called on a new task very soon before ++ * adding it to the tasklist. No need to take any locks since no-one ++ * can be operating on this task ++ */ ++void container_fork_callbacks(struct task_struct *child) ++{ ++ if (need_forkexit_callback) { ++ int i; ++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { ++ struct container_subsys *ss = subsys[i]; ++ if (ss->fork) ++ ss->fork(ss, child); ++ } ++ } ++} ++ ++/** ++ * container_exit - detach container from exiting task ++ * @tsk: pointer to task_struct of exiting process ++ * ++ * Description: Detach container from @tsk and release it. ++ * ++ * Note that containers marked notify_on_release force every task in ++ * them to take the global container_mutex mutex when exiting. ++ * This could impact scaling on very large systems. Be reluctant to ++ * use notify_on_release containers where very high task exit scaling ++ * is required on large systems. ++ * ++ * the_top_container_hack: ++ * ++ * Set the exiting tasks container to the root container (top_container). ++ * ++ * We call container_exit() while the task is still competent to ++ * handle notify_on_release(), then leave the task attached to the ++ * root container in each hierarchy for the remainder of its exit. ++ * ++ * To do this properly, we would increment the reference count on ++ * top_container, and near the very end of the kernel/exit.c do_exit() ++ * code we would add a second container function call, to drop that ++ * reference. This would just create an unnecessary hot spot on ++ * the top_container reference count, to no avail. ++ * ++ * Normally, holding a reference to a container without bumping its ++ * count is unsafe. The container could go away, or someone could ++ * attach us to a different container, decrementing the count on ++ * the first container that we never incremented. But in this case, ++ * top_container isn't going away, and either task has PF_EXITING set, ++ * which wards off any attach_task() attempts, or task is a failed ++ * fork, never visible to attach_task. ++ * ++ */ ++void container_exit(struct task_struct *tsk, int run_callbacks) ++{ ++ int i; ++ struct css_group *cg = NULL; ++ ++ if (run_callbacks && need_forkexit_callback) { ++ for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) { ++ struct container_subsys *ss = subsys[i]; ++ if (ss->exit) ++ ss->exit(ss, tsk); ++ } ++ } ++ ++ /* Reassign the task to the init_css_group. */ ++ task_lock(tsk); ++ write_lock(&css_group_lock); ++ list_del(&tsk->cg_list); ++ write_unlock(&css_group_lock); ++ ++ cg = tsk->containers; ++ tsk->containers = &init_css_group; ++ task_unlock(tsk); ++ if (cg) ++ put_css_group_taskexit(cg); ++} ++ ++/** ++ * container_clone - duplicate the current container in the hierarchy ++ * that the given subsystem is attached to, and move this task into ++ * the new child ++ */ ++int container_clone(struct task_struct *tsk, struct container_subsys *subsys) ++{ ++ struct dentry *dentry; ++ int ret = 0; ++ char nodename[MAX_CONTAINER_TYPE_NAMELEN]; ++ struct container *parent, *child; ++ struct inode *inode; ++ struct css_group *cg; ++ struct containerfs_root *root; ++ struct container_subsys *ss; ++ ++ /* We shouldn't be called by an unregistered subsystem */ ++ BUG_ON(!subsys->active); ++ ++ /* First figure out what hierarchy and container we're dealing ++ * with, and pin them so we can drop container_mutex */ ++ mutex_lock(&container_mutex); ++ again: ++ root = subsys->root; ++ if (root == &rootnode) { ++ printk(KERN_INFO ++ "Not cloning container for unused subsystem %s\n", ++ subsys->name); ++ mutex_unlock(&container_mutex); ++ return 0; ++ } ++ cg = tsk->containers; ++ parent = task_container(tsk, subsys->subsys_id); ++ ++ snprintf(nodename, MAX_CONTAINER_TYPE_NAMELEN, "node_%d", tsk->pid); ++ ++ /* Pin the hierarchy */ ++ atomic_inc(&parent->root->sb->s_active); ++ ++ /* Keep the container alive */ ++ get_css_group(cg); ++ mutex_unlock(&container_mutex); ++ ++ /* Now do the VFS work to create a container */ ++ inode = parent->dentry->d_inode; ++ ++ /* Hold the parent directory mutex across this operation to ++ * stop anyone else deleting the new container */ ++ mutex_lock(&inode->i_mutex); ++ dentry = container_get_dentry(parent->dentry, nodename); ++ if (IS_ERR(dentry)) { ++ printk(KERN_INFO ++ "Couldn't allocate dentry for %s: %ld\n", nodename, ++ PTR_ERR(dentry)); ++ ret = PTR_ERR(dentry); ++ goto out_release; ++ } ++ ++ /* Create the container directory, which also creates the container */ ++ ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755, NULL); ++ child = __d_cont(dentry); ++ dput(dentry); ++ if (ret) { ++ printk(KERN_INFO ++ "Failed to create container %s: %d\n", nodename, ++ ret); ++ goto out_release; ++ } ++ ++ if (!child) { ++ printk(KERN_INFO ++ "Couldn't find new container %s\n", nodename); ++ ret = -ENOMEM; ++ goto out_release; ++ } ++ ++ /* The container now exists. Retake container_mutex and check ++ * that we're still in the same state that we thought we ++ * were. */ ++ mutex_lock(&container_mutex); ++ if ((root != subsys->root) || ++ (parent != task_container(tsk, subsys->subsys_id))) { ++ /* Aargh, we raced ... */ ++ mutex_unlock(&inode->i_mutex); ++ put_css_group(cg); ++ ++ deactivate_super(parent->root->sb); ++ /* The container is still accessible in the VFS, but ++ * we're not going to try to rmdir() it at this ++ * point. */ ++ printk(KERN_INFO ++ "Race in container_clone() - leaking container %s\n", ++ nodename); ++ goto again; ++ } ++ ++ /* do any required auto-setup */ ++ for_each_subsys(root, ss) { ++ if (ss->post_clone) ++ ss->post_clone(ss, child); ++ } ++ ++ /* All seems fine. Finish by moving the task into the new container */ ++ ret = attach_task(child, tsk); ++ mutex_unlock(&container_mutex); ++ ++ out_release: ++ mutex_unlock(&inode->i_mutex); ++ ++ mutex_lock(&container_mutex); ++ put_css_group(cg); ++ mutex_unlock(&container_mutex); ++ deactivate_super(parent->root->sb); ++ return ret; ++} ++ ++/* See if "cont" is a descendant of the current task's container in ++ * the appropriate hierarchy */ ++ ++int container_is_descendant(const struct container *cont) ++{ ++ int ret; ++ struct container *target; ++ int subsys_id; ++ ++ if (cont == dummytop) ++ return 1; ++ get_first_subsys(cont, NULL, &subsys_id); ++ target = task_container(current, subsys_id); ++ while (cont != target && cont!= cont->top_container) { ++ cont = cont->parent; ++ } ++ ret = (cont == target); ++ return ret; ++} ++ ++static void check_for_release(struct container *cont) ++{ ++ BUG_ON(!mutex_is_locked(&container_mutex)); ++ if (container_is_releasable(cont) && !atomic_read(&cont->count) ++ && list_empty(&cont->children) && !container_has_css_refs(cont)) { ++ /* Container is currently removeable. If it's not ++ * already queued for a userspace notification, queue ++ * it now */ ++ if (list_empty(&cont->release_list)) { ++ list_add(&cont->release_list, &release_list); ++ schedule_work(&release_agent_work); ++ } ++ } ++} ++ ++void css_put(struct container_subsys_state *css) ++{ ++ struct container *cont = css->container; ++ if (notify_on_release(cont)) { ++ mutex_lock(&container_mutex); ++ set_bit(CONT_RELEASABLE, &cont->flags); ++ if (atomic_dec_and_test(&css->refcnt)) { ++ check_for_release(cont); ++ } ++ mutex_unlock(&container_mutex); ++ } else { ++ atomic_dec(&css->refcnt); ++ } ++} ++ ++void container_set_release_agent_path(struct container_subsys *ss, ++ const char *path) ++{ ++ mutex_lock(&container_mutex); ++ strcpy(ss->root->release_agent_path, path); ++ mutex_unlock(&container_mutex); ++} ++ ++/* ++ * Notify userspace when a container is released, by running the ++ * configured release agent with the name of the container (path ++ * relative to the root of container file system) as the argument. ++ * ++ * Most likely, this user command will try to rmdir this container. ++ * ++ * This races with the possibility that some other task will be ++ * attached to this container before it is removed, or that some other ++ * user task will 'mkdir' a child container of this container. That's ok. ++ * The presumed 'rmdir' will fail quietly if this container is no longer ++ * unused, and this container will be reprieved from its death sentence, ++ * to continue to serve a useful existence. Next time it's released, ++ * we will get notified again, if it still has 'notify_on_release' set. ++ * ++ * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which ++ * means only wait until the task is successfully execve()'d. The ++ * separate release agent task is forked by call_usermodehelper(), ++ * then control in this thread returns here, without waiting for the ++ * release agent task. We don't bother to wait because the caller of ++ * this routine has no use for the exit status of the release agent ++ * task, so no sense holding our caller up for that. ++ * ++ */ ++ ++static void container_release_agent(struct work_struct *work) ++{ ++ BUG_ON(work != &release_agent_work); ++ mutex_lock(&container_mutex); ++ while (!list_empty(&release_list)) { ++ char *argv[3], *envp[3]; ++ int i; ++ char *pathbuf; ++ struct container *cont = list_entry(release_list.next, ++ struct container, ++ release_list); ++ list_del_init(&cont->release_list); ++ ++ pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL); ++ if (!pathbuf) ++ continue; ++ ++ if (container_path(cont, pathbuf, PAGE_SIZE) < 0) { ++ kfree(pathbuf); ++ continue; ++ } ++ ++ i = 0; ++ argv[i++] = cont->root->release_agent_path; ++ argv[i++] = (char *)pathbuf; ++ argv[i] = NULL; ++ ++ i = 0; ++ /* minimal command environment */ ++ envp[i++] = "HOME=/"; ++ envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; ++ envp[i] = NULL; ++ ++ /* Drop the lock while we invoke the usermode helper, ++ * since the exec could involve hitting disk and hence ++ * be a slow process */ ++ mutex_unlock(&container_mutex); ++ call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC); ++ kfree(pathbuf); ++ mutex_lock(&container_mutex); ++ } ++ mutex_unlock(&container_mutex); ++} +diff -Nurb linux-2.6.22-570/kernel/container_debug.c linux-2.6.22-591/kernel/container_debug.c +--- linux-2.6.22-570/kernel/container_debug.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/kernel/container_debug.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,89 @@ ++/* ++ * kernel/ccontainer_debug.c - Example container subsystem that ++ * exposes debug info ++ * ++ * Copyright (C) Google Inc, 2007 ++ * ++ * Developed by Paul Menage (menage@google.com) ++ * ++ */ ++ ++#include ++#include ++ ++static int debug_create(struct container_subsys *ss, struct container *cont) ++{ ++ struct container_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); ++ if (!css) ++ return -ENOMEM; ++ cont->subsys[debug_subsys_id] = css; ++ return 0; ++} ++ ++static void debug_destroy(struct container_subsys *ss, struct container *cont) ++{ ++ kfree(cont->subsys[debug_subsys_id]); ++} ++ ++static u64 container_refcount_read(struct container *cont, struct cftype *cft) ++{ ++ return atomic_read(&cont->count); ++} ++ ++static u64 taskcount_read(struct container *cont, struct cftype *cft) ++{ ++ u64 count; ++ container_lock(); ++ count = container_task_count(cont); ++ container_unlock(); ++ return count; ++} ++ ++static u64 current_css_group_read(struct container *cont, struct cftype *cft) ++{ ++ return (u64) current->containers; ++} ++ ++static u64 current_css_group_refcount_read(struct container *cont, ++ struct cftype *cft) ++{ ++ u64 count; ++ rcu_read_lock(); ++ count = atomic_read(¤t->containers->ref.refcount); ++ rcu_read_unlock(); ++ return count; ++} ++ ++static struct cftype files[] = { ++ { ++ .name = "debug.container_refcount", ++ .read_uint = container_refcount_read, ++ }, ++ { ++ .name = "debug.taskcount", ++ .read_uint = taskcount_read, ++ }, ++ ++ { ++ .name = "debug.current_css_group", ++ .read_uint = current_css_group_read, ++ }, ++ ++ { ++ .name = "debug.current_css_group_refcount", ++ .read_uint = current_css_group_refcount_read, ++ }, ++}; ++ ++static int debug_populate(struct container_subsys *ss, struct container *cont) ++{ ++ return container_add_files(cont, files, ARRAY_SIZE(files)); ++} ++ ++struct container_subsys debug_subsys = { ++ .name = "debug", ++ .create = debug_create, ++ .destroy = debug_destroy, ++ .populate = debug_populate, ++ .subsys_id = debug_subsys_id, ++}; +diff -Nurb linux-2.6.22-570/kernel/cpu_acct.c linux-2.6.22-591/kernel/cpu_acct.c +--- linux-2.6.22-570/kernel/cpu_acct.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/kernel/cpu_acct.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,185 @@ ++/* ++ * kernel/cpu_acct.c - CPU accounting container subsystem ++ * ++ * Copyright (C) Google Inc, 2006 ++ * ++ * Developed by Paul Menage (menage@google.com) and Balbir Singh ++ * (balbir@in.ibm.com) ++ * ++ */ ++ ++/* ++ * Example container subsystem for reporting total CPU usage of tasks in a ++ * container, along with percentage load over a time interval ++ */ ++ ++#include ++#include ++#include ++#include ++ ++struct cpuacct { ++ struct container_subsys_state css; ++ spinlock_t lock; ++ /* total time used by this class */ ++ cputime64_t time; ++ ++ /* time when next load calculation occurs */ ++ u64 next_interval_check; ++ ++ /* time used in current period */ ++ cputime64_t current_interval_time; ++ ++ /* time used in last period */ ++ cputime64_t last_interval_time; ++}; ++ ++struct container_subsys cpuacct_subsys; ++ ++static inline struct cpuacct *container_ca(struct container *cont) ++{ ++ return container_of(container_subsys_state(cont, cpuacct_subsys_id), ++ struct cpuacct, css); ++} ++ ++static inline struct cpuacct *task_ca(struct task_struct *task) ++{ ++ return container_of(task_subsys_state(task, cpuacct_subsys_id), ++ struct cpuacct, css); ++} ++ ++#define INTERVAL (HZ * 10) ++ ++static inline u64 next_interval_boundary(u64 now) { ++ /* calculate the next interval boundary beyond the ++ * current time */ ++ do_div(now, INTERVAL); ++ return (now + 1) * INTERVAL; ++} ++ ++static int cpuacct_create(struct container_subsys *ss, struct container *cont) ++{ ++ struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); ++ if (!ca) ++ return -ENOMEM; ++ spin_lock_init(&ca->lock); ++ ca->next_interval_check = next_interval_boundary(get_jiffies_64()); ++ cont->subsys[cpuacct_subsys_id] = &ca->css; ++ return 0; ++} ++ ++static void cpuacct_destroy(struct container_subsys *ss, ++ struct container *cont) ++{ ++ kfree(container_ca(cont)); ++} ++ ++/* Lazily update the load calculation if necessary. Called with ca locked */ ++static void cpuusage_update(struct cpuacct *ca) ++{ ++ u64 now = get_jiffies_64(); ++ /* If we're not due for an update, return */ ++ if (ca->next_interval_check > now) ++ return; ++ ++ if (ca->next_interval_check <= (now - INTERVAL)) { ++ /* If it's been more than an interval since the last ++ * check, then catch up - the last interval must have ++ * been zero load */ ++ ca->last_interval_time = 0; ++ ca->next_interval_check = next_interval_boundary(now); ++ } else { ++ /* If a steal takes the last interval time negative, ++ * then we just ignore it */ ++ if ((s64)ca->current_interval_time > 0) { ++ ca->last_interval_time = ca->current_interval_time; ++ } else { ++ ca->last_interval_time = 0; ++ } ++ ca->next_interval_check += INTERVAL; ++ } ++ ca->current_interval_time = 0; ++} ++ ++static u64 cpuusage_read(struct container *cont, ++ struct cftype *cft) ++{ ++ struct cpuacct *ca = container_ca(cont); ++ u64 time; ++ ++ spin_lock_irq(&ca->lock); ++ cpuusage_update(ca); ++ time = cputime64_to_jiffies64(ca->time); ++ spin_unlock_irq(&ca->lock); ++ ++ /* Convert 64-bit jiffies to seconds */ ++ time *= 1000; ++ do_div(time, HZ); ++ return time; ++} ++ ++static u64 load_read(struct container *cont, ++ struct cftype *cft) ++{ ++ struct cpuacct *ca = container_ca(cont); ++ u64 time; ++ ++ /* Find the time used in the previous interval */ ++ spin_lock_irq(&ca->lock); ++ cpuusage_update(ca); ++ time = cputime64_to_jiffies64(ca->last_interval_time); ++ spin_unlock_irq(&ca->lock); ++ ++ /* Convert time to a percentage, to give the load in the ++ * previous period */ ++ time *= 100; ++ do_div(time, INTERVAL); ++ ++ return time; ++} ++ ++static struct cftype files[] = { ++ { ++ .name = "cpuacct.usage", ++ .read_uint = cpuusage_read, ++ }, ++ { ++ .name = "cpuacct.load", ++ .read_uint = load_read, ++ } ++}; ++ ++static int cpuacct_populate(struct container_subsys *ss, ++ struct container *cont) ++{ ++ return container_add_files(cont, files, ARRAY_SIZE(files)); ++} ++ ++void cpuacct_charge(struct task_struct *task, cputime_t cputime) ++{ ++ ++ struct cpuacct *ca; ++ unsigned long flags; ++ ++ if (!cpuacct_subsys.active) ++ return; ++ rcu_read_lock(); ++ ca = task_ca(task); ++ if (ca) { ++ spin_lock_irqsave(&ca->lock, flags); ++ cpuusage_update(ca); ++ ca->time = cputime64_add(ca->time, cputime); ++ ca->current_interval_time = ++ cputime64_add(ca->current_interval_time, cputime); ++ spin_unlock_irqrestore(&ca->lock, flags); ++ } ++ rcu_read_unlock(); ++} ++ ++struct container_subsys cpuacct_subsys = { ++ .name = "cpuacct", ++ .create = cpuacct_create, ++ .destroy = cpuacct_destroy, ++ .populate = cpuacct_populate, ++ .subsys_id = cpuacct_subsys_id, ++}; +diff -Nurb linux-2.6.22-570/kernel/cpuset.c linux-2.6.22-591/kernel/cpuset.c +--- linux-2.6.22-570/kernel/cpuset.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/kernel/cpuset.c 2007-12-21 15:36:12.000000000 -0500 +@@ -5,6 +5,7 @@ + * + * Copyright (C) 2003 BULL SA. + * Copyright (C) 2004-2006 Silicon Graphics, Inc. ++ * Copyright (C) 2006 Google, Inc + * + * Portions derived from Patrick Mochel's sysfs code. + * sysfs is Copyright (c) 2001-3 Patrick Mochel +@@ -12,6 +13,7 @@ + * 2003-10-10 Written by Simon Derr. + * 2003-10-22 Updates by Stephen Hemminger. + * 2004 May-July Rework by Paul Jackson. ++ * 2006 Rework by Paul Menage to use generic containers + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of the Linux +@@ -53,8 +55,6 @@ + #include + #include + +-#define CPUSET_SUPER_MAGIC 0x27e0eb +- + /* + * Tracks how many cpusets are currently defined in system. + * When there is only one cpuset (the root cpuset) we can +@@ -62,6 +62,10 @@ + */ + int number_of_cpusets __read_mostly; + ++/* Retrieve the cpuset from a container */ ++struct container_subsys cpuset_subsys; ++struct cpuset; ++ + /* See "Frequency meter" comments, below. */ + + struct fmeter { +@@ -72,24 +76,13 @@ + }; + + struct cpuset { ++ struct container_subsys_state css; ++ + unsigned long flags; /* "unsigned long" so bitops work */ + cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ + nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ + +- /* +- * Count is atomic so can incr (fork) or decr (exit) without a lock. +- */ +- atomic_t count; /* count tasks using this cpuset */ +- +- /* +- * We link our 'sibling' struct into our parents 'children'. +- * Our children link their 'sibling' into our 'children'. +- */ +- struct list_head sibling; /* my parents children */ +- struct list_head children; /* my children */ +- + struct cpuset *parent; /* my parent */ +- struct dentry *dentry; /* cpuset fs entry */ + + /* + * Copy of global cpuset_mems_generation as of the most +@@ -100,13 +93,32 @@ + struct fmeter fmeter; /* memory_pressure filter */ + }; + ++/* Update the cpuset for a container */ ++static inline void set_container_cs(struct container *cont, struct cpuset *cs) ++{ ++ cont->subsys[cpuset_subsys_id] = &cs->css; ++} ++ ++/* Retrieve the cpuset for a container */ ++static inline struct cpuset *container_cs(struct container *cont) ++{ ++ return container_of(container_subsys_state(cont, cpuset_subsys_id), ++ struct cpuset, css); ++} ++ ++/* Retrieve the cpuset for a task */ ++static inline struct cpuset *task_cs(struct task_struct *task) ++{ ++ return container_of(task_subsys_state(task, cpuset_subsys_id), ++ struct cpuset, css); ++} ++ ++ + /* bits in struct cpuset flags field */ + typedef enum { + CS_CPU_EXCLUSIVE, + CS_MEM_EXCLUSIVE, + CS_MEMORY_MIGRATE, +- CS_REMOVED, +- CS_NOTIFY_ON_RELEASE, + CS_SPREAD_PAGE, + CS_SPREAD_SLAB, + } cpuset_flagbits_t; +@@ -122,16 +134,6 @@ + return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); + } + +-static inline int is_removed(const struct cpuset *cs) +-{ +- return test_bit(CS_REMOVED, &cs->flags); +-} +- +-static inline int notify_on_release(const struct cpuset *cs) +-{ +- return test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); +-} +- + static inline int is_memory_migrate(const struct cpuset *cs) + { + return test_bit(CS_MEMORY_MIGRATE, &cs->flags); +@@ -172,14 +174,8 @@ + .flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)), + .cpus_allowed = CPU_MASK_ALL, + .mems_allowed = NODE_MASK_ALL, +- .count = ATOMIC_INIT(0), +- .sibling = LIST_HEAD_INIT(top_cpuset.sibling), +- .children = LIST_HEAD_INIT(top_cpuset.children), + }; + +-static struct vfsmount *cpuset_mount; +-static struct super_block *cpuset_sb; +- + /* + * We have two global cpuset mutexes below. They can nest. + * It is ok to first take manage_mutex, then nest callback_mutex. We also +@@ -263,297 +259,36 @@ + * the routine cpuset_update_task_memory_state(). + */ + +-static DEFINE_MUTEX(manage_mutex); + static DEFINE_MUTEX(callback_mutex); + +-/* +- * A couple of forward declarations required, due to cyclic reference loop: +- * cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file +- * -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir. +- */ +- +-static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode); +-static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry); +- +-static struct backing_dev_info cpuset_backing_dev_info = { +- .ra_pages = 0, /* No readahead */ +- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, +-}; +- +-static struct inode *cpuset_new_inode(mode_t mode) +-{ +- struct inode *inode = new_inode(cpuset_sb); +- +- if (inode) { +- inode->i_mode = mode; +- inode->i_uid = current->fsuid; +- inode->i_gid = current->fsgid; +- inode->i_blocks = 0; +- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; +- inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info; +- } +- return inode; +-} +- +-static void cpuset_diput(struct dentry *dentry, struct inode *inode) +-{ +- /* is dentry a directory ? if so, kfree() associated cpuset */ +- if (S_ISDIR(inode->i_mode)) { +- struct cpuset *cs = dentry->d_fsdata; +- BUG_ON(!(is_removed(cs))); +- kfree(cs); +- } +- iput(inode); +-} +- +-static struct dentry_operations cpuset_dops = { +- .d_iput = cpuset_diput, +-}; +- +-static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name) +-{ +- struct dentry *d = lookup_one_len(name, parent, strlen(name)); +- if (!IS_ERR(d)) +- d->d_op = &cpuset_dops; +- return d; +-} +- +-static void remove_dir(struct dentry *d) +-{ +- struct dentry *parent = dget(d->d_parent); +- +- d_delete(d); +- simple_rmdir(parent->d_inode, d); +- dput(parent); +-} +- +-/* +- * NOTE : the dentry must have been dget()'ed +- */ +-static void cpuset_d_remove_dir(struct dentry *dentry) +-{ +- struct list_head *node; +- +- spin_lock(&dcache_lock); +- node = dentry->d_subdirs.next; +- while (node != &dentry->d_subdirs) { +- struct dentry *d = list_entry(node, struct dentry, d_u.d_child); +- list_del_init(node); +- if (d->d_inode) { +- d = dget_locked(d); +- spin_unlock(&dcache_lock); +- d_delete(d); +- simple_unlink(dentry->d_inode, d); +- dput(d); +- spin_lock(&dcache_lock); +- } +- node = dentry->d_subdirs.next; +- } +- list_del_init(&dentry->d_u.d_child); +- spin_unlock(&dcache_lock); +- remove_dir(dentry); +-} +- +-static struct super_operations cpuset_ops = { +- .statfs = simple_statfs, +- .drop_inode = generic_delete_inode, +-}; +- +-static int cpuset_fill_super(struct super_block *sb, void *unused_data, +- int unused_silent) +-{ +- struct inode *inode; +- struct dentry *root; +- +- sb->s_blocksize = PAGE_CACHE_SIZE; +- sb->s_blocksize_bits = PAGE_CACHE_SHIFT; +- sb->s_magic = CPUSET_SUPER_MAGIC; +- sb->s_op = &cpuset_ops; +- cpuset_sb = sb; +- +- inode = cpuset_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR); +- if (inode) { +- inode->i_op = &simple_dir_inode_operations; +- inode->i_fop = &simple_dir_operations; +- /* directories start off with i_nlink == 2 (for "." entry) */ +- inc_nlink(inode); +- } else { +- return -ENOMEM; +- } +- +- root = d_alloc_root(inode); +- if (!root) { +- iput(inode); +- return -ENOMEM; +- } +- sb->s_root = root; +- return 0; +-} +- ++/* This is ugly, but preserves the userspace API for existing cpuset ++ * users. If someone tries to mount the "cpuset" filesystem, we ++ * silently switch it to mount "container" instead */ + static int cpuset_get_sb(struct file_system_type *fs_type, + int flags, const char *unused_dev_name, + void *data, struct vfsmount *mnt) + { +- return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt); ++ struct file_system_type *container_fs = get_fs_type("container"); ++ int ret = -ENODEV; ++ if (container_fs) { ++ ret = container_fs->get_sb(container_fs, flags, ++ unused_dev_name, ++ "cpuset", mnt); ++ put_filesystem(container_fs); ++ if (!ret) { ++ container_set_release_agent_path( ++ &cpuset_subsys, ++ "/sbin/cpuset_release_agent"); ++ } ++ } ++ return ret; + } + + static struct file_system_type cpuset_fs_type = { + .name = "cpuset", + .get_sb = cpuset_get_sb, +- .kill_sb = kill_litter_super, +-}; +- +-/* struct cftype: +- * +- * The files in the cpuset filesystem mostly have a very simple read/write +- * handling, some common function will take care of it. Nevertheless some cases +- * (read tasks) are special and therefore I define this structure for every +- * kind of file. +- * +- * +- * When reading/writing to a file: +- * - the cpuset to use in file->f_path.dentry->d_parent->d_fsdata +- * - the 'cftype' of the file is file->f_path.dentry->d_fsdata +- */ +- +-struct cftype { +- char *name; +- int private; +- int (*open) (struct inode *inode, struct file *file); +- ssize_t (*read) (struct file *file, char __user *buf, size_t nbytes, +- loff_t *ppos); +- int (*write) (struct file *file, const char __user *buf, size_t nbytes, +- loff_t *ppos); +- int (*release) (struct inode *inode, struct file *file); + }; + +-static inline struct cpuset *__d_cs(struct dentry *dentry) +-{ +- return dentry->d_fsdata; +-} +- +-static inline struct cftype *__d_cft(struct dentry *dentry) +-{ +- return dentry->d_fsdata; +-} +- +-/* +- * Call with manage_mutex held. Writes path of cpuset into buf. +- * Returns 0 on success, -errno on error. +- */ +- +-static int cpuset_path(const struct cpuset *cs, char *buf, int buflen) +-{ +- char *start; +- +- start = buf + buflen; +- +- *--start = '\0'; +- for (;;) { +- int len = cs->dentry->d_name.len; +- if ((start -= len) < buf) +- return -ENAMETOOLONG; +- memcpy(start, cs->dentry->d_name.name, len); +- cs = cs->parent; +- if (!cs) +- break; +- if (!cs->parent) +- continue; +- if (--start < buf) +- return -ENAMETOOLONG; +- *start = '/'; +- } +- memmove(buf, start, buf + buflen - start); +- return 0; +-} +- +-/* +- * Notify userspace when a cpuset is released, by running +- * /sbin/cpuset_release_agent with the name of the cpuset (path +- * relative to the root of cpuset file system) as the argument. +- * +- * Most likely, this user command will try to rmdir this cpuset. +- * +- * This races with the possibility that some other task will be +- * attached to this cpuset before it is removed, or that some other +- * user task will 'mkdir' a child cpuset of this cpuset. That's ok. +- * The presumed 'rmdir' will fail quietly if this cpuset is no longer +- * unused, and this cpuset will be reprieved from its death sentence, +- * to continue to serve a useful existence. Next time it's released, +- * we will get notified again, if it still has 'notify_on_release' set. +- * +- * The final arg to call_usermodehelper() is 0, which means don't +- * wait. The separate /sbin/cpuset_release_agent task is forked by +- * call_usermodehelper(), then control in this thread returns here, +- * without waiting for the release agent task. We don't bother to +- * wait because the caller of this routine has no use for the exit +- * status of the /sbin/cpuset_release_agent task, so no sense holding +- * our caller up for that. +- * +- * When we had only one cpuset mutex, we had to call this +- * without holding it, to avoid deadlock when call_usermodehelper() +- * allocated memory. With two locks, we could now call this while +- * holding manage_mutex, but we still don't, so as to minimize +- * the time manage_mutex is held. +- */ +- +-static void cpuset_release_agent(const char *pathbuf) +-{ +- char *argv[3], *envp[3]; +- int i; +- +- if (!pathbuf) +- return; +- +- i = 0; +- argv[i++] = "/sbin/cpuset_release_agent"; +- argv[i++] = (char *)pathbuf; +- argv[i] = NULL; +- +- i = 0; +- /* minimal command environment */ +- envp[i++] = "HOME=/"; +- envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; +- envp[i] = NULL; +- +- call_usermodehelper(argv[0], argv, envp, 0); +- kfree(pathbuf); +-} +- +-/* +- * Either cs->count of using tasks transitioned to zero, or the +- * cs->children list of child cpusets just became empty. If this +- * cs is notify_on_release() and now both the user count is zero and +- * the list of children is empty, prepare cpuset path in a kmalloc'd +- * buffer, to be returned via ppathbuf, so that the caller can invoke +- * cpuset_release_agent() with it later on, once manage_mutex is dropped. +- * Call here with manage_mutex held. +- * +- * This check_for_release() routine is responsible for kmalloc'ing +- * pathbuf. The above cpuset_release_agent() is responsible for +- * kfree'ing pathbuf. The caller of these routines is responsible +- * for providing a pathbuf pointer, initialized to NULL, then +- * calling check_for_release() with manage_mutex held and the address +- * of the pathbuf pointer, then dropping manage_mutex, then calling +- * cpuset_release_agent() with pathbuf, as set by check_for_release(). +- */ +- +-static void check_for_release(struct cpuset *cs, char **ppathbuf) +-{ +- if (notify_on_release(cs) && atomic_read(&cs->count) == 0 && +- list_empty(&cs->children)) { +- char *buf; +- +- buf = kmalloc(PAGE_SIZE, GFP_KERNEL); +- if (!buf) +- return; +- if (cpuset_path(cs, buf, PAGE_SIZE) < 0) +- kfree(buf); +- else +- *ppathbuf = buf; +- } +-} +- + /* + * Return in *pmask the portion of a cpusets's cpus_allowed that + * are online. If none are online, walk up the cpuset hierarchy +@@ -651,20 +386,19 @@ + struct task_struct *tsk = current; + struct cpuset *cs; + +- if (tsk->cpuset == &top_cpuset) { ++ if (task_cs(tsk) == &top_cpuset) { + /* Don't need rcu for top_cpuset. It's never freed. */ + my_cpusets_mem_gen = top_cpuset.mems_generation; + } else { + rcu_read_lock(); +- cs = rcu_dereference(tsk->cpuset); +- my_cpusets_mem_gen = cs->mems_generation; ++ my_cpusets_mem_gen = task_cs(current)->mems_generation; + rcu_read_unlock(); + } + + if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { + mutex_lock(&callback_mutex); + task_lock(tsk); +- cs = tsk->cpuset; /* Maybe changed when task not locked */ ++ cs = task_cs(tsk); /* Maybe changed when task not locked */ + guarantee_online_mems(cs, &tsk->mems_allowed); + tsk->cpuset_mems_generation = cs->mems_generation; + if (is_spread_page(cs)) +@@ -719,11 +453,12 @@ + + static int validate_change(const struct cpuset *cur, const struct cpuset *trial) + { ++ struct container *cont; + struct cpuset *c, *par; + + /* Each of our child cpusets must be a subset of us */ +- list_for_each_entry(c, &cur->children, sibling) { +- if (!is_cpuset_subset(c, trial)) ++ list_for_each_entry(cont, &cur->css.container->children, sibling) { ++ if (!is_cpuset_subset(container_cs(cont), trial)) + return -EBUSY; + } + +@@ -738,7 +473,8 @@ + return -EACCES; + + /* If either I or some sibling (!= me) is exclusive, we can't overlap */ +- list_for_each_entry(c, &par->children, sibling) { ++ list_for_each_entry(cont, &par->css.container->children, sibling) { ++ c = container_cs(cont); + if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && + c != cur && + cpus_intersects(trial->cpus_allowed, c->cpus_allowed)) +@@ -753,68 +489,13 @@ + } + + /* +- * For a given cpuset cur, partition the system as follows +- * a. All cpus in the parent cpuset's cpus_allowed that are not part of any +- * exclusive child cpusets +- * b. All cpus in the current cpuset's cpus_allowed that are not part of any +- * exclusive child cpusets +- * Build these two partitions by calling partition_sched_domains +- * +- * Call with manage_mutex held. May nest a call to the +- * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. +- * Must not be called holding callback_mutex, because we must +- * not call lock_cpu_hotplug() while holding callback_mutex. +- */ +- +-static void update_cpu_domains(struct cpuset *cur) +-{ +- struct cpuset *c, *par = cur->parent; +- cpumask_t pspan, cspan; +- +- if (par == NULL || cpus_empty(cur->cpus_allowed)) +- return; +- +- /* +- * Get all cpus from parent's cpus_allowed not part of exclusive +- * children +- */ +- pspan = par->cpus_allowed; +- list_for_each_entry(c, &par->children, sibling) { +- if (is_cpu_exclusive(c)) +- cpus_andnot(pspan, pspan, c->cpus_allowed); +- } +- if (!is_cpu_exclusive(cur)) { +- cpus_or(pspan, pspan, cur->cpus_allowed); +- if (cpus_equal(pspan, cur->cpus_allowed)) +- return; +- cspan = CPU_MASK_NONE; +- } else { +- if (cpus_empty(pspan)) +- return; +- cspan = cur->cpus_allowed; +- /* +- * Get all cpus from current cpuset's cpus_allowed not part +- * of exclusive children +- */ +- list_for_each_entry(c, &cur->children, sibling) { +- if (is_cpu_exclusive(c)) +- cpus_andnot(cspan, cspan, c->cpus_allowed); +- } +- } +- +- lock_cpu_hotplug(); +- partition_sched_domains(&pspan, &cspan); +- unlock_cpu_hotplug(); +-} +- +-/* + * Call with manage_mutex held. May take callback_mutex during call. + */ + + static int update_cpumask(struct cpuset *cs, char *buf) + { + struct cpuset trialcs; +- int retval, cpus_unchanged; ++ int retval; + + /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ + if (cs == &top_cpuset) +@@ -836,17 +517,15 @@ + } + cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map); + /* cpus_allowed cannot be empty for a cpuset with attached tasks. */ +- if (atomic_read(&cs->count) && cpus_empty(trialcs.cpus_allowed)) ++ if (container_task_count(cs->css.container) && ++ cpus_empty(trialcs.cpus_allowed)) + return -ENOSPC; + retval = validate_change(cs, &trialcs); + if (retval < 0) + return retval; +- cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed); + mutex_lock(&callback_mutex); + cs->cpus_allowed = trialcs.cpus_allowed; + mutex_unlock(&callback_mutex); +- if (is_cpu_exclusive(cs) && !cpus_unchanged) +- update_cpu_domains(cs); + return 0; + } + +@@ -895,7 +574,7 @@ + do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); + + mutex_lock(&callback_mutex); +- guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed); ++ guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed); + mutex_unlock(&callback_mutex); + } + +@@ -913,16 +592,19 @@ + * their mempolicies to the cpusets new mems_allowed. + */ + ++static void *cpuset_being_rebound; ++ + static int update_nodemask(struct cpuset *cs, char *buf) + { + struct cpuset trialcs; + nodemask_t oldmem; +- struct task_struct *g, *p; ++ struct task_struct *p; + struct mm_struct **mmarray; + int i, n, ntasks; + int migrate; + int fudge; + int retval; ++ struct container_iter it; + + /* top_cpuset.mems_allowed tracks node_online_map; it's read-only */ + if (cs == &top_cpuset) +@@ -949,7 +631,8 @@ + goto done; + } + /* mems_allowed cannot be empty for a cpuset with attached tasks. */ +- if (atomic_read(&cs->count) && nodes_empty(trialcs.mems_allowed)) { ++ if (container_task_count(cs->css.container) && ++ nodes_empty(trialcs.mems_allowed)) { + retval = -ENOSPC; + goto done; + } +@@ -962,7 +645,7 @@ + cs->mems_generation = cpuset_mems_generation++; + mutex_unlock(&callback_mutex); + +- set_cpuset_being_rebound(cs); /* causes mpol_copy() rebind */ ++ cpuset_being_rebound = cs; /* causes mpol_copy() rebind */ + + fudge = 10; /* spare mmarray[] slots */ + fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ +@@ -976,37 +659,37 @@ + * enough mmarray[] w/o using GFP_ATOMIC. + */ + while (1) { +- ntasks = atomic_read(&cs->count); /* guess */ ++ ntasks = container_task_count(cs->css.container); /* guess */ + ntasks += fudge; + mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL); + if (!mmarray) + goto done; +- write_lock_irq(&tasklist_lock); /* block fork */ +- if (atomic_read(&cs->count) <= ntasks) ++ read_lock(&tasklist_lock); /* block fork */ ++ if (container_task_count(cs->css.container) <= ntasks) + break; /* got enough */ +- write_unlock_irq(&tasklist_lock); /* try again */ ++ read_unlock(&tasklist_lock); /* try again */ + kfree(mmarray); + } + + n = 0; + + /* Load up mmarray[] with mm reference for each task in cpuset. */ +- do_each_thread(g, p) { ++ container_iter_start(cs->css.container, &it); ++ while ((p = container_iter_next(cs->css.container, &it))) { + struct mm_struct *mm; + + if (n >= ntasks) { + printk(KERN_WARNING + "Cpuset mempolicy rebind incomplete.\n"); +- continue; ++ break; + } +- if (p->cpuset != cs) +- continue; + mm = get_task_mm(p); + if (!mm) + continue; + mmarray[n++] = mm; +- } while_each_thread(g, p); +- write_unlock_irq(&tasklist_lock); ++ } ++ container_iter_end(cs->css.container, &it); ++ read_unlock(&tasklist_lock); + + /* + * Now that we've dropped the tasklist spinlock, we can +@@ -1033,12 +716,17 @@ + + /* We're done rebinding vma's to this cpusets new mems_allowed. */ + kfree(mmarray); +- set_cpuset_being_rebound(NULL); ++ cpuset_being_rebound = NULL; + retval = 0; + done: + return retval; + } + ++int current_cpuset_is_being_rebound(void) ++{ ++ return task_cs(current) == cpuset_being_rebound; ++} ++ + /* + * Call with manage_mutex held. + */ +@@ -1067,7 +755,7 @@ + { + int turning_on; + struct cpuset trialcs; +- int err, cpu_exclusive_changed; ++ int err; + + turning_on = (simple_strtoul(buf, NULL, 10) != 0); + +@@ -1080,14 +768,10 @@ + err = validate_change(cs, &trialcs); + if (err < 0) + return err; +- cpu_exclusive_changed = +- (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs)); + mutex_lock(&callback_mutex); + cs->flags = trialcs.flags; + mutex_unlock(&callback_mutex); + +- if (cpu_exclusive_changed) +- update_cpu_domains(cs); + return 0; + } + +@@ -1189,85 +873,34 @@ + return val; + } + +-/* +- * Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly +- * writing the path of the old cpuset in 'ppathbuf' if it needs to be +- * notified on release. +- * +- * Call holding manage_mutex. May take callback_mutex and task_lock of +- * the task 'pid' during call. +- */ +- +-static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf) ++int cpuset_can_attach(struct container_subsys *ss, ++ struct container *cont, struct task_struct *tsk) + { +- pid_t pid; +- struct task_struct *tsk; +- struct cpuset *oldcs; +- cpumask_t cpus; +- nodemask_t from, to; +- struct mm_struct *mm; +- int retval; ++ struct cpuset *cs = container_cs(cont); + +- if (sscanf(pidbuf, "%d", &pid) != 1) +- return -EIO; + if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)) + return -ENOSPC; + +- if (pid) { +- read_lock(&tasklist_lock); +- +- tsk = find_task_by_pid(pid); +- if (!tsk || tsk->flags & PF_EXITING) { +- read_unlock(&tasklist_lock); +- return -ESRCH; +- } +- +- get_task_struct(tsk); +- read_unlock(&tasklist_lock); +- +- if ((current->euid) && (current->euid != tsk->uid) +- && (current->euid != tsk->suid)) { +- put_task_struct(tsk); +- return -EACCES; +- } +- } else { +- tsk = current; +- get_task_struct(tsk); +- } ++ return security_task_setscheduler(tsk, 0, NULL); ++} + +- retval = security_task_setscheduler(tsk, 0, NULL); +- if (retval) { +- put_task_struct(tsk); +- return retval; +- } ++void cpuset_attach(struct container_subsys *ss, ++ struct container *cont, struct container *oldcont, ++ struct task_struct *tsk) ++{ ++ cpumask_t cpus; ++ nodemask_t from, to; ++ struct mm_struct *mm; ++ struct cpuset *cs = container_cs(cont); ++ struct cpuset *oldcs = container_cs(oldcont); + + mutex_lock(&callback_mutex); +- +- task_lock(tsk); +- oldcs = tsk->cpuset; +- /* +- * After getting 'oldcs' cpuset ptr, be sure still not exiting. +- * If 'oldcs' might be the top_cpuset due to the_top_cpuset_hack +- * then fail this attach_task(), to avoid breaking top_cpuset.count. +- */ +- if (tsk->flags & PF_EXITING) { +- task_unlock(tsk); +- mutex_unlock(&callback_mutex); +- put_task_struct(tsk); +- return -ESRCH; +- } +- atomic_inc(&cs->count); +- rcu_assign_pointer(tsk->cpuset, cs); +- task_unlock(tsk); +- + guarantee_online_cpus(cs, &cpus); + set_cpus_allowed(tsk, cpus); ++ mutex_unlock(&callback_mutex); + + from = oldcs->mems_allowed; + to = cs->mems_allowed; +- +- mutex_unlock(&callback_mutex); +- + mm = get_task_mm(tsk); + if (mm) { + mpol_rebind_mm(mm, &to); +@@ -1276,40 +909,31 @@ + mmput(mm); + } + +- put_task_struct(tsk); +- synchronize_rcu(); +- if (atomic_dec_and_test(&oldcs->count)) +- check_for_release(oldcs, ppathbuf); +- return 0; + } + + /* The various types of files and directories in a cpuset file system */ + + typedef enum { +- FILE_ROOT, +- FILE_DIR, + FILE_MEMORY_MIGRATE, + FILE_CPULIST, + FILE_MEMLIST, + FILE_CPU_EXCLUSIVE, + FILE_MEM_EXCLUSIVE, +- FILE_NOTIFY_ON_RELEASE, + FILE_MEMORY_PRESSURE_ENABLED, + FILE_MEMORY_PRESSURE, + FILE_SPREAD_PAGE, + FILE_SPREAD_SLAB, +- FILE_TASKLIST, + } cpuset_filetype_t; + +-static ssize_t cpuset_common_file_write(struct file *file, ++static ssize_t cpuset_common_file_write(struct container *cont, ++ struct cftype *cft, ++ struct file *file, + const char __user *userbuf, + size_t nbytes, loff_t *unused_ppos) + { +- struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent); +- struct cftype *cft = __d_cft(file->f_path.dentry); ++ struct cpuset *cs = container_cs(cont); + cpuset_filetype_t type = cft->private; + char *buffer; +- char *pathbuf = NULL; + int retval = 0; + + /* Crude upper limit on largest legitimate cpulist user might write. */ +@@ -1326,9 +950,9 @@ + } + buffer[nbytes] = 0; /* nul-terminate */ + +- mutex_lock(&manage_mutex); ++ container_lock(); + +- if (is_removed(cs)) { ++ if (container_is_removed(cont)) { + retval = -ENODEV; + goto out2; + } +@@ -1346,9 +970,6 @@ + case FILE_MEM_EXCLUSIVE: + retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer); + break; +- case FILE_NOTIFY_ON_RELEASE: +- retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer); +- break; + case FILE_MEMORY_MIGRATE: + retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); + break; +@@ -1366,9 +987,6 @@ + retval = update_flag(CS_SPREAD_SLAB, cs, buffer); + cs->mems_generation = cpuset_mems_generation++; + break; +- case FILE_TASKLIST: +- retval = attach_task(cs, buffer, &pathbuf); +- break; + default: + retval = -EINVAL; + goto out2; +@@ -1377,30 +995,12 @@ + if (retval == 0) + retval = nbytes; + out2: +- mutex_unlock(&manage_mutex); +- cpuset_release_agent(pathbuf); ++ container_unlock(); + out1: + kfree(buffer); + return retval; + } + +-static ssize_t cpuset_file_write(struct file *file, const char __user *buf, +- size_t nbytes, loff_t *ppos) +-{ +- ssize_t retval = 0; +- struct cftype *cft = __d_cft(file->f_path.dentry); +- if (!cft) +- return -ENODEV; +- +- /* special function ? */ +- if (cft->write) +- retval = cft->write(file, buf, nbytes, ppos); +- else +- retval = cpuset_common_file_write(file, buf, nbytes, ppos); +- +- return retval; +-} +- + /* + * These ascii lists should be read in a single call, by using a user + * buffer large enough to hold the entire map. If read in smaller +@@ -1435,17 +1035,19 @@ + return nodelist_scnprintf(page, PAGE_SIZE, mask); + } + +-static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, ++static ssize_t cpuset_common_file_read(struct container *cont, ++ struct cftype *cft, ++ struct file *file, ++ char __user *buf, + size_t nbytes, loff_t *ppos) + { +- struct cftype *cft = __d_cft(file->f_path.dentry); +- struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent); ++ struct cpuset *cs = container_cs(cont); + cpuset_filetype_t type = cft->private; + char *page; + ssize_t retval = 0; + char *s; + +- if (!(page = (char *)__get_free_page(GFP_KERNEL))) ++ if (!(page = (char *)__get_free_page(GFP_TEMPORARY))) + return -ENOMEM; + + s = page; +@@ -1463,9 +1065,6 @@ + case FILE_MEM_EXCLUSIVE: + *s++ = is_mem_exclusive(cs) ? '1' : '0'; + break; +- case FILE_NOTIFY_ON_RELEASE: +- *s++ = notify_on_release(cs) ? '1' : '0'; +- break; + case FILE_MEMORY_MIGRATE: + *s++ = is_memory_migrate(cs) ? '1' : '0'; + break; +@@ -1493,390 +1092,140 @@ + return retval; + } + +-static ssize_t cpuset_file_read(struct file *file, char __user *buf, size_t nbytes, +- loff_t *ppos) +-{ +- ssize_t retval = 0; +- struct cftype *cft = __d_cft(file->f_path.dentry); +- if (!cft) +- return -ENODEV; +- +- /* special function ? */ +- if (cft->read) +- retval = cft->read(file, buf, nbytes, ppos); +- else +- retval = cpuset_common_file_read(file, buf, nbytes, ppos); +- +- return retval; +-} +- +-static int cpuset_file_open(struct inode *inode, struct file *file) +-{ +- int err; +- struct cftype *cft; +- +- err = generic_file_open(inode, file); +- if (err) +- return err; +- +- cft = __d_cft(file->f_path.dentry); +- if (!cft) +- return -ENODEV; +- if (cft->open) +- err = cft->open(inode, file); +- else +- err = 0; +- +- return err; +-} +- +-static int cpuset_file_release(struct inode *inode, struct file *file) +-{ +- struct cftype *cft = __d_cft(file->f_path.dentry); +- if (cft->release) +- return cft->release(inode, file); +- return 0; +-} +- +-/* +- * cpuset_rename - Only allow simple rename of directories in place. +- */ +-static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) +-{ +- if (!S_ISDIR(old_dentry->d_inode->i_mode)) +- return -ENOTDIR; +- if (new_dentry->d_inode) +- return -EEXIST; +- if (old_dir != new_dir) +- return -EIO; +- return simple_rename(old_dir, old_dentry, new_dir, new_dentry); +-} +- +-static const struct file_operations cpuset_file_operations = { +- .read = cpuset_file_read, +- .write = cpuset_file_write, +- .llseek = generic_file_llseek, +- .open = cpuset_file_open, +- .release = cpuset_file_release, +-}; +- +-static const struct inode_operations cpuset_dir_inode_operations = { +- .lookup = simple_lookup, +- .mkdir = cpuset_mkdir, +- .rmdir = cpuset_rmdir, +- .rename = cpuset_rename, +-}; +- +-static int cpuset_create_file(struct dentry *dentry, int mode) +-{ +- struct inode *inode; +- +- if (!dentry) +- return -ENOENT; +- if (dentry->d_inode) +- return -EEXIST; +- +- inode = cpuset_new_inode(mode); +- if (!inode) +- return -ENOMEM; +- +- if (S_ISDIR(mode)) { +- inode->i_op = &cpuset_dir_inode_operations; +- inode->i_fop = &simple_dir_operations; +- +- /* start off with i_nlink == 2 (for "." entry) */ +- inc_nlink(inode); +- } else if (S_ISREG(mode)) { +- inode->i_size = 0; +- inode->i_fop = &cpuset_file_operations; +- } +- +- d_instantiate(dentry, inode); +- dget(dentry); /* Extra count - pin the dentry in core */ +- return 0; +-} +- +-/* +- * cpuset_create_dir - create a directory for an object. +- * cs: the cpuset we create the directory for. +- * It must have a valid ->parent field +- * And we are going to fill its ->dentry field. +- * name: The name to give to the cpuset directory. Will be copied. +- * mode: mode to set on new directory. +- */ +- +-static int cpuset_create_dir(struct cpuset *cs, const char *name, int mode) +-{ +- struct dentry *dentry = NULL; +- struct dentry *parent; +- int error = 0; +- +- parent = cs->parent->dentry; +- dentry = cpuset_get_dentry(parent, name); +- if (IS_ERR(dentry)) +- return PTR_ERR(dentry); +- error = cpuset_create_file(dentry, S_IFDIR | mode); +- if (!error) { +- dentry->d_fsdata = cs; +- inc_nlink(parent->d_inode); +- cs->dentry = dentry; +- } +- dput(dentry); +- +- return error; +-} +- +-static int cpuset_add_file(struct dentry *dir, const struct cftype *cft) +-{ +- struct dentry *dentry; +- int error; +- +- mutex_lock(&dir->d_inode->i_mutex); +- dentry = cpuset_get_dentry(dir, cft->name); +- if (!IS_ERR(dentry)) { +- error = cpuset_create_file(dentry, 0644 | S_IFREG); +- if (!error) +- dentry->d_fsdata = (void *)cft; +- dput(dentry); +- } else +- error = PTR_ERR(dentry); +- mutex_unlock(&dir->d_inode->i_mutex); +- return error; +-} +- +-/* +- * Stuff for reading the 'tasks' file. +- * +- * Reading this file can return large amounts of data if a cpuset has +- * *lots* of attached tasks. So it may need several calls to read(), +- * but we cannot guarantee that the information we produce is correct +- * unless we produce it entirely atomically. +- * +- * Upon tasks file open(), a struct ctr_struct is allocated, that +- * will have a pointer to an array (also allocated here). The struct +- * ctr_struct * is stored in file->private_data. Its resources will +- * be freed by release() when the file is closed. The array is used +- * to sprintf the PIDs and then used by read(). +- */ +- +-/* cpusets_tasks_read array */ +- +-struct ctr_struct { +- char *buf; +- int bufsz; +-}; +- +-/* +- * Load into 'pidarray' up to 'npids' of the tasks using cpuset 'cs'. +- * Return actual number of pids loaded. No need to task_lock(p) +- * when reading out p->cpuset, as we don't really care if it changes +- * on the next cycle, and we are not going to try to dereference it. +- */ +-static int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs) +-{ +- int n = 0; +- struct task_struct *g, *p; +- +- read_lock(&tasklist_lock); +- +- do_each_thread(g, p) { +- if (p->cpuset == cs) { +- if (unlikely(n == npids)) +- goto array_full; +- pidarray[n++] = p->pid; +- } +- } while_each_thread(g, p); +- +-array_full: +- read_unlock(&tasklist_lock); +- return n; +-} +- +-static int cmppid(const void *a, const void *b) +-{ +- return *(pid_t *)a - *(pid_t *)b; +-} +- +-/* +- * Convert array 'a' of 'npids' pid_t's to a string of newline separated +- * decimal pids in 'buf'. Don't write more than 'sz' chars, but return +- * count 'cnt' of how many chars would be written if buf were large enough. +- */ +-static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids) +-{ +- int cnt = 0; +- int i; +- +- for (i = 0; i < npids; i++) +- cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]); +- return cnt; +-} +- +-/* +- * Handle an open on 'tasks' file. Prepare a buffer listing the +- * process id's of tasks currently attached to the cpuset being opened. +- * +- * Does not require any specific cpuset mutexes, and does not take any. +- */ +-static int cpuset_tasks_open(struct inode *unused, struct file *file) +-{ +- struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent); +- struct ctr_struct *ctr; +- pid_t *pidarray; +- int npids; +- char c; +- +- if (!(file->f_mode & FMODE_READ)) +- return 0; +- +- ctr = kmalloc(sizeof(*ctr), GFP_KERNEL); +- if (!ctr) +- goto err0; +- +- /* +- * If cpuset gets more users after we read count, we won't have +- * enough space - tough. This race is indistinguishable to the +- * caller from the case that the additional cpuset users didn't +- * show up until sometime later on. +- */ +- npids = atomic_read(&cs->count); +- pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL); +- if (!pidarray) +- goto err1; +- +- npids = pid_array_load(pidarray, npids, cs); +- sort(pidarray, npids, sizeof(pid_t), cmppid, NULL); +- +- /* Call pid_array_to_buf() twice, first just to get bufsz */ +- ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1; +- ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL); +- if (!ctr->buf) +- goto err2; +- ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids); +- +- kfree(pidarray); +- file->private_data = ctr; +- return 0; +- +-err2: +- kfree(pidarray); +-err1: +- kfree(ctr); +-err0: +- return -ENOMEM; +-} +- +-static ssize_t cpuset_tasks_read(struct file *file, char __user *buf, +- size_t nbytes, loff_t *ppos) +-{ +- struct ctr_struct *ctr = file->private_data; + +- return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz); +-} + +-static int cpuset_tasks_release(struct inode *unused_inode, struct file *file) +-{ +- struct ctr_struct *ctr; + +- if (file->f_mode & FMODE_READ) { +- ctr = file->private_data; +- kfree(ctr->buf); +- kfree(ctr); +- } +- return 0; +-} + + /* + * for the common functions, 'private' gives the type of file + */ + +-static struct cftype cft_tasks = { +- .name = "tasks", +- .open = cpuset_tasks_open, +- .read = cpuset_tasks_read, +- .release = cpuset_tasks_release, +- .private = FILE_TASKLIST, +-}; +- + static struct cftype cft_cpus = { + .name = "cpus", ++ .read = cpuset_common_file_read, ++ .write = cpuset_common_file_write, + .private = FILE_CPULIST, + }; + + static struct cftype cft_mems = { + .name = "mems", ++ .read = cpuset_common_file_read, ++ .write = cpuset_common_file_write, + .private = FILE_MEMLIST, + }; + + static struct cftype cft_cpu_exclusive = { + .name = "cpu_exclusive", ++ .read = cpuset_common_file_read, ++ .write = cpuset_common_file_write, + .private = FILE_CPU_EXCLUSIVE, + }; + + static struct cftype cft_mem_exclusive = { + .name = "mem_exclusive", ++ .read = cpuset_common_file_read, ++ .write = cpuset_common_file_write, + .private = FILE_MEM_EXCLUSIVE, + }; + +-static struct cftype cft_notify_on_release = { +- .name = "notify_on_release", +- .private = FILE_NOTIFY_ON_RELEASE, +-}; +- + static struct cftype cft_memory_migrate = { + .name = "memory_migrate", ++ .read = cpuset_common_file_read, ++ .write = cpuset_common_file_write, + .private = FILE_MEMORY_MIGRATE, + }; + + static struct cftype cft_memory_pressure_enabled = { + .name = "memory_pressure_enabled", ++ .read = cpuset_common_file_read, ++ .write = cpuset_common_file_write, + .private = FILE_MEMORY_PRESSURE_ENABLED, + }; + + static struct cftype cft_memory_pressure = { + .name = "memory_pressure", ++ .read = cpuset_common_file_read, ++ .write = cpuset_common_file_write, + .private = FILE_MEMORY_PRESSURE, + }; + + static struct cftype cft_spread_page = { + .name = "memory_spread_page", ++ .read = cpuset_common_file_read, ++ .write = cpuset_common_file_write, + .private = FILE_SPREAD_PAGE, + }; + + static struct cftype cft_spread_slab = { + .name = "memory_spread_slab", ++ .read = cpuset_common_file_read, ++ .write = cpuset_common_file_write, + .private = FILE_SPREAD_SLAB, + }; + +-static int cpuset_populate_dir(struct dentry *cs_dentry) ++int cpuset_populate(struct container_subsys *ss, struct container *cont) + { + int err; + +- if ((err = cpuset_add_file(cs_dentry, &cft_cpus)) < 0) ++ if ((err = container_add_file(cont, &cft_cpus)) < 0) + return err; +- if ((err = cpuset_add_file(cs_dentry, &cft_mems)) < 0) ++ if ((err = container_add_file(cont, &cft_mems)) < 0) + return err; +- if ((err = cpuset_add_file(cs_dentry, &cft_cpu_exclusive)) < 0) ++ if ((err = container_add_file(cont, &cft_cpu_exclusive)) < 0) + return err; +- if ((err = cpuset_add_file(cs_dentry, &cft_mem_exclusive)) < 0) ++ if ((err = container_add_file(cont, &cft_mem_exclusive)) < 0) + return err; +- if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0) ++ if ((err = container_add_file(cont, &cft_memory_migrate)) < 0) + return err; +- if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0) ++ if ((err = container_add_file(cont, &cft_memory_pressure)) < 0) + return err; +- if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0) ++ if ((err = container_add_file(cont, &cft_spread_page)) < 0) + return err; +- if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0) +- return err; +- if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0) +- return err; +- if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0) ++ if ((err = container_add_file(cont, &cft_spread_slab)) < 0) + return err; ++ /* memory_pressure_enabled is in root cpuset only */ ++ if (err == 0 && !cont->parent) ++ err = container_add_file(cont, &cft_memory_pressure_enabled); + return 0; + } + + /* ++ * post_clone() is called at the end of container_clone(). ++ * 'container' was just created automatically as a result of ++ * a container_clone(), and the current task is about to ++ * be moved into 'container'. ++ * ++ * Currently we refuse to set up the container - thereby ++ * refusing the task to be entered, and as a result refusing ++ * the sys_unshare() or clone() which initiated it - if any ++ * sibling cpusets have exclusive cpus or mem. ++ * ++ * If this becomes a problem for some users who wish to ++ * allow that scenario, then cpuset_post_clone() could be ++ * changed to grant parent->cpus_allowed-sibling_cpus_exclusive ++ * (and likewise for mems) to the new container. ++ */ ++void cpuset_post_clone(struct container_subsys *ss, ++ struct container *container) ++{ ++ struct container *parent, *child; ++ struct cpuset *cs, *parent_cs; ++ ++ parent = container->parent; ++ list_for_each_entry(child, &parent->children, sibling) { ++ cs = container_cs(child); ++ if (is_mem_exclusive(cs) || is_cpu_exclusive(cs)) ++ return; ++ } ++ cs = container_cs(container); ++ parent_cs = container_cs(parent); ++ ++ cs->mems_allowed = parent_cs->mems_allowed; ++ cs->cpus_allowed = parent_cs->cpus_allowed; ++ return; ++} ++ ++/* + * cpuset_create - create a cpuset + * parent: cpuset that will be parent of the new cpuset. + * name: name of the new cpuset. Will be strcpy'ed. +@@ -1885,124 +1234,62 @@ + * Must be called with the mutex on the parent inode held + */ + +-static long cpuset_create(struct cpuset *parent, const char *name, int mode) ++int cpuset_create(struct container_subsys *ss, struct container *cont) + { + struct cpuset *cs; +- int err; ++ struct cpuset *parent; + ++ if (!cont->parent) { ++ /* This is early initialization for the top container */ ++ set_container_cs(cont, &top_cpuset); ++ top_cpuset.css.container = cont; ++ top_cpuset.mems_generation = cpuset_mems_generation++; ++ return 0; ++ } ++ parent = container_cs(cont->parent); + cs = kmalloc(sizeof(*cs), GFP_KERNEL); + if (!cs) + return -ENOMEM; + +- mutex_lock(&manage_mutex); + cpuset_update_task_memory_state(); + cs->flags = 0; +- if (notify_on_release(parent)) +- set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); + if (is_spread_page(parent)) + set_bit(CS_SPREAD_PAGE, &cs->flags); + if (is_spread_slab(parent)) + set_bit(CS_SPREAD_SLAB, &cs->flags); + cs->cpus_allowed = CPU_MASK_NONE; + cs->mems_allowed = NODE_MASK_NONE; +- atomic_set(&cs->count, 0); +- INIT_LIST_HEAD(&cs->sibling); +- INIT_LIST_HEAD(&cs->children); + cs->mems_generation = cpuset_mems_generation++; + fmeter_init(&cs->fmeter); + + cs->parent = parent; +- +- mutex_lock(&callback_mutex); +- list_add(&cs->sibling, &cs->parent->children); ++ set_container_cs(cont, cs); ++ cs->css.container = cont; + number_of_cpusets++; +- mutex_unlock(&callback_mutex); +- +- err = cpuset_create_dir(cs, name, mode); +- if (err < 0) +- goto err; +- +- /* +- * Release manage_mutex before cpuset_populate_dir() because it +- * will down() this new directory's i_mutex and if we race with +- * another mkdir, we might deadlock. +- */ +- mutex_unlock(&manage_mutex); +- +- err = cpuset_populate_dir(cs->dentry); +- /* If err < 0, we have a half-filled directory - oh well ;) */ + return 0; +-err: +- list_del(&cs->sibling); +- mutex_unlock(&manage_mutex); +- kfree(cs); +- return err; +-} +- +-static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode) +-{ +- struct cpuset *c_parent = dentry->d_parent->d_fsdata; +- +- /* the vfs holds inode->i_mutex already */ +- return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); + } + +-/* +- * Locking note on the strange update_flag() call below: +- * +- * If the cpuset being removed is marked cpu_exclusive, then simulate +- * turning cpu_exclusive off, which will call update_cpu_domains(). +- * The lock_cpu_hotplug() call in update_cpu_domains() must not be +- * made while holding callback_mutex. Elsewhere the kernel nests +- * callback_mutex inside lock_cpu_hotplug() calls. So the reverse +- * nesting would risk an ABBA deadlock. +- */ +- +-static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) ++void cpuset_destroy(struct container_subsys *ss, struct container *cont) + { +- struct cpuset *cs = dentry->d_fsdata; +- struct dentry *d; +- struct cpuset *parent; +- char *pathbuf = NULL; +- +- /* the vfs holds both inode->i_mutex already */ ++ struct cpuset *cs = container_cs(cont); + +- mutex_lock(&manage_mutex); + cpuset_update_task_memory_state(); +- if (atomic_read(&cs->count) > 0) { +- mutex_unlock(&manage_mutex); +- return -EBUSY; +- } +- if (!list_empty(&cs->children)) { +- mutex_unlock(&manage_mutex); +- return -EBUSY; +- } +- if (is_cpu_exclusive(cs)) { +- int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0"); +- if (retval < 0) { +- mutex_unlock(&manage_mutex); +- return retval; +- } +- } +- parent = cs->parent; +- mutex_lock(&callback_mutex); +- set_bit(CS_REMOVED, &cs->flags); +- list_del(&cs->sibling); /* delete my sibling from parent->children */ +- spin_lock(&cs->dentry->d_lock); +- d = dget(cs->dentry); +- cs->dentry = NULL; +- spin_unlock(&d->d_lock); +- cpuset_d_remove_dir(d); +- dput(d); + number_of_cpusets--; +- mutex_unlock(&callback_mutex); +- if (list_empty(&parent->children)) +- check_for_release(parent, &pathbuf); +- mutex_unlock(&manage_mutex); +- cpuset_release_agent(pathbuf); +- return 0; ++ kfree(cs); + } + ++struct container_subsys cpuset_subsys = { ++ .name = "cpuset", ++ .create = cpuset_create, ++ .destroy = cpuset_destroy, ++ .can_attach = cpuset_can_attach, ++ .attach = cpuset_attach, ++ .populate = cpuset_populate, ++ .post_clone = cpuset_post_clone, ++ .subsys_id = cpuset_subsys_id, ++ .early_init = 1, ++}; ++ + /* + * cpuset_init_early - just enough so that the calls to + * cpuset_update_task_memory_state() in early init code +@@ -2011,13 +1298,11 @@ + + int __init cpuset_init_early(void) + { +- struct task_struct *tsk = current; +- +- tsk->cpuset = &top_cpuset; +- tsk->cpuset->mems_generation = cpuset_mems_generation++; ++ top_cpuset.mems_generation = cpuset_mems_generation++; + return 0; + } + ++ + /** + * cpuset_init - initialize cpusets at system boot + * +@@ -2026,8 +1311,7 @@ + + int __init cpuset_init(void) + { +- struct dentry *root; +- int err; ++ int err = 0; + + top_cpuset.cpus_allowed = CPU_MASK_ALL; + top_cpuset.mems_allowed = NODE_MASK_ALL; +@@ -2035,30 +1319,12 @@ + fmeter_init(&top_cpuset.fmeter); + top_cpuset.mems_generation = cpuset_mems_generation++; + +- init_task.cpuset = &top_cpuset; +- + err = register_filesystem(&cpuset_fs_type); + if (err < 0) +- goto out; +- cpuset_mount = kern_mount(&cpuset_fs_type); +- if (IS_ERR(cpuset_mount)) { +- printk(KERN_ERR "cpuset: could not mount!\n"); +- err = PTR_ERR(cpuset_mount); +- cpuset_mount = NULL; +- goto out; +- } +- root = cpuset_mount->mnt_sb->s_root; +- root->d_fsdata = &top_cpuset; +- inc_nlink(root->d_inode); +- top_cpuset.dentry = root; +- root->d_inode->i_op = &cpuset_dir_inode_operations; +- number_of_cpusets = 1; +- err = cpuset_populate_dir(root); +- /* memory_pressure_enabled is in root cpuset only */ +- if (err == 0) +- err = cpuset_add_file(root, &cft_memory_pressure_enabled); +-out: + return err; ++ ++ number_of_cpusets = 1; ++ return 0; + } + + /* +@@ -2084,10 +1350,12 @@ + + static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur) + { ++ struct container *cont; + struct cpuset *c; + + /* Each of our child cpusets mems must be online */ +- list_for_each_entry(c, &cur->children, sibling) { ++ list_for_each_entry(cont, &cur->css.container->children, sibling) { ++ c = container_cs(cont); + guarantee_online_cpus_mems_in_subtree(c); + if (!cpus_empty(c->cpus_allowed)) + guarantee_online_cpus(c, &c->cpus_allowed); +@@ -2114,7 +1382,7 @@ + + static void common_cpu_mem_hotplug_unplug(void) + { +- mutex_lock(&manage_mutex); ++ container_lock(); + mutex_lock(&callback_mutex); + + guarantee_online_cpus_mems_in_subtree(&top_cpuset); +@@ -2122,7 +1390,7 @@ + top_cpuset.mems_allowed = node_online_map; + + mutex_unlock(&callback_mutex); +- mutex_unlock(&manage_mutex); ++ container_unlock(); + } + + /* +@@ -2170,109 +1438,7 @@ + } + + /** +- * cpuset_fork - attach newly forked task to its parents cpuset. +- * @tsk: pointer to task_struct of forking parent process. +- * +- * Description: A task inherits its parent's cpuset at fork(). +- * +- * A pointer to the shared cpuset was automatically copied in fork.c +- * by dup_task_struct(). However, we ignore that copy, since it was +- * not made under the protection of task_lock(), so might no longer be +- * a valid cpuset pointer. attach_task() might have already changed +- * current->cpuset, allowing the previously referenced cpuset to +- * be removed and freed. Instead, we task_lock(current) and copy +- * its present value of current->cpuset for our freshly forked child. +- * +- * At the point that cpuset_fork() is called, 'current' is the parent +- * task, and the passed argument 'child' points to the child task. +- **/ + +-void cpuset_fork(struct task_struct *child) +-{ +- task_lock(current); +- child->cpuset = current->cpuset; +- atomic_inc(&child->cpuset->count); +- task_unlock(current); +-} +- +-/** +- * cpuset_exit - detach cpuset from exiting task +- * @tsk: pointer to task_struct of exiting process +- * +- * Description: Detach cpuset from @tsk and release it. +- * +- * Note that cpusets marked notify_on_release force every task in +- * them to take the global manage_mutex mutex when exiting. +- * This could impact scaling on very large systems. Be reluctant to +- * use notify_on_release cpusets where very high task exit scaling +- * is required on large systems. +- * +- * Don't even think about derefencing 'cs' after the cpuset use count +- * goes to zero, except inside a critical section guarded by manage_mutex +- * or callback_mutex. Otherwise a zero cpuset use count is a license to +- * any other task to nuke the cpuset immediately, via cpuset_rmdir(). +- * +- * This routine has to take manage_mutex, not callback_mutex, because +- * it is holding that mutex while calling check_for_release(), +- * which calls kmalloc(), so can't be called holding callback_mutex(). +- * +- * the_top_cpuset_hack: +- * +- * Set the exiting tasks cpuset to the root cpuset (top_cpuset). +- * +- * Don't leave a task unable to allocate memory, as that is an +- * accident waiting to happen should someone add a callout in +- * do_exit() after the cpuset_exit() call that might allocate. +- * If a task tries to allocate memory with an invalid cpuset, +- * it will oops in cpuset_update_task_memory_state(). +- * +- * We call cpuset_exit() while the task is still competent to +- * handle notify_on_release(), then leave the task attached to +- * the root cpuset (top_cpuset) for the remainder of its exit. +- * +- * To do this properly, we would increment the reference count on +- * top_cpuset, and near the very end of the kernel/exit.c do_exit() +- * code we would add a second cpuset function call, to drop that +- * reference. This would just create an unnecessary hot spot on +- * the top_cpuset reference count, to no avail. +- * +- * Normally, holding a reference to a cpuset without bumping its +- * count is unsafe. The cpuset could go away, or someone could +- * attach us to a different cpuset, decrementing the count on +- * the first cpuset that we never incremented. But in this case, +- * top_cpuset isn't going away, and either task has PF_EXITING set, +- * which wards off any attach_task() attempts, or task is a failed +- * fork, never visible to attach_task. +- * +- * Another way to do this would be to set the cpuset pointer +- * to NULL here, and check in cpuset_update_task_memory_state() +- * for a NULL pointer. This hack avoids that NULL check, for no +- * cost (other than this way too long comment ;). +- **/ +- +-void cpuset_exit(struct task_struct *tsk) +-{ +- struct cpuset *cs; +- +- task_lock(current); +- cs = tsk->cpuset; +- tsk->cpuset = &top_cpuset; /* the_top_cpuset_hack - see above */ +- task_unlock(current); +- +- if (notify_on_release(cs)) { +- char *pathbuf = NULL; +- +- mutex_lock(&manage_mutex); +- if (atomic_dec_and_test(&cs->count)) +- check_for_release(cs, &pathbuf); +- mutex_unlock(&manage_mutex); +- cpuset_release_agent(pathbuf); +- } else { +- atomic_dec(&cs->count); +- } +-} +- +-/** + * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. + * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. + * +@@ -2288,7 +1454,7 @@ + + mutex_lock(&callback_mutex); + task_lock(tsk); +- guarantee_online_cpus(tsk->cpuset, &mask); ++ guarantee_online_cpus(task_cs(tsk), &mask); + task_unlock(tsk); + mutex_unlock(&callback_mutex); + +@@ -2316,7 +1482,7 @@ + + mutex_lock(&callback_mutex); + task_lock(tsk); +- guarantee_online_mems(tsk->cpuset, &mask); ++ guarantee_online_mems(task_cs(tsk), &mask); + task_unlock(tsk); + mutex_unlock(&callback_mutex); + +@@ -2447,7 +1613,7 @@ + mutex_lock(&callback_mutex); + + task_lock(current); +- cs = nearest_exclusive_ancestor(current->cpuset); ++ cs = nearest_exclusive_ancestor(task_cs(current)); + task_unlock(current); + + allowed = node_isset(node, cs->mems_allowed); +@@ -2584,7 +1750,7 @@ + task_unlock(current); + goto done; + } +- cs1 = nearest_exclusive_ancestor(current->cpuset); ++ cs1 = nearest_exclusive_ancestor(task_cs(current)); + task_unlock(current); + + task_lock((struct task_struct *)p); +@@ -2592,7 +1758,7 @@ + task_unlock((struct task_struct *)p); + goto done; + } +- cs2 = nearest_exclusive_ancestor(p->cpuset); ++ cs2 = nearest_exclusive_ancestor(task_cs((struct task_struct *)p)); + task_unlock((struct task_struct *)p); + + overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); +@@ -2628,14 +1794,12 @@ + + void __cpuset_memory_pressure_bump(void) + { +- struct cpuset *cs; +- + task_lock(current); +- cs = current->cpuset; +- fmeter_markevent(&cs->fmeter); ++ fmeter_markevent(&task_cs(current)->fmeter); + task_unlock(current); + } + ++#ifdef CONFIG_PROC_PID_CPUSET + /* + * proc_cpuset_show() + * - Print tasks cpuset path into seq_file. +@@ -2652,6 +1816,7 @@ + struct pid *pid; + struct task_struct *tsk; + char *buf; ++ struct container_subsys_state *css; + int retval; + + retval = -ENOMEM; +@@ -2666,15 +1831,15 @@ + goto out_free; + + retval = -EINVAL; +- mutex_lock(&manage_mutex); +- +- retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE); ++ container_lock(); ++ css = task_subsys_state(tsk, cpuset_subsys_id); ++ retval = container_path(css->container, buf, PAGE_SIZE); + if (retval < 0) + goto out_unlock; + seq_puts(m, buf); + seq_putc(m, '\n'); + out_unlock: +- mutex_unlock(&manage_mutex); ++ container_unlock(); + put_task_struct(tsk); + out_free: + kfree(buf); +@@ -2694,6 +1859,7 @@ + .llseek = seq_lseek, + .release = single_release, + }; ++#endif /* CONFIG_PROC_PID_CPUSET */ + + /* Display task cpus_allowed, mems_allowed in /proc//status file. */ + char *cpuset_task_status_allowed(struct task_struct *task, char *buffer) +diff -Nurb linux-2.6.22-570/kernel/exit.c linux-2.6.22-591/kernel/exit.c +--- linux-2.6.22-570/kernel/exit.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/exit.c 2007-12-21 15:36:12.000000000 -0500 +@@ -31,7 +31,8 @@ + #include + #include + #include +-#include ++#include ++#include + #include + #include + #include +@@ -393,6 +394,11 @@ + * they would be locked into memory. + */ + exit_mm(current); ++ /* ++ * We don't want to have TIF_FREEZE set if the system-wide hibernation ++ * or suspend transition begins right now. ++ */ ++ current->flags |= PF_NOFREEZE; + + set_special_pids(1, 1); + proc_clear_tty(current); +@@ -875,6 +881,34 @@ + release_task(tsk); + } + ++#ifdef CONFIG_DEBUG_STACK_USAGE ++static void check_stack_usage(void) ++{ ++ static DEFINE_SPINLOCK(low_water_lock); ++ static int lowest_to_date = THREAD_SIZE; ++ unsigned long *n = end_of_stack(current); ++ unsigned long free; ++ ++ while (*n == 0) ++ n++; ++ free = (unsigned long)n - (unsigned long)end_of_stack(current); ++ ++ if (free >= lowest_to_date) ++ return; ++ ++ spin_lock(&low_water_lock); ++ if (free < lowest_to_date) { ++ printk(KERN_WARNING "%s used greatest stack depth: %lu bytes " ++ "left\n", ++ current->comm, free); ++ lowest_to_date = free; ++ } ++ spin_unlock(&low_water_lock); ++} ++#else ++static inline void check_stack_usage(void) {} ++#endif ++ + fastcall NORET_TYPE void do_exit(long code) + { + struct task_struct *tsk = current; +@@ -966,8 +1000,9 @@ + exit_sem(tsk); + __exit_files(tsk); + __exit_fs(tsk); ++ check_stack_usage(); + exit_thread(); +- cpuset_exit(tsk); ++ container_exit(tsk, 1); + exit_keys(tsk); + + if (group_dead && tsk->signal->leader) +diff -Nurb linux-2.6.22-570/kernel/fork.c linux-2.6.22-591/kernel/fork.c +--- linux-2.6.22-570/kernel/fork.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/fork.c 2007-12-21 15:36:15.000000000 -0500 +@@ -29,7 +29,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +@@ -342,6 +342,8 @@ + atomic_set(&mm->mm_count, 1); + init_rwsem(&mm->mmap_sem); + INIT_LIST_HEAD(&mm->mmlist); ++ mm->flags = (current->mm) ? current->mm->flags ++ : MMF_DUMP_FILTER_DEFAULT; + mm->core_waiters = 0; + mm->nr_ptes = 0; + __set_mm_counter(mm, file_rss, 0); +@@ -936,7 +938,7 @@ + { + unsigned long new_flags = p->flags; + +- new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE); ++ new_flags &= ~PF_SUPERPRIV; + new_flags |= PF_FORKNOEXEC; + if (!(clone_flags & CLONE_PTRACE)) + p->ptrace = 0; +@@ -977,6 +979,7 @@ + { + int retval; + struct task_struct *p = NULL; ++ int container_callbacks_done = 0; + struct vx_info *vxi; + struct nx_info *nxi; + +@@ -1061,11 +1064,6 @@ + delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ + copy_flags(clone_flags, p); + p->pid = pid_nr(pid); +- retval = -EFAULT; +- if (clone_flags & CLONE_PARENT_SETTID) +- if (put_user(p->pid, parent_tidptr)) +- goto bad_fork_cleanup_delays_binfmt; +- + INIT_LIST_HEAD(&p->children); + INIT_LIST_HEAD(&p->sibling); + p->vfork_done = NULL; +@@ -1095,17 +1093,19 @@ + + p->lock_depth = -1; /* -1 = no lock */ + do_posix_clock_monotonic_gettime(&p->start_time); ++ p->real_start_time = p->start_time; ++ monotonic_to_bootbased(&p->real_start_time); + p->security = NULL; + p->io_context = NULL; + p->io_wait = NULL; + p->audit_context = NULL; +- cpuset_fork(p); ++ container_fork(p); + #ifdef CONFIG_NUMA + p->mempolicy = mpol_copy(p->mempolicy); + if (IS_ERR(p->mempolicy)) { + retval = PTR_ERR(p->mempolicy); + p->mempolicy = NULL; +- goto bad_fork_cleanup_cpuset; ++ goto bad_fork_cleanup_container; + } + mpol_fix_fork_child_flag(p); + #endif +@@ -1215,6 +1215,12 @@ + /* Perform scheduler related setup. Assign this task to a CPU. */ + sched_fork(p, clone_flags); + ++ /* Now that the task is set up, run container callbacks if ++ * necessary. We need to run them before the task is visible ++ * on the tasklist. */ ++ container_fork_callbacks(p); ++ container_callbacks_done = 1; ++ + /* Need tasklist lock for parent etc handling! */ + write_lock_irq(&tasklist_lock); + +@@ -1314,6 +1320,14 @@ + if (nxi) + claim_nx_info(nxi, p); + write_unlock_irq(&tasklist_lock); ++ ++ /* ++ * Now that we know the fork has succeeded, record the new ++ * TID. It's too late to back out if this fails. ++ */ ++ if (clone_flags & CLONE_PARENT_SETTID) ++ put_user(p->pid, parent_tidptr); ++ + proc_fork_connector(p); + return p; + +@@ -1341,10 +1355,9 @@ + bad_fork_cleanup_policy: + #ifdef CONFIG_NUMA + mpol_free(p->mempolicy); +-bad_fork_cleanup_cpuset: ++bad_fork_cleanup_container: + #endif +- cpuset_exit(p); +-bad_fork_cleanup_delays_binfmt: ++ container_exit(p, container_callbacks_done); + delayacct_tsk_free(p); + if (p->binfmt) + module_put(p->binfmt->module); +@@ -1661,7 +1674,7 @@ + err = -EINVAL; + if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND| + CLONE_VM|CLONE_FILES|CLONE_SYSVSEM| +- CLONE_NEWUTS|CLONE_NEWIPC)) ++ CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER)) + goto bad_unshare_out; + + if ((err = unshare_thread(unshare_flags))) +diff -Nurb linux-2.6.22-570/kernel/kgdb.c linux-2.6.22-591/kernel/kgdb.c +--- linux-2.6.22-570/kernel/kgdb.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/kernel/kgdb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,1866 @@ ++/* ++ * kernel/kgdb.c ++ * ++ * Maintainer: Jason Wessel ++ * ++ * Copyright (C) 2000-2001 VERITAS Software Corporation. ++ * Copyright (C) 2002-2004 Timesys Corporation ++ * Copyright (C) 2003-2004 Amit S. Kale ++ * Copyright (C) 2004 Pavel Machek ++ * Copyright (C) 2004-2006 Tom Rini ++ * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd. ++ * Copyright (C) 2005-2007 Wind River Systems, Inc. ++ * ++ * Contributors at various stages not listed above: ++ * Jason Wessel ( jason.wessel@windriver.com ) ++ * George Anzinger ++ * Anurekh Saxena (anurekh.saxena@timesys.com) ++ * Lake Stevens Instrument Division (Glenn Engel) ++ * Jim Kingdon, Cygnus Support. ++ * ++ * Original KGDB stub: David Grothe , ++ * Tigran Aivazian ++ * ++ * This file is licensed under the terms of the GNU General Public License ++ * version 2. This program is licensed "as is" without any warranty of any ++ * kind, whether express or implied. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern int pid_max; ++/* How many times to count all of the waiting CPUs */ ++#define ROUNDUP_WAIT 640000 /* Arbitrary, increase if needed. */ ++#define BUF_THREAD_ID_SIZE 16 ++ ++/* ++ * kgdb_initialized with a value of 1 indicates that kgdb is setup and is ++ * all ready to serve breakpoints and other kernel exceptions. A value of ++ * -1 indicates that we have tried to initialize early, and need to try ++ * again later. ++ */ ++int kgdb_initialized; ++/* Is a host GDB connected to us? */ ++int kgdb_connected; ++/* Could we be about to try and access a bad memory location? If so we ++ * also need to flag this has happend. */ ++int kgdb_may_fault; ++#ifdef CONFIG_PREEMPT ++static int kgdb_fault_preempt_count; ++#endif ++ ++/* All the KGDB handlers are installed */ ++int kgdb_from_module_registered = 0; ++/* Guard for recursive entry */ ++static int exception_level = 0; ++ ++/* We provide a kgdb_io_ops structure that may be overriden. */ ++struct kgdb_io __attribute__ ((weak)) kgdb_io_ops; ++ ++static struct kgdb_io kgdb_io_ops_prev[MAX_KGDB_IO_HANDLERS]; ++static int kgdb_io_handler_cnt = 0; ++ ++/* Export the following symbols for use with kernel modules */ ++EXPORT_SYMBOL(kgdb_io_ops); ++EXPORT_SYMBOL(kgdb_tasklet_breakpoint); ++EXPORT_SYMBOL(kgdb_connected); ++EXPORT_SYMBOL(kgdb_register_io_module); ++EXPORT_SYMBOL(kgdb_unregister_io_module); ++EXPORT_SYMBOL(debugger_active); ++ ++/* ++ * Holds information about breakpoints in a kernel. These breakpoints are ++ * added and removed by gdb. ++ */ ++struct kgdb_bkpt kgdb_break[MAX_BREAKPOINTS]; ++ ++struct kgdb_arch *kgdb_ops = &arch_kgdb_ops; ++ ++static const char hexchars[] = "0123456789abcdef"; ++ ++static spinlock_t slavecpulocks[NR_CPUS]; ++static atomic_t procindebug[NR_CPUS]; ++atomic_t kgdb_setting_breakpoint; ++EXPORT_SYMBOL(kgdb_setting_breakpoint); ++struct task_struct *kgdb_usethread, *kgdb_contthread; ++ ++int debugger_step; ++atomic_t debugger_active; ++ ++/* Our I/O buffers. */ ++static char remcom_in_buffer[BUFMAX]; ++static char remcom_out_buffer[BUFMAX]; ++/* Storage for the registers, in GDB format. */ ++static unsigned long gdb_regs[(NUMREGBYTES + sizeof(unsigned long) - 1) / ++ sizeof(unsigned long)]; ++/* Storage of registers for handling a fault. */ ++unsigned long kgdb_fault_jmp_regs[NUMCRITREGBYTES / sizeof(unsigned long)] ++ JMP_REGS_ALIGNMENT; ++static int kgdb_notify_reboot(struct notifier_block *this, ++ unsigned long code ,void *x); ++struct debuggerinfo_struct { ++ void *debuggerinfo; ++ struct task_struct *task; ++} kgdb_info[NR_CPUS]; ++ ++/* to keep track of the CPU which is doing the single stepping*/ ++atomic_t cpu_doing_single_step = ATOMIC_INIT(-1); ++ ++atomic_t kgdb_sync_softlockup[NR_CPUS] = {ATOMIC_INIT(0)}; ++ ++/* reboot notifier block */ ++static struct notifier_block kgdb_reboot_notifier = { ++ .notifier_call = kgdb_notify_reboot, ++ .next = NULL, ++ .priority = INT_MAX, ++}; ++ ++int __attribute__ ((weak)) ++ kgdb_validate_break_address(unsigned long addr) ++{ ++ int error = 0; ++ char tmp_variable[BREAK_INSTR_SIZE]; ++ error = kgdb_get_mem((char *)addr, tmp_variable, BREAK_INSTR_SIZE); ++ return error; ++} ++ ++int __attribute__ ((weak)) ++ kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr) ++{ ++ int error = 0; ++ if ((error = kgdb_get_mem((char *)addr, ++ saved_instr, BREAK_INSTR_SIZE)) < 0) ++ return error; ++ ++ if ((error = kgdb_set_mem((char *)addr, kgdb_ops->gdb_bpt_instr, ++ BREAK_INSTR_SIZE)) < 0) ++ return error; ++ return 0; ++} ++ ++int __attribute__ ((weak)) ++ kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle) ++{ ++ ++ int error = 0; ++ if ((error =kgdb_set_mem((char *)addr, (char *)bundle, ++ BREAK_INSTR_SIZE)) < 0) ++ return error; ++ return 0; ++} ++ ++unsigned long __attribute__ ((weak)) ++ kgdb_arch_pc(int exception, struct pt_regs *regs) ++{ ++ return instruction_pointer(regs); ++} ++ ++static int hex(char ch) ++{ ++ if ((ch >= 'a') && (ch <= 'f')) ++ return (ch - 'a' + 10); ++ if ((ch >= '0') && (ch <= '9')) ++ return (ch - '0'); ++ if ((ch >= 'A') && (ch <= 'F')) ++ return (ch - 'A' + 10); ++ return (-1); ++} ++ ++/* scan for the sequence $# */ ++static void get_packet(char *buffer) ++{ ++ unsigned char checksum; ++ unsigned char xmitcsum; ++ int count; ++ char ch; ++ if (!kgdb_io_ops.read_char) ++ return; ++ do { ++ /* Spin and wait around for the start character, ignore all ++ * other characters */ ++ while ((ch = (kgdb_io_ops.read_char())) != '$') ; ++ kgdb_connected = 1; ++ checksum = 0; ++ xmitcsum = -1; ++ ++ count = 0; ++ ++ /* now, read until a # or end of buffer is found */ ++ while (count < (BUFMAX - 1)) { ++ ch = kgdb_io_ops.read_char(); ++ if (ch == '#') ++ break; ++ checksum = checksum + ch; ++ buffer[count] = ch; ++ count = count + 1; ++ } ++ buffer[count] = 0; ++ ++ if (ch == '#') { ++ xmitcsum = hex(kgdb_io_ops.read_char()) << 4; ++ xmitcsum += hex(kgdb_io_ops.read_char()); ++ ++ if (checksum != xmitcsum) ++ /* failed checksum */ ++ kgdb_io_ops.write_char('-'); ++ else ++ /* successful transfer */ ++ kgdb_io_ops.write_char('+'); ++ if (kgdb_io_ops.flush) ++ kgdb_io_ops.flush(); ++ } ++ } while (checksum != xmitcsum); ++} ++ ++static void kgdb_set_may_fault(void) { ++ kgdb_may_fault = 1; ++#ifdef CONFIG_PREEMPT ++ kgdb_fault_preempt_count = preempt_count(); ++#endif ++} ++ ++static void kgdb_unset_may_fault(void) { ++ kgdb_may_fault = 0; ++#ifdef CONFIG_PREEMPT ++ preempt_count() = kgdb_fault_preempt_count; ++#endif ++} ++ ++/* ++ * Send the packet in buffer. ++ * Check for gdb connection if asked for. ++ */ ++static void put_packet(char *buffer) ++{ ++ unsigned char checksum; ++ int count; ++ char ch; ++ ++ if (!kgdb_io_ops.write_char) ++ return; ++ /* $#. */ ++ while (1) { ++ kgdb_io_ops.write_char('$'); ++ checksum = 0; ++ count = 0; ++ ++ while ((ch = buffer[count])) { ++ kgdb_io_ops.write_char(ch); ++ checksum += ch; ++ count++; ++ } ++ ++ kgdb_io_ops.write_char('#'); ++ kgdb_io_ops.write_char(hexchars[checksum >> 4]); ++ kgdb_io_ops.write_char(hexchars[checksum % 16]); ++ if (kgdb_io_ops.flush) ++ kgdb_io_ops.flush(); ++ ++ /* Now see what we get in reply. */ ++ ch = kgdb_io_ops.read_char(); ++ ++ if (ch == 3) ++ ch = kgdb_io_ops.read_char(); ++ ++ /* If we get an ACK, we are done. */ ++ if (ch == '+') ++ return; ++ ++ /* If we get the start of another packet, this means ++ * that GDB is attempting to reconnect. We will NAK ++ * the packet being sent, and stop trying to send this ++ * packet. */ ++ if (ch == '$') { ++ kgdb_io_ops.write_char('-'); ++ if (kgdb_io_ops.flush) ++ kgdb_io_ops.flush(); ++ return; ++ } ++ } ++} ++ ++/* ++ * convert the memory pointed to by mem into hex, placing result in buf ++ * return a pointer to the last char put in buf (null). May return an error. ++ */ ++char *kgdb_mem2hex(char *mem, char *buf, int count) ++{ ++ kgdb_set_may_fault(); ++ if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) { ++ kgdb_unset_may_fault(); ++ return ERR_PTR(-EINVAL); ++ } ++ /* Accessing some registers in a single load instruction is ++ * required to avoid bad side effects for some I/O registers. ++ */ ++ if ((count == 2) && (((long)mem & 1) == 0)) { ++ unsigned short tmp_s = *(unsigned short *)mem; ++ mem += 2; ++#ifdef __BIG_ENDIAN ++ *buf++ = hexchars[(tmp_s >> 12) & 0xf]; ++ *buf++ = hexchars[(tmp_s >> 8) & 0xf]; ++ *buf++ = hexchars[(tmp_s >> 4) & 0xf]; ++ *buf++ = hexchars[tmp_s & 0xf]; ++#else ++ *buf++ = hexchars[(tmp_s >> 4) & 0xf]; ++ *buf++ = hexchars[tmp_s & 0xf]; ++ *buf++ = hexchars[(tmp_s >> 12) & 0xf]; ++ *buf++ = hexchars[(tmp_s >> 8) & 0xf]; ++#endif ++ } else if ((count == 4) && (((long)mem & 3) == 0)) { ++ unsigned long tmp_l = *(unsigned int *)mem; ++ mem += 4; ++#ifdef __BIG_ENDIAN ++ *buf++ = hexchars[(tmp_l >> 28) & 0xf]; ++ *buf++ = hexchars[(tmp_l >> 24) & 0xf]; ++ *buf++ = hexchars[(tmp_l >> 20) & 0xf]; ++ *buf++ = hexchars[(tmp_l >> 16) & 0xf]; ++ *buf++ = hexchars[(tmp_l >> 12) & 0xf]; ++ *buf++ = hexchars[(tmp_l >> 8) & 0xf]; ++ *buf++ = hexchars[(tmp_l >> 4) & 0xf]; ++ *buf++ = hexchars[tmp_l & 0xf]; ++#else ++ *buf++ = hexchars[(tmp_l >> 4) & 0xf]; ++ *buf++ = hexchars[tmp_l & 0xf]; ++ *buf++ = hexchars[(tmp_l >> 12) & 0xf]; ++ *buf++ = hexchars[(tmp_l >> 8) & 0xf]; ++ *buf++ = hexchars[(tmp_l >> 20) & 0xf]; ++ *buf++ = hexchars[(tmp_l >> 16) & 0xf]; ++ *buf++ = hexchars[(tmp_l >> 28) & 0xf]; ++ *buf++ = hexchars[(tmp_l >> 24) & 0xf]; ++#endif ++#ifdef CONFIG_64BIT ++ } else if ((count == 8) && (((long)mem & 7) == 0)) { ++ unsigned long long tmp_ll = *(unsigned long long *)mem; ++ mem += 8; ++#ifdef __BIG_ENDIAN ++ *buf++ = hexchars[(tmp_ll >> 60) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 56) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 52) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 48) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 44) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 40) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 36) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 32) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 28) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 24) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 20) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 16) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 12) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 8) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 4) & 0xf]; ++ *buf++ = hexchars[tmp_ll & 0xf]; ++#else ++ *buf++ = hexchars[(tmp_ll >> 4) & 0xf]; ++ *buf++ = hexchars[tmp_ll & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 12) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 8) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 20) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 16) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 28) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 24) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 36) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 32) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 44) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 40) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 52) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 48) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 60) & 0xf]; ++ *buf++ = hexchars[(tmp_ll >> 56) & 0xf]; ++#endif ++#endif ++ } else { ++ while (count-- > 0) { ++ unsigned char ch = *mem++; ++ *buf++ = hexchars[ch >> 4]; ++ *buf++ = hexchars[ch & 0xf]; ++ } ++ } ++ kgdb_unset_may_fault(); ++ *buf = 0; ++ return (buf); ++} ++ ++/* ++ * Copy the binary array pointed to by buf into mem. Fix $, #, and ++ * 0x7d escaped with 0x7d. Return a pointer to the character after ++ * the last byte written. ++ */ ++static char *kgdb_ebin2mem(char *buf, char *mem, int count) ++{ ++ kgdb_set_may_fault(); ++ if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) { ++ kgdb_unset_may_fault(); ++ return ERR_PTR(-EINVAL); ++ } ++ for (; count > 0; count--, buf++) { ++ if (*buf == 0x7d) ++ *mem++ = *(++buf) ^ 0x20; ++ else ++ *mem++ = *buf; ++ } ++ kgdb_unset_may_fault(); ++ return mem; ++} ++ ++/* ++ * convert the hex array pointed to by buf into binary to be placed in mem ++ * return a pointer to the character AFTER the last byte written ++ * May return an error. ++ */ ++char *kgdb_hex2mem(char *buf, char *mem, int count) ++{ ++ kgdb_set_may_fault(); ++ if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) { ++ kgdb_unset_may_fault(); ++ return ERR_PTR(-EINVAL); ++ } ++ if ((count == 2) && (((long)mem & 1) == 0)) { ++ unsigned short tmp_s = 0; ++#ifdef __BIG_ENDIAN ++ tmp_s |= hex(*buf++) << 12; ++ tmp_s |= hex(*buf++) << 8; ++ tmp_s |= hex(*buf++) << 4; ++ tmp_s |= hex(*buf++); ++#else ++ tmp_s |= hex(*buf++) << 4; ++ tmp_s |= hex(*buf++); ++ tmp_s |= hex(*buf++) << 12; ++ tmp_s |= hex(*buf++) << 8; ++#endif ++ *(unsigned short *)mem = tmp_s; ++ mem += 2; ++ } else if ((count == 4) && (((long)mem & 3) == 0)) { ++ unsigned long tmp_l = 0; ++#ifdef __BIG_ENDIAN ++ tmp_l |= hex(*buf++) << 28; ++ tmp_l |= hex(*buf++) << 24; ++ tmp_l |= hex(*buf++) << 20; ++ tmp_l |= hex(*buf++) << 16; ++ tmp_l |= hex(*buf++) << 12; ++ tmp_l |= hex(*buf++) << 8; ++ tmp_l |= hex(*buf++) << 4; ++ tmp_l |= hex(*buf++); ++#else ++ tmp_l |= hex(*buf++) << 4; ++ tmp_l |= hex(*buf++); ++ tmp_l |= hex(*buf++) << 12; ++ tmp_l |= hex(*buf++) << 8; ++ tmp_l |= hex(*buf++) << 20; ++ tmp_l |= hex(*buf++) << 16; ++ tmp_l |= hex(*buf++) << 28; ++ tmp_l |= hex(*buf++) << 24; ++#endif ++ *(unsigned long *)mem = tmp_l; ++ mem += 4; ++ } else { ++ int i; ++ for (i = 0; i < count; i++) { ++ unsigned char ch = hex(*buf++) << 4; ++ ch |= hex(*buf++); ++ *mem++ = ch; ++ } ++ } ++ kgdb_unset_may_fault(); ++ return (mem); ++} ++ ++/* ++ * While we find nice hex chars, build a long_val. ++ * Return number of chars processed. ++ */ ++int kgdb_hex2long(char **ptr, long *long_val) ++{ ++ int hex_val, num = 0; ++ ++ *long_val = 0; ++ ++ while (**ptr) { ++ hex_val = hex(**ptr); ++ if (hex_val >= 0) { ++ *long_val = (*long_val << 4) | hex_val; ++ num++; ++ } else ++ break; ++ ++ (*ptr)++; ++ } ++ ++ return (num); ++} ++ ++/* Write memory due to an 'M' or 'X' packet. */ ++static char *write_mem_msg(int binary) ++{ ++ char *ptr = &remcom_in_buffer[1]; ++ unsigned long addr, length; ++ ++ if (kgdb_hex2long(&ptr, &addr) > 0 && *(ptr++) == ',' && ++ kgdb_hex2long(&ptr, &length) > 0 && *(ptr++) == ':') { ++ if (binary) ++ ptr = kgdb_ebin2mem(ptr, (char *)addr, length); ++ else ++ ptr = kgdb_hex2mem(ptr, (char *)addr, length); ++ if (CACHE_FLUSH_IS_SAFE) ++ flush_icache_range(addr, addr + length + 1); ++ if (IS_ERR(ptr)) ++ return ptr; ++ return NULL; ++ } ++ ++ return ERR_PTR(-EINVAL); ++} ++ ++static inline char *pack_hex_byte(char *pkt, int byte) ++{ ++ *pkt++ = hexchars[(byte >> 4) & 0xf]; ++ *pkt++ = hexchars[(byte & 0xf)]; ++ return pkt; ++} ++ ++static inline void error_packet(char *pkt, int error) ++{ ++ error = -error; ++ pkt[0] = 'E'; ++ pkt[1] = hexchars[(error / 10)]; ++ pkt[2] = hexchars[(error % 10)]; ++ pkt[3] = '\0'; ++} ++ ++static char *pack_threadid(char *pkt, threadref * id) ++{ ++ char *limit; ++ unsigned char *altid; ++ ++ altid = (unsigned char *)id; ++ limit = pkt + BUF_THREAD_ID_SIZE; ++ while (pkt < limit) ++ pkt = pack_hex_byte(pkt, *altid++); ++ ++ return pkt; ++} ++ ++void int_to_threadref(threadref * id, int value) ++{ ++ unsigned char *scan; ++ int i = 4; ++ ++ scan = (unsigned char *)id; ++ while (i--) ++ *scan++ = 0; ++ *scan++ = (value >> 24) & 0xff; ++ *scan++ = (value >> 16) & 0xff; ++ *scan++ = (value >> 8) & 0xff; ++ *scan++ = (value & 0xff); ++} ++ ++static struct task_struct *getthread(struct pt_regs *regs, int tid) ++{ ++ if (init_pid_ns.last_pid == 0) ++ return current; ++ ++ if (num_online_cpus() && ++ (tid >= pid_max + num_online_cpus() + kgdb_ops->shadowth)) ++ return NULL; ++ ++ if (kgdb_ops->shadowth && (tid >= pid_max + num_online_cpus())) ++ return kgdb_get_shadow_thread(regs, tid - pid_max - ++ num_online_cpus()); ++ ++ if (tid >= pid_max) ++ return idle_task(tid - pid_max); ++ ++ if (!tid) ++ return NULL; ++ ++ return find_task_by_pid(tid); ++} ++ ++#ifdef CONFIG_SMP ++static void kgdb_wait(struct pt_regs *regs) ++{ ++ unsigned long flags; ++ int processor; ++ ++ local_irq_save(flags); ++ processor = raw_smp_processor_id(); ++ kgdb_info[processor].debuggerinfo = regs; ++ kgdb_info[processor].task = current; ++ atomic_set(&procindebug[processor], 1); ++ atomic_set(&kgdb_sync_softlockup[raw_smp_processor_id()], 1); ++ ++ /* Wait till master processor goes completely into the debugger. ++ * FIXME: this looks racy */ ++ while (!atomic_read(&procindebug[atomic_read(&debugger_active) - 1])) { ++ int i = 10; /* an arbitrary number */ ++ ++ while (--i) ++ cpu_relax(); ++ } ++ ++ /* Wait till master processor is done with debugging */ ++ spin_lock(&slavecpulocks[processor]); ++ ++ kgdb_info[processor].debuggerinfo = NULL; ++ kgdb_info[processor].task = NULL; ++ ++ /* fix up hardware debug registers on local cpu */ ++ if (kgdb_ops->correct_hw_break) ++ kgdb_ops->correct_hw_break(); ++ /* Signal the master processor that we are done */ ++ atomic_set(&procindebug[processor], 0); ++ spin_unlock(&slavecpulocks[processor]); ++ local_irq_restore(flags); ++} ++#endif ++ ++int kgdb_get_mem(char *addr, unsigned char *buf, int count) ++{ ++ kgdb_set_may_fault(); ++ if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) { ++ kgdb_unset_may_fault(); ++ return -EINVAL; ++ } ++ while (count) { ++ if ((unsigned long)addr < TASK_SIZE) { ++ kgdb_unset_may_fault(); ++ return -EINVAL; ++ } ++ *buf++ = *addr++; ++ count--; ++ } ++ kgdb_unset_may_fault(); ++ return 0; ++} ++ ++int kgdb_set_mem(char *addr, unsigned char *buf, int count) ++{ ++ kgdb_set_may_fault(); ++ if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) { ++ kgdb_unset_may_fault(); ++ return -EINVAL; ++ } ++ while (count) { ++ if ((unsigned long)addr < TASK_SIZE) { ++ kgdb_unset_may_fault(); ++ return -EINVAL; ++ } ++ *addr++ = *buf++; ++ count--; ++ } ++ kgdb_unset_may_fault(); ++ return 0; ++} ++int kgdb_activate_sw_breakpoints(void) ++{ ++ int i; ++ int error = 0; ++ unsigned long addr; ++ for (i = 0; i < MAX_BREAKPOINTS; i++) { ++ if (kgdb_break[i].state != bp_set) ++ continue; ++ addr = kgdb_break[i].bpt_addr; ++ if ((error = kgdb_arch_set_breakpoint(addr, ++ kgdb_break[i].saved_instr))) ++ return error; ++ ++ if (CACHE_FLUSH_IS_SAFE) { ++ if (current->mm && addr < TASK_SIZE) ++ flush_cache_range(current->mm->mmap_cache, ++ addr, addr + BREAK_INSTR_SIZE); ++ else ++ flush_icache_range(addr, addr + ++ BREAK_INSTR_SIZE); ++ } ++ ++ kgdb_break[i].state = bp_active; ++ } ++ return 0; ++} ++ ++static int kgdb_set_sw_break(unsigned long addr) ++{ ++ int i, breakno = -1; ++ int error = 0; ++ if ((error = kgdb_validate_break_address(addr)) < 0) ++ return error; ++ for (i = 0; i < MAX_BREAKPOINTS; i++) { ++ if ((kgdb_break[i].state == bp_set) && ++ (kgdb_break[i].bpt_addr == addr)) ++ return -EEXIST; ++ } ++ for (i = 0; i < MAX_BREAKPOINTS; i++) { ++ if (kgdb_break[i].state == bp_removed && ++ kgdb_break[i].bpt_addr == addr) { ++ breakno = i; ++ break; ++ } ++ } ++ ++ if (breakno == -1) { ++ for (i = 0; i < MAX_BREAKPOINTS; i++) { ++ if (kgdb_break[i].state == bp_none) { ++ breakno = i; ++ break; ++ } ++ } ++ } ++ if (breakno == -1) ++ return -E2BIG; ++ ++ kgdb_break[breakno].state = bp_set; ++ kgdb_break[breakno].type = bp_breakpoint; ++ kgdb_break[breakno].bpt_addr = addr; ++ ++ return 0; ++} ++ ++int kgdb_deactivate_sw_breakpoints(void) ++{ ++ int i; ++ int error = 0; ++ unsigned long addr; ++ for (i = 0; i < MAX_BREAKPOINTS; i++) { ++ if (kgdb_break[i].state != bp_active) ++ continue; ++ addr = kgdb_break[i].bpt_addr; ++ if ((error = kgdb_arch_remove_breakpoint(addr, ++ kgdb_break[i].saved_instr))) ++ return error; ++ ++ if (CACHE_FLUSH_IS_SAFE && current->mm && ++ addr < TASK_SIZE) ++ flush_cache_range(current->mm->mmap_cache, ++ addr, addr + BREAK_INSTR_SIZE); ++ else if (CACHE_FLUSH_IS_SAFE) ++ flush_icache_range(addr, ++ addr + BREAK_INSTR_SIZE); ++ kgdb_break[i].state = bp_set; ++ } ++ return 0; ++} ++ ++static int kgdb_remove_sw_break(unsigned long addr) ++{ ++ int i; ++ ++ for (i = 0; i < MAX_BREAKPOINTS; i++) { ++ if ((kgdb_break[i].state == bp_set) && ++ (kgdb_break[i].bpt_addr == addr)) { ++ kgdb_break[i].state = bp_removed; ++ return 0; ++ } ++ } ++ return -ENOENT; ++} ++ ++int kgdb_isremovedbreak(unsigned long addr) ++{ ++ int i; ++ for (i = 0; i < MAX_BREAKPOINTS; i++) { ++ if ((kgdb_break[i].state == bp_removed) && ++ (kgdb_break[i].bpt_addr == addr)) { ++ return 1; ++ } ++ } ++ return 0; ++} ++ ++int remove_all_break(void) ++{ ++ int i; ++ int error; ++ unsigned long addr; ++ ++ /* Clear memory breakpoints. */ ++ for (i = 0; i < MAX_BREAKPOINTS; i++) { ++ if (kgdb_break[i].state != bp_set) ++ continue; ++ addr = kgdb_break[i].bpt_addr; ++ if ((error = kgdb_arch_remove_breakpoint(addr, ++ kgdb_break[i].saved_instr))) ++ return error; ++ kgdb_break[i].state = bp_removed; ++ } ++ ++ /* Clear hardware breakpoints. */ ++ if (kgdb_ops->remove_all_hw_break) ++ kgdb_ops->remove_all_hw_break(); ++ ++ return 0; ++} ++ ++static inline int shadow_pid(int realpid) ++{ ++ if (realpid) { ++ return realpid; ++ } ++ return pid_max + raw_smp_processor_id(); ++} ++ ++static char gdbmsgbuf[BUFMAX + 1]; ++static void kgdb_msg_write(const char *s, int len) ++{ ++ int i; ++ int wcount; ++ char *bufptr; ++ ++ /* 'O'utput */ ++ gdbmsgbuf[0] = 'O'; ++ ++ /* Fill and send buffers... */ ++ while (len > 0) { ++ bufptr = gdbmsgbuf + 1; ++ ++ /* Calculate how many this time */ ++ if ((len << 1) > (BUFMAX - 2)) ++ wcount = (BUFMAX - 2) >> 1; ++ else ++ wcount = len; ++ ++ /* Pack in hex chars */ ++ for (i = 0; i < wcount; i++) ++ bufptr = pack_hex_byte(bufptr, s[i]); ++ *bufptr = '\0'; ++ ++ /* Move up */ ++ s += wcount; ++ len -= wcount; ++ ++ /* Write packet */ ++ put_packet(gdbmsgbuf); ++ } ++} ++ ++/* ++ * This function does all command procesing for interfacing to gdb. ++ * ++ * Locking hierarchy: ++ * interface locks, if any (begin_session) ++ * kgdb lock (debugger_active) ++ * ++ * Note that since we can be in here prior to our cpumask being filled ++ * out, we err on the side of caution and loop over NR_CPUS instead ++ * of a for_each_online_cpu. ++ * ++ */ ++int kgdb_handle_exception(int ex_vector, int signo, int err_code, ++ struct pt_regs *linux_regs) ++{ ++ unsigned long length, addr; ++ char *ptr; ++ unsigned long flags; ++ unsigned i; ++ long threadid; ++ threadref thref; ++ struct task_struct *thread = NULL; ++ unsigned procid; ++ int numshadowth = num_online_cpus() + kgdb_ops->shadowth; ++ long kgdb_usethreadid = 0; ++ int error = 0, all_cpus_synced = 0; ++ struct pt_regs *shadowregs; ++ int processor = raw_smp_processor_id(); ++ void *local_debuggerinfo; ++ ++ /* Panic on recursive debugger calls. */ ++ if (atomic_read(&debugger_active) == raw_smp_processor_id() + 1) { ++ exception_level++; ++ addr = kgdb_arch_pc(ex_vector, linux_regs); ++ kgdb_deactivate_sw_breakpoints(); ++ if (kgdb_remove_sw_break(addr) == 0) { ++ /* If the break point removed ok at the place exception ++ * occurred, try to recover and print a warning to the end ++ * user because the user planted a breakpoint in a place ++ * that KGDB needs in order to function. ++ */ ++ exception_level = 0; ++ kgdb_skipexception(ex_vector, linux_regs); ++ kgdb_activate_sw_breakpoints(); ++ printk(KERN_CRIT "KGDB: re-enter exception: breakpoint removed\n"); ++ WARN_ON(1); ++ return 0; ++ } ++ remove_all_break(); ++ kgdb_skipexception(ex_vector, linux_regs); ++ if (exception_level > 1) ++ panic("Recursive entry to debugger"); ++ ++ printk(KERN_CRIT "KGDB: re-enter exception: ALL breakpoints removed\n"); ++ panic("Recursive entry to debugger"); ++ return 0; ++ } ++ ++ acquirelock: ++ ++ /* ++ * Interrupts will be restored by the 'trap return' code, except when ++ * single stepping. ++ */ ++ local_irq_save(flags); ++ ++ /* Hold debugger_active */ ++ procid = raw_smp_processor_id(); ++ ++ while (cmpxchg(&atomic_read(&debugger_active), 0, (procid + 1)) != 0) { ++ int i = 25; /* an arbitrary number */ ++ ++ while (--i) ++ cpu_relax(); ++ ++ if (atomic_read(&cpu_doing_single_step) != -1 && ++ atomic_read(&cpu_doing_single_step) != procid) ++ udelay(1); ++ } ++ ++ atomic_set(&kgdb_sync_softlockup[raw_smp_processor_id()], 1); ++ ++ /* ++ * Don't enter if the last instance of the exception handler wanted to ++ * come into the debugger again. ++ */ ++ if (atomic_read(&cpu_doing_single_step) != -1 && ++ atomic_read(&cpu_doing_single_step) != procid) { ++ atomic_set(&debugger_active, 0); ++ local_irq_restore(flags); ++ goto acquirelock; ++ } ++ ++ /* ++ * Don't enter if we have hit a removed breakpoint. ++ */ ++ if (kgdb_skipexception(ex_vector, linux_regs)) ++ goto kgdb_restore; ++ ++ /* ++ * Call the I/O drivers pre_exception routine ++ * if the I/O driver defined one ++ */ ++ if (kgdb_io_ops.pre_exception) ++ kgdb_io_ops.pre_exception(); ++ ++ kgdb_info[processor].debuggerinfo = linux_regs; ++ kgdb_info[processor].task = current; ++ ++ kgdb_disable_hw_debug(linux_regs); ++ ++ if (!debugger_step || !kgdb_contthread) ++ for (i = 0; i < NR_CPUS; i++) ++ spin_lock(&slavecpulocks[i]); ++ ++#ifdef CONFIG_SMP ++ /* Make sure we get the other CPUs */ ++ if (!debugger_step || !kgdb_contthread) ++ kgdb_roundup_cpus(flags); ++#endif ++ ++ /* spin_lock code is good enough as a barrier so we don't ++ * need one here */ ++ atomic_set(&procindebug[processor], 1); ++ ++ /* Wait a reasonable time for the other CPUs to be notified and ++ * be waiting for us. Very early on this could be imperfect ++ * as num_online_cpus() could be 0.*/ ++ for (i = 0; i < ROUNDUP_WAIT; i++) { ++ int cpu, num = 0; ++ for (cpu = 0; cpu < NR_CPUS; cpu++) { ++ if (atomic_read(&procindebug[cpu])) ++ num++; ++ } ++ if (num >= num_online_cpus()) { ++ all_cpus_synced = 1; ++ break; ++ } ++ } ++ ++ /* Clear the out buffer. */ ++ memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer)); ++ ++ /* Master processor is completely in the debugger */ ++ kgdb_post_master_code(linux_regs, ex_vector, err_code); ++ kgdb_deactivate_sw_breakpoints(); ++ debugger_step = 0; ++ kgdb_contthread = NULL; ++ exception_level = 0; ++ ++ if (kgdb_connected) { ++ /* If we're still unable to roundup all of the CPUs, ++ * send an 'O' packet informing the user again. */ ++ if (!all_cpus_synced) ++ kgdb_msg_write("Not all CPUs have been synced for " ++ "KGDB\n", 39); ++ /* Reply to host that an exception has occurred */ ++ ptr = remcom_out_buffer; ++ *ptr++ = 'T'; ++ *ptr++ = hexchars[(signo >> 4) % 16]; ++ *ptr++ = hexchars[signo % 16]; ++ ptr += strlen(strcpy(ptr, "thread:")); ++ int_to_threadref(&thref, shadow_pid(current->pid)); ++ ptr = pack_threadid(ptr, &thref); ++ *ptr++ = ';'; ++ ++ put_packet(remcom_out_buffer); ++ } ++ ++ kgdb_usethread = kgdb_info[processor].task; ++ kgdb_usethreadid = shadow_pid(kgdb_info[processor].task->pid); ++ ++ while (kgdb_io_ops.read_char) { ++ char *bpt_type; ++ error = 0; ++ ++ /* Clear the out buffer. */ ++ memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer)); ++ ++ get_packet(remcom_in_buffer); ++ ++ switch (remcom_in_buffer[0]) { ++ case '?': ++ /* We know that this packet is only sent ++ * during initial connect. So to be safe, ++ * we clear out our breakpoints now incase ++ * GDB is reconnecting. */ ++ remove_all_break(); ++ /* Also, if we haven't been able to roundup all ++ * CPUs, send an 'O' packet informing the user ++ * as much. Only need to do this once. */ ++ if (!all_cpus_synced) ++ kgdb_msg_write("Not all CPUs have been " ++ "synced for KGDB\n", 39); ++ remcom_out_buffer[0] = 'S'; ++ remcom_out_buffer[1] = hexchars[signo >> 4]; ++ remcom_out_buffer[2] = hexchars[signo % 16]; ++ break; ++ ++ case 'g': /* return the value of the CPU registers */ ++ thread = kgdb_usethread; ++ ++ if (!thread) { ++ thread = kgdb_info[processor].task; ++ local_debuggerinfo = ++ kgdb_info[processor].debuggerinfo; ++ } else { ++ local_debuggerinfo = NULL; ++ for (i = 0; i < NR_CPUS; i++) { ++ /* Try to find the task on some other ++ * or possibly this node if we do not ++ * find the matching task then we try ++ * to approximate the results. ++ */ ++ if (thread == kgdb_info[i].task) ++ local_debuggerinfo = ++ kgdb_info[i].debuggerinfo; ++ } ++ } ++ ++ /* All threads that don't have debuggerinfo should be ++ * in __schedule() sleeping, since all other CPUs ++ * are in kgdb_wait, and thus have debuggerinfo. */ ++ if (kgdb_ops->shadowth && ++ kgdb_usethreadid >= pid_max + num_online_cpus()) { ++ shadowregs = kgdb_shadow_regs(linux_regs, ++ kgdb_usethreadid - ++ pid_max - ++ num_online_cpus ++ ()); ++ if (!shadowregs) { ++ error_packet(remcom_out_buffer, ++ -EINVAL); ++ break; ++ } ++ regs_to_gdb_regs(gdb_regs, shadowregs); ++ } else if (local_debuggerinfo) ++ regs_to_gdb_regs(gdb_regs, local_debuggerinfo); ++ else { ++ /* Pull stuff saved during ++ * switch_to; nothing else is ++ * accessible (or even particularly relevant). ++ * This should be enough for a stack trace. */ ++ sleeping_thread_to_gdb_regs(gdb_regs, thread); ++ } ++ kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer, ++ NUMREGBYTES); ++ break; ++ ++ /* set the value of the CPU registers - return OK */ ++ case 'G': ++ kgdb_hex2mem(&remcom_in_buffer[1], (char *)gdb_regs, ++ NUMREGBYTES); ++ ++ if (kgdb_usethread && kgdb_usethread != current) ++ error_packet(remcom_out_buffer, -EINVAL); ++ else { ++ gdb_regs_to_regs(gdb_regs, linux_regs); ++ strcpy(remcom_out_buffer, "OK"); ++ } ++ break; ++ ++ /* mAA..AA,LLLL Read LLLL bytes at address AA..AA */ ++ case 'm': ++ ptr = &remcom_in_buffer[1]; ++ if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' && ++ kgdb_hex2long(&ptr, &length) > 0) { ++ if (IS_ERR(ptr = kgdb_mem2hex((char *)addr, ++ remcom_out_buffer, ++ length))) ++ error_packet(remcom_out_buffer, ++ PTR_ERR(ptr)); ++ } else ++ error_packet(remcom_out_buffer, -EINVAL); ++ break; ++ ++ /* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */ ++ case 'M': ++ if (IS_ERR(ptr = write_mem_msg(0))) ++ error_packet(remcom_out_buffer, PTR_ERR(ptr)); ++ else ++ strcpy(remcom_out_buffer, "OK"); ++ break; ++ /* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */ ++ case 'X': ++ if (IS_ERR(ptr = write_mem_msg(1))) ++ error_packet(remcom_out_buffer, PTR_ERR(ptr)); ++ else ++ strcpy(remcom_out_buffer, "OK"); ++ break; ++ ++ /* kill or detach. KGDB should treat this like a ++ * continue. ++ */ ++ case 'D': ++ if ((error = remove_all_break()) < 0) { ++ error_packet(remcom_out_buffer, error); ++ } else { ++ strcpy(remcom_out_buffer, "OK"); ++ kgdb_connected = 0; ++ } ++ put_packet(remcom_out_buffer); ++ goto default_handle; ++ ++ case 'k': ++ /* Don't care about error from remove_all_break */ ++ remove_all_break(); ++ kgdb_connected = 0; ++ goto default_handle; ++ ++ /* Reboot */ ++ case 'R': ++ /* For now, only honor R0 */ ++ if (strcmp(remcom_in_buffer, "R0") == 0) { ++ printk(KERN_CRIT "Executing reboot\n"); ++ strcpy(remcom_out_buffer, "OK"); ++ put_packet(remcom_out_buffer); ++ emergency_sync(); ++ /* Execution should not return from ++ * machine_restart() ++ */ ++ machine_restart(NULL); ++ kgdb_connected = 0; ++ goto default_handle; ++ } ++ ++ /* query */ ++ case 'q': ++ switch (remcom_in_buffer[1]) { ++ case 's': ++ case 'f': ++ if (memcmp(remcom_in_buffer + 2, "ThreadInfo", ++ 10)) { ++ error_packet(remcom_out_buffer, ++ -EINVAL); ++ break; ++ } ++ ++ /* ++ * If we have not yet completed in ++ * pidhash_init() there isn't much we ++ * can give back. ++ */ ++ if (init_pid_ns.last_pid == 0) { ++ if (remcom_in_buffer[1] == 'f') ++ strcpy(remcom_out_buffer, ++ "m0000000000000001"); ++ break; ++ } ++ ++ if (remcom_in_buffer[1] == 'f') { ++ threadid = 1; ++ } ++ remcom_out_buffer[0] = 'm'; ++ ptr = remcom_out_buffer + 1; ++ for (i = 0; i < 17 && threadid < pid_max + ++ numshadowth; threadid++) { ++ thread = getthread(linux_regs, ++ threadid); ++ if (thread) { ++ int_to_threadref(&thref, ++ threadid); ++ pack_threadid(ptr, &thref); ++ ptr += 16; ++ *(ptr++) = ','; ++ i++; ++ } ++ } ++ *(--ptr) = '\0'; ++ break; ++ ++ case 'C': ++ /* Current thread id */ ++ strcpy(remcom_out_buffer, "QC"); ++ ++ threadid = shadow_pid(current->pid); ++ ++ int_to_threadref(&thref, threadid); ++ pack_threadid(remcom_out_buffer + 2, &thref); ++ break; ++ case 'T': ++ if (memcmp(remcom_in_buffer + 1, ++ "ThreadExtraInfo,", 16)) { ++ error_packet(remcom_out_buffer, ++ -EINVAL); ++ break; ++ } ++ threadid = 0; ++ ptr = remcom_in_buffer + 17; ++ kgdb_hex2long(&ptr, &threadid); ++ if (!getthread(linux_regs, threadid)) { ++ error_packet(remcom_out_buffer, ++ -EINVAL); ++ break; ++ } ++ if (threadid < pid_max) { ++ kgdb_mem2hex(getthread(linux_regs, ++ threadid)->comm, ++ remcom_out_buffer, 16); ++ } else if (threadid >= pid_max + ++ num_online_cpus()) { ++ kgdb_shadowinfo(linux_regs, ++ remcom_out_buffer, ++ threadid - pid_max - ++ num_online_cpus()); ++ } else { ++ static char tmpstr[23 + ++ BUF_THREAD_ID_SIZE]; ++ sprintf(tmpstr, "Shadow task %d" ++ " for pid 0", ++ (int)(threadid - pid_max)); ++ kgdb_mem2hex(tmpstr, remcom_out_buffer, ++ strlen(tmpstr)); ++ } ++ break; ++ } ++ break; ++ ++ /* task related */ ++ case 'H': ++ switch (remcom_in_buffer[1]) { ++ case 'g': ++ ptr = &remcom_in_buffer[2]; ++ kgdb_hex2long(&ptr, &threadid); ++ thread = getthread(linux_regs, threadid); ++ if (!thread && threadid > 0) { ++ error_packet(remcom_out_buffer, ++ -EINVAL); ++ break; ++ } ++ kgdb_usethread = thread; ++ kgdb_usethreadid = threadid; ++ strcpy(remcom_out_buffer, "OK"); ++ break; ++ ++ case 'c': ++ ptr = &remcom_in_buffer[2]; ++ kgdb_hex2long(&ptr, &threadid); ++ if (!threadid) { ++ kgdb_contthread = NULL; ++ } else { ++ thread = getthread(linux_regs, ++ threadid); ++ if (!thread && threadid > 0) { ++ error_packet(remcom_out_buffer, ++ -EINVAL); ++ break; ++ } ++ kgdb_contthread = thread; ++ } ++ strcpy(remcom_out_buffer, "OK"); ++ break; ++ } ++ break; ++ ++ /* Query thread status */ ++ case 'T': ++ ptr = &remcom_in_buffer[1]; ++ kgdb_hex2long(&ptr, &threadid); ++ thread = getthread(linux_regs, threadid); ++ if (thread) ++ strcpy(remcom_out_buffer, "OK"); ++ else ++ error_packet(remcom_out_buffer, -EINVAL); ++ break; ++ /* Since GDB-5.3, it's been drafted that '0' is a software ++ * breakpoint, '1' is a hardware breakpoint, so let's do ++ * that. ++ */ ++ case 'z': ++ case 'Z': ++ bpt_type = &remcom_in_buffer[1]; ++ ptr = &remcom_in_buffer[2]; ++ ++ if (kgdb_ops->set_hw_breakpoint && *bpt_type >= '1') { ++ /* Unsupported */ ++ if (*bpt_type > '4') ++ break; ++ } else if (*bpt_type != '0' && *bpt_type != '1') ++ /* Unsupported. */ ++ break; ++ /* Test if this is a hardware breakpoint, and ++ * if we support it. */ ++ if (*bpt_type == '1' && ++ !(kgdb_ops->flags & KGDB_HW_BREAKPOINT)) ++ /* Unsupported. */ ++ break; ++ ++ if (*(ptr++) != ',') { ++ error_packet(remcom_out_buffer, -EINVAL); ++ break; ++ } else if (kgdb_hex2long(&ptr, &addr)) { ++ if (*(ptr++) != ',' || ++ !kgdb_hex2long(&ptr, &length)) { ++ error_packet(remcom_out_buffer, ++ -EINVAL); ++ break; ++ } ++ } else { ++ error_packet(remcom_out_buffer, -EINVAL); ++ break; ++ } ++ ++ if (remcom_in_buffer[0] == 'Z' && *bpt_type == '0') ++ error = kgdb_set_sw_break(addr); ++ else if (remcom_in_buffer[0] == 'z' && *bpt_type == '0') ++ error = kgdb_remove_sw_break(addr); ++ else if (remcom_in_buffer[0] == 'Z') ++ error = kgdb_ops->set_hw_breakpoint(addr, ++ (int)length, ++ *bpt_type); ++ else if (remcom_in_buffer[0] == 'z') ++ error = kgdb_ops->remove_hw_breakpoint(addr, ++ (int) ++ length, ++ *bpt_type); ++ ++ if (error == 0) ++ strcpy(remcom_out_buffer, "OK"); ++ else ++ error_packet(remcom_out_buffer, error); ++ ++ break; ++ case 'c': ++ case 's': ++ if (kgdb_contthread && kgdb_contthread != current) { ++ /* Can't switch threads in kgdb */ ++ error_packet(remcom_out_buffer, -EINVAL); ++ break; ++ } ++ kgdb_activate_sw_breakpoints(); ++ /* Followthrough to default processing */ ++ default: ++ default_handle: ++ error = kgdb_arch_handle_exception(ex_vector, signo, ++ err_code, ++ remcom_in_buffer, ++ remcom_out_buffer, ++ linux_regs); ++ ++ if (error >= 0 || remcom_in_buffer[0] == 'D' || ++ remcom_in_buffer[0] == 'k') ++ goto kgdb_exit; ++ ++ } /* switch */ ++ ++ /* reply to the request */ ++ put_packet(remcom_out_buffer); ++ } ++ ++ kgdb_exit: ++ /* ++ * Call the I/O driver's post_exception routine ++ * if the I/O driver defined one. ++ */ ++ if (kgdb_io_ops.post_exception) ++ kgdb_io_ops.post_exception(); ++ ++ kgdb_info[processor].debuggerinfo = NULL; ++ kgdb_info[processor].task = NULL; ++ atomic_set(&procindebug[processor], 0); ++ ++ if (!debugger_step || !kgdb_contthread) { ++ for (i = 0; i < NR_CPUS; i++) ++ spin_unlock(&slavecpulocks[i]); ++ /* Wait till all the processors have quit ++ * from the debugger. */ ++ for (i = 0; i < NR_CPUS; i++) { ++ while (atomic_read(&procindebug[i])) { ++ int j = 10; /* an arbitrary number */ ++ ++ while (--j) ++ cpu_relax(); ++ } ++ } ++ } ++ ++#ifdef CONFIG_SMP ++ /* This delay has a real purpose. The problem is that if you ++ * are single-stepping, you are sending an NMI to all the ++ * other processors to stop them. Interrupts come in, but ++ * don't get handled. Then you let them go just long enough ++ * to get into their interrupt routines and use up some stack. ++ * You stop them again, and then do the same thing. After a ++ * while you blow the stack on the other processors. This ++ * delay gives some time for interrupts to be cleared out on ++ * the other processors. ++ */ ++ if (debugger_step) ++ mdelay(2); ++#endif ++ kgdb_restore: ++ /* Free debugger_active */ ++ atomic_set(&debugger_active, 0); ++ local_irq_restore(flags); ++ ++ return error; ++} ++ ++/* ++ * GDB places a breakpoint at this function to know dynamically ++ * loaded objects. It's not defined static so that only one instance with this ++ * name exists in the kernel. ++ */ ++ ++int module_event(struct notifier_block *self, unsigned long val, void *data) ++{ ++ return 0; ++} ++ ++static struct notifier_block kgdb_module_load_nb = { ++ .notifier_call = module_event, ++}; ++ ++void kgdb_nmihook(int cpu, void *regs) ++{ ++#ifdef CONFIG_SMP ++ if (!atomic_read(&procindebug[cpu]) && atomic_read(&debugger_active) != (cpu + 1)) ++ kgdb_wait((struct pt_regs *)regs); ++#endif ++} ++ ++/* ++ * This is called when a panic happens. All we need to do is ++ * breakpoint(). ++ */ ++static int kgdb_panic_notify(struct notifier_block *self, unsigned long cmd, ++ void *ptr) ++{ ++ breakpoint(); ++ ++ return 0; ++} ++ ++static struct notifier_block kgdb_panic_notifier = { ++ .notifier_call = kgdb_panic_notify, ++}; ++ ++/* ++ * Initialization that needs to be done in either of our entry points. ++ */ ++static void __init kgdb_internal_init(void) ++{ ++ int i; ++ ++ /* Initialize our spinlocks. */ ++ for (i = 0; i < NR_CPUS; i++) ++ spin_lock_init(&slavecpulocks[i]); ++ ++ for (i = 0; i < MAX_BREAKPOINTS; i++) ++ kgdb_break[i].state = bp_none; ++ ++ /* Initialize the I/O handles */ ++ memset(&kgdb_io_ops_prev, 0, sizeof(kgdb_io_ops_prev)); ++ ++ /* We can't do much if this fails */ ++ register_module_notifier(&kgdb_module_load_nb); ++ ++ kgdb_initialized = 1; ++} ++ ++static void kgdb_register_for_panic(void) ++{ ++ /* Register for panics(). */ ++ /* The registration is done in the kgdb_register_for_panic ++ * routine because KGDB should not try to handle a panic when ++ * there are no kgdb_io_ops setup. It is assumed that the ++ * kgdb_io_ops are setup at the time this method is called. ++ */ ++ if (!kgdb_from_module_registered) { ++ atomic_notifier_chain_register(&panic_notifier_list, ++ &kgdb_panic_notifier); ++ kgdb_from_module_registered = 1; ++ } ++} ++ ++static void kgdb_unregister_for_panic(void) ++{ ++ /* When this routine is called KGDB should unregister from the ++ * panic handler and clean up, making sure it is not handling any ++ * break exceptions at the time. ++ */ ++ if (kgdb_from_module_registered) { ++ kgdb_from_module_registered = 0; ++ atomic_notifier_chain_unregister(&panic_notifier_list, ++ &kgdb_panic_notifier); ++ } ++} ++ ++int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops) ++{ ++ ++ if (kgdb_connected) { ++ printk(KERN_ERR "kgdb: Cannot load I/O module while KGDB " ++ "connected.\n"); ++ return -EINVAL; ++ } ++ ++ /* Save the old values so they can be restored */ ++ if (kgdb_io_handler_cnt >= MAX_KGDB_IO_HANDLERS) { ++ printk(KERN_ERR "kgdb: No more I/O handles available.\n"); ++ return -EINVAL; ++ } ++ ++ /* Check to see if there is an existing driver and if so save its ++ * values. Also check to make sure the same driver was not trying ++ * to re-register. ++ */ ++ if (kgdb_io_ops.read_char != NULL && ++ kgdb_io_ops.read_char != local_kgdb_io_ops->read_char) { ++ memcpy(&kgdb_io_ops_prev[kgdb_io_handler_cnt], ++ &kgdb_io_ops, sizeof(struct kgdb_io)); ++ kgdb_io_handler_cnt++; ++ } ++ ++ /* Initialize the io values for this module */ ++ memcpy(&kgdb_io_ops, local_kgdb_io_ops, sizeof(struct kgdb_io)); ++ ++ /* Make the call to register kgdb if is not initialized */ ++ kgdb_register_for_panic(); ++ ++ return 0; ++} ++ ++void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops) ++{ ++ int i; ++ ++ /* Unregister KGDB if there were no other prior io hooks, else ++ * restore the io hooks. ++ */ ++ if (kgdb_io_handler_cnt > 0 && kgdb_io_ops_prev[0].read_char != NULL) { ++ /* First check if the hook that is in use is the one being ++ * removed */ ++ if (kgdb_io_ops.read_char == local_kgdb_io_ops->read_char) { ++ /* Set 'i' to the value of where the list should be ++ * shifed */ ++ i = kgdb_io_handler_cnt - 1; ++ memcpy(&kgdb_io_ops, &kgdb_io_ops_prev[i], ++ sizeof(struct kgdb_io)); ++ } else { ++ /* Simple case to remove an entry for an I/O handler ++ * that is not in use */ ++ for (i = 0; i < kgdb_io_handler_cnt; i++) { ++ if (kgdb_io_ops_prev[i].read_char == ++ local_kgdb_io_ops->read_char) ++ break; ++ } ++ } ++ ++ /* Shift all the entries in the handler array so it is ++ * ordered from oldest to newest. ++ */ ++ kgdb_io_handler_cnt--; ++ for (; i < kgdb_io_handler_cnt; i++) { ++ memcpy(&kgdb_io_ops_prev[i], &kgdb_io_ops_prev[i + 1], ++ sizeof(struct kgdb_io)); ++ } ++ /* Handle the case if we are on the last element and set it ++ * to NULL; */ ++ memset(&kgdb_io_ops_prev[kgdb_io_handler_cnt], 0, ++ sizeof(struct kgdb_io)); ++ ++ if (kgdb_connected) ++ printk(KERN_ERR "kgdb: WARNING: I/O method changed " ++ "while kgdb was connected state.\n"); ++ } else { ++ /* KGDB is no longer able to communicate out, so ++ * unregister our hooks and reset state. */ ++ kgdb_unregister_for_panic(); ++ if (kgdb_connected) { ++ printk(KERN_CRIT "kgdb: I/O module was unloaded while " ++ "a debugging session was running. " ++ "KGDB will be reset.\n"); ++ if (remove_all_break() < 0) ++ printk(KERN_CRIT "kgdb: Reset failed.\n"); ++ kgdb_connected = 0; ++ } ++ memset(&kgdb_io_ops, 0, sizeof(struct kgdb_io)); ++ } ++} ++ ++/* ++ * There are times we need to call a tasklet to cause a breakpoint ++ * as calling breakpoint() at that point might be fatal. We have to ++ * check that the exception stack is setup, as tasklets may be scheduled ++ * prior to this. When that happens, it is up to the architecture to ++ * schedule this when it is safe to run. ++ */ ++static void kgdb_tasklet_bpt(unsigned long ing) ++{ ++ if (CHECK_EXCEPTION_STACK()) ++ breakpoint(); ++} ++ ++DECLARE_TASKLET(kgdb_tasklet_breakpoint, kgdb_tasklet_bpt, 0); ++ ++/* ++ * This function can be called very early, either via early_param() or ++ * an explicit breakpoint() early on. ++ */ ++static void __init kgdb_early_entry(void) ++{ ++ /* Let the architecture do any setup that it needs to. */ ++ kgdb_arch_init(); ++ ++ /* ++ * Don't try and do anything until the architecture is able to ++ * setup the exception stack. In this case, it is up to the ++ * architecture to hook in and look at us when they are ready. ++ */ ++ ++ if (!CHECK_EXCEPTION_STACK()) { ++ kgdb_initialized = -1; ++ /* any kind of break point is deferred to late_init */ ++ return; ++ } ++ ++ /* Now try the I/O. */ ++ /* For early entry kgdb_io_ops.init must be defined */ ++ if (!kgdb_io_ops.init || kgdb_io_ops.init()) { ++ /* Try again later. */ ++ kgdb_initialized = -1; ++ return; ++ } ++ ++ /* Finish up. */ ++ kgdb_internal_init(); ++ ++ /* KGDB can assume that if kgdb_io_ops.init was defined that the ++ * panic registion should be performed at this time. This means ++ * kgdb_io_ops.init did not come from a kernel module and was ++ * initialized statically by a built in. ++ */ ++ if (kgdb_io_ops.init) ++ kgdb_register_for_panic(); ++} ++ ++/* ++ * This function will always be invoked to make sure that KGDB will grab ++ * what it needs to so that if something happens while the system is ++ * running, KGDB will get involved. If kgdb_early_entry() has already ++ * been invoked, there is little we need to do. ++ */ ++static int __init kgdb_late_entry(void) ++{ ++ int need_break = 0; ++ ++ /* If kgdb_initialized is -1 then we were passed kgdbwait. */ ++ if (kgdb_initialized == -1) ++ need_break = 1; ++ ++ /* ++ * If we haven't tried to initialize KGDB yet, we need to call ++ * kgdb_arch_init before moving onto the I/O. ++ */ ++ if (!kgdb_initialized) ++ kgdb_arch_init(); ++ ++ if (kgdb_initialized != 1) { ++ if (kgdb_io_ops.init && kgdb_io_ops.init()) { ++ /* When KGDB allows I/O via modules and the core ++ * I/O init fails KGDB must default to defering the ++ * I/O setup, and appropriately print an error about ++ * it. ++ */ ++ printk(KERN_ERR "kgdb: Could not setup core I/O " ++ "for KGDB.\n"); ++ printk(KERN_INFO "kgdb: Defering I/O setup to kernel " ++ "module.\n"); ++ memset(&kgdb_io_ops, 0, sizeof(struct kgdb_io)); ++ } ++ ++ kgdb_internal_init(); ++ ++ /* KGDB can assume that if kgdb_io_ops.init was defined that ++ * panic registion should be performed at this time. This means ++ * kgdb_io_ops.init did not come from a kernel module and was ++ * initialized statically by a built in. ++ */ ++ if (kgdb_io_ops.init) ++ kgdb_register_for_panic(); ++ } ++ ++ /* Registering to reboot notifier list*/ ++ register_reboot_notifier(&kgdb_reboot_notifier); ++ ++ /* Now do any late init of the I/O. */ ++ if (kgdb_io_ops.late_init) ++ kgdb_io_ops.late_init(); ++ ++ if (need_break) { ++ printk(KERN_CRIT "kgdb: Waiting for connection from remote" ++ " gdb...\n"); ++ breakpoint(); ++ } ++ ++ return 0; ++} ++ ++late_initcall(kgdb_late_entry); ++ ++/* ++ * This function will generate a breakpoint exception. It is used at the ++ * beginning of a program to sync up with a debugger and can be used ++ * otherwise as a quick means to stop program execution and "break" into ++ * the debugger. ++ */ ++void breakpoint(void) ++{ ++ atomic_set(&kgdb_setting_breakpoint, 1); ++ wmb(); ++ BREAKPOINT(); ++ wmb(); ++ atomic_set(&kgdb_setting_breakpoint, 0); ++} ++ ++EXPORT_SYMBOL(breakpoint); ++ ++#ifdef CONFIG_MAGIC_SYSRQ ++static void sysrq_handle_gdb(int key, struct tty_struct *tty) ++{ ++ printk("Entering GDB stub\n"); ++ breakpoint(); ++} ++static struct sysrq_key_op sysrq_gdb_op = { ++ .handler = sysrq_handle_gdb, ++ .help_msg = "Gdb", ++ .action_msg = "GDB", ++}; ++ ++static int gdb_register_sysrq(void) ++{ ++ printk("Registering GDB sysrq handler\n"); ++ register_sysrq_key('g', &sysrq_gdb_op); ++ return 0; ++} ++ ++module_init(gdb_register_sysrq); ++#endif ++ ++static int kgdb_notify_reboot(struct notifier_block *this, ++ unsigned long code, void *x) ++{ ++ ++ unsigned long flags; ++ ++ /* If we're debugging, or KGDB has not connected, don't try ++ * and print. */ ++ if (!kgdb_connected || atomic_read(&debugger_active) != 0) ++ return 0; ++ if ((code == SYS_RESTART) || (code == SYS_HALT) || (code == SYS_POWER_OFF)){ ++ local_irq_save(flags); ++ put_packet("X00"); ++ local_irq_restore(flags); ++ } ++ return NOTIFY_DONE; ++} ++ ++#ifdef CONFIG_KGDB_CONSOLE ++void kgdb_console_write(struct console *co, const char *s, unsigned count) ++{ ++ unsigned long flags; ++ ++ /* If we're debugging, or KGDB has not connected, don't try ++ * and print. */ ++ if (!kgdb_connected || atomic_read(&debugger_active) != 0) ++ return; ++ ++ local_irq_save(flags); ++ kgdb_msg_write(s, count); ++ local_irq_restore(flags); ++} ++ ++struct console kgdbcons = { ++ .name = "kgdb", ++ .write = kgdb_console_write, ++ .flags = CON_PRINTBUFFER | CON_ENABLED, ++}; ++static int __init kgdb_console_init(void) ++{ ++ register_console(&kgdbcons); ++ return 0; ++} ++ ++console_initcall(kgdb_console_init); ++#endif ++ ++static int __init opt_kgdb_enter(char *str) ++{ ++ /* We've already done this by an explicit breakpoint() call. */ ++ if (kgdb_initialized) ++ return 0; ++ ++ kgdb_early_entry(); ++ if (kgdb_initialized == 1) ++ printk(KERN_CRIT "Waiting for connection from remote " ++ "gdb...\n"); ++ else { ++ printk(KERN_CRIT "KGDB cannot initialize I/O yet.\n"); ++ return 0; ++ } ++ ++ breakpoint(); ++ ++ return 0; ++} ++ ++early_param("kgdbwait", opt_kgdb_enter); +diff -Nurb linux-2.6.22-570/kernel/kmod.c linux-2.6.22-591/kernel/kmod.c +--- linux-2.6.22-570/kernel/kmod.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/kernel/kmod.c 2007-12-21 15:36:12.000000000 -0500 +@@ -119,9 +119,10 @@ + char **argv; + char **envp; + struct key *ring; +- int wait; ++ enum umh_wait wait; + int retval; + struct file *stdin; ++ void (*cleanup)(char **argv, char **envp); + }; + + /* +@@ -180,6 +181,14 @@ + do_exit(0); + } + ++void call_usermodehelper_freeinfo(struct subprocess_info *info) ++{ ++ if (info->cleanup) ++ (*info->cleanup)(info->argv, info->envp); ++ kfree(info); ++} ++EXPORT_SYMBOL(call_usermodehelper_freeinfo); ++ + /* Keventd can't block, but this (a child) can. */ + static int wait_for_helper(void *data) + { +@@ -216,8 +225,8 @@ + sub_info->retval = ret; + } + +- if (sub_info->wait < 0) +- kfree(sub_info); ++ if (sub_info->wait == UMH_NO_WAIT) ++ call_usermodehelper_freeinfo(sub_info); + else + complete(sub_info->complete); + return 0; +@@ -229,101 +238,102 @@ + struct subprocess_info *sub_info = + container_of(work, struct subprocess_info, work); + pid_t pid; +- int wait = sub_info->wait; ++ enum umh_wait wait = sub_info->wait; + + /* CLONE_VFORK: wait until the usermode helper has execve'd + * successfully We need the data structures to stay around + * until that is done. */ +- if (wait) ++ if (wait == UMH_WAIT_PROC) + pid = kernel_thread(wait_for_helper, sub_info, + CLONE_FS | CLONE_FILES | SIGCHLD); + else + pid = kernel_thread(____call_usermodehelper, sub_info, + CLONE_VFORK | SIGCHLD); + +- if (wait < 0) +- return; +- +- if (pid < 0) { ++ switch(wait) { ++ case UMH_NO_WAIT: ++ break; ++ ++ case UMH_WAIT_PROC: ++ if (pid > 0) ++ break; + sub_info->retval = pid; ++ /* FALLTHROUGH */ ++ ++ case UMH_WAIT_EXEC: + complete(sub_info->complete); +- } else if (!wait) +- complete(sub_info->complete); ++ } + } + + /** +- * call_usermodehelper_keys - start a usermode application +- * @path: pathname for the application +- * @argv: null-terminated argument list +- * @envp: null-terminated environment list +- * @session_keyring: session keyring for process (NULL for an empty keyring) +- * @wait: wait for the application to finish and return status. +- * when -1 don't wait at all, but you get no useful error back when +- * the program couldn't be exec'ed. This makes it safe to call +- * from interrupt context. ++ * call_usermodehelper_setup - prepare to call a usermode helper ++ * @path - path to usermode executable ++ * @argv - arg vector for process ++ * @envp - environment for process + * +- * Runs a user-space application. The application is started +- * asynchronously if wait is not set, and runs as a child of keventd. +- * (ie. it runs with full root capabilities). +- * +- * Must be called from process context. Returns a negative error code +- * if program was not execed successfully, or 0. ++ * Returns either NULL on allocation failure, or a subprocess_info ++ * structure. This should be passed to call_usermodehelper_exec to ++ * exec the process and free the structure. + */ +-int call_usermodehelper_keys(char *path, char **argv, char **envp, +- struct key *session_keyring, int wait) ++struct subprocess_info *call_usermodehelper_setup(char *path, ++ char **argv, char **envp) + { +- DECLARE_COMPLETION_ONSTACK(done); + struct subprocess_info *sub_info; +- int retval; +- +- if (!khelper_wq) +- return -EBUSY; +- +- if (path[0] == '\0') +- return 0; +- + sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); + if (!sub_info) +- return -ENOMEM; ++ goto out; + + INIT_WORK(&sub_info->work, __call_usermodehelper); +- sub_info->complete = &done; + sub_info->path = path; + sub_info->argv = argv; + sub_info->envp = envp; +- sub_info->ring = session_keyring; +- sub_info->wait = wait; + +- queue_work(khelper_wq, &sub_info->work); +- if (wait < 0) /* task has freed sub_info */ +- return 0; +- wait_for_completion(&done); +- retval = sub_info->retval; +- kfree(sub_info); +- return retval; ++ out: ++ return sub_info; + } +-EXPORT_SYMBOL(call_usermodehelper_keys); ++EXPORT_SYMBOL(call_usermodehelper_setup); + +-int call_usermodehelper_pipe(char *path, char **argv, char **envp, +- struct file **filp) ++/** ++ * call_usermodehelper_setkeys - set the session keys for usermode helper ++ * @info: a subprocess_info returned by call_usermodehelper_setup ++ * @session_keyring: the session keyring for the process ++ */ ++void call_usermodehelper_setkeys(struct subprocess_info *info, ++ struct key *session_keyring) + { +- DECLARE_COMPLETION(done); +- struct subprocess_info sub_info = { +- .work = __WORK_INITIALIZER(sub_info.work, +- __call_usermodehelper), +- .complete = &done, +- .path = path, +- .argv = argv, +- .envp = envp, +- .retval = 0, +- }; +- struct file *f; ++ info->ring = session_keyring; ++} ++EXPORT_SYMBOL(call_usermodehelper_setkeys); + +- if (!khelper_wq) +- return -EBUSY; ++/** ++ * call_usermodehelper_setcleanup - set a cleanup function ++ * @info: a subprocess_info returned by call_usermodehelper_setup ++ * @cleanup: a cleanup function ++ * ++ * The cleanup function is just befor ethe subprocess_info is about to ++ * be freed. This can be used for freeing the argv and envp. The ++ * Function must be runnable in either a process context or the ++ * context in which call_usermodehelper_exec is called. ++ */ ++void call_usermodehelper_setcleanup(struct subprocess_info *info, ++ void (*cleanup)(char **argv, char **envp)) ++{ ++ info->cleanup = cleanup; ++} ++EXPORT_SYMBOL(call_usermodehelper_setcleanup); + +- if (path[0] == '\0') +- return 0; ++/** ++ * call_usermodehelper_stdinpipe - set up a pipe to be used for stdin ++ * @sub_info: a subprocess_info returned by call_usermodehelper_setup ++ * @filp: set to the write-end of a pipe ++ * ++ * This constructs a pipe, and sets the read end to be the stdin of the ++ * subprocess, and returns the write-end in *@filp. ++ */ ++int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info, ++ struct file **filp) ++{ ++ struct file *f; + + f = create_write_pipe(); + if (IS_ERR(f)) +@@ -335,11 +345,85 @@ + free_write_pipe(*filp); + return PTR_ERR(f); + } +- sub_info.stdin = f; ++ sub_info->stdin = f; ++ ++ return 0; ++} ++EXPORT_SYMBOL(call_usermodehelper_stdinpipe); + +- queue_work(khelper_wq, &sub_info.work); ++/** ++ * call_usermodehelper_exec - start a usermode application ++ * @sub_info: information about the subprocessa ++ * @wait: wait for the application to finish and return status. ++ * when -1 don't wait at all, but you get no useful error back when ++ * the program couldn't be exec'ed. This makes it safe to call ++ * from interrupt context. ++ * ++ * Runs a user-space application. The application is started ++ * asynchronously if wait is not set, and runs as a child of keventd. ++ * (ie. it runs with full root capabilities). ++ */ ++int call_usermodehelper_exec(struct subprocess_info *sub_info, ++ enum umh_wait wait) ++{ ++ DECLARE_COMPLETION_ONSTACK(done); ++ int retval; ++ ++ if (sub_info->path[0] == '\0') { ++ retval = 0; ++ goto out; ++ } ++ ++ if (!khelper_wq) { ++ retval = -EBUSY; ++ goto out; ++ } ++ ++ sub_info->complete = &done; ++ sub_info->wait = wait; ++ ++ queue_work(khelper_wq, &sub_info->work); ++ if (wait == UMH_NO_WAIT) /* task has freed sub_info */ ++ return 0; + wait_for_completion(&done); +- return sub_info.retval; ++ retval = sub_info->retval; ++ ++ out: ++ call_usermodehelper_freeinfo(sub_info); ++ return retval; ++} ++EXPORT_SYMBOL(call_usermodehelper_exec); ++ ++/** ++ * call_usermodehelper_pipe - call a usermode helper process with a pipe stdin ++ * @path: path to usermode executable ++ * @argv: arg vector for process ++ * @envp: environment for process ++ * @filp: set to the write-end of a pipe ++ * ++ * This is a simple wrapper which executes a usermode-helper function ++ * with a pipe as stdin. It is implemented entirely in terms of ++ * lower-level call_usermodehelper_* functions. ++ */ ++int call_usermodehelper_pipe(char *path, char **argv, char **envp, ++ struct file **filp) ++{ ++ struct subprocess_info *sub_info; ++ int ret; ++ ++ sub_info = call_usermodehelper_setup(path, argv, envp); ++ if (sub_info == NULL) ++ return -ENOMEM; ++ ++ ret = call_usermodehelper_stdinpipe(sub_info, filp); ++ if (ret < 0) ++ goto out; ++ ++ return call_usermodehelper_exec(sub_info, 1); ++ ++ out: ++ call_usermodehelper_freeinfo(sub_info); ++ return ret; + } + EXPORT_SYMBOL(call_usermodehelper_pipe); + +diff -Nurb linux-2.6.22-570/kernel/module.c linux-2.6.22-591/kernel/module.c +--- linux-2.6.22-570/kernel/module.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/kernel/module.c 2007-12-21 15:36:12.000000000 -0500 +@@ -67,6 +67,7 @@ + /* List of modules, protected by module_mutex AND modlist_lock */ + static DEFINE_MUTEX(module_mutex); + static LIST_HEAD(modules); ++static DECLARE_MUTEX(notify_mutex); + + static BLOCKING_NOTIFIER_HEAD(module_notify_list); + +@@ -488,8 +489,7 @@ + mod->field = NULL; \ + } \ + static struct module_attribute modinfo_##field = { \ +- .attr = { .name = __stringify(field), .mode = 0444, \ +- .owner = THIS_MODULE }, \ ++ .attr = { .name = __stringify(field), .mode = 0444 }, \ + .show = show_modinfo_##field, \ + .setup = setup_modinfo_##field, \ + .test = modinfo_##field##_exists, \ +@@ -713,6 +713,12 @@ + if (ret != 0) + goto out; + ++ down(¬ify_mutex); ++ blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, ++ mod); ++ up(¬ify_mutex); ++ ++ + /* Never wait if forced. */ + if (!forced && module_refcount(mod) != 0) + wait_for_zero_refcount(mod); +@@ -725,6 +731,11 @@ + } + free_module(mod); + ++ down(¬ify_mutex); ++ blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GONE, ++ NULL); ++ up(¬ify_mutex); ++ + out: + mutex_unlock(&module_mutex); + return ret; +@@ -793,7 +804,7 @@ + } + + static struct module_attribute refcnt = { +- .attr = { .name = "refcnt", .mode = 0444, .owner = THIS_MODULE }, ++ .attr = { .name = "refcnt", .mode = 0444 }, + .show = show_refcnt, + }; + +@@ -846,12 +857,15 @@ + case MODULE_STATE_GOING: + state = "going"; + break; ++ case MODULE_STATE_GONE: ++ state = "gone"; ++ break; + } + return sprintf(buffer, "%s\n", state); + } + + static struct module_attribute initstate = { +- .attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE }, ++ .attr = { .name = "initstate", .mode = 0444 }, + .show = show_initstate, + }; + +@@ -1032,7 +1046,6 @@ + sattr->mattr.show = module_sect_show; + sattr->mattr.store = NULL; + sattr->mattr.attr.name = sattr->name; +- sattr->mattr.attr.owner = mod; + sattr->mattr.attr.mode = S_IRUGO; + *(gattr++) = &(sattr++)->mattr.attr; + } +@@ -1090,7 +1103,6 @@ + if (!attr->test || + (attr->test && attr->test(mod))) { + memcpy(temp_attr, attr, sizeof(*temp_attr)); +- temp_attr->attr.owner = mod; + error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr); + ++temp_attr; + } +@@ -1212,6 +1224,11 @@ + /* Arch-specific cleanup. */ + module_arch_cleanup(mod); + ++#ifdef CONFIG_KGDB ++ /* kgdb info */ ++ vfree(mod->mod_sections); ++#endif ++ + /* Module unload stuff */ + module_unload_free(mod); + +@@ -1471,6 +1488,31 @@ + } + } + ++#ifdef CONFIG_KGDB ++int add_modsects (struct module *mod, Elf_Ehdr *hdr, Elf_Shdr *sechdrs, const ++ char *secstrings) ++{ ++ int i; ++ ++ mod->num_sections = hdr->e_shnum - 1; ++ mod->mod_sections = vmalloc((hdr->e_shnum - 1)* ++ sizeof (struct mod_section)); ++ ++ if (mod->mod_sections == NULL) { ++ return -ENOMEM; ++ } ++ ++ for (i = 1; i < hdr->e_shnum; i++) { ++ mod->mod_sections[i - 1].address = (void *)sechdrs[i].sh_addr; ++ strncpy(mod->mod_sections[i - 1].name, secstrings + ++ sechdrs[i].sh_name, MAX_SECTNAME); ++ mod->mod_sections[i - 1].name[MAX_SECTNAME] = '\0'; ++ } ++ ++ return 0; ++} ++#endif ++ + #ifdef CONFIG_KALLSYMS + static int is_exported(const char *name, const struct module *mod) + { +@@ -1886,6 +1928,12 @@ + + add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + ++#ifdef CONFIG_KGDB ++ if ((err = add_modsects(mod, hdr, sechdrs, secstrings)) < 0) { ++ goto nomodsectinfo; ++ } ++#endif ++ + err = module_finalize(hdr, sechdrs, mod); + if (err < 0) + goto cleanup; +@@ -1946,6 +1994,11 @@ + arch_cleanup: + module_arch_cleanup(mod); + cleanup: ++ ++#ifdef CONFIG_KGDB ++nomodsectinfo: ++ vfree(mod->mod_sections); ++#endif + module_unload_free(mod); + module_free(mod, mod->module_init); + free_core: +@@ -2017,6 +2070,10 @@ + /* Init routine failed: abort. Try to protect us from + buggy refcounters. */ + mod->state = MODULE_STATE_GOING; ++ down(¬ify_mutex); ++ blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, ++ mod); ++ up(¬ify_mutex); + synchronize_sched(); + if (mod->unsafe) + printk(KERN_ERR "%s: module is now stuck!\n", +diff -Nurb linux-2.6.22-570/kernel/ns_container.c linux-2.6.22-591/kernel/ns_container.c +--- linux-2.6.22-570/kernel/ns_container.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/kernel/ns_container.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,99 @@ ++/* ++ * ns_container.c - namespace container subsystem ++ * ++ * Copyright 2006, 2007 IBM Corp ++ */ ++ ++#include ++#include ++#include ++ ++struct ns_container { ++ struct container_subsys_state css; ++ spinlock_t lock; ++}; ++ ++struct container_subsys ns_subsys; ++ ++static inline struct ns_container *container_to_ns( ++ struct container *container) ++{ ++ return container_of(container_subsys_state(container, ns_subsys_id), ++ struct ns_container, css); ++} ++ ++int ns_container_clone(struct task_struct *task) ++{ ++ return container_clone(task, &ns_subsys); ++} ++ ++/* ++ * Rules: ++ * 1. you can only enter a container which is a child of your current ++ * container ++ * 2. you can only place another process into a container if ++ * a. you have CAP_SYS_ADMIN ++ * b. your container is an ancestor of task's destination container ++ * (hence either you are in the same container as task, or in an ++ * ancestor container thereof) ++ */ ++static int ns_can_attach(struct container_subsys *ss, ++ struct container *new_container, struct task_struct *task) ++{ ++ struct container *orig; ++ ++ if (current != task) { ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ ++ if (!container_is_descendant(new_container)) ++ return -EPERM; ++ } ++ ++ if (atomic_read(&new_container->count) != 0) ++ return -EPERM; ++ ++ orig = task_container(task, ns_subsys_id); ++ if (orig && orig != new_container->parent) ++ return -EPERM; ++ ++ return 0; ++} ++ ++/* ++ * Rules: you can only create a container if ++ * 1. you are capable(CAP_SYS_ADMIN) ++ * 2. the target container is a descendant of your own container ++ */ ++static int ns_create(struct container_subsys *ss, struct container *container) ++{ ++ struct ns_container *ns_container; ++ ++ if (!capable(CAP_SYS_ADMIN)) ++ return -EPERM; ++ if (!container_is_descendant(container)) ++ return -EPERM; ++ ++ ns_container = kzalloc(sizeof(*ns_container), GFP_KERNEL); ++ if (!ns_container) return -ENOMEM; ++ spin_lock_init(&ns_container->lock); ++ container->subsys[ns_subsys.subsys_id] = &ns_container->css; ++ return 0; ++} ++ ++static void ns_destroy(struct container_subsys *ss, ++ struct container *container) ++{ ++ struct ns_container *ns_container; ++ ++ ns_container = container_to_ns(container); ++ kfree(ns_container); ++} ++ ++struct container_subsys ns_subsys = { ++ .name = "ns", ++ .can_attach = ns_can_attach, ++ .create = ns_create, ++ .destroy = ns_destroy, ++ .subsys_id = ns_subsys_id, ++}; +diff -Nurb linux-2.6.22-570/kernel/nsproxy.c linux-2.6.22-591/kernel/nsproxy.c +--- linux-2.6.22-570/kernel/nsproxy.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/nsproxy.c 2007-12-21 15:36:15.000000000 -0500 +@@ -19,10 +19,13 @@ + #include + #include + #include ++#include + #include + #include + #include + ++static struct kmem_cache *nsproxy_cachep; ++ + struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy); + + void get_task_namespaces(struct task_struct *tsk) +@@ -58,6 +61,7 @@ + struct fs_struct *new_fs) + { + struct nsproxy *new_nsp; ++ int err = -ENOMEM; + + vxdprintk(VXD_CBIT(space, 4), + "unshare_namespaces(0x%08x,%p,%p)", +@@ -83,8 +87,24 @@ + if (IS_ERR(new_nsp->pid_ns)) + goto out_pid; + ++ new_nsp->user_ns = copy_user_ns(flags, orig->user_ns); ++ if (IS_ERR(new_nsp->user_ns)) ++ goto out_user; ++ ++ new_nsp->net_ns = copy_net_ns(flags, orig->net_ns); ++ if (IS_ERR(new_nsp->net_ns)) ++ goto out_net; ++ + return new_nsp; + ++out_net: ++ if (new_nsp->user_ns) ++ put_user_ns(new_nsp->user_ns); ++ if (new_nsp->net_ns) ++ put_net(new_nsp->net_ns); ++out_user: ++ if (new_nsp->pid_ns) ++ put_pid_ns(new_nsp->pid_ns); + out_pid: + if (new_nsp->ipc_ns) + put_ipc_ns(new_nsp->ipc_ns); +@@ -95,11 +115,11 @@ + if (new_nsp->mnt_ns) + put_mnt_ns(new_nsp->mnt_ns); + out_ns: +- kfree(new_nsp); +- return ERR_PTR(-ENOMEM); ++ kmem_cache_free(nsproxy_cachep, new_nsp); ++ return ERR_PTR(err); + } + +-static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk, ++static struct nsproxy *create_new_namespaces(unsigned long flags, struct task_struct *tsk, + struct fs_struct *new_fs) + { + return unshare_namespaces(flags, tsk->nsproxy, new_fs); +@@ -130,7 +150,7 @@ + * called from clone. This now handles copy for nsproxy and all + * namespaces therein. + */ +-int copy_namespaces(int flags, struct task_struct *tsk) ++int copy_namespaces(unsigned long flags, struct task_struct *tsk) + { + struct nsproxy *old_ns = tsk->nsproxy; + struct nsproxy *new_ns = NULL; +@@ -143,10 +163,17 @@ + return 0; + + get_nsproxy(old_ns); ++ return 0; + +- if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) ++ if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET))) + return 0; + ++ #ifndef CONFIG_NET_NS ++ if (unshare_flags & CLONE_NEWNET) ++ return -EINVAL; ++ #endif ++ ++ + if (!capable(CAP_SYS_ADMIN)) { + err = -EPERM; + goto out; +@@ -158,7 +185,14 @@ + goto out; + } + ++ err = ns_container_clone(tsk); ++ if (err) { ++ put_nsproxy(new_ns); ++ goto out; ++ } ++ + tsk->nsproxy = new_ns; ++ + out: + put_nsproxy(old_ns); + vxdprintk(VXD_CBIT(space, 3), +@@ -194,25 +228,37 @@ + "unshare_nsproxy_namespaces(0x%08lx,[%p])", + unshare_flags, current->nsproxy); + +- if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) ++ if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | ++ CLONE_NEWUSER | CLONE_NEWNET))) + return 0; + +-#ifndef CONFIG_IPC_NS +- if (unshare_flags & CLONE_NEWIPC) ++#ifndef CONFIG_NET_NS ++ if (unshare_flags & CLONE_NEWNET) + return -EINVAL; + #endif +- +-#ifndef CONFIG_UTS_NS +- if (unshare_flags & CLONE_NEWUTS) +- return -EINVAL; +-#endif +- + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + *new_nsp = create_new_namespaces(unshare_flags, current, + new_fs ? new_fs : current->fs); +- if (IS_ERR(*new_nsp)) ++ if (IS_ERR(*new_nsp)) { + err = PTR_ERR(*new_nsp); ++ goto out; ++ } ++ ++ err = ns_container_clone(current); ++ if (err) ++ put_nsproxy(*new_nsp); ++ ++out: + return err; + } ++ ++static int __init nsproxy_cache_init(void) ++{ ++ nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy), ++ 0, SLAB_PANIC, NULL, NULL); ++ return 0; ++} ++ ++module_init(nsproxy_cache_init); +diff -Nurb linux-2.6.22-570/kernel/params.c linux-2.6.22-591/kernel/params.c +--- linux-2.6.22-570/kernel/params.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/kernel/params.c 2007-12-21 15:36:12.000000000 -0500 +@@ -491,7 +491,6 @@ + pattr->mattr.show = param_attr_show; + pattr->mattr.store = param_attr_store; + pattr->mattr.attr.name = (char *)&kp->name[name_skip]; +- pattr->mattr.attr.owner = mk->mod; + pattr->mattr.attr.mode = kp->perm; + *(gattr++) = &(pattr++)->mattr.attr; + } +diff -Nurb linux-2.6.22-570/kernel/pid.c linux-2.6.22-591/kernel/pid.c +--- linux-2.6.22-570/kernel/pid.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/pid.c 2007-12-21 15:36:12.000000000 -0500 +@@ -379,7 +379,7 @@ + } + EXPORT_SYMBOL_GPL(find_get_pid); + +-struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *old_ns) ++struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns) + { + BUG_ON(!old_ns); + get_pid_ns(old_ns); +diff -Nurb linux-2.6.22-570/kernel/ptrace.c linux-2.6.22-591/kernel/ptrace.c +--- linux-2.6.22-570/kernel/ptrace.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/ptrace.c 2007-12-21 15:36:12.000000000 -0500 +@@ -143,7 +143,7 @@ + return -EPERM; + smp_rmb(); + if (task->mm) +- dumpable = task->mm->dumpable; ++ dumpable = get_dumpable(task->mm); + if (!dumpable && !capable(CAP_SYS_PTRACE)) + return -EPERM; + if (!vx_check(task->xid, VS_ADMIN_P|VS_IDENT)) +diff -Nurb linux-2.6.22-570/kernel/rcutorture.c linux-2.6.22-591/kernel/rcutorture.c +--- linux-2.6.22-570/kernel/rcutorture.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/kernel/rcutorture.c 2007-12-21 15:36:12.000000000 -0500 +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -518,7 +519,6 @@ + + VERBOSE_PRINTK_STRING("rcu_torture_writer task started"); + set_user_nice(current, 19); +- current->flags |= PF_NOFREEZE; + + do { + schedule_timeout_uninterruptible(1); +@@ -558,7 +558,6 @@ + + VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started"); + set_user_nice(current, 19); +- current->flags |= PF_NOFREEZE; + + do { + schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); +@@ -589,7 +588,6 @@ + + VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); + set_user_nice(current, 19); +- current->flags |= PF_NOFREEZE; + + do { + idx = cur_ops->readlock(); +diff -Nurb linux-2.6.22-570/kernel/rtmutex-tester.c linux-2.6.22-591/kernel/rtmutex-tester.c +--- linux-2.6.22-570/kernel/rtmutex-tester.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/kernel/rtmutex-tester.c 2007-12-21 15:36:12.000000000 -0500 +@@ -260,6 +260,7 @@ + int ret; + + current->flags |= PF_MUTEX_TESTER; ++ set_freezable(); + allow_signal(SIGHUP); + + for(;;) { +diff -Nurb linux-2.6.22-570/kernel/sched.c linux-2.6.22-591/kernel/sched.c +--- linux-2.6.22-570/kernel/sched.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/sched.c 2007-12-21 15:36:12.000000000 -0500 +@@ -51,8 +51,10 @@ + #include + #include + #include ++#include + #include + #include ++#include + + #include + #include +@@ -3399,9 +3401,16 @@ + struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; + struct vx_info *vxi = p->vx_info; /* p is _always_ current */ + cputime64_t tmp; ++ struct rq *rq = this_rq(); + int nice = (TASK_NICE(p) > 0); + + p->utime = cputime_add(p->utime, cputime); ++ ++ ++ if (p != rq->idle) ++ cpuacct_charge(p, cputime); ++ ++ + vx_account_user(vxi, cputime, nice); + + /* Add user time to cpustat. */ +@@ -3435,9 +3444,10 @@ + cpustat->irq = cputime64_add(cpustat->irq, tmp); + else if (softirq_count()) + cpustat->softirq = cputime64_add(cpustat->softirq, tmp); +- else if (p != rq->idle) ++ else if (p != rq->idle) { + cpustat->system = cputime64_add(cpustat->system, tmp); +- else if (atomic_read(&rq->nr_iowait) > 0) ++ cpuacct_charge(p, cputime); ++ } else if (atomic_read(&rq->nr_iowait) > 0) + cpustat->iowait = cputime64_add(cpustat->iowait, tmp); + else + cpustat->idle = cputime64_add(cpustat->idle, tmp); +@@ -3462,8 +3472,10 @@ + cpustat->iowait = cputime64_add(cpustat->iowait, tmp); + else + cpustat->idle = cputime64_add(cpustat->idle, tmp); +- } else ++ } else { + cpustat->steal = cputime64_add(cpustat->steal, tmp); ++ cpuacct_charge(p, -tmp); ++ } + } + + static void task_running_tick(struct rq *rq, struct task_struct *p, int cpu) +@@ -5287,8 +5299,6 @@ + struct migration_req *req; + struct list_head *head; + +- try_to_freeze(); +- + spin_lock_irq(&rq->lock); + + if (cpu_is_offline(cpu)) { +@@ -5522,7 +5532,6 @@ + p = kthread_create(migration_thread, hcpu, "migration/%d",cpu); + if (IS_ERR(p)) + return NOTIFY_BAD; +- p->flags |= PF_NOFREEZE; + kthread_bind(p, cpu); + /* Must be high prio: stop_machine expects to yield to it. */ + rq = task_rq_lock(p, &flags); +@@ -6926,33 +6935,6 @@ + arch_destroy_sched_domains(cpu_map); + } + +-/* +- * Partition sched domains as specified by the cpumasks below. +- * This attaches all cpus from the cpumasks to the NULL domain, +- * waits for a RCU quiescent period, recalculates sched +- * domain information and then attaches them back to the +- * correct sched domains +- * Call with hotplug lock held +- */ +-int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2) +-{ +- cpumask_t change_map; +- int err = 0; +- +- cpus_and(*partition1, *partition1, cpu_online_map); +- cpus_and(*partition2, *partition2, cpu_online_map); +- cpus_or(change_map, *partition1, *partition2); +- +- /* Detach sched domains from all of the affected cpus */ +- detach_destroy_domains(&change_map); +- if (!cpus_empty(*partition1)) +- err = build_sched_domains(partition1); +- if (!err && !cpus_empty(*partition2)) +- err = build_sched_domains(partition2); +- +- return err; +-} +- + #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) + int arch_reinit_sched_domains(void) + { +@@ -7177,6 +7159,9 @@ + #ifdef in_atomic + static unsigned long prev_jiffy; /* ratelimiting */ + ++ if (atomic_read(&debugger_active)) ++ return; ++ + if ((in_atomic() || irqs_disabled()) && + system_state == SYSTEM_RUNNING && !oops_in_progress) { + if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy) +diff -Nurb linux-2.6.22-570/kernel/seccomp.c linux-2.6.22-591/kernel/seccomp.c +--- linux-2.6.22-570/kernel/seccomp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/kernel/seccomp.c 2007-12-21 15:36:12.000000000 -0500 +@@ -10,6 +10,7 @@ + #include + + /* #define SECCOMP_DEBUG 1 */ ++#define NR_SECCOMP_MODES 1 + + /* + * Secure computing mode 1 allows only read/write/exit/sigreturn. +@@ -54,3 +55,28 @@ + #endif + do_exit(SIGKILL); + } ++ ++long prctl_get_seccomp(void) ++{ ++ return current->seccomp.mode; ++} ++ ++long prctl_set_seccomp(unsigned long seccomp_mode) ++{ ++ long ret; ++ ++ /* can set it only once to be even more secure */ ++ ret = -EPERM; ++ if (unlikely(current->seccomp.mode)) ++ goto out; ++ ++ ret = -EINVAL; ++ if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) { ++ current->seccomp.mode = seccomp_mode; ++ set_thread_flag(TIF_SECCOMP); ++ ret = 0; ++ } ++ ++ out: ++ return ret; ++} +diff -Nurb linux-2.6.22-570/kernel/signal.c linux-2.6.22-591/kernel/signal.c +--- linux-2.6.22-570/kernel/signal.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/signal.c 2007-12-21 15:36:12.000000000 -0500 +@@ -257,6 +257,16 @@ + } + } + ++int unhandled_signal(struct task_struct *tsk, int sig) ++{ ++ if (is_init(tsk)) ++ return 1; ++ if (tsk->ptrace & PT_PTRACED) ++ return 0; ++ return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) || ++ (tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL); ++} ++ + + /* Notify the system that a driver wants to block all signals for this + * process, and wants to be notified if any signals at all were to be +diff -Nurb linux-2.6.22-570/kernel/softirq.c linux-2.6.22-591/kernel/softirq.c +--- linux-2.6.22-570/kernel/softirq.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/softirq.c 2007-12-21 15:36:12.000000000 -0500 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -304,11 +305,6 @@ + if (!in_interrupt() && local_softirq_pending()) + invoke_softirq(); + +-#ifdef CONFIG_NO_HZ +- /* Make sure that timer wheel updates are propagated */ +- if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) +- tick_nohz_stop_sched_tick(); +-#endif + preempt_enable_no_resched(); + } + +@@ -490,7 +486,6 @@ + static int ksoftirqd(void * __bind_cpu) + { + set_user_nice(current, 19); +- current->flags |= PF_NOFREEZE; + + set_current_state(TASK_INTERRUPTIBLE); + +diff -Nurb linux-2.6.22-570/kernel/softlockup.c linux-2.6.22-591/kernel/softlockup.c +--- linux-2.6.22-570/kernel/softlockup.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/kernel/softlockup.c 2007-12-21 15:36:12.000000000 -0500 +@@ -10,9 +10,11 @@ + #include + #include + #include ++#include + #include + #include + #include ++#include + + static DEFINE_SPINLOCK(print_lock); + +@@ -47,6 +49,9 @@ + void touch_softlockup_watchdog(void) + { + __raw_get_cpu_var(touch_timestamp) = get_timestamp(); ++#ifdef CONFIG_KGDB ++ atomic_set(&kgdb_sync_softlockup[raw_smp_processor_id()], 0); ++#endif + } + EXPORT_SYMBOL(touch_softlockup_watchdog); + +@@ -116,7 +121,6 @@ + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + + sched_setscheduler(current, SCHED_FIFO, ¶m); +- current->flags |= PF_NOFREEZE; + + /* initialize timestamp */ + touch_softlockup_watchdog(); +diff -Nurb linux-2.6.22-570/kernel/sys.c linux-2.6.22-591/kernel/sys.c +--- linux-2.6.22-570/kernel/sys.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/sys.c 2007-12-21 15:36:12.000000000 -0500 +@@ -31,6 +31,7 @@ + #include + #include + #include ++#include + #include + + #include +@@ -1043,7 +1044,7 @@ + return -EPERM; + } + if (new_egid != old_egid) { +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + smp_wmb(); + } + if (rgid != (gid_t) -1 || +@@ -1073,13 +1074,13 @@ + + if (capable(CAP_SETGID)) { + if (old_egid != gid) { +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + smp_wmb(); + } + current->gid = current->egid = current->sgid = current->fsgid = gid; + } else if ((gid == current->gid) || (gid == current->sgid)) { + if (old_egid != gid) { +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + smp_wmb(); + } + current->egid = current->fsgid = gid; +@@ -1110,7 +1111,7 @@ + switch_uid(new_user); + + if (dumpclear) { +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + smp_wmb(); + } + current->uid = new_ruid; +@@ -1166,7 +1167,7 @@ + return -EAGAIN; + + if (new_euid != old_euid) { +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + smp_wmb(); + } + current->fsuid = current->euid = new_euid; +@@ -1216,7 +1217,7 @@ + return -EPERM; + + if (old_euid != uid) { +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + smp_wmb(); + } + current->fsuid = current->euid = uid; +@@ -1261,7 +1262,7 @@ + } + if (euid != (uid_t) -1) { + if (euid != current->euid) { +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + smp_wmb(); + } + current->euid = euid; +@@ -1311,7 +1312,7 @@ + } + if (egid != (gid_t) -1) { + if (egid != current->egid) { +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + smp_wmb(); + } + current->egid = egid; +@@ -1357,7 +1358,7 @@ + uid == current->suid || uid == current->fsuid || + capable(CAP_SETUID)) { + if (uid != old_fsuid) { +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + smp_wmb(); + } + current->fsuid = uid; +@@ -1386,7 +1387,7 @@ + gid == current->sgid || gid == current->fsgid || + capable(CAP_SETGID)) { + if (gid != old_fsgid) { +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + smp_wmb(); + } + current->fsgid = gid; +@@ -2185,14 +2186,14 @@ + error = put_user(current->pdeath_signal, (int __user *)arg2); + break; + case PR_GET_DUMPABLE: +- error = current->mm->dumpable; ++ error = get_dumpable(current->mm); + break; + case PR_SET_DUMPABLE: + if (arg2 < 0 || arg2 > 1) { + error = -EINVAL; + break; + } +- current->mm->dumpable = arg2; ++ set_dumpable(current->mm, arg2); + break; + + case PR_SET_UNALIGN: +@@ -2261,6 +2262,13 @@ + error = SET_ENDIAN(current, arg2); + break; + ++ case PR_GET_SECCOMP: ++ error = prctl_get_seccomp(); ++ break; ++ case PR_SET_SECCOMP: ++ error = prctl_set_seccomp(arg2); ++ break; ++ + default: + error = -EINVAL; + break; +@@ -2297,3 +2305,61 @@ + } + return err ? -EFAULT : 0; + } ++ ++char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; ++ ++static void argv_cleanup(char **argv, char **envp) ++{ ++ argv_free(argv); ++} ++ ++/** ++ * Trigger an orderly system poweroff ++ * @force: force poweroff if command execution fails ++ * ++ * This may be called from any context to trigger a system shutdown. ++ * If the orderly shutdown fails, it will force an immediate shutdown. ++ */ ++int orderly_poweroff(bool force) ++{ ++ int argc; ++ char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); ++ static char *envp[] = { ++ "HOME=/", ++ "PATH=/sbin:/bin:/usr/sbin:/usr/bin", ++ NULL ++ }; ++ int ret = -ENOMEM; ++ struct subprocess_info *info; ++ ++ if (argv == NULL) { ++ printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", ++ __func__, poweroff_cmd); ++ goto out; ++ } ++ ++ info = call_usermodehelper_setup(argv[0], argv, envp); ++ if (info == NULL) { ++ argv_free(argv); ++ goto out; ++ } ++ ++ call_usermodehelper_setcleanup(info, argv_cleanup); ++ ++ ret = call_usermodehelper_exec(info, UMH_NO_WAIT); ++ ++ out: ++ if (ret && force) { ++ printk(KERN_WARNING "Failed to start orderly shutdown: " ++ "forcing the issue\n"); ++ ++ /* I guess this should try to kick off some daemon to ++ sync and poweroff asap. Or not even bother syncing ++ if we're doing an emergency shutdown? */ ++ emergency_sync(); ++ kernel_power_off(); ++ } ++ ++ return ret; ++} ++EXPORT_SYMBOL_GPL(orderly_poweroff); +diff -Nurb linux-2.6.22-570/kernel/sysctl.c linux-2.6.22-591/kernel/sysctl.c +--- linux-2.6.22-570/kernel/sysctl.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/sysctl.c 2007-12-21 15:36:15.000000000 -0500 +@@ -45,13 +45,13 @@ + #include + #include + #include ++#include ++#include ++#include + + #include + #include + +-extern int proc_nr_files(ctl_table *table, int write, struct file *filp, +- void __user *buffer, size_t *lenp, loff_t *ppos); +- + #ifdef CONFIG_X86 + #include + #include +@@ -140,6 +140,10 @@ + void __user *buffer, size_t *lenp, loff_t *ppos); + #endif + ++#ifdef CONFIG_NET ++static void sysctl_net_init(struct net *net); ++#endif ++ + static ctl_table root_table[]; + static struct ctl_table_header root_table_header = + { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) }; +@@ -203,7 +207,10 @@ + .mode = 0555, + .child = dev_table, + }, +- ++/* ++ * NOTE: do not add new entries to this table unless you have read ++ * Documentation/sysctl/ctl_unnumbered.txt ++ */ + { .ctl_name = 0 } + }; + +@@ -217,6 +224,15 @@ + .proc_handler = &proc_dointvec, + }, + { ++ .ctl_name = KERN_POWEROFF_CMD, ++ .procname = "poweroff_cmd", ++ .data = &poweroff_cmd, ++ .maxlen = POWEROFF_CMD_PATH_LEN, ++ .mode = 0644, ++ .proc_handler = &proc_dostring, ++ .strategy = &sysctl_string, ++ }, ++ { + .ctl_name = KERN_CORE_USES_PID, + .procname = "core_uses_pid", + .data = &core_uses_pid, +@@ -625,7 +641,20 @@ + .proc_handler = &proc_dointvec, + }, + #endif +- ++#ifdef CONFIG_SECURITY ++ { ++ .ctl_name = CTL_UNNUMBERED, ++ .procname = "mmap_min_addr", ++ .data = &mmap_min_addr, ++ .maxlen = sizeof(unsigned long), ++ .mode = 0644, ++ .proc_handler = &proc_doulongvec_minmax, ++ }, ++#endif ++/* ++ * NOTE: do not add new entries to this table unless you have read ++ * Documentation/sysctl/ctl_unnumbered.txt ++ */ + { .ctl_name = 0 } + }; + +@@ -744,6 +773,14 @@ + .mode = 0644, + .proc_handler = &proc_dointvec, + }, ++ { ++ .ctl_name = VM_HUGETLB_TREAT_MOVABLE, ++ .procname = "hugepages_treat_as_movable", ++ .data = &hugepages_treat_as_movable, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &hugetlb_treat_movable_handler, ++ }, + #endif + { + .ctl_name = VM_LOWMEM_RESERVE_RATIO, +@@ -892,6 +929,10 @@ + .extra1 = &zero, + }, + #endif ++/* ++ * NOTE: do not add new entries to this table unless you have read ++ * Documentation/sysctl/ctl_unnumbered.txt ++ */ + { .ctl_name = 0 } + }; + +@@ -1032,10 +1073,28 @@ + .child = binfmt_misc_table, + }, + #endif ++/* ++ * NOTE: do not add new entries to this table unless you have read ++ * Documentation/sysctl/ctl_unnumbered.txt ++ */ + { .ctl_name = 0 } + }; + + static ctl_table debug_table[] = { ++#ifdef CONFIG_X86 ++ { ++ .ctl_name = DEBUG_UNHANDLED_SIGNALS, ++ .procname = "show-unhandled-signals", ++ .data = &show_unhandled_signals, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = proc_dointvec ++ }, ++#endif ++/* ++ * NOTE: do not add new entries to this table unless you have read ++ * Documentation/sysctl/ctl_unnumbered.txt ++ */ + { .ctl_name = 0 } + }; + +@@ -1097,6 +1156,11 @@ + { + struct ctl_table_header *head; + struct list_head *tmp; ++ struct net *net = current->nsproxy->net_ns; ++ ++ if (!net->net_table_header.ctl_table) ++ sysctl_net_init(net); ++ + spin_lock(&sysctl_lock); + if (prev) { + tmp = &prev->ctl_entry; +@@ -1114,6 +1178,10 @@ + next: + tmp = tmp->next; + if (tmp == &root_table_header.ctl_entry) ++#ifdef CONFIG_NET ++ tmp = &net->net_table_header.ctl_entry; ++ else if (tmp == &net->net_table_header.ctl_entry) ++#endif + break; + } + spin_unlock(&sysctl_lock); +@@ -1229,7 +1297,6 @@ + void __user *newval, size_t newlen) + { + int op = 0, rc; +- size_t len; + + if (oldval) + op |= 004; +@@ -1250,25 +1317,10 @@ + /* If there is no strategy routine, or if the strategy returns + * zero, proceed with automatic r/w */ + if (table->data && table->maxlen) { +- if (oldval && oldlenp) { +- if (get_user(len, oldlenp)) +- return -EFAULT; +- if (len) { +- if (len > table->maxlen) +- len = table->maxlen; +- if(copy_to_user(oldval, table->data, len)) +- return -EFAULT; +- if(put_user(len, oldlenp)) +- return -EFAULT; +- } +- } +- if (newval && newlen) { +- len = newlen; +- if (len > table->maxlen) +- len = table->maxlen; +- if(copy_from_user(table->data, newval, len)) +- return -EFAULT; +- } ++ rc = sysctl_data(table, name, nlen, oldval, oldlenp, ++ newval, newlen); ++ if (rc < 0) ++ return rc; + } + return 0; + } +@@ -1359,7 +1411,8 @@ + * This routine returns %NULL on a failure to register, and a pointer + * to the table header on success. + */ +-struct ctl_table_header *register_sysctl_table(ctl_table * table) ++static struct ctl_table_header *__register_sysctl_table( ++ struct ctl_table_header *root, ctl_table * table) + { + struct ctl_table_header *tmp; + tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL); +@@ -1371,11 +1424,16 @@ + tmp->unregistering = NULL; + sysctl_set_parent(NULL, table); + spin_lock(&sysctl_lock); +- list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); ++ list_add_tail(&tmp->ctl_entry, &root->ctl_entry); + spin_unlock(&sysctl_lock); + return tmp; + } + ++struct ctl_table_header *register_sysctl_table(ctl_table *table) ++{ ++ return __register_sysctl_table(&root_table_header, table); ++} ++ + /** + * unregister_sysctl_table - unregister a sysctl table hierarchy + * @header: the header returned from register_sysctl_table +@@ -1392,6 +1450,92 @@ + kfree(header); + } + ++#ifdef CONFIG_NET ++ ++static void *fixup_table_addr(void *addr, ++ const char *start, size_t size, const char *new) ++{ ++ char *ptr = addr; ++ if ((ptr >= start) && (ptr < (start + size))) ++ ptr += new - start; ++ return ptr; ++} ++ ++static void table_fixup(struct ctl_table *table, ++ const void *start, size_t size, const void *new) ++{ ++ for (; table->ctl_name || table->procname; table++) { ++ table->data = fixup_table_addr(table->data, start, size, new); ++ table->extra1 = fixup_table_addr(table->extra1, start, size, new); ++ table->extra2 = fixup_table_addr(table->extra2, start, size, new); ++ ++ /* Whee recursive functions on the kernel stack */ ++ if (table->child) ++ table_fixup(table->child, start, size, new); ++ } ++} ++ ++static unsigned count_table_entries(struct ctl_table *table) ++{ ++ unsigned entries = 0; ++ for (; table->ctl_name || table->procname; table++) { ++ entries += 1; ++ ++ if (table->child) ++ entries += count_table_entries(table->child); ++ } ++ entries += 1; /* Null terminating entry */ ++ return entries; ++} ++ ++static struct ctl_table *copy_table_entries( ++ struct ctl_table *dest, struct ctl_table *src) ++{ ++ struct ctl_table *table = dest; ++ for (; src->ctl_name || src->procname; src++) { ++ *dest++ = *table; ++ } ++ dest++; /* Null terminating entry */ ++ for (; table->ctl_name || table->procname; table++) { ++ if (table->child) ++ dest = copy_table_entries(dest, table->child); ++ } ++ return dest; ++} ++ ++static void sysctl_net_init(struct net *net) ++{ ++ unsigned entries; ++ struct ctl_table *table; ++ ++ entries = count_table_entries(net_root_table); ++ table = kzalloc(GFP_KERNEL, sizeof(*table)*entries); ++ /* FIXME free table... */ ++ ++ copy_table_entries(table, net_root_table); ++ table_fixup(table, &init_net, sizeof(init_net), net); ++ ++ net->net_table_header.ctl_table = table; ++ INIT_LIST_HEAD(&net->net_table_header.ctl_entry); ++} ++ ++struct ctl_table_header *register_net_sysctl_table(struct net *net, struct ctl_table *table) ++{ ++ if (!net->net_table_header.ctl_table) ++ sysctl_net_init(net); ++ table_fixup(table, &init_net, sizeof(init_net), net); ++ return __register_sysctl_table(&net->net_table_header, table); ++} ++EXPORT_SYMBOL_GPL(register_net_sysctl_table); ++ ++void unregister_net_sysctl_table(struct ctl_table_header *header) ++{ ++ return unregister_sysctl_table(header); ++} ++EXPORT_SYMBOL_GPL(unregister_net_sysctl_table); ++#endif ++ ++ + #else /* !CONFIG_SYSCTL */ + struct ctl_table_header *register_sysctl_table(ctl_table * table) + { +@@ -2167,6 +2311,40 @@ + * General sysctl support routines + */ + ++/* The generic sysctl data routine (used if no strategy routine supplied) */ ++int sysctl_data(ctl_table *table, int __user *name, int nlen, ++ void __user *oldval, size_t __user *oldlenp, ++ void __user *newval, size_t newlen) ++{ ++ size_t len; ++ ++ /* Get out of I don't have a variable */ ++ if (!table->data || !table->maxlen) ++ return -ENOTDIR; ++ ++ if (oldval && oldlenp) { ++ if (get_user(len, oldlenp)) ++ return -EFAULT; ++ if (len) { ++ if (len > table->maxlen) ++ len = table->maxlen; ++ if (copy_to_user(oldval, table->data, len)) ++ return -EFAULT; ++ if (put_user(len, oldlenp)) ++ return -EFAULT; ++ } ++ } ++ ++ if (newval && newlen) { ++ if (newlen > table->maxlen) ++ newlen = table->maxlen; ++ ++ if (copy_from_user(table->data, newval, newlen)) ++ return -EFAULT; ++ } ++ return 1; ++} ++ + /* The generic string strategy routine: */ + int sysctl_string(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, +@@ -2355,6 +2533,13 @@ + return -ENOSYS; + } + ++int sysctl_data(ctl_table *table, int __user *name, int nlen, ++ void __user *oldval, size_t __user *oldlenp, ++ void __user *newval, size_t newlen) ++{ ++ return -ENOSYS; ++} ++ + int sysctl_string(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) +@@ -2402,4 +2587,5 @@ + EXPORT_SYMBOL(sysctl_jiffies); + EXPORT_SYMBOL(sysctl_ms_jiffies); + EXPORT_SYMBOL(sysctl_string); ++EXPORT_SYMBOL(sysctl_data); + EXPORT_SYMBOL(unregister_sysctl_table); +diff -Nurb linux-2.6.22-570/kernel/taskstats.c linux-2.6.22-591/kernel/taskstats.c +--- linux-2.6.22-570/kernel/taskstats.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/kernel/taskstats.c 2007-12-21 15:36:12.000000000 -0500 +@@ -196,6 +196,8 @@ + + /* fill in basic acct fields */ + stats->version = TASKSTATS_VERSION; ++ stats->nvcsw = tsk->nvcsw; ++ stats->nivcsw = tsk->nivcsw; + bacct_add_tsk(stats, tsk); + + /* fill in extended acct fields */ +@@ -242,6 +244,8 @@ + */ + delayacct_add_tsk(stats, tsk); + ++ stats->nvcsw += tsk->nvcsw; ++ stats->nivcsw += tsk->nivcsw; + } while_each_thread(first, tsk); + + unlock_task_sighand(first, &flags); +diff -Nurb linux-2.6.22-570/kernel/time/tick-sched.c linux-2.6.22-591/kernel/time/tick-sched.c +--- linux-2.6.22-570/kernel/time/tick-sched.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/kernel/time/tick-sched.c 2007-12-21 15:36:12.000000000 -0500 +@@ -153,6 +153,7 @@ + unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags; + struct tick_sched *ts; + ktime_t last_update, expires, now, delta; ++ struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; + int cpu; + + local_irq_save(flags); +@@ -290,11 +291,34 @@ + out: + ts->next_jiffies = next_jiffies; + ts->last_jiffies = last_jiffies; ++ ts->sleep_length = ktime_sub(dev->next_event, now); + end: + local_irq_restore(flags); + } + + /** ++ * tick_nohz_get_sleep_length - return the length of the current sleep ++ * ++ * Called from power state control code with interrupts disabled ++ */ ++ktime_t tick_nohz_get_sleep_length(void) ++{ ++ struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); ++ ++ return ts->sleep_length; ++} ++ ++/** ++ * tick_nohz_get_idle_jiffies - returns the current idle jiffie count ++ */ ++unsigned long tick_nohz_get_idle_jiffies(void) ++{ ++ struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); ++ ++ return ts->idle_jiffies; ++} ++ ++/** + * nohz_restart_sched_tick - restart the idle tick from the idle task + * + * Restart the idle tick when the CPU is woken up from idle +diff -Nurb linux-2.6.22-570/kernel/time/timekeeping.c linux-2.6.22-591/kernel/time/timekeeping.c +--- linux-2.6.22-570/kernel/time/timekeeping.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/kernel/time/timekeeping.c 2007-12-21 15:36:15.000000000 -0500 +@@ -39,7 +39,7 @@ + */ + struct timespec xtime __attribute__ ((aligned (16))); + struct timespec wall_to_monotonic __attribute__ ((aligned (16))); +- ++static unsigned long total_sleep_time; + EXPORT_SYMBOL(xtime); + + +@@ -251,6 +251,7 @@ + xtime.tv_nsec = 0; + set_normalized_timespec(&wall_to_monotonic, + -xtime.tv_sec, -xtime.tv_nsec); ++ total_sleep_time = 0; + + write_sequnlock_irqrestore(&xtime_lock, flags); + } +@@ -282,6 +283,7 @@ + + xtime.tv_sec += sleep_length; + wall_to_monotonic.tv_sec -= sleep_length; ++ total_sleep_time += sleep_length; + } + /* re-base the last cycle value */ + clock->cycle_last = clocksource_read(clock); +@@ -476,3 +478,34 @@ + change_clocksource(); + update_vsyscall(&xtime, clock); + } ++ ++/** ++ * getboottime - Return the real time of system boot. ++ * @ts: pointer to the timespec to be set ++ * ++ * Returns the time of day in a timespec. ++ * ++ * This is based on the wall_to_monotonic offset and the total suspend ++ * time. Calls to settimeofday will affect the value returned (which ++ * basically means that however wrong your real time clock is at boot time, ++ * you get the right time here). ++ */ ++void getboottime(struct timespec *ts) ++{ ++ set_normalized_timespec(ts, ++ - (wall_to_monotonic.tv_sec + total_sleep_time), ++ - wall_to_monotonic.tv_nsec); ++} ++ ++EXPORT_SYMBOL(getboottime); ++ ++/** ++ * monotonic_to_bootbased - Convert the monotonic time to boot based. ++ * @ts: pointer to the timespec to be converted ++ */ ++void monotonic_to_bootbased(struct timespec *ts) ++{ ++ ts->tv_sec += total_sleep_time; ++} ++ ++EXPORT_SYMBOL(monotonic_to_bootbased); +diff -Nurb linux-2.6.22-570/kernel/timer.c linux-2.6.22-591/kernel/timer.c +--- linux-2.6.22-570/kernel/timer.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/timer.c 2007-12-21 15:36:15.000000000 -0500 +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -886,7 +887,11 @@ + */ + void run_local_timers(void) + { ++ int this_cpu = smp_processor_id(); + raise_softirq(TIMER_SOFTIRQ); ++#ifdef CONFIG_KGDB ++ if(!atomic_read(&kgdb_sync_softlockup[this_cpu])) ++#endif + softlockup_tick(); + } + +@@ -1125,6 +1130,7 @@ + getnstimeofday(&tp); + tp.tv_sec += wall_to_monotonic.tv_sec; + tp.tv_nsec += wall_to_monotonic.tv_nsec; ++ monotonic_to_bootbased(&tp); + if (tp.tv_nsec - NSEC_PER_SEC >= 0) { + tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC; + tp.tv_sec++; +diff -Nurb linux-2.6.22-570/kernel/unwind.c linux-2.6.22-591/kernel/unwind.c +--- linux-2.6.22-570/kernel/unwind.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/kernel/unwind.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,1288 @@ ++/* ++ * Copyright (C) 2002-2006 Novell, Inc. ++ * Jan Beulich ++ * This code is released under version 2 of the GNU GPL. ++ * ++ * A simple API for unwinding kernel stacks. This is used for ++ * debugging and error reporting purposes. The kernel doesn't need ++ * full-blown stack unwinding with all the bells and whistles, so there ++ * is not much point in implementing the full Dwarf2 unwind API. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++extern const char __start_unwind[], __end_unwind[]; ++extern const u8 __start_unwind_hdr[], __end_unwind_hdr[]; ++ ++#define MAX_STACK_DEPTH 8 ++ ++#define EXTRA_INFO(f) { \ ++ BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \ ++ % FIELD_SIZEOF(struct unwind_frame_info, f)) \ ++ + offsetof(struct unwind_frame_info, f) \ ++ / FIELD_SIZEOF(struct unwind_frame_info, f), \ ++ FIELD_SIZEOF(struct unwind_frame_info, f) \ ++ } ++#define PTREGS_INFO(f) EXTRA_INFO(regs.f) ++ ++static const struct { ++ unsigned offs:BITS_PER_LONG / 2; ++ unsigned width:BITS_PER_LONG / 2; ++} reg_info[] = { ++ UNW_REGISTER_INFO ++}; ++ ++#undef PTREGS_INFO ++#undef EXTRA_INFO ++ ++#ifndef REG_INVALID ++#define REG_INVALID(r) (reg_info[r].width == 0) ++#endif ++ ++#define DW_CFA_nop 0x00 ++#define DW_CFA_set_loc 0x01 ++#define DW_CFA_advance_loc1 0x02 ++#define DW_CFA_advance_loc2 0x03 ++#define DW_CFA_advance_loc4 0x04 ++#define DW_CFA_offset_extended 0x05 ++#define DW_CFA_restore_extended 0x06 ++#define DW_CFA_undefined 0x07 ++#define DW_CFA_same_value 0x08 ++#define DW_CFA_register 0x09 ++#define DW_CFA_remember_state 0x0a ++#define DW_CFA_restore_state 0x0b ++#define DW_CFA_def_cfa 0x0c ++#define DW_CFA_def_cfa_register 0x0d ++#define DW_CFA_def_cfa_offset 0x0e ++#define DW_CFA_def_cfa_expression 0x0f ++#define DW_CFA_expression 0x10 ++#define DW_CFA_offset_extended_sf 0x11 ++#define DW_CFA_def_cfa_sf 0x12 ++#define DW_CFA_def_cfa_offset_sf 0x13 ++#define DW_CFA_val_offset 0x14 ++#define DW_CFA_val_offset_sf 0x15 ++#define DW_CFA_val_expression 0x16 ++#define DW_CFA_lo_user 0x1c ++#define DW_CFA_GNU_window_save 0x2d ++#define DW_CFA_GNU_args_size 0x2e ++#define DW_CFA_GNU_negative_offset_extended 0x2f ++#define DW_CFA_hi_user 0x3f ++ ++#define DW_EH_PE_FORM 0x07 ++#define DW_EH_PE_native 0x00 ++#define DW_EH_PE_leb128 0x01 ++#define DW_EH_PE_data2 0x02 ++#define DW_EH_PE_data4 0x03 ++#define DW_EH_PE_data8 0x04 ++#define DW_EH_PE_signed 0x08 ++#define DW_EH_PE_ADJUST 0x70 ++#define DW_EH_PE_abs 0x00 ++#define DW_EH_PE_pcrel 0x10 ++#define DW_EH_PE_textrel 0x20 ++#define DW_EH_PE_datarel 0x30 ++#define DW_EH_PE_funcrel 0x40 ++#define DW_EH_PE_aligned 0x50 ++#define DW_EH_PE_indirect 0x80 ++#define DW_EH_PE_omit 0xff ++ ++typedef unsigned long uleb128_t; ++typedef signed long sleb128_t; ++#define sleb128abs __builtin_labs ++ ++static struct unwind_table { ++ struct { ++ unsigned long pc; ++ unsigned long range; ++ } core, init; ++ const void *address; ++ unsigned long size; ++ const unsigned char *header; ++ unsigned long hdrsz; ++ struct unwind_table *link; ++ const char *name; ++} root_table; ++ ++struct unwind_item { ++ enum item_location { ++ Nowhere, ++ Memory, ++ Register, ++ Value ++ } where; ++ uleb128_t value; ++}; ++ ++struct unwind_state { ++ uleb128_t loc, org; ++ const u8 *cieStart, *cieEnd; ++ uleb128_t codeAlign; ++ sleb128_t dataAlign; ++ struct cfa { ++ uleb128_t reg, offs; ++ } cfa; ++ struct unwind_item regs[ARRAY_SIZE(reg_info)]; ++ unsigned stackDepth:8; ++ unsigned version:8; ++ const u8 *label; ++ const u8 *stack[MAX_STACK_DEPTH]; ++}; ++ ++static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 }; ++ ++static unsigned unwind_debug; ++static int __init unwind_debug_setup(char *s) ++{ ++ unwind_debug = simple_strtoul(s, NULL, 0); ++ return 1; ++} ++__setup("unwind_debug=", unwind_debug_setup); ++#define dprintk(lvl, fmt, args...) \ ++ ((void)(lvl > unwind_debug \ ++ || printk(KERN_DEBUG "unwind: " fmt "\n", ##args))) ++ ++static struct unwind_table *find_table(unsigned long pc) ++{ ++ struct unwind_table *table; ++ ++ for (table = &root_table; table; table = table->link) ++ if ((pc >= table->core.pc ++ && pc < table->core.pc + table->core.range) ++ || (pc >= table->init.pc ++ && pc < table->init.pc + table->init.range)) ++ break; ++ ++ return table; ++} ++ ++static unsigned long read_pointer(const u8 **pLoc, ++ const void *end, ++ signed ptrType, ++ unsigned long text_base, ++ unsigned long data_base); ++ ++static void init_unwind_table(struct unwind_table *table, ++ const char *name, ++ const void *core_start, ++ unsigned long core_size, ++ const void *init_start, ++ unsigned long init_size, ++ const void *table_start, ++ unsigned long table_size, ++ const u8 *header_start, ++ unsigned long header_size) ++{ ++ const u8 *ptr = header_start + 4; ++ const u8 *end = header_start + header_size; ++ ++ table->core.pc = (unsigned long)core_start; ++ table->core.range = core_size; ++ table->init.pc = (unsigned long)init_start; ++ table->init.range = init_size; ++ table->address = table_start; ++ table->size = table_size; ++ /* See if the linker provided table looks valid. */ ++ if (header_size <= 4 ++ || header_start[0] != 1 ++ || (void *)read_pointer(&ptr, end, header_start[1], 0, 0) ++ != table_start ++ || !read_pointer(&ptr, end, header_start[2], 0, 0) ++ || !read_pointer(&ptr, end, header_start[3], 0, ++ (unsigned long)header_start) ++ || !read_pointer(&ptr, end, header_start[3], 0, ++ (unsigned long)header_start)) ++ header_start = NULL; ++ table->hdrsz = header_size; ++ smp_wmb(); ++ table->header = header_start; ++ table->link = NULL; ++ table->name = name; ++} ++ ++void __init unwind_init(void) ++{ ++ init_unwind_table(&root_table, "kernel", ++ _text, _end - _text, ++ NULL, 0, ++ __start_unwind, __end_unwind - __start_unwind, ++ __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr); ++} ++ ++static const u32 bad_cie, not_fde; ++static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *); ++static signed fde_pointer_type(const u32 *cie); ++ ++struct eh_frame_hdr_table_entry { ++ unsigned long start, fde; ++}; ++ ++static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2) ++{ ++ const struct eh_frame_hdr_table_entry *e1 = p1; ++ const struct eh_frame_hdr_table_entry *e2 = p2; ++ ++ return (e1->start > e2->start) - (e1->start < e2->start); ++} ++ ++static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size) ++{ ++ struct eh_frame_hdr_table_entry *e1 = p1; ++ struct eh_frame_hdr_table_entry *e2 = p2; ++ unsigned long v; ++ ++ v = e1->start; ++ e1->start = e2->start; ++ e2->start = v; ++ v = e1->fde; ++ e1->fde = e2->fde; ++ e2->fde = v; ++} ++ ++static void __init setup_unwind_table(struct unwind_table *table, ++ void *(*alloc)(unsigned long)) ++{ ++ const u8 *ptr; ++ unsigned long tableSize = table->size, hdrSize; ++ unsigned n; ++ const u32 *fde; ++ struct { ++ u8 version; ++ u8 eh_frame_ptr_enc; ++ u8 fde_count_enc; ++ u8 table_enc; ++ unsigned long eh_frame_ptr; ++ unsigned int fde_count; ++ struct eh_frame_hdr_table_entry table[]; ++ } __attribute__((__packed__)) *header; ++ ++ if (table->header) ++ return; ++ ++ if (table->hdrsz) ++ printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n", ++ table->name); ++ ++ if (tableSize & (sizeof(*fde) - 1)) ++ return; ++ ++ for (fde = table->address, n = 0; ++ tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde; ++ tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { ++ const u32 *cie = cie_for_fde(fde, table); ++ signed ptrType; ++ ++ if (cie == ¬_fde) ++ continue; ++ if (cie == NULL ++ || cie == &bad_cie ++ || (ptrType = fde_pointer_type(cie)) < 0) ++ return; ++ ptr = (const u8 *)(fde + 2); ++ if (!read_pointer(&ptr, ++ (const u8 *)(fde + 1) + *fde, ++ ptrType, 0, 0)) ++ return; ++ ++n; ++ } ++ ++ if (tableSize || !n) ++ return; ++ ++ hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int) ++ + 2 * n * sizeof(unsigned long); ++ dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize); ++ header = alloc(hdrSize); ++ if (!header) ++ return; ++ header->version = 1; ++ header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native; ++ header->fde_count_enc = DW_EH_PE_abs|DW_EH_PE_data4; ++ header->table_enc = DW_EH_PE_abs|DW_EH_PE_native; ++ put_unaligned((unsigned long)table->address, &header->eh_frame_ptr); ++ BUILD_BUG_ON(offsetof(typeof(*header), fde_count) ++ % __alignof(typeof(header->fde_count))); ++ header->fde_count = n; ++ ++ BUILD_BUG_ON(offsetof(typeof(*header), table) ++ % __alignof(typeof(*header->table))); ++ for (fde = table->address, tableSize = table->size, n = 0; ++ tableSize; ++ tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) { ++ const u32 *cie = fde + 1 - fde[1] / sizeof(*fde); ++ ++ if (!fde[1]) ++ continue; /* this is a CIE */ ++ ptr = (const u8 *)(fde + 2); ++ header->table[n].start = read_pointer(&ptr, ++ (const u8 *)(fde + 1) + *fde, ++ fde_pointer_type(cie), 0, 0); ++ header->table[n].fde = (unsigned long)fde; ++ ++n; ++ } ++ WARN_ON(n != header->fde_count); ++ ++ sort(header->table, ++ n, ++ sizeof(*header->table), ++ cmp_eh_frame_hdr_table_entries, ++ swap_eh_frame_hdr_table_entries); ++ ++ table->hdrsz = hdrSize; ++ smp_wmb(); ++ table->header = (const void *)header; ++} ++ ++static void *__init balloc(unsigned long sz) ++{ ++ return __alloc_bootmem_nopanic(sz, ++ sizeof(unsigned int), ++ __pa(MAX_DMA_ADDRESS)); ++} ++ ++void __init unwind_setup(void) ++{ ++ setup_unwind_table(&root_table, balloc); ++} ++ ++#ifdef CONFIG_MODULES ++ ++static struct unwind_table *last_table; ++ ++/* Must be called with module_mutex held. */ ++void *unwind_add_table(struct module *module, ++ const void *table_start, ++ unsigned long table_size) ++{ ++ struct unwind_table *table; ++ ++ if (table_size <= 0) ++ return NULL; ++ ++ table = kmalloc(sizeof(*table), GFP_KERNEL); ++ if (!table) ++ return NULL; ++ ++ init_unwind_table(table, module->name, ++ module->module_core, module->core_size, ++ module->module_init, module->init_size, ++ table_start, table_size, ++ NULL, 0); ++ ++ if (last_table) ++ last_table->link = table; ++ else ++ root_table.link = table; ++ last_table = table; ++ ++ return table; ++} ++ ++struct unlink_table_info ++{ ++ struct unwind_table *table; ++ int init_only; ++}; ++ ++static int unlink_table(void *arg) ++{ ++ struct unlink_table_info *info = arg; ++ struct unwind_table *table = info->table, *prev; ++ ++ for (prev = &root_table; prev->link && prev->link != table; prev = prev->link) ++ ; ++ ++ if (prev->link) { ++ if (info->init_only) { ++ table->init.pc = 0; ++ table->init.range = 0; ++ info->table = NULL; ++ } else { ++ prev->link = table->link; ++ if (!prev->link) ++ last_table = prev; ++ } ++ } else ++ info->table = NULL; ++ ++ return 0; ++} ++ ++/* Must be called with module_mutex held. */ ++void unwind_remove_table(void *handle, int init_only) ++{ ++ struct unwind_table *table = handle; ++ struct unlink_table_info info; ++ ++ if (!table || table == &root_table) ++ return; ++ ++ if (init_only && table == last_table) { ++ table->init.pc = 0; ++ table->init.range = 0; ++ return; ++ } ++ ++ info.table = table; ++ info.init_only = init_only; ++ stop_machine_run(unlink_table, &info, NR_CPUS); ++ ++ if (info.table) ++ kfree(table); ++} ++ ++#endif /* CONFIG_MODULES */ ++ ++static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) ++{ ++ const u8 *cur = *pcur; ++ uleb128_t value; ++ unsigned shift; ++ ++ for (shift = 0, value = 0; cur < end; shift += 7) { ++ if (shift + 7 > 8 * sizeof(value) ++ && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { ++ cur = end + 1; ++ break; ++ } ++ value |= (uleb128_t)(*cur & 0x7f) << shift; ++ if (!(*cur++ & 0x80)) ++ break; ++ } ++ *pcur = cur; ++ ++ return value; ++} ++ ++static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) ++{ ++ const u8 *cur = *pcur; ++ sleb128_t value; ++ unsigned shift; ++ ++ for (shift = 0, value = 0; cur < end; shift += 7) { ++ if (shift + 7 > 8 * sizeof(value) ++ && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) { ++ cur = end + 1; ++ break; ++ } ++ value |= (sleb128_t)(*cur & 0x7f) << shift; ++ if (!(*cur & 0x80)) { ++ value |= -(*cur++ & 0x40) << shift; ++ break; ++ } ++ } ++ *pcur = cur; ++ ++ return value; ++} ++ ++static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table) ++{ ++ const u32 *cie; ++ ++ if (!*fde || (*fde & (sizeof(*fde) - 1))) ++ return &bad_cie; ++ if (!fde[1]) ++ return ¬_fde; /* this is a CIE */ ++ if ((fde[1] & (sizeof(*fde) - 1)) ++ || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address) ++ return NULL; /* this is not a valid FDE */ ++ cie = fde + 1 - fde[1] / sizeof(*fde); ++ if (*cie <= sizeof(*cie) + 4 ++ || *cie >= fde[1] - sizeof(*fde) ++ || (*cie & (sizeof(*cie) - 1)) ++ || cie[1]) ++ return NULL; /* this is not a (valid) CIE */ ++ return cie; ++} ++ ++static unsigned long read_pointer(const u8 **pLoc, ++ const void *end, ++ signed ptrType, ++ unsigned long text_base, ++ unsigned long data_base) ++{ ++ unsigned long value = 0; ++ union { ++ const u8 *p8; ++ const u16 *p16u; ++ const s16 *p16s; ++ const u32 *p32u; ++ const s32 *p32s; ++ const unsigned long *pul; ++ } ptr; ++ ++ if (ptrType < 0 || ptrType == DW_EH_PE_omit) { ++ dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end); ++ return 0; ++ } ++ ptr.p8 = *pLoc; ++ switch(ptrType & DW_EH_PE_FORM) { ++ case DW_EH_PE_data2: ++ if (end < (const void *)(ptr.p16u + 1)) { ++ dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end); ++ return 0; ++ } ++ if(ptrType & DW_EH_PE_signed) ++ value = get_unaligned(ptr.p16s++); ++ else ++ value = get_unaligned(ptr.p16u++); ++ break; ++ case DW_EH_PE_data4: ++#ifdef CONFIG_64BIT ++ if (end < (const void *)(ptr.p32u + 1)) { ++ dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end); ++ return 0; ++ } ++ if(ptrType & DW_EH_PE_signed) ++ value = get_unaligned(ptr.p32s++); ++ else ++ value = get_unaligned(ptr.p32u++); ++ break; ++ case DW_EH_PE_data8: ++ BUILD_BUG_ON(sizeof(u64) != sizeof(value)); ++#else ++ BUILD_BUG_ON(sizeof(u32) != sizeof(value)); ++#endif ++ case DW_EH_PE_native: ++ if (end < (const void *)(ptr.pul + 1)) { ++ dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end); ++ return 0; ++ } ++ value = get_unaligned(ptr.pul++); ++ break; ++ case DW_EH_PE_leb128: ++ BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value)); ++ value = ptrType & DW_EH_PE_signed ++ ? get_sleb128(&ptr.p8, end) ++ : get_uleb128(&ptr.p8, end); ++ if ((const void *)ptr.p8 > end) { ++ dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end); ++ return 0; ++ } ++ break; ++ default: ++ dprintk(2, "Cannot decode pointer type %02X (%p,%p).", ++ ptrType, ptr.p8, end); ++ return 0; ++ } ++ switch(ptrType & DW_EH_PE_ADJUST) { ++ case DW_EH_PE_abs: ++ break; ++ case DW_EH_PE_pcrel: ++ value += (unsigned long)*pLoc; ++ break; ++ case DW_EH_PE_textrel: ++ if (likely(text_base)) { ++ value += text_base; ++ break; ++ } ++ dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.", ++ ptrType, *pLoc, end); ++ return 0; ++ case DW_EH_PE_datarel: ++ if (likely(data_base)) { ++ value += data_base; ++ break; ++ } ++ dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.", ++ ptrType, *pLoc, end); ++ return 0; ++ default: ++ dprintk(2, "Cannot adjust pointer type %02X (%p,%p).", ++ ptrType, *pLoc, end); ++ return 0; ++ } ++ if ((ptrType & DW_EH_PE_indirect) ++ && probe_kernel_address((unsigned long *)value, value)) { ++ dprintk(1, "Cannot read indirect value %lx (%p,%p).", ++ value, *pLoc, end); ++ return 0; ++ } ++ *pLoc = ptr.p8; ++ ++ return value; ++} ++ ++static signed fde_pointer_type(const u32 *cie) ++{ ++ const u8 *ptr = (const u8 *)(cie + 2); ++ unsigned version = *ptr; ++ ++ if (version != 1) ++ return -1; /* unsupported */ ++ if (*++ptr) { ++ const char *aug; ++ const u8 *end = (const u8 *)(cie + 1) + *cie; ++ uleb128_t len; ++ ++ /* check if augmentation size is first (and thus present) */ ++ if (*ptr != 'z') ++ return -1; ++ /* check if augmentation string is nul-terminated */ ++ if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL) ++ return -1; ++ ++ptr; /* skip terminator */ ++ get_uleb128(&ptr, end); /* skip code alignment */ ++ get_sleb128(&ptr, end); /* skip data alignment */ ++ /* skip return address column */ ++ version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end); ++ len = get_uleb128(&ptr, end); /* augmentation length */ ++ if (ptr + len < ptr || ptr + len > end) ++ return -1; ++ end = ptr + len; ++ while (*++aug) { ++ if (ptr >= end) ++ return -1; ++ switch(*aug) { ++ case 'L': ++ ++ptr; ++ break; ++ case 'P': { ++ signed ptrType = *ptr++; ++ ++ if (!read_pointer(&ptr, end, ptrType, 0, 0) ++ || ptr > end) ++ return -1; ++ } ++ break; ++ case 'R': ++ return *ptr; ++ default: ++ return -1; ++ } ++ } ++ } ++ return DW_EH_PE_native|DW_EH_PE_abs; ++} ++ ++static int advance_loc(unsigned long delta, struct unwind_state *state) ++{ ++ state->loc += delta * state->codeAlign; ++ ++ return delta > 0; ++} ++ ++static void set_rule(uleb128_t reg, ++ enum item_location where, ++ uleb128_t value, ++ struct unwind_state *state) ++{ ++ if (reg < ARRAY_SIZE(state->regs)) { ++ state->regs[reg].where = where; ++ state->regs[reg].value = value; ++ } ++} ++ ++static int processCFI(const u8 *start, ++ const u8 *end, ++ unsigned long targetLoc, ++ signed ptrType, ++ struct unwind_state *state) ++{ ++ union { ++ const u8 *p8; ++ const u16 *p16; ++ const u32 *p32; ++ } ptr; ++ int result = 1; ++ ++ if (start != state->cieStart) { ++ state->loc = state->org; ++ result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state); ++ if (targetLoc == 0 && state->label == NULL) ++ return result; ++ } ++ for (ptr.p8 = start; result && ptr.p8 < end; ) { ++ switch(*ptr.p8 >> 6) { ++ uleb128_t value; ++ ++ case 0: ++ switch(*ptr.p8++) { ++ case DW_CFA_nop: ++ break; ++ case DW_CFA_set_loc: ++ state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0); ++ if (state->loc == 0) ++ result = 0; ++ break; ++ case DW_CFA_advance_loc1: ++ result = ptr.p8 < end && advance_loc(*ptr.p8++, state); ++ break; ++ case DW_CFA_advance_loc2: ++ result = ptr.p8 <= end + 2 ++ && advance_loc(*ptr.p16++, state); ++ break; ++ case DW_CFA_advance_loc4: ++ result = ptr.p8 <= end + 4 ++ && advance_loc(*ptr.p32++, state); ++ break; ++ case DW_CFA_offset_extended: ++ value = get_uleb128(&ptr.p8, end); ++ set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); ++ break; ++ case DW_CFA_val_offset: ++ value = get_uleb128(&ptr.p8, end); ++ set_rule(value, Value, get_uleb128(&ptr.p8, end), state); ++ break; ++ case DW_CFA_offset_extended_sf: ++ value = get_uleb128(&ptr.p8, end); ++ set_rule(value, Memory, get_sleb128(&ptr.p8, end), state); ++ break; ++ case DW_CFA_val_offset_sf: ++ value = get_uleb128(&ptr.p8, end); ++ set_rule(value, Value, get_sleb128(&ptr.p8, end), state); ++ break; ++ case DW_CFA_restore_extended: ++ case DW_CFA_undefined: ++ case DW_CFA_same_value: ++ set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state); ++ break; ++ case DW_CFA_register: ++ value = get_uleb128(&ptr.p8, end); ++ set_rule(value, ++ Register, ++ get_uleb128(&ptr.p8, end), state); ++ break; ++ case DW_CFA_remember_state: ++ if (ptr.p8 == state->label) { ++ state->label = NULL; ++ return 1; ++ } ++ if (state->stackDepth >= MAX_STACK_DEPTH) { ++ dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end); ++ return 0; ++ } ++ state->stack[state->stackDepth++] = ptr.p8; ++ break; ++ case DW_CFA_restore_state: ++ if (state->stackDepth) { ++ const uleb128_t loc = state->loc; ++ const u8 *label = state->label; ++ ++ state->label = state->stack[state->stackDepth - 1]; ++ memcpy(&state->cfa, &badCFA, sizeof(state->cfa)); ++ memset(state->regs, 0, sizeof(state->regs)); ++ state->stackDepth = 0; ++ result = processCFI(start, end, 0, ptrType, state); ++ state->loc = loc; ++ state->label = label; ++ } else { ++ dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end); ++ return 0; ++ } ++ break; ++ case DW_CFA_def_cfa: ++ state->cfa.reg = get_uleb128(&ptr.p8, end); ++ /*nobreak*/ ++ case DW_CFA_def_cfa_offset: ++ state->cfa.offs = get_uleb128(&ptr.p8, end); ++ break; ++ case DW_CFA_def_cfa_sf: ++ state->cfa.reg = get_uleb128(&ptr.p8, end); ++ /*nobreak*/ ++ case DW_CFA_def_cfa_offset_sf: ++ state->cfa.offs = get_sleb128(&ptr.p8, end) ++ * state->dataAlign; ++ break; ++ case DW_CFA_def_cfa_register: ++ state->cfa.reg = get_uleb128(&ptr.p8, end); ++ break; ++ /*todo case DW_CFA_def_cfa_expression: */ ++ /*todo case DW_CFA_expression: */ ++ /*todo case DW_CFA_val_expression: */ ++ case DW_CFA_GNU_args_size: ++ get_uleb128(&ptr.p8, end); ++ break; ++ case DW_CFA_GNU_negative_offset_extended: ++ value = get_uleb128(&ptr.p8, end); ++ set_rule(value, ++ Memory, ++ (uleb128_t)0 - get_uleb128(&ptr.p8, end), state); ++ break; ++ case DW_CFA_GNU_window_save: ++ default: ++ dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end); ++ result = 0; ++ break; ++ } ++ break; ++ case 1: ++ result = advance_loc(*ptr.p8++ & 0x3f, state); ++ break; ++ case 2: ++ value = *ptr.p8++ & 0x3f; ++ set_rule(value, Memory, get_uleb128(&ptr.p8, end), state); ++ break; ++ case 3: ++ set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state); ++ break; ++ } ++ if (ptr.p8 > end) { ++ dprintk(1, "Data overrun (%p,%p).", ptr.p8, end); ++ result = 0; ++ } ++ if (result && targetLoc != 0 && targetLoc < state->loc) ++ return 1; ++ } ++ ++ if (result && ptr.p8 < end) ++ dprintk(1, "Data underrun (%p,%p).", ptr.p8, end); ++ ++ return result ++ && ptr.p8 == end ++ && (targetLoc == 0 ++ || (/*todo While in theory this should apply, gcc in practice omits ++ everything past the function prolog, and hence the location ++ never reaches the end of the function. ++ targetLoc < state->loc &&*/ state->label == NULL)); ++} ++ ++/* Unwind to previous to frame. Returns 0 if successful, negative ++ * number in case of an error. */ ++int unwind(struct unwind_frame_info *frame) ++{ ++#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs]) ++ const u32 *fde = NULL, *cie = NULL; ++ const u8 *ptr = NULL, *end = NULL; ++ unsigned long pc = UNW_PC(frame) - frame->call_frame, sp; ++ unsigned long startLoc = 0, endLoc = 0, cfa; ++ unsigned i; ++ signed ptrType = -1; ++ uleb128_t retAddrReg = 0; ++ const struct unwind_table *table; ++ struct unwind_state state; ++ ++ if (UNW_PC(frame) == 0) ++ return -EINVAL; ++ if ((table = find_table(pc)) != NULL ++ && !(table->size & (sizeof(*fde) - 1))) { ++ const u8 *hdr = table->header; ++ unsigned long tableSize; ++ ++ smp_rmb(); ++ if (hdr && hdr[0] == 1) { ++ switch(hdr[3] & DW_EH_PE_FORM) { ++ case DW_EH_PE_native: tableSize = sizeof(unsigned long); break; ++ case DW_EH_PE_data2: tableSize = 2; break; ++ case DW_EH_PE_data4: tableSize = 4; break; ++ case DW_EH_PE_data8: tableSize = 8; break; ++ default: tableSize = 0; break; ++ } ++ ptr = hdr + 4; ++ end = hdr + table->hdrsz; ++ if (tableSize ++ && read_pointer(&ptr, end, hdr[1], 0, 0) ++ == (unsigned long)table->address ++ && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0 ++ && i == (end - ptr) / (2 * tableSize) ++ && !((end - ptr) % (2 * tableSize))) { ++ do { ++ const u8 *cur = ptr + (i / 2) * (2 * tableSize); ++ ++ startLoc = read_pointer(&cur, ++ cur + tableSize, ++ hdr[3], 0, ++ (unsigned long)hdr); ++ if (pc < startLoc) ++ i /= 2; ++ else { ++ ptr = cur - tableSize; ++ i = (i + 1) / 2; ++ } ++ } while (startLoc && i > 1); ++ if (i == 1 ++ && (startLoc = read_pointer(&ptr, ++ ptr + tableSize, ++ hdr[3], 0, ++ (unsigned long)hdr)) != 0 ++ && pc >= startLoc) ++ fde = (void *)read_pointer(&ptr, ++ ptr + tableSize, ++ hdr[3], 0, ++ (unsigned long)hdr); ++ } ++ } ++ if(hdr && !fde) ++ dprintk(3, "Binary lookup for %lx failed.", pc); ++ ++ if (fde != NULL) { ++ cie = cie_for_fde(fde, table); ++ ptr = (const u8 *)(fde + 2); ++ if(cie != NULL ++ && cie != &bad_cie ++ && cie != ¬_fde ++ && (ptrType = fde_pointer_type(cie)) >= 0 ++ && read_pointer(&ptr, ++ (const u8 *)(fde + 1) + *fde, ++ ptrType, 0, 0) == startLoc) { ++ if (!(ptrType & DW_EH_PE_indirect)) ++ ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; ++ endLoc = startLoc ++ + read_pointer(&ptr, ++ (const u8 *)(fde + 1) + *fde, ++ ptrType, 0, 0); ++ if(pc >= endLoc) ++ fde = NULL; ++ } else ++ fde = NULL; ++ if(!fde) ++ dprintk(1, "Binary lookup result for %lx discarded.", pc); ++ } ++ if (fde == NULL) { ++ for (fde = table->address, tableSize = table->size; ++ cie = NULL, tableSize > sizeof(*fde) ++ && tableSize - sizeof(*fde) >= *fde; ++ tableSize -= sizeof(*fde) + *fde, ++ fde += 1 + *fde / sizeof(*fde)) { ++ cie = cie_for_fde(fde, table); ++ if (cie == &bad_cie) { ++ cie = NULL; ++ break; ++ } ++ if (cie == NULL ++ || cie == ¬_fde ++ || (ptrType = fde_pointer_type(cie)) < 0) ++ continue; ++ ptr = (const u8 *)(fde + 2); ++ startLoc = read_pointer(&ptr, ++ (const u8 *)(fde + 1) + *fde, ++ ptrType, 0, 0); ++ if (!startLoc) ++ continue; ++ if (!(ptrType & DW_EH_PE_indirect)) ++ ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed; ++ endLoc = startLoc ++ + read_pointer(&ptr, ++ (const u8 *)(fde + 1) + *fde, ++ ptrType, 0, 0); ++ if (pc >= startLoc && pc < endLoc) ++ break; ++ } ++ if(!fde) ++ dprintk(3, "Linear lookup for %lx failed.", pc); ++ } ++ } ++ if (cie != NULL) { ++ memset(&state, 0, sizeof(state)); ++ state.cieEnd = ptr; /* keep here temporarily */ ++ ptr = (const u8 *)(cie + 2); ++ end = (const u8 *)(cie + 1) + *cie; ++ frame->call_frame = 1; ++ if ((state.version = *ptr) != 1) ++ cie = NULL; /* unsupported version */ ++ else if (*++ptr) { ++ /* check if augmentation size is first (and thus present) */ ++ if (*ptr == 'z') { ++ while (++ptr < end && *ptr) { ++ switch(*ptr) { ++ /* check for ignorable (or already handled) ++ * nul-terminated augmentation string */ ++ case 'L': ++ case 'P': ++ case 'R': ++ continue; ++ case 'S': ++ frame->call_frame = 0; ++ continue; ++ default: ++ break; ++ } ++ break; ++ } ++ } ++ if (ptr >= end || *ptr) ++ cie = NULL; ++ } ++ if(!cie) ++ dprintk(1, "CIE unusable (%p,%p).", ptr, end); ++ ++ptr; ++ } ++ if (cie != NULL) { ++ /* get code aligment factor */ ++ state.codeAlign = get_uleb128(&ptr, end); ++ /* get data aligment factor */ ++ state.dataAlign = get_sleb128(&ptr, end); ++ if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end) ++ cie = NULL; ++ else if (UNW_PC(frame) % state.codeAlign ++ || UNW_SP(frame) % sleb128abs(state.dataAlign)) { ++ dprintk(1, "Input pointer(s) misaligned (%lx,%lx).", ++ UNW_PC(frame), UNW_SP(frame)); ++ return -EPERM; ++ } else { ++ retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end); ++ /* skip augmentation */ ++ if (((const char *)(cie + 2))[1] == 'z') { ++ uleb128_t augSize = get_uleb128(&ptr, end); ++ ++ ptr += augSize; ++ } ++ if (ptr > end ++ || retAddrReg >= ARRAY_SIZE(reg_info) ++ || REG_INVALID(retAddrReg) ++ || reg_info[retAddrReg].width != sizeof(unsigned long)) ++ cie = NULL; ++ } ++ if(!cie) ++ dprintk(1, "CIE validation failed (%p,%p).", ptr, end); ++ } ++ if (cie != NULL) { ++ state.cieStart = ptr; ++ ptr = state.cieEnd; ++ state.cieEnd = end; ++ end = (const u8 *)(fde + 1) + *fde; ++ /* skip augmentation */ ++ if (((const char *)(cie + 2))[1] == 'z') { ++ uleb128_t augSize = get_uleb128(&ptr, end); ++ ++ if ((ptr += augSize) > end) ++ fde = NULL; ++ } ++ if(!fde) ++ dprintk(1, "FDE validation failed (%p,%p).", ptr, end); ++ } ++ if (cie == NULL || fde == NULL) { ++#ifdef CONFIG_FRAME_POINTER ++ unsigned long top, bottom; ++ ++ if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long)) ++ return -EPERM; ++ top = STACK_TOP(frame->task); ++ bottom = STACK_BOTTOM(frame->task); ++# if FRAME_RETADDR_OFFSET < 0 ++ if (UNW_SP(frame) < top ++ && UNW_FP(frame) <= UNW_SP(frame) ++ && bottom < UNW_FP(frame) ++# else ++ if (UNW_SP(frame) > top ++ && UNW_FP(frame) >= UNW_SP(frame) ++ && bottom > UNW_FP(frame) ++# endif ++ && !((UNW_SP(frame) | UNW_FP(frame)) ++ & (sizeof(unsigned long) - 1))) { ++ unsigned long link; ++ ++ if (!probe_kernel_address( ++ (unsigned long *)(UNW_FP(frame) ++ + FRAME_LINK_OFFSET), ++ link) ++# if FRAME_RETADDR_OFFSET < 0 ++ && link > bottom && link < UNW_FP(frame) ++# else ++ && link > UNW_FP(frame) && link < bottom ++# endif ++ && !(link & (sizeof(link) - 1)) ++ && !probe_kernel_address( ++ (unsigned long *)(UNW_FP(frame) ++ + FRAME_RETADDR_OFFSET), UNW_PC(frame))) { ++ UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET ++# if FRAME_RETADDR_OFFSET < 0 ++ - ++# else ++ + ++# endif ++ sizeof(UNW_PC(frame)); ++ UNW_FP(frame) = link; ++ return 0; ++ } ++ } ++#endif ++ return -ENXIO; ++ } ++ state.org = startLoc; ++ memcpy(&state.cfa, &badCFA, sizeof(state.cfa)); ++ /* process instructions */ ++ if (!processCFI(ptr, end, pc, ptrType, &state) ++ || state.loc > endLoc ++ || state.regs[retAddrReg].where == Nowhere ++ || state.cfa.reg >= ARRAY_SIZE(reg_info) ++ || reg_info[state.cfa.reg].width != sizeof(unsigned long) ++ || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long) ++ || state.cfa.offs % sizeof(unsigned long)) { ++ dprintk(1, "Unusable unwind info (%p,%p).", ptr, end); ++ return -EIO; ++ } ++ /* update frame */ ++#ifndef CONFIG_AS_CFI_SIGNAL_FRAME ++ if(frame->call_frame ++ && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign)) ++ frame->call_frame = 0; ++#endif ++ cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs; ++ startLoc = min((unsigned long)UNW_SP(frame), cfa); ++ endLoc = max((unsigned long)UNW_SP(frame), cfa); ++ if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) { ++ startLoc = min(STACK_LIMIT(cfa), cfa); ++ endLoc = max(STACK_LIMIT(cfa), cfa); ++ } ++#ifndef CONFIG_64BIT ++# define CASES CASE(8); CASE(16); CASE(32) ++#else ++# define CASES CASE(8); CASE(16); CASE(32); CASE(64) ++#endif ++ pc = UNW_PC(frame); ++ sp = UNW_SP(frame); ++ for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { ++ if (REG_INVALID(i)) { ++ if (state.regs[i].where == Nowhere) ++ continue; ++ dprintk(1, "Cannot restore register %u (%d).", ++ i, state.regs[i].where); ++ return -EIO; ++ } ++ switch(state.regs[i].where) { ++ default: ++ break; ++ case Register: ++ if (state.regs[i].value >= ARRAY_SIZE(reg_info) ++ || REG_INVALID(state.regs[i].value) ++ || reg_info[i].width > reg_info[state.regs[i].value].width) { ++ dprintk(1, "Cannot restore register %u from register %lu.", ++ i, state.regs[i].value); ++ return -EIO; ++ } ++ switch(reg_info[state.regs[i].value].width) { ++#define CASE(n) \ ++ case sizeof(u##n): \ ++ state.regs[i].value = FRAME_REG(state.regs[i].value, \ ++ const u##n); \ ++ break ++ CASES; ++#undef CASE ++ default: ++ dprintk(1, "Unsupported register size %u (%lu).", ++ reg_info[state.regs[i].value].width, ++ state.regs[i].value); ++ return -EIO; ++ } ++ break; ++ } ++ } ++ for (i = 0; i < ARRAY_SIZE(state.regs); ++i) { ++ if (REG_INVALID(i)) ++ continue; ++ switch(state.regs[i].where) { ++ case Nowhere: ++ if (reg_info[i].width != sizeof(UNW_SP(frame)) ++ || &FRAME_REG(i, __typeof__(UNW_SP(frame))) ++ != &UNW_SP(frame)) ++ continue; ++ UNW_SP(frame) = cfa; ++ break; ++ case Register: ++ switch(reg_info[i].width) { ++#define CASE(n) case sizeof(u##n): \ ++ FRAME_REG(i, u##n) = state.regs[i].value; \ ++ break ++ CASES; ++#undef CASE ++ default: ++ dprintk(1, "Unsupported register size %u (%u).", ++ reg_info[i].width, i); ++ return -EIO; ++ } ++ break; ++ case Value: ++ if (reg_info[i].width != sizeof(unsigned long)) { ++ dprintk(1, "Unsupported value size %u (%u).", ++ reg_info[i].width, i); ++ return -EIO; ++ } ++ FRAME_REG(i, unsigned long) = cfa + state.regs[i].value ++ * state.dataAlign; ++ break; ++ case Memory: { ++ unsigned long addr = cfa + state.regs[i].value ++ * state.dataAlign; ++ ++ if ((state.regs[i].value * state.dataAlign) ++ % sizeof(unsigned long) ++ || addr < startLoc ++ || addr + sizeof(unsigned long) < addr ++ || addr + sizeof(unsigned long) > endLoc) { ++ dprintk(1, "Bad memory location %lx (%lx).", ++ addr, state.regs[i].value); ++ return -EIO; ++ } ++ switch(reg_info[i].width) { ++#define CASE(n) case sizeof(u##n): \ ++ probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \ ++ break ++ CASES; ++#undef CASE ++ default: ++ dprintk(1, "Unsupported memory size %u (%u).", ++ reg_info[i].width, i); ++ return -EIO; ++ } ++ } ++ break; ++ } ++ } ++ ++ if (UNW_PC(frame) % state.codeAlign ++ || UNW_SP(frame) % sleb128abs(state.dataAlign)) { ++ dprintk(1, "Output pointer(s) misaligned (%lx,%lx).", ++ UNW_PC(frame), UNW_SP(frame)); ++ return -EIO; ++ } ++ if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) { ++ dprintk(1, "No progress (%lx,%lx).", pc, sp); ++ return -EIO; ++ } ++ ++ return 0; ++#undef CASES ++#undef FRAME_REG ++} ++EXPORT_SYMBOL(unwind); ++ ++int unwind_init_frame_info(struct unwind_frame_info *info, ++ struct task_struct *tsk, ++ /*const*/ struct pt_regs *regs) ++{ ++ info->task = tsk; ++ info->call_frame = 0; ++ arch_unw_init_frame_info(info, regs); ++ ++ return 0; ++} ++EXPORT_SYMBOL(unwind_init_frame_info); ++ ++/* ++ * Prepare to unwind a blocked task. ++ */ ++int unwind_init_blocked(struct unwind_frame_info *info, ++ struct task_struct *tsk) ++{ ++ info->task = tsk; ++ info->call_frame = 0; ++ arch_unw_init_blocked(info); ++ ++ return 0; ++} ++EXPORT_SYMBOL(unwind_init_blocked); ++ ++/* ++ * Prepare to unwind the currently running thread. ++ */ ++int unwind_init_running(struct unwind_frame_info *info, ++ asmlinkage int (*callback)(struct unwind_frame_info *, ++ void *arg), ++ void *arg) ++{ ++ info->task = current; ++ info->call_frame = 0; ++ ++ return arch_unwind_init_running(info, callback, arg); ++} ++EXPORT_SYMBOL(unwind_init_running); ++ +diff -Nurb linux-2.6.22-570/kernel/user.c linux-2.6.22-591/kernel/user.c +--- linux-2.6.22-570/kernel/user.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/user.c 2007-12-21 15:36:15.000000000 -0500 +@@ -14,17 +14,17 @@ + #include + #include + #include ++#include ++#include + + /* + * UID task count cache, to get fast user lookup in "alloc_uid" + * when changing user ID's (ie setuid() and friends). + */ + +-#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) +-#define UIDHASH_SZ (1 << UIDHASH_BITS) + #define UIDHASH_MASK (UIDHASH_SZ - 1) + #define __uidhashfn(xid,uid) ((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK) +-#define uidhashentry(xid,uid) (uidhash_table + __uidhashfn((xid),(uid))) ++#define uidhashentry(ns, xid, uid) ((ns)->uidhash_table + __uidhashfn(xid, uid)) + + static struct kmem_cache *uid_cachep; + static struct list_head uidhash_table[UIDHASH_SZ]; +@@ -94,9 +94,10 @@ + { + struct user_struct *ret; + unsigned long flags; ++ struct user_namespace *ns = current->nsproxy->user_ns; + + spin_lock_irqsave(&uidhash_lock, flags); +- ret = uid_hash_find(xid, uid, uidhashentry(xid, uid)); ++ ret = uid_hash_find(xid, uid, uidhashentry(ns, xid, uid)); + spin_unlock_irqrestore(&uidhash_lock, flags); + return ret; + } +@@ -122,7 +123,8 @@ + + struct user_struct * alloc_uid(xid_t xid, uid_t uid) + { +- struct list_head *hashent = uidhashentry(xid, uid); ++ struct user_namespace *ns = current->nsproxy->user_ns; ++ struct list_head *hashent = uidhashentry(ns,xid, uid); + struct user_struct *up; + + spin_lock_irq(&uidhash_lock); +@@ -212,11 +214,11 @@ + 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + + for(n = 0; n < UIDHASH_SZ; ++n) +- INIT_LIST_HEAD(uidhash_table + n); ++ INIT_LIST_HEAD(init_user_ns.uidhash_table + n); + + /* Insert the root user immediately (init already runs as root) */ + spin_lock_irq(&uidhash_lock); +- uid_hash_insert(&root_user, uidhashentry(0,0)); ++ uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0, 0)); + spin_unlock_irq(&uidhash_lock); + + return 0; +diff -Nurb linux-2.6.22-570/kernel/user_namespace.c linux-2.6.22-591/kernel/user_namespace.c +--- linux-2.6.22-570/kernel/user_namespace.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/kernel/user_namespace.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,87 @@ ++/* ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License as ++ * published by the Free Software Foundation, version 2 of the ++ * License. ++ */ ++ ++#include ++#include ++#include ++#include ++ ++struct user_namespace init_user_ns = { ++ .kref = { ++ .refcount = ATOMIC_INIT(2), ++ }, ++ .root_user = &root_user, ++}; ++ ++EXPORT_SYMBOL_GPL(init_user_ns); ++ ++#ifdef CONFIG_USER_NS ++ ++/* ++ * Clone a new ns copying an original user ns, setting refcount to 1 ++ * @old_ns: namespace to clone ++ * Return NULL on error (failure to kmalloc), new ns otherwise ++ */ ++static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) ++{ ++ struct user_namespace *ns; ++ struct user_struct *new_user; ++ int n; ++ ++ ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL); ++ if (!ns) ++ return ERR_PTR(-ENOMEM); ++ ++ kref_init(&ns->kref); ++ ++ for (n = 0; n < UIDHASH_SZ; ++n) ++ INIT_LIST_HEAD(ns->uidhash_table + n); ++ ++ /* Insert new root user. */ ++ ns->root_user = alloc_uid(ns, 0); ++ if (!ns->root_user) { ++ kfree(ns); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ /* Reset current->user with a new one */ ++ new_user = alloc_uid(ns, current->uid); ++ if (!new_user) { ++ free_uid(ns->root_user); ++ kfree(ns); ++ return ERR_PTR(-ENOMEM); ++ } ++ ++ switch_uid(new_user); ++ return ns; ++} ++ ++struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns) ++{ ++ struct user_namespace *new_ns; ++ ++ BUG_ON(!old_ns); ++ get_user_ns(old_ns); ++ ++ if (!(flags & CLONE_NEWUSER)) ++ return old_ns; ++ ++ new_ns = clone_user_ns(old_ns); ++ ++ put_user_ns(old_ns); ++ return new_ns; ++} ++ ++void free_user_ns(struct kref *kref) ++{ ++ struct user_namespace *ns; ++ ++ ns = container_of(kref, struct user_namespace, kref); ++ kfree(ns); ++} ++ ++#endif /* CONFIG_USER_NS */ +diff -Nurb linux-2.6.22-570/kernel/utsname.c linux-2.6.22-591/kernel/utsname.c +--- linux-2.6.22-570/kernel/utsname.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/kernel/utsname.c 2007-12-21 15:36:15.000000000 -0500 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + + /* + * Clone a new ns copying an original utsname, setting refcount to 1 +@@ -25,11 +26,12 @@ + struct uts_namespace *ns; + + ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL); +- if (ns) { ++ if (!ns) ++ return ERR_PTR(-ENOMEM); ++ + memcpy(&ns->name, &old_ns->name, sizeof(ns->name)); + kref_init(&ns->kref); +- atomic_inc(&vs_global_uts_ns); +- } ++ + return ns; + } + +@@ -39,7 +41,7 @@ + * utsname of this process won't be seen by parent, and vice + * versa. + */ +-struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns) ++struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns) + { + struct uts_namespace *new_ns; + +diff -Nurb linux-2.6.22-570/kernel/utsname_sysctl.c linux-2.6.22-591/kernel/utsname_sysctl.c +--- linux-2.6.22-570/kernel/utsname_sysctl.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/kernel/utsname_sysctl.c 2007-12-21 15:36:12.000000000 -0500 +@@ -18,10 +18,7 @@ + static void *get_uts(ctl_table *table, int write) + { + char *which = table->data; +-#ifdef CONFIG_UTS_NS +- struct uts_namespace *uts_ns = current->nsproxy->uts_ns; +- which = (which - (char *)&init_uts_ns) + (char *)uts_ns; +-#endif ++ + if (!write) + down_read(&uts_sem); + else +diff -Nurb linux-2.6.22-570/kernel/workqueue.c linux-2.6.22-591/kernel/workqueue.c +--- linux-2.6.22-570/kernel/workqueue.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/kernel/workqueue.c 2007-12-21 15:36:12.000000000 -0500 +@@ -282,8 +282,8 @@ + struct cpu_workqueue_struct *cwq = __cwq; + DEFINE_WAIT(wait); + +- if (!cwq->wq->freezeable) +- current->flags |= PF_NOFREEZE; ++ if (cwq->wq->freezeable) ++ set_freezable(); + + set_user_nice(current, -5); + +diff -Nurb linux-2.6.22-570/lib/Kconfig.debug linux-2.6.22-591/lib/Kconfig.debug +--- linux-2.6.22-570/lib/Kconfig.debug 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/lib/Kconfig.debug 2007-12-21 15:36:12.000000000 -0500 +@@ -364,6 +364,24 @@ + some architectures or if you use external debuggers. + If you don't debug the kernel, you can say N. + ++config UNWIND_INFO ++ bool "Compile the kernel with frame unwind information" ++ depends on !IA64 && !PARISC && !ARM ++ depends on !MODULES || !(MIPS || PPC || SUPERH || V850) ++ help ++ If you say Y here the resulting kernel image will be slightly larger ++ but not slower, and it will give very useful debugging information. ++ If you don't debug the kernel, you can say N, but we may not be able ++ to solve problems without frame unwind information or frame pointers. ++ ++config STACK_UNWIND ++ bool "Stack unwind support" ++ depends on UNWIND_INFO ++ depends on X86 ++ help ++ This enables more precise stack traces, omitting all unrelated ++ occurrences of pointers into kernel code from the dump. ++ + config FORCED_INLINING + bool "Force gcc to inline functions marked 'inline'" + depends on DEBUG_KERNEL +@@ -409,6 +427,9 @@ + config FAULT_INJECTION + bool "Fault-injection framework" + depends on DEBUG_KERNEL ++ # could support fp on X86_32 here too, but let's not ++ select UNWIND_INFO if X86 ++ select STACK_UNWIND if X86 + help + Provide fault-injection framework. + For more details, see Documentation/fault-injection/. +@@ -445,3 +466,5 @@ + select FRAME_POINTER + help + Provide stacktrace filter for fault-injection capabilities ++ ++source "lib/Kconfig.kgdb" +diff -Nurb linux-2.6.22-570/lib/Kconfig.kgdb linux-2.6.22-591/lib/Kconfig.kgdb +--- linux-2.6.22-570/lib/Kconfig.kgdb 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/lib/Kconfig.kgdb 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,255 @@ ++ ++config WANT_EXTRA_DEBUG_INFORMATION ++ bool ++ select DEBUG_INFO ++ select UNWIND_INFO ++ select FRAME_POINTER if X86 || SUPERH ++ default n ++ ++config UNWIND_INFO ++ bool ++ default n ++ ++config KGDB ++ bool "KGDB: kernel debugging with remote gdb" ++ select WANT_EXTRA_DEBUG_INFORMATION ++ select KGDB_ARCH_HAS_SHADOW_INFO if X86_64 ++ depends on DEBUG_KERNEL && (ARM || X86 || MIPS || (SUPERH && !SUPERH64) || IA64 || PPC) ++ help ++ If you say Y here, it will be possible to remotely debug the ++ kernel using gdb. Documentation of kernel debugger is available ++ at http://kgdb.sourceforge.net as well as in DocBook form ++ in Documentation/DocBook/. If unsure, say N. ++ ++config KGDB_ARCH_HAS_SHADOW_INFO ++ bool ++ ++config KGDB_CONSOLE ++ bool "KGDB: Console messages through gdb" ++ depends on KGDB ++ help ++ If you say Y here, console messages will appear through gdb. ++ Other consoles such as tty or ttyS will continue to work as usual. ++ Note, that if you use this in conjunction with KGDB_ETH, if the ++ ethernet driver runs into an error condition during use with KGDB ++ it is possible to hit an infinite recusrion, causing the kernel ++ to crash, and typically reboot. For this reason, it is preferable ++ to use NETCONSOLE in conjunction with KGDB_ETH instead of ++ KGDB_CONSOLE. ++ ++choice ++ prompt "Method for KGDB communication" ++ depends on KGDB ++ default KGDB_MPSC if SERIAL_MPSC ++ default KGDB_CPM_UART if (CPM2 || 8xx) ++ default KGDB_SIBYTE if SIBYTE_SB1xxx_SOC ++ default KGDB_TXX9 if CPU_TX49XX ++ default KGDB_SH_SCI if SERIAL_SH_SCI ++ default KGDB_PXA_SERIAL if ARCH_PXA ++ default KGDB_AMBA_PL011 if ARM_AMBA ++ default KGDB_8250_NOMODULE ++ help ++ There are a number of different ways in which you can communicate ++ with KGDB. The most common is via serial, with the 8250 driver ++ (should your hardware have an 8250, or ns1655x style uart). ++ Another option is to use the NETPOLL framework and UDP, should ++ your ethernet card support this. Other options may exist. ++ You can elect to have one core I/O driver that is built into the ++ kernel for debugging as the kernel is booting, or using only ++ kernel modules. ++ ++config KGDB_ONLY_MODULES ++ bool "KGDB: Use only kernel modules for I/O" ++ depends on MODULES ++ help ++ Use only kernel modules to configure KGDB I/O after the ++ kernel is booted. ++ ++config KGDB_8250_NOMODULE ++ bool "KGDB: On generic serial port (8250)" ++ select KGDB_8250 ++ help ++ Uses generic serial port (8250) to communicate with the host ++ GDB. This is independent of the normal (SERIAL_8250) driver ++ for this chipset. ++ ++config KGDBOE_NOMODULE ++ bool "KGDB: On ethernet - in kernel" ++ select KGDBOE ++ help ++ Uses the NETPOLL API to communicate with the host GDB via UDP. ++ In order for this to work, the ethernet interface specified must ++ support the NETPOLL API, and this must be initialized at boot. ++ See the documentation for syntax. ++ ++config KGDB_MPSC ++ bool "KGDB: On MV64x60 MPSC" ++ depends on SERIAL_MPSC ++ help ++ Uses a Marvell GT64260B or MV64x60 Multi-Purpose Serial ++ Controller (MPSC) channel. Note that the GT64260A is not ++ supported. ++ ++config KGDB_CPM_UART ++ bool "KGDB: On CPM UART" ++ depends on PPC && (CPM2 || 8xx) ++ help ++ Uses CPM UART to communicate with the host GDB. ++ ++config KGDB_SIBYTE ++ bool "KGDB: On Broadcom SB1xxx serial port" ++ depends on MIPS && SIBYTE_SB1xxx_SOC ++ ++config KGDB_TXX9 ++ bool "KGDB: On TX49xx serial port" ++ depends on MIPS && CPU_TX49XX ++ help ++ Uses TX49xx serial port to communicate with the host KGDB. ++ ++config KGDB_SH_SCI ++ bool "KGDB: On SH SCI(F) serial port" ++ depends on SUPERH && SERIAL_SH_SCI ++ help ++ Uses the SH SCI(F) serial port to communicate with the host GDB. ++ ++config KGDB_AMBA_PL011 ++ bool "KGDB: On ARM AMBA PL011 Serial Port" ++ depends on ARM && ARCH_VERSATILE ++ help ++ Enables the KGDB serial driver for the AMBA bus PL011 serial ++ devices from ARM. ++ ++config KGDB_PXA_SERIAL ++ bool "KGDB: On the PXA2xx serial port" ++ depends on ARCH_PXA ++ help ++ Enables the KGDB serial driver for Intel PXA SOC ++endchoice ++ ++choice ++ prompt "PXA UART to use for KGDB" ++ depends on KGDB_PXA_SERIAL ++ default KGDB_PXA_FFUART ++ ++config KGDB_PXA_FFUART ++ bool "FFUART" ++ ++config KGDB_PXA_BTUART ++ bool "BTUART" ++ ++config KGDB_PXA_STUART ++ bool "STUART" ++endchoice ++ ++choice ++ prompt "SCC/SMC to use for KGDB" ++ depends on KGDB_CPM_UART ++ default KGDB_CPM_UART_SCC4 if ADS8272 ++ ++config KGDB_CPM_UART_SCC1 ++ bool "SCC1" ++ depends on SERIAL_CPM_SCC1 ++ ++config KGDB_CPM_UART_SCC2 ++ bool "SCC2" ++ depends on SERIAL_CPM_SCC2 ++ ++config KGDB_CPM_UART_SCC3 ++ bool "SCC3" ++ depends on SERIAL_CPM_SCC3 ++ ++config KGDB_CPM_UART_SCC4 ++ bool "SCC4" ++ depends on SERIAL_CPM_SCC4 ++ ++config KGDB_CPM_UART_SMC1 ++ bool "SMC1" ++ depends on SERIAL_CPM_SMC1 ++ ++config KGDB_CPM_UART_SMC2 ++ bool "SMC2" ++ depends on SERIAL_CPM_SMC2 ++endchoice ++ ++config KGDBOE ++ tristate "KGDB: On ethernet" if !KGDBOE_NOMODULE ++ depends on m && KGDB ++ select NETPOLL ++ select NETPOLL_TRAP ++ help ++ Uses the NETPOLL API to communicate with the host GDB via UDP. ++ In order for this to work, the ethernet interface specified must ++ support the NETPOLL API, and this must be initialized at boot. ++ See the documentation for syntax. ++ ++config KGDB_8250 ++ tristate "KGDB: On generic serial port (8250)" if !KGDB_8250_NOMODULE ++ depends on m && KGDB_ONLY_MODULES ++ help ++ Uses generic serial port (8250) to communicate with the host ++ GDB. This is independent of the normal (SERIAL_8250) driver ++ for this chipset. ++ ++config KGDB_SIMPLE_SERIAL ++ bool "Simple selection of KGDB serial port" ++ depends on KGDB_8250_NOMODULE ++ default y ++ help ++ If you say Y here, you will only have to pick the baud rate ++ and port number that you wish to use for KGDB. Note that this ++ only works on architectures that register known serial ports ++ early on. If you say N, you will have to provide, either here ++ or on the command line, the type (I/O or MMIO), IRQ and ++ address to use. If in doubt, say Y. ++ ++config KGDB_BAUDRATE ++ int "Debug serial port baud rate" ++ depends on (KGDB_8250 && KGDB_SIMPLE_SERIAL) || \ ++ KGDB_MPSC || KGDB_CPM_UART || \ ++ KGDB_TXX9 || KGDB_PXA_SERIAL || KGDB_AMBA_PL011 ++ default "115200" ++ help ++ gdb and the kernel stub need to agree on the baud rate to be ++ used. Standard rates from 9600 to 115200 are allowed, and this ++ may be overridden via the commandline. ++ ++config KGDB_PORT_NUM ++ int "Serial port number for KGDB" ++ range 0 1 if KGDB_MPSC ++ range 0 3 ++ depends on (KGDB_8250 && KGDB_SIMPLE_SERIAL) || KGDB_MPSC || KGDB_TXX9 ++ default "1" ++ help ++ Pick the port number (0 based) for KGDB to use. ++ ++config KGDB_AMBA_BASE ++ hex "AMBA PL011 Serial Port Base Address" ++ default 0x101f2000 if ARCH_VERSATILE ++ depends on KGDB_AMBA_PL011 ++ help ++ Base address of the AMBA port that KGDB will use. ++ ++config KGDB_AMBA_UARTCLK ++ int "AMBAPL011 Serial UART Clock Frequency" ++ default 24000000 if ARCH_VERSATILE ++ depends on KGDB_AMBA_PL011 ++ help ++ Frequency (in HZ) of the ARM AMBA UART clock ++ ++config KGDB_AMBA_IRQ ++ int "AMBA PL011 Serial Port IRQ" ++ default 13 if ARCH_VERSATILE ++ depends on KGDB_AMBA_PL011 ++ help ++ Pick the IRQ of the AMBA port that KGDB will use. ++ ++config KGDB_8250_CONF_STRING ++ string "Configuration string for KGDB" ++ depends on KGDB_8250_NOMODULE && !KGDB_SIMPLE_SERIAL ++ default "io,2f8,115200,3" if X86 ++ help ++ The format of this string should be ,
,,. For example, to use the ++ serial port on an i386 box located at 0x2f8 and 115200 baud ++ on IRQ 3 at use: ++ io,2f8,115200,3 +diff -Nurb linux-2.6.22-570/lib/Makefile linux-2.6.22-591/lib/Makefile +--- linux-2.6.22-570/lib/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/lib/Makefile 2007-12-21 15:36:12.000000000 -0500 +@@ -5,9 +5,10 @@ + lib-y := ctype.o string.o vsprintf.o cmdline.o \ + rbtree.o radix-tree.o dump_stack.o \ + idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \ +- sha1.o irq_regs.o reciprocal_div.o ++ sha1.o irq_regs.o reciprocal_div.o argv_split.o \ ++ check_signature.o + +-lib-$(CONFIG_MMU) += ioremap.o ++lib-$(CONFIG_MMU) += ioremap.o pagewalk.o + lib-$(CONFIG_SMP) += cpumask.o + + lib-y += kobject.o kref.o kobject_uevent.o klist.o +diff -Nurb linux-2.6.22-570/lib/argv_split.c linux-2.6.22-591/lib/argv_split.c +--- linux-2.6.22-570/lib/argv_split.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/lib/argv_split.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,159 @@ ++/* ++ * Helper function for splitting a string into an argv-like array. ++ */ ++ ++#ifndef TEST ++#include ++#include ++#include ++#endif ++ ++static const char *skip_sep(const char *cp) ++{ ++ while (*cp && isspace(*cp)) ++ cp++; ++ ++ return cp; ++} ++ ++static const char *skip_arg(const char *cp) ++{ ++ while (*cp && !isspace(*cp)) ++ cp++; ++ ++ return cp; ++} ++ ++static int count_argc(const char *str) ++{ ++ int count = 0; ++ ++ while (*str) { ++ str = skip_sep(str); ++ if (*str) { ++ count++; ++ str = skip_arg(str); ++ } ++ } ++ ++ return count; ++} ++ ++/** ++ * argv_free - free an argv ++ * ++ * @argv - the argument vector to be freed ++ * ++ * Frees an argv and the strings it points to. ++ */ ++void argv_free(char **argv) ++{ ++ char **p; ++ for (p = argv; *p; p++) ++ kfree(*p); ++ ++ kfree(argv); ++} ++EXPORT_SYMBOL(argv_free); ++ ++/** ++ * argv_split - split a string at whitespace, returning an argv ++ * @gfp: the GFP mask used to allocate memory ++ * @str: the string to be split ++ * @argcp: returned argument count ++ * ++ * Returns an array of pointers to strings which are split out from ++ * @str. This is performed by strictly splitting on white-space; no ++ * quote processing is performed. Multiple whitespace characters are ++ * considered to be a single argument separator. The returned array ++ * is always NULL-terminated. Returns NULL on memory allocation ++ * failure. ++ */ ++char **argv_split(gfp_t gfp, const char *str, int *argcp) ++{ ++ int argc = count_argc(str); ++ char **argv = kzalloc(sizeof(*argv) * (argc+1), gfp); ++ char **argvp; ++ ++ if (argv == NULL) ++ goto out; ++ ++ *argcp = argc; ++ argvp = argv; ++ ++ while (*str) { ++ str = skip_sep(str); ++ ++ if (*str) { ++ const char *p = str; ++ char *t; ++ ++ str = skip_arg(str); ++ ++ t = kstrndup(p, str-p, gfp); ++ if (t == NULL) ++ goto fail; ++ *argvp++ = t; ++ } ++ } ++ *argvp = NULL; ++ ++ out: ++ return argv; ++ ++ fail: ++ argv_free(argv); ++ return NULL; ++} ++EXPORT_SYMBOL(argv_split); ++ ++#ifdef TEST ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++ ++typedef enum { ++ GFP_KERNEL, ++} gfp_t; ++#define kzalloc(size, x) malloc(size) ++#define kfree(x) free(x) ++#define kstrndup(s, n, gfp) strndup(s, n) ++#define BUG() abort() ++ ++int main() { ++ const char *testvec[] = { ++ "", ++ "x", ++ "\"", ++ "\\\0", ++ "\"", ++ "test one two three", ++ "arg\"foo\"bar biff", ++ "one two\\ three four", ++ "one \"two three\" four", ++ NULL, ++ }; ++ const char **t; ++ ++ for (t = testvec; *t; t++) { ++ char **argv; ++ int argc; ++ char **a; ++ ++ printf("%d: test [%s]\n", t-testvec, *t); ++ ++ argv = argv_split(GFP_KERNEL, *t, &argc); ++ ++ printf("argc=%d vec=", argc); ++ for (a = argv; *a; a++) ++ printf("[%s] ", *a); ++ printf("\n"); ++ ++ argv_free(argv); ++ } ++ ++ return 0; ++} ++#endif +diff -Nurb linux-2.6.22-570/lib/check_signature.c linux-2.6.22-591/lib/check_signature.c +--- linux-2.6.22-570/lib/check_signature.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/lib/check_signature.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,26 @@ ++#include ++#include ++ ++/** ++ * check_signature - find BIOS signatures ++ * @io_addr: mmio address to check ++ * @signature: signature block ++ * @length: length of signature ++ * ++ * Perform a signature comparison with the mmio address io_addr. This ++ * address should have been obtained by ioremap. ++ * Returns 1 on a match. ++ */ ++ ++int check_signature(const volatile void __iomem *io_addr, ++ const unsigned char *signature, int length) ++{ ++ while (length--) { ++ if (readb(io_addr) != *signature) ++ return 0; ++ io_addr++; ++ signature++; ++ } ++ return 1; ++} ++EXPORT_SYMBOL(check_signature); +diff -Nurb linux-2.6.22-570/lib/idr.c linux-2.6.22-591/lib/idr.c +--- linux-2.6.22-570/lib/idr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/lib/idr.c 2007-12-21 15:36:15.000000000 -0500 +@@ -70,6 +70,26 @@ + spin_unlock_irqrestore(&idp->lock, flags); + } + ++static void idr_mark_full(struct idr_layer **pa, int id) ++{ ++ struct idr_layer *p = pa[0]; ++ int l = 0; ++ ++ __set_bit(id & IDR_MASK, &p->bitmap); ++ /* ++ * If this layer is full mark the bit in the layer above to ++ * show that this part of the radix tree is full. This may ++ * complete the layer above and require walking up the radix ++ * tree. ++ */ ++ while (p->bitmap == IDR_FULL) { ++ if (!(p = pa[++l])) ++ break; ++ id = id >> IDR_BITS; ++ __set_bit((id & IDR_MASK), &p->bitmap); ++ } ++} ++ + /** + * idr_pre_get - reserver resources for idr allocation + * @idp: idr handle +@@ -95,11 +115,10 @@ + } + EXPORT_SYMBOL(idr_pre_get); + +-static int sub_alloc(struct idr *idp, void *ptr, int *starting_id) ++static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa) + { + int n, m, sh; + struct idr_layer *p, *new; +- struct idr_layer *pa[MAX_LEVEL]; + int l, id; + long bm; + +@@ -144,30 +163,13 @@ + pa[l--] = p; + p = p->ary[m]; + } +- /* +- * We have reached the leaf node, plant the +- * users pointer and return the raw id. +- */ +- p->ary[m] = (struct idr_layer *)ptr; +- __set_bit(m, &p->bitmap); +- p->count++; +- /* +- * If this layer is full mark the bit in the layer above +- * to show that this part of the radix tree is full. +- * This may complete the layer above and require walking +- * up the radix tree. +- */ +- n = id; +- while (p->bitmap == IDR_FULL) { +- if (!(p = pa[++l])) +- break; +- n = n >> IDR_BITS; +- __set_bit((n & IDR_MASK), &p->bitmap); +- } +- return(id); ++ ++ pa[l] = p; ++ return id; + } + +-static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) ++static int idr_get_empty_slot(struct idr *idp, int starting_id, ++ struct idr_layer **pa) + { + struct idr_layer *p, *new; + int layers, v, id; +@@ -213,12 +215,31 @@ + } + idp->top = p; + idp->layers = layers; +- v = sub_alloc(idp, ptr, &id); ++ v = sub_alloc(idp, &id, pa); + if (v == -2) + goto build_up; + return(v); + } + ++static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) ++{ ++ struct idr_layer *pa[MAX_LEVEL]; ++ int id; ++ ++ id = idr_get_empty_slot(idp, starting_id, pa); ++ if (id >= 0) { ++ /* ++ * Successfully found an empty slot. Install the user ++ * pointer and mark the slot full. ++ */ ++ pa[0]->ary[id & IDR_MASK] = (struct idr_layer *)ptr; ++ pa[0]->count++; ++ idr_mark_full(pa, id); ++ } ++ ++ return id; ++} ++ + /** + * idr_get_new_above - allocate new idr entry above or equal to a start id + * @idp: idr handle +@@ -473,3 +494,248 @@ + spin_lock_init(&idp->lock); + } + EXPORT_SYMBOL(idr_init); ++ ++ ++/* ++ * IDA - IDR based ID allocator ++ * ++ * this is id allocator without id -> pointer translation. Memory ++ * usage is much lower than full blown idr because each id only ++ * occupies a bit. ida uses a custom leaf node which contains ++ * IDA_BITMAP_BITS slots. ++ * ++ * 2007-04-25 written by Tejun Heo ++ */ ++ ++static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap) ++{ ++ unsigned long flags; ++ ++ if (!ida->free_bitmap) { ++ spin_lock_irqsave(&ida->idr.lock, flags); ++ if (!ida->free_bitmap) { ++ ida->free_bitmap = bitmap; ++ bitmap = NULL; ++ } ++ spin_unlock_irqrestore(&ida->idr.lock, flags); ++ } ++ ++ kfree(bitmap); ++} ++ ++/** ++ * ida_pre_get - reserve resources for ida allocation ++ * @ida: ida handle ++ * @gfp_mask: memory allocation flag ++ * ++ * This function should be called prior to locking and calling the ++ * following function. It preallocates enough memory to satisfy the ++ * worst possible allocation. ++ * ++ * If the system is REALLY out of memory this function returns 0, ++ * otherwise 1. ++ */ ++int ida_pre_get(struct ida *ida, gfp_t gfp_mask) ++{ ++ /* allocate idr_layers */ ++ if (!idr_pre_get(&ida->idr, gfp_mask)) ++ return 0; ++ ++ /* allocate free_bitmap */ ++ if (!ida->free_bitmap) { ++ struct ida_bitmap *bitmap; ++ ++ bitmap = kmalloc(sizeof(struct ida_bitmap), gfp_mask); ++ if (!bitmap) ++ return 0; ++ ++ free_bitmap(ida, bitmap); ++ } ++ ++ return 1; ++} ++EXPORT_SYMBOL(ida_pre_get); ++ ++/** ++ * ida_get_new_above - allocate new ID above or equal to a start id ++ * @ida: ida handle ++ * @staring_id: id to start search at ++ * @p_id: pointer to the allocated handle ++ * ++ * Allocate new ID above or equal to @ida. It should be called with ++ * any required locks. ++ * ++ * If memory is required, it will return -EAGAIN, you should unlock ++ * and go back to the ida_pre_get() call. If the ida is full, it will ++ * return -ENOSPC. ++ * ++ * @p_id returns a value in the range 0 ... 0x7fffffff. ++ */ ++int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) ++{ ++ struct idr_layer *pa[MAX_LEVEL]; ++ struct ida_bitmap *bitmap; ++ unsigned long flags; ++ int idr_id = starting_id / IDA_BITMAP_BITS; ++ int offset = starting_id % IDA_BITMAP_BITS; ++ int t, id; ++ ++ restart: ++ /* get vacant slot */ ++ t = idr_get_empty_slot(&ida->idr, idr_id, pa); ++ if (t < 0) { ++ if (t == -1) ++ return -EAGAIN; ++ else /* will be -3 */ ++ return -ENOSPC; ++ } ++ ++ if (t * IDA_BITMAP_BITS >= MAX_ID_BIT) ++ return -ENOSPC; ++ ++ if (t != idr_id) ++ offset = 0; ++ idr_id = t; ++ ++ /* if bitmap isn't there, create a new one */ ++ bitmap = (void *)pa[0]->ary[idr_id & IDR_MASK]; ++ if (!bitmap) { ++ spin_lock_irqsave(&ida->idr.lock, flags); ++ bitmap = ida->free_bitmap; ++ ida->free_bitmap = NULL; ++ spin_unlock_irqrestore(&ida->idr.lock, flags); ++ ++ if (!bitmap) ++ return -EAGAIN; ++ ++ memset(bitmap, 0, sizeof(struct ida_bitmap)); ++ pa[0]->ary[idr_id & IDR_MASK] = (void *)bitmap; ++ pa[0]->count++; ++ } ++ ++ /* lookup for empty slot */ ++ t = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, offset); ++ if (t == IDA_BITMAP_BITS) { ++ /* no empty slot after offset, continue to the next chunk */ ++ idr_id++; ++ offset = 0; ++ goto restart; ++ } ++ ++ id = idr_id * IDA_BITMAP_BITS + t; ++ if (id >= MAX_ID_BIT) ++ return -ENOSPC; ++ ++ __set_bit(t, bitmap->bitmap); ++ if (++bitmap->nr_busy == IDA_BITMAP_BITS) ++ idr_mark_full(pa, idr_id); ++ ++ *p_id = id; ++ ++ /* Each leaf node can handle nearly a thousand slots and the ++ * whole idea of ida is to have small memory foot print. ++ * Throw away extra resources one by one after each successful ++ * allocation. ++ */ ++ if (ida->idr.id_free_cnt || ida->free_bitmap) { ++ struct idr_layer *p = alloc_layer(&ida->idr); ++ if (p) ++ kmem_cache_free(idr_layer_cache, p); ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL(ida_get_new_above); ++ ++/** ++ * ida_get_new - allocate new ID ++ * @ida: idr handle ++ * @p_id: pointer to the allocated handle ++ * ++ * Allocate new ID. It should be called with any required locks. ++ * ++ * If memory is required, it will return -EAGAIN, you should unlock ++ * and go back to the idr_pre_get() call. If the idr is full, it will ++ * return -ENOSPC. ++ * ++ * @id returns a value in the range 0 ... 0x7fffffff. ++ */ ++int ida_get_new(struct ida *ida, int *p_id) ++{ ++ return ida_get_new_above(ida, 0, p_id); ++} ++EXPORT_SYMBOL(ida_get_new); ++ ++/** ++ * ida_remove - remove the given ID ++ * @ida: ida handle ++ * @id: ID to free ++ */ ++void ida_remove(struct ida *ida, int id) ++{ ++ struct idr_layer *p = ida->idr.top; ++ int shift = (ida->idr.layers - 1) * IDR_BITS; ++ int idr_id = id / IDA_BITMAP_BITS; ++ int offset = id % IDA_BITMAP_BITS; ++ int n; ++ struct ida_bitmap *bitmap; ++ ++ /* clear full bits while looking up the leaf idr_layer */ ++ while ((shift > 0) && p) { ++ n = (idr_id >> shift) & IDR_MASK; ++ __clear_bit(n, &p->bitmap); ++ p = p->ary[n]; ++ shift -= IDR_BITS; ++ } ++ ++ if (p == NULL) ++ goto err; ++ ++ n = idr_id & IDR_MASK; ++ __clear_bit(n, &p->bitmap); ++ ++ bitmap = (void *)p->ary[n]; ++ if (!test_bit(offset, bitmap->bitmap)) ++ goto err; ++ ++ /* update bitmap and remove it if empty */ ++ __clear_bit(offset, bitmap->bitmap); ++ if (--bitmap->nr_busy == 0) { ++ __set_bit(n, &p->bitmap); /* to please idr_remove() */ ++ idr_remove(&ida->idr, idr_id); ++ free_bitmap(ida, bitmap); ++ } ++ ++ return; ++ ++ err: ++ printk(KERN_WARNING ++ "ida_remove called for id=%d which is not allocated.\n", id); ++} ++EXPORT_SYMBOL(ida_remove); ++ ++/** ++ * ida_destroy - release all cached layers within an ida tree ++ * ida: ida handle ++ */ ++void ida_destroy(struct ida *ida) ++{ ++ idr_destroy(&ida->idr); ++ kfree(ida->free_bitmap); ++} ++EXPORT_SYMBOL(ida_destroy); ++ ++/** ++ * ida_init - initialize ida handle ++ * @ida: ida handle ++ * ++ * This function is use to set up the handle (@ida) that you will pass ++ * to the rest of the functions. ++ */ ++void ida_init(struct ida *ida) ++{ ++ memset(ida, 0, sizeof(struct ida)); ++ idr_init(&ida->idr); ++ ++} ++EXPORT_SYMBOL(ida_init); +diff -Nurb linux-2.6.22-570/lib/kobject.c linux-2.6.22-591/lib/kobject.c +--- linux-2.6.22-570/lib/kobject.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/lib/kobject.c 2007-12-21 15:36:15.000000000 -0500 +@@ -44,11 +44,11 @@ + return error; + } + +-static int create_dir(struct kobject * kobj, struct dentry *shadow_parent) ++static int create_dir(struct kobject * kobj) + { + int error = 0; + if (kobject_name(kobj)) { +- error = sysfs_create_dir(kobj, shadow_parent); ++ error = sysfs_create_dir(kobj); + if (!error) { + if ((error = populate_dir(kobj))) + sysfs_remove_dir(kobj); +@@ -157,12 +157,11 @@ + } + + /** +- * kobject_shadow_add - add an object to the hierarchy. ++ * kobject_add - add an object to the hierarchy. + * @kobj: object. +- * @shadow_parent: sysfs directory to add to. + */ + +-int kobject_shadow_add(struct kobject * kobj, struct dentry *shadow_parent) ++int kobject_add(struct kobject * kobj) + { + int error = 0; + struct kobject * parent; +@@ -194,7 +193,7 @@ + kobj->parent = parent; + } + +- error = create_dir(kobj, shadow_parent); ++ error = create_dir(kobj); + if (error) { + /* unlink does the kobject_put() for us */ + unlink(kobj); +@@ -216,16 +215,6 @@ + } + + /** +- * kobject_add - add an object to the hierarchy. +- * @kobj: object. +- */ +-int kobject_add(struct kobject * kobj) +-{ +- return kobject_shadow_add(kobj, NULL); +-} +- +- +-/** + * kobject_register - initialize and add an object. + * @kobj: object in question. + */ +@@ -338,7 +327,7 @@ + /* Note : if we want to send the new name alone, not the full path, + * we could probably use kobject_name(kobj); */ + +- error = sysfs_rename_dir(kobj, kobj->parent->dentry, new_name); ++ error = sysfs_rename_dir(kobj, new_name); + + /* This function is mostly/only used for network interface. + * Some hotplug package track interfaces by their name and +@@ -355,27 +344,6 @@ + } + + /** +- * kobject_rename - change the name of an object +- * @kobj: object in question. +- * @new_parent: object's new parent +- * @new_name: object's new name +- */ +- +-int kobject_shadow_rename(struct kobject * kobj, struct dentry *new_parent, +- const char *new_name) +-{ +- int error = 0; +- +- kobj = kobject_get(kobj); +- if (!kobj) +- return -EINVAL; +- error = sysfs_rename_dir(kobj, new_parent, new_name); +- kobject_put(kobj); +- +- return error; +-} +- +-/** + * kobject_move - move object to another parent + * @kobj: object in question. + * @new_parent: object's new parent (can be NULL) +diff -Nurb linux-2.6.22-570/lib/kobject_uevent.c linux-2.6.22-591/lib/kobject_uevent.c +--- linux-2.6.22-570/lib/kobject_uevent.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/lib/kobject_uevent.c 2007-12-21 15:36:15.000000000 -0500 +@@ -208,7 +208,7 @@ + argv [0] = uevent_helper; + argv [1] = (char *)subsystem; + argv [2] = NULL; +- call_usermodehelper (argv[0], argv, envp, 0); ++ call_usermodehelper (argv[0], argv, envp, UMH_WAIT_EXEC); + } + + exit: +@@ -290,9 +290,8 @@ + #if defined(CONFIG_NET) + static int __init kobject_uevent_init(void) + { +- uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL, +- NULL, THIS_MODULE); +- ++ uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT, ++ 1, NULL, NULL, THIS_MODULE); + if (!uevent_sock) { + printk(KERN_ERR + "kobject_uevent: unable to create netlink socket!\n"); +diff -Nurb linux-2.6.22-570/lib/pagewalk.c linux-2.6.22-591/lib/pagewalk.c +--- linux-2.6.22-570/lib/pagewalk.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/lib/pagewalk.c 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,112 @@ ++#include ++#include ++ ++static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ++ struct mm_walk *walk, void *private) ++{ ++ pte_t *pte; ++ int err; ++ ++ for (pte = pte_offset_map(pmd, addr); addr != end; ++ addr += PAGE_SIZE, pte++) { ++ if (pte_none(*pte)) ++ continue; ++ err = walk->pte_entry(pte, addr, addr, private); ++ if (err) { ++ pte_unmap(pte); ++ return err; ++ } ++ } ++ pte_unmap(pte); ++ return 0; ++} ++ ++static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, ++ struct mm_walk *walk, void *private) ++{ ++ pmd_t *pmd; ++ unsigned long next; ++ int err; ++ ++ for (pmd = pmd_offset(pud, addr); addr != end; ++ pmd++, addr = next) { ++ next = pmd_addr_end(addr, end); ++ if (pmd_none_or_clear_bad(pmd)) ++ continue; ++ if (walk->pmd_entry) { ++ err = walk->pmd_entry(pmd, addr, next, private); ++ if (err) ++ return err; ++ } ++ if (walk->pte_entry) { ++ err = walk_pte_range(pmd, addr, next, walk, private); ++ if (err) ++ return err; ++ } ++ } ++ return 0; ++} ++ ++static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, ++ struct mm_walk *walk, void *private) ++{ ++ pud_t *pud; ++ unsigned long next; ++ int err; ++ ++ for (pud = pud_offset(pgd, addr); addr != end; ++ pud++, addr = next) { ++ next = pud_addr_end(addr, end); ++ if (pud_none_or_clear_bad(pud)) ++ continue; ++ if (walk->pud_entry) { ++ err = walk->pud_entry(pud, addr, next, private); ++ if (err) ++ return err; ++ } ++ if (walk->pmd_entry || walk->pte_entry) { ++ err = walk_pmd_range(pud, addr, next, walk, private); ++ if (err) ++ return err; ++ } ++ } ++ return 0; ++} ++ ++/* ++ * walk_page_range - walk a memory map's page tables with a callback ++ * @mm - memory map to walk ++ * @addr - starting address ++ * @end - ending address ++ * @walk - set of callbacks to invoke for each level of the tree ++ * @private - private data passed to the callback function ++ * ++ * Recursively walk the page table for the memory area in a VMA, calling ++ * a callback for every bottom-level (PTE) page table. ++ */ ++int walk_page_range(struct mm_struct *mm, ++ unsigned long addr, unsigned long end, ++ struct mm_walk *walk, void *private) ++{ ++ pgd_t *pgd; ++ unsigned long next; ++ int err; ++ ++ for (pgd = pgd_offset(mm, addr); addr != end; ++ pgd++, addr = next) { ++ next = pgd_addr_end(addr, end); ++ if (pgd_none_or_clear_bad(pgd)) ++ continue; ++ if (walk->pgd_entry) { ++ err = walk->pgd_entry(pgd, addr, next, private); ++ if (err) ++ return err; ++ } ++ if (walk->pud_entry || walk->pmd_entry || walk->pte_entry) { ++ err = walk_pud_range(pgd, addr, next, walk, private); ++ if (err) ++ return err; ++ } ++ } ++ return 0; ++} +diff -Nurb linux-2.6.22-570/lib/radix-tree.c linux-2.6.22-591/lib/radix-tree.c +--- linux-2.6.22-570/lib/radix-tree.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/lib/radix-tree.c 2007-12-21 15:36:12.000000000 -0500 +@@ -93,7 +93,8 @@ + struct radix_tree_node *ret; + gfp_t gfp_mask = root_gfp_mask(root); + +- ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); ++ ret = kmem_cache_alloc(radix_tree_node_cachep, ++ set_migrateflags(gfp_mask, __GFP_RECLAIMABLE)); + if (ret == NULL && !(gfp_mask & __GFP_WAIT)) { + struct radix_tree_preload *rtp; + +@@ -137,7 +138,8 @@ + rtp = &__get_cpu_var(radix_tree_preloads); + while (rtp->nr < ARRAY_SIZE(rtp->nodes)) { + preempt_enable(); +- node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask); ++ node = kmem_cache_alloc(radix_tree_node_cachep, ++ set_migrateflags(gfp_mask, __GFP_RECLAIMABLE)); + if (node == NULL) + goto out; + preempt_disable(); +diff -Nurb linux-2.6.22-570/mm/filemap.c linux-2.6.22-591/mm/filemap.c +--- linux-2.6.22-570/mm/filemap.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/mm/filemap.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1334,39 +1334,38 @@ + #define MMAP_LOTSAMISS (100) + + /** +- * filemap_nopage - read in file data for page fault handling +- * @area: the applicable vm_area +- * @address: target address to read in +- * @type: returned with VM_FAULT_{MINOR,MAJOR} if not %NULL ++ * filemap_fault - read in file data for page fault handling ++ * @vma: user vma (not used) ++ * @fdata: the applicable fault_data + * +- * filemap_nopage() is invoked via the vma operations vector for a ++ * filemap_fault() is invoked via the vma operations vector for a + * mapped memory region to read in file data during a page fault. + * + * The goto's are kind of ugly, but this streamlines the normal case of having + * it in the page cache, and handles the special cases reasonably without + * having a lot of duplicated code. + */ +-struct page *filemap_nopage(struct vm_area_struct *area, +- unsigned long address, int *type) ++struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) + { + int error; +- struct file *file = area->vm_file; ++ struct file *file = vma->vm_file; + struct address_space *mapping = file->f_mapping; + struct file_ra_state *ra = &file->f_ra; + struct inode *inode = mapping->host; + struct page *page; +- unsigned long size, pgoff; +- int did_readaround = 0, majmin = VM_FAULT_MINOR; ++ unsigned long size; ++ int did_readaround = 0; + +- pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; ++ fdata->type = VM_FAULT_MINOR; ++ ++ BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); + +-retry_all: + size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; +- if (pgoff >= size) ++ if (fdata->pgoff >= size) + goto outside_data_content; + + /* If we don't want any read-ahead, don't bother */ +- if (VM_RandomReadHint(area)) ++ if (VM_RandomReadHint(vma)) + goto no_cached_page; + + /* +@@ -1375,19 +1374,19 @@ + * + * For sequential accesses, we use the generic readahead logic. + */ +- if (VM_SequentialReadHint(area)) +- page_cache_readahead(mapping, ra, file, pgoff, 1); ++ if (VM_SequentialReadHint(vma)) ++ page_cache_readahead(mapping, ra, file, fdata->pgoff, 1); + + /* + * Do we have something in the page cache already? + */ + retry_find: +- page = find_get_page(mapping, pgoff); ++ page = find_lock_page(mapping, fdata->pgoff); + if (!page) { + unsigned long ra_pages; + +- if (VM_SequentialReadHint(area)) { +- handle_ra_miss(mapping, ra, pgoff); ++ if (VM_SequentialReadHint(vma)) { ++ handle_ra_miss(mapping, ra, fdata->pgoff); + goto no_cached_page; + } + ra->mmap_miss++; +@@ -1404,7 +1403,7 @@ + * check did_readaround, as this is an inner loop. + */ + if (!did_readaround) { +- majmin = VM_FAULT_MAJOR; ++ fdata->type = VM_FAULT_MAJOR; + count_vm_event(PGMAJFAULT); + } + did_readaround = 1; +@@ -1412,11 +1411,11 @@ + if (ra_pages) { + pgoff_t start = 0; + +- if (pgoff > ra_pages / 2) +- start = pgoff - ra_pages / 2; ++ if (fdata->pgoff > ra_pages / 2) ++ start = fdata->pgoff - ra_pages / 2; + do_page_cache_readahead(mapping, file, start, ra_pages); + } +- page = find_get_page(mapping, pgoff); ++ page = find_lock_page(mapping, fdata->pgoff); + if (!page) + goto no_cached_page; + } +@@ -1425,19 +1424,23 @@ + ra->mmap_hit++; + + /* +- * Ok, found a page in the page cache, now we need to check +- * that it's up-to-date. ++ * We have a locked page in the page cache, now we need to check ++ * that it's up-to-date. If not, it is going to be due to an error. + */ +- if (!PageUptodate(page)) ++ if (unlikely(!PageUptodate(page))) + goto page_not_uptodate; + +-success: ++ /* Must recheck i_size under page lock */ ++ size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; ++ if (unlikely(fdata->pgoff >= size)) { ++ unlock_page(page); ++ goto outside_data_content; ++ } ++ + /* + * Found the page and have a reference on it. + */ + mark_page_accessed(page); +- if (type) +- *type = majmin; + return page; + + outside_data_content: +@@ -1445,15 +1448,17 @@ + * An external ptracer can access pages that normally aren't + * accessible.. + */ +- if (area->vm_mm == current->mm) +- return NOPAGE_SIGBUS; ++ if (vma->vm_mm == current->mm) { ++ fdata->type = VM_FAULT_SIGBUS; ++ return NULL; ++ } + /* Fall through to the non-read-ahead case */ + no_cached_page: + /* + * We're only likely to ever get here if MADV_RANDOM is in + * effect. + */ +- error = page_cache_read(file, pgoff); ++ error = page_cache_read(file, fdata->pgoff); + + /* + * The page we want has now been added to the page cache. +@@ -1469,12 +1474,15 @@ + * to schedule I/O. + */ + if (error == -ENOMEM) +- return NOPAGE_OOM; +- return NOPAGE_SIGBUS; ++ fdata->type = VM_FAULT_OOM; ++ else ++ fdata->type = VM_FAULT_SIGBUS; ++ return NULL; + + page_not_uptodate: ++ /* IO error path */ + if (!did_readaround) { +- majmin = VM_FAULT_MAJOR; ++ fdata->type = VM_FAULT_MAJOR; + count_vm_event(PGMAJFAULT); + } + +@@ -1484,38 +1492,39 @@ + * because there really aren't any performance issues here + * and we need to check for errors. + */ +- lock_page(page); +- +- /* Somebody truncated the page on us? */ +- if (!page->mapping) { +- unlock_page(page); +- page_cache_release(page); +- goto retry_all; +- } +- +- /* Somebody else successfully read it in? */ +- if (PageUptodate(page)) { +- unlock_page(page); +- goto success; +- } + ClearPageError(page); + error = mapping->a_ops->readpage(file, page); +- if (!error) { +- wait_on_page_locked(page); +- if (PageUptodate(page)) +- goto success; +- } else if (error == AOP_TRUNCATED_PAGE) { + page_cache_release(page); ++ ++ if (!error || error == AOP_TRUNCATED_PAGE) + goto retry_find; +- } + +- /* +- * Things didn't work out. Return zero to tell the +- * mm layer so, possibly freeing the page cache page first. +- */ ++ /* Things didn't work out. Return zero to tell the mm layer so. */ + shrink_readahead_size_eio(file, ra); +- page_cache_release(page); +- return NOPAGE_SIGBUS; ++ fdata->type = VM_FAULT_SIGBUS; ++ return NULL; ++} ++EXPORT_SYMBOL(filemap_fault); ++ ++/* ++ * filemap_nopage and filemap_populate are legacy exports that are not used ++ * in tree. Scheduled for removal. ++ */ ++struct page *filemap_nopage(struct vm_area_struct *area, ++ unsigned long address, int *type) ++{ ++ struct page *page; ++ struct fault_data fdata; ++ fdata.address = address; ++ fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) ++ + area->vm_pgoff; ++ fdata.flags = 0; ++ ++ page = filemap_fault(area, &fdata); ++ if (type) ++ *type = fdata.type; ++ ++ return page; + } + EXPORT_SYMBOL(filemap_nopage); + +@@ -1693,8 +1702,7 @@ + EXPORT_SYMBOL(filemap_populate); + + struct vm_operations_struct generic_file_vm_ops = { +- .nopage = filemap_nopage, +- .populate = filemap_populate, ++ .fault = filemap_fault, + }; + + /* This is used for a general mmap of a disk file */ +@@ -1707,6 +1715,7 @@ + return -ENOEXEC; + file_accessed(file); + vma->vm_ops = &generic_file_vm_ops; ++ vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; + return 0; + } + +diff -Nurb linux-2.6.22-570/mm/filemap_xip.c linux-2.6.22-591/mm/filemap_xip.c +--- linux-2.6.22-570/mm/filemap_xip.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/mm/filemap_xip.c 2007-12-21 15:36:12.000000000 -0500 +@@ -228,62 +228,67 @@ + } + + /* +- * xip_nopage() is invoked via the vma operations vector for a ++ * xip_fault() is invoked via the vma operations vector for a + * mapped memory region to read in file data during a page fault. + * +- * This function is derived from filemap_nopage, but used for execute in place ++ * This function is derived from filemap_fault, but used for execute in place + */ +-static struct page * +-xip_file_nopage(struct vm_area_struct * area, +- unsigned long address, +- int *type) ++static struct page *xip_file_fault(struct vm_area_struct *area, ++ struct fault_data *fdata) + { + struct file *file = area->vm_file; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct page *page; +- unsigned long size, pgoff, endoff; ++ pgoff_t size; + +- pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) +- + area->vm_pgoff; +- endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) +- + area->vm_pgoff; ++ /* XXX: are VM_FAULT_ codes OK? */ + + size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; +- if (pgoff >= size) +- return NOPAGE_SIGBUS; ++ if (fdata->pgoff >= size) { ++ fdata->type = VM_FAULT_SIGBUS; ++ return NULL; ++ } + +- page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); ++ page = mapping->a_ops->get_xip_page(mapping, ++ fdata->pgoff*(PAGE_SIZE/512), 0); + if (!IS_ERR(page)) + goto out; +- if (PTR_ERR(page) != -ENODATA) +- return NOPAGE_SIGBUS; ++ if (PTR_ERR(page) != -ENODATA) { ++ fdata->type = VM_FAULT_OOM; ++ return NULL; ++ } + + /* sparse block */ + if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && + (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) && + (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { + /* maybe shared writable, allocate new block */ +- page = mapping->a_ops->get_xip_page (mapping, +- pgoff*(PAGE_SIZE/512), 1); +- if (IS_ERR(page)) +- return NOPAGE_SIGBUS; ++ page = mapping->a_ops->get_xip_page(mapping, ++ fdata->pgoff*(PAGE_SIZE/512), 1); ++ if (IS_ERR(page)) { ++ fdata->type = VM_FAULT_SIGBUS; ++ return NULL; ++ } + /* unmap page at pgoff from all other vmas */ +- __xip_unmap(mapping, pgoff); ++ __xip_unmap(mapping, fdata->pgoff); + } else { + /* not shared and writable, use xip_sparse_page() */ + page = xip_sparse_page(); +- if (!page) +- return NOPAGE_OOM; ++ if (!page) { ++ fdata->type = VM_FAULT_OOM; ++ return NULL; ++ } + } + + out: ++ fdata->type = VM_FAULT_MINOR; + page_cache_get(page); + return page; + } + + static struct vm_operations_struct xip_file_vm_ops = { +- .nopage = xip_file_nopage, ++ .fault = xip_file_fault, + }; + + int xip_file_mmap(struct file * file, struct vm_area_struct * vma) +@@ -292,6 +297,7 @@ + + file_accessed(file); + vma->vm_ops = &xip_file_vm_ops; ++ vma->vm_flags |= VM_CAN_NONLINEAR; + return 0; + } + EXPORT_SYMBOL_GPL(xip_file_mmap); +diff -Nurb linux-2.6.22-570/mm/fremap.c linux-2.6.22-591/mm/fremap.c +--- linux-2.6.22-570/mm/fremap.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/mm/fremap.c 2007-12-21 15:36:12.000000000 -0500 +@@ -129,6 +129,25 @@ + return err; + } + ++static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma, ++ unsigned long addr, unsigned long size, pgoff_t pgoff) ++{ ++ int err; ++ ++ do { ++ err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot); ++ if (err) ++ return err; ++ ++ size -= PAGE_SIZE; ++ addr += PAGE_SIZE; ++ pgoff++; ++ } while (size); ++ ++ return 0; ++ ++} ++ + /*** + * sys_remap_file_pages - remap arbitrary pages of a shared backing store + * file within an existing vma. +@@ -186,15 +205,27 @@ + * the single existing vma. vm_private_data is used as a + * swapout cursor in a VM_NONLINEAR vma. + */ +- if (vma && (vma->vm_flags & VM_SHARED) && +- (!vma->vm_private_data || (vma->vm_flags & VM_NONLINEAR)) && +- vma->vm_ops && vma->vm_ops->populate && +- end > start && start >= vma->vm_start && +- end <= vma->vm_end) { ++ if (!vma || !(vma->vm_flags & VM_SHARED)) ++ goto out; ++ ++ if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR)) ++ goto out; ++ ++ if ((!vma->vm_ops || !vma->vm_ops->populate) && ++ !(vma->vm_flags & VM_CAN_NONLINEAR)) ++ goto out; ++ ++ if (end <= start || start < vma->vm_start || end > vma->vm_end) ++ goto out; + + /* Must set VM_NONLINEAR before any pages are populated. */ +- if (pgoff != linear_page_index(vma, start) && +- !(vma->vm_flags & VM_NONLINEAR)) { ++ if (!(vma->vm_flags & VM_NONLINEAR)) { ++ /* Don't need a nonlinear mapping, exit success */ ++ if (pgoff == linear_page_index(vma, start)) { ++ err = 0; ++ goto out; ++ } ++ + if (!has_write_lock) { + up_read(&mm->mmap_sem); + down_write(&mm->mmap_sem); +@@ -211,8 +242,17 @@ + spin_unlock(&mapping->i_mmap_lock); + } + +- err = vma->vm_ops->populate(vma, start, size, +- vma->vm_page_prot, ++ if (vma->vm_flags & VM_CAN_NONLINEAR) { ++ err = populate_range(mm, vma, start, size, pgoff); ++ if (!err && !(flags & MAP_NONBLOCK)) { ++ if (unlikely(has_write_lock)) { ++ downgrade_write(&mm->mmap_sem); ++ has_write_lock = 0; ++ } ++ make_pages_present(start, start+size); ++ } ++ } else ++ err = vma->vm_ops->populate(vma, start, size, vma->vm_page_prot, + pgoff, flags & MAP_NONBLOCK); + + /* +@@ -220,7 +260,8 @@ + * it after ->populate completes, and that would prevent + * downgrading the lock. (Locks can't be upgraded). + */ +- } ++ ++out: + if (likely(!has_write_lock)) + up_read(&mm->mmap_sem); + else +diff -Nurb linux-2.6.22-570/mm/hugetlb.c linux-2.6.22-591/mm/hugetlb.c +--- linux-2.6.22-570/mm/hugetlb.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/mm/hugetlb.c 2007-12-21 15:36:12.000000000 -0500 +@@ -28,6 +28,9 @@ + static struct list_head hugepage_freelists[MAX_NUMNODES]; + static unsigned int nr_huge_pages_node[MAX_NUMNODES]; + static unsigned int free_huge_pages_node[MAX_NUMNODES]; ++gfp_t htlb_alloc_mask = GFP_HIGHUSER; ++unsigned long hugepages_treat_as_movable; ++ + /* + * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages + */ +@@ -67,14 +70,15 @@ + static struct page *dequeue_huge_page(struct vm_area_struct *vma, + unsigned long address) + { +- int nid = numa_node_id(); ++ int nid; + struct page *page = NULL; +- struct zonelist *zonelist = huge_zonelist(vma, address); ++ struct zonelist *zonelist = huge_zonelist(vma, address, ++ htlb_alloc_mask); + struct zone **z; + + for (z = zonelist->zones; *z; z++) { + nid = zone_to_nid(*z); +- if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) && ++ if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) && + !list_empty(&hugepage_freelists[nid])) + break; + } +@@ -114,7 +118,7 @@ + prev_nid = nid; + spin_unlock(&nid_lock); + +- page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, ++ page = alloc_pages_node(nid, htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN, + HUGETLB_PAGE_ORDER); + if (page) { + set_compound_page_dtor(page, free_huge_page); +@@ -264,6 +268,19 @@ + max_huge_pages = set_max_huge_pages(max_huge_pages); + return 0; + } ++ ++int hugetlb_treat_movable_handler(struct ctl_table *table, int write, ++ struct file *file, void __user *buffer, ++ size_t *length, loff_t *ppos) ++{ ++ proc_dointvec(table, write, file, buffer, length, ppos); ++ if (hugepages_treat_as_movable) ++ htlb_alloc_mask = GFP_HIGH_MOVABLE; ++ else ++ htlb_alloc_mask = GFP_HIGHUSER; ++ return 0; ++} ++ + #endif /* CONFIG_SYSCTL */ + + int hugetlb_report_meminfo(char *buf) +diff -Nurb linux-2.6.22-570/mm/memory.c linux-2.6.22-591/mm/memory.c +--- linux-2.6.22-570/mm/memory.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/mm/memory.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1052,7 +1052,8 @@ + if (pages) + foll_flags |= FOLL_GET; + if (!write && !(vma->vm_flags & VM_LOCKED) && +- (!vma->vm_ops || !vma->vm_ops->nopage)) ++ (!vma->vm_ops || (!vma->vm_ops->nopage && ++ !vma->vm_ops->fault))) + foll_flags |= FOLL_ANON; + + do { +@@ -1712,11 +1713,11 @@ + if (unlikely(anon_vma_prepare(vma))) + goto oom; + if (old_page == ZERO_PAGE(address)) { +- new_page = alloc_zeroed_user_highpage(vma, address); ++ new_page = alloc_zeroed_user_highpage_movable(vma, address); + if (!new_page) + goto oom; + } else { +- new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); ++ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); + if (!new_page) + goto oom; + cow_user_page(new_page, old_page, address, vma); +@@ -1828,6 +1829,13 @@ + unsigned long restart_addr; + int need_break; + ++ /* ++ * files that support invalidating or truncating portions of the ++ * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and ++ * have their .nopage function return the page locked. ++ */ ++ BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); ++ + again: + restart_addr = vma->vm_truncate_count; + if (is_restart_addr(restart_addr) && start_addr < restart_addr) { +@@ -1956,17 +1964,8 @@ + + spin_lock(&mapping->i_mmap_lock); + +- /* serialize i_size write against truncate_count write */ +- smp_wmb(); +- /* Protect against page faults, and endless unmapping loops */ ++ /* Protect against endless unmapping loops */ + mapping->truncate_count++; +- /* +- * For archs where spin_lock has inclusive semantics like ia64 +- * this smp_mb() will prevent to read pagetable contents +- * before the truncate_count increment is visible to +- * other cpus. +- */ +- smp_mb(); + if (unlikely(is_restart_addr(mapping->truncate_count))) { + if (mapping->truncate_count == 0) + reset_vma_truncate_counts(mapping); +@@ -2005,8 +2004,18 @@ + if (IS_SWAPFILE(inode)) + goto out_busy; + i_size_write(inode, offset); ++ ++ /* ++ * unmap_mapping_range is called twice, first simply for efficiency ++ * so that truncate_inode_pages does fewer single-page unmaps. However ++ * after this first call, and before truncate_inode_pages finishes, ++ * it is possible for private pages to be COWed, which remain after ++ * truncate_inode_pages finishes, hence the second unmap_mapping_range ++ * call must be made for correctness. ++ */ + unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(mapping, offset); ++ unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); + goto out_truncate; + + do_expand: +@@ -2046,6 +2055,7 @@ + down_write(&inode->i_alloc_sem); + unmap_mapping_range(mapping, offset, (end - offset), 1); + truncate_inode_pages_range(mapping, offset, end); ++ unmap_mapping_range(mapping, offset, (end - offset), 1); + inode->i_op->truncate_range(inode, offset, end); + up_write(&inode->i_alloc_sem); + mutex_unlock(&inode->i_mutex); +@@ -2208,7 +2218,6 @@ + + /* No need to invalidate - it was non-present before */ + update_mmu_cache(vma, address, pte); +- lazy_mmu_prot_update(pte); + unlock: + pte_unmap_unlock(page_table, ptl); + out: +@@ -2241,7 +2250,7 @@ + goto oom; + if (unlikely(anon_vma_prepare(vma))) + goto oom; +- page = alloc_zeroed_user_highpage(vma, address); ++ page = alloc_zeroed_user_highpage_movable(vma, address); + if (!page) + goto oom; + +@@ -2284,10 +2293,10 @@ + } + + /* +- * do_no_page() tries to create a new page mapping. It aggressively ++ * __do_fault() tries to create a new page mapping. It aggressively + * tries to share with existing pages, but makes a separate copy if +- * the "write_access" parameter is true in order to avoid the next +- * page fault. ++ * the FAULT_FLAG_WRITE is set in the flags parameter in order to avoid ++ * the next page fault. + * + * As this is called only for pages that do not currently exist, we + * do not need to flush old virtual caches or the TLB. +@@ -2296,92 +2305,85 @@ + * but allow concurrent faults), and pte mapped but not yet locked. + * We return with mmap_sem still held, but pte unmapped and unlocked. + */ +-static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, ++static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address, pte_t *page_table, pmd_t *pmd, +- int write_access) +-{ ++ pgoff_t pgoff, unsigned int flags, pte_t orig_pte) ++ { + spinlock_t *ptl; +- struct page *new_page; +- struct address_space *mapping = NULL; ++ struct page *page, *faulted_page; + pte_t entry; +- unsigned int sequence = 0; +- int ret = VM_FAULT_MINOR; + int anon = 0; + struct page *dirty_page = NULL; ++ struct fault_data fdata; ++ ++ fdata.address = address & PAGE_MASK; ++ fdata.pgoff = pgoff; ++ fdata.flags = flags; + + pte_unmap(page_table); + BUG_ON(vma->vm_flags & VM_PFNMAP); + +- if (!vx_rss_avail(mm, 1)) ++ if (likely(vma->vm_ops->fault)) { ++ fdata.type = -1; ++ faulted_page = vma->vm_ops->fault(vma, &fdata); ++ WARN_ON(fdata.type == -1); ++ if (unlikely(!faulted_page)) ++ return fdata.type; ++ } else { ++ /* Legacy ->nopage path */ ++ fdata.type = VM_FAULT_MINOR; ++ faulted_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, ++ &fdata.type); ++ /* no page was available -- either SIGBUS or OOM */ ++ if (unlikely(faulted_page == NOPAGE_SIGBUS)) ++ return VM_FAULT_SIGBUS; ++ else if (unlikely(faulted_page == NOPAGE_OOM)) + return VM_FAULT_OOM; ++ } + +- if (vma->vm_file) { +- mapping = vma->vm_file->f_mapping; +- sequence = mapping->truncate_count; +- smp_rmb(); /* serializes i_size against truncate_count */ +- } +-retry: +- new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); +- /* +- * No smp_rmb is needed here as long as there's a full +- * spin_lock/unlock sequence inside the ->nopage callback +- * (for the pagecache lookup) that acts as an implicit +- * smp_mb() and prevents the i_size read to happen +- * after the next truncate_count read. ++ /* ++ * For consistency in subsequent calls, make the faulted_page always ++ * locked. + */ +- +- /* no page was available -- either SIGBUS, OOM or REFAULT */ +- if (unlikely(new_page == NOPAGE_SIGBUS)) +- return VM_FAULT_SIGBUS; +- else if (unlikely(new_page == NOPAGE_OOM)) +- return VM_FAULT_OOM; +- else if (unlikely(new_page == NOPAGE_REFAULT)) +- return VM_FAULT_MINOR; ++ if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE))) ++ lock_page(faulted_page); ++ else ++ BUG_ON(!PageLocked(faulted_page)); + + /* + * Should we do an early C-O-W break? + */ +- if (write_access) { ++ page = faulted_page; ++ if (flags & FAULT_FLAG_WRITE) { + if (!(vma->vm_flags & VM_SHARED)) { +- struct page *page; +- +- if (unlikely(anon_vma_prepare(vma))) +- goto oom; +- page = alloc_page_vma(GFP_HIGHUSER, vma, address); +- if (!page) +- goto oom; +- copy_user_highpage(page, new_page, address, vma); +- page_cache_release(new_page); +- new_page = page; + anon = 1; +- ++ if (unlikely(anon_vma_prepare(vma))) { ++ fdata.type = VM_FAULT_OOM; ++ goto out; ++ } ++ page = alloc_page_vma(GFP_HIGHUSER, vma, address); ++ if (!page) { ++ fdata.type = VM_FAULT_OOM; ++ goto out; ++ } ++ copy_user_highpage(page, faulted_page, address, vma); + } else { +- /* if the page will be shareable, see if the backing ++ /* ++ * If the page will be shareable, see if the backing + * address space wants to know that the page is about +- * to become writable */ ++ * to become writable ++ */ + if (vma->vm_ops->page_mkwrite && +- vma->vm_ops->page_mkwrite(vma, new_page) < 0 +- ) { +- page_cache_release(new_page); +- return VM_FAULT_SIGBUS; ++ vma->vm_ops->page_mkwrite(vma, page) < 0) { ++ fdata.type = VM_FAULT_SIGBUS; ++ anon = 1; /* no anon but release faulted_page */ ++ goto out; + } + } ++ + } + + page_table = pte_offset_map_lock(mm, pmd, address, &ptl); +- /* +- * For a file-backed vma, someone could have truncated or otherwise +- * invalidated this page. If unmap_mapping_range got called, +- * retry getting the page. +- */ +- if (mapping && unlikely(sequence != mapping->truncate_count)) { +- pte_unmap_unlock(page_table, ptl); +- page_cache_release(new_page); +- cond_resched(); +- sequence = mapping->truncate_count; +- smp_rmb(); +- goto retry; +- } + + /* + * This silly early PAGE_DIRTY setting removes a race +@@ -2394,43 +2396,68 @@ + * handle that later. + */ + /* Only go through if we didn't race with anybody else... */ +- if (pte_none(*page_table)) { +- flush_icache_page(vma, new_page); +- entry = mk_pte(new_page, vma->vm_page_prot); +- if (write_access) ++ if (likely(pte_same(*page_table, orig_pte))) { ++ flush_icache_page(vma, page); ++ entry = mk_pte(page, vma->vm_page_prot); ++ if (flags & FAULT_FLAG_WRITE) + entry = maybe_mkwrite(pte_mkdirty(entry), vma); + set_pte_at(mm, address, page_table, entry); + if (anon) { + inc_mm_counter(mm, anon_rss); +- lru_cache_add_active(new_page); +- page_add_new_anon_rmap(new_page, vma, address); ++ lru_cache_add_active(page); ++ page_add_new_anon_rmap(page, vma, address); + } else { + inc_mm_counter(mm, file_rss); +- page_add_file_rmap(new_page); +- if (write_access) { +- dirty_page = new_page; ++ page_add_file_rmap(page); ++ if (flags & FAULT_FLAG_WRITE) { ++ dirty_page = page; + get_page(dirty_page); + } + } +- } else { +- /* One of our sibling threads was faster, back out. */ +- page_cache_release(new_page); +- goto unlock; +- } + +- /* no need to invalidate: a not-present page shouldn't be cached */ ++ /* no need to invalidate: a not-present page won't be cached */ + update_mmu_cache(vma, address, entry); + lazy_mmu_prot_update(entry); +-unlock: ++ } else { ++ if (anon) ++ page_cache_release(page); ++ else ++ anon = 1; /* no anon but release faulted_page */ ++ } ++ + pte_unmap_unlock(page_table, ptl); +- if (dirty_page) { ++ ++out: ++ unlock_page(faulted_page); ++ if (anon) ++ page_cache_release(faulted_page); ++ else if (dirty_page) { + set_page_dirty_balance(dirty_page); + put_page(dirty_page); + } +- return ret; +-oom: +- page_cache_release(new_page); +- return VM_FAULT_OOM; ++ ++ return fdata.type; ++} ++ ++static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma, ++ unsigned long address, pte_t *page_table, pmd_t *pmd, ++ int write_access, pte_t orig_pte) ++{ ++ pgoff_t pgoff = (((address & PAGE_MASK) ++ - vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff; ++ unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0); ++ ++ return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); ++} ++ ++static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, ++ unsigned long address, pte_t *page_table, pmd_t *pmd, ++ int write_access, pgoff_t pgoff, pte_t orig_pte) ++{ ++ unsigned int flags = FAULT_FLAG_NONLINEAR | ++ (write_access ? FAULT_FLAG_WRITE : 0); ++ ++ return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte); + } + + /* +@@ -2509,9 +2536,14 @@ + print_bad_pte(vma, orig_pte, address); + return VM_FAULT_OOM; + } +- /* We can then assume vm->vm_ops && vma->vm_ops->populate */ + + pgoff = pte_to_pgoff(orig_pte); ++ ++ if (vma->vm_ops && vma->vm_ops->fault) ++ return do_nonlinear_fault(mm, vma, address, page_table, pmd, ++ write_access, pgoff, orig_pte); ++ ++ /* We can then assume vm->vm_ops && vma->vm_ops->populate */ + err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, + vma->vm_page_prot, pgoff, 0); + if (err == -ENOMEM) +@@ -2546,10 +2578,9 @@ + if (!pte_present(entry)) { + if (pte_none(entry)) { + if (vma->vm_ops) { +- if (vma->vm_ops->nopage) +- return do_no_page(mm, vma, address, +- pte, pmd, +- write_access); ++ if (vma->vm_ops->fault || vma->vm_ops->nopage) ++ return do_linear_fault(mm, vma, address, ++ pte, pmd, write_access, entry); + if (unlikely(vma->vm_ops->nopfn)) + return do_no_pfn(mm, vma, address, pte, + pmd, write_access); +diff -Nurb linux-2.6.22-570/mm/mempolicy.c linux-2.6.22-591/mm/mempolicy.c +--- linux-2.6.22-570/mm/mempolicy.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/mm/mempolicy.c 2007-12-21 15:36:12.000000000 -0500 +@@ -594,7 +594,7 @@ + + static struct page *new_node_page(struct page *page, unsigned long node, int **x) + { +- return alloc_pages_node(node, GFP_HIGHUSER, 0); ++ return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0); + } + + /* +@@ -710,7 +710,8 @@ + { + struct vm_area_struct *vma = (struct vm_area_struct *)private; + +- return alloc_page_vma(GFP_HIGHUSER, vma, page_address_in_vma(page, vma)); ++ return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, ++ page_address_in_vma(page, vma)); + } + #else + +@@ -1202,7 +1203,8 @@ + + #ifdef CONFIG_HUGETLBFS + /* Return a zonelist suitable for a huge page allocation. */ +-struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) ++struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, ++ gfp_t gfp_flags) + { + struct mempolicy *pol = get_vma_policy(current, vma, addr); + +@@ -1210,7 +1212,7 @@ + unsigned nid; + + nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); +- return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER); ++ return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags); + } + return zonelist_policy(GFP_HIGHUSER, pol); + } +@@ -1309,7 +1311,6 @@ + * keeps mempolicies cpuset relative after its cpuset moves. See + * further kernel/cpuset.c update_nodemask(). + */ +-void *cpuset_being_rebound; + + /* Slow path of a mempolicy copy */ + struct mempolicy *__mpol_copy(struct mempolicy *old) +@@ -1908,4 +1909,3 @@ + m->version = (vma != priv->tail_vma) ? vma->vm_start : 0; + return 0; + } +- +diff -Nurb linux-2.6.22-570/mm/migrate.c linux-2.6.22-591/mm/migrate.c +--- linux-2.6.22-570/mm/migrate.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/mm/migrate.c 2007-12-21 15:36:12.000000000 -0500 +@@ -761,7 +761,8 @@ + + *result = &pm->status; + +- return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0); ++ return alloc_pages_node(pm->node, ++ GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0); + } + + /* +diff -Nurb linux-2.6.22-570/mm/mmap.c linux-2.6.22-591/mm/mmap.c +--- linux-2.6.22-570/mm/mmap.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/mm/mmap.c 2007-12-21 15:36:12.000000000 -0500 +@@ -202,6 +202,17 @@ + } + + /* ++ * Requires inode->i_mapping->i_mmap_lock ++ */ ++void __unlink_file_vma(struct vm_area_struct *vma) ++{ ++ struct file *file = vma->vm_file; ++ struct address_space *mapping = file->f_mapping; ++ ++ __remove_shared_vm_struct(vma, file, mapping); ++} ++ ++/* + * Unlink a file-based vm structure from its prio_tree, to hide + * vma from rmap and vmtruncate before freeing its page tables. + */ +@@ -1023,7 +1034,7 @@ + } + } + +- error = security_file_mmap(file, reqprot, prot, flags); ++ error = security_file_mmap(file, reqprot, prot, flags, addr, 0); + if (error) + return error; + +@@ -1150,12 +1161,8 @@ + vx_vmlocked_add(mm, len >> PAGE_SHIFT); + make_pages_present(addr, addr + len); + } +- if (flags & MAP_POPULATE) { +- up_write(&mm->mmap_sem); +- sys_remap_file_pages(addr, len, 0, +- pgoff, flags & MAP_NONBLOCK); +- down_write(&mm->mmap_sem); +- } ++ if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) ++ make_pages_present(addr, addr + len); + return addr; + + unmap_and_free_vma: +diff -Nurb linux-2.6.22-570/mm/mremap.c linux-2.6.22-591/mm/mremap.c +--- linux-2.6.22-570/mm/mremap.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/mm/mremap.c 2007-12-21 15:36:12.000000000 -0500 +@@ -292,6 +292,10 @@ + if ((addr <= new_addr) && (addr+old_len) > new_addr) + goto out; + ++ ret = security_file_mmap(0, 0, 0, 0, new_addr, 1); ++ if (ret) ++ goto out; ++ + ret = do_munmap(mm, new_addr, new_len); + if (ret) + goto out; +@@ -394,8 +398,13 @@ + + new_addr = get_unmapped_area(vma->vm_file, 0, new_len, + vma->vm_pgoff, map_flags); ++ if (new_addr & ~PAGE_MASK) { + ret = new_addr; +- if (new_addr & ~PAGE_MASK) ++ goto out; ++ } ++ ++ ret = security_file_mmap(0, 0, 0, 0, new_addr, 1); ++ if (ret) + goto out; + } + ret = move_vma(vma, addr, old_len, new_len, new_addr); +diff -Nurb linux-2.6.22-570/mm/nommu.c linux-2.6.22-591/mm/nommu.c +--- linux-2.6.22-570/mm/nommu.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/mm/nommu.c 2007-12-21 15:36:12.000000000 -0500 +@@ -639,7 +639,7 @@ + } + + /* allow the security API to have its say */ +- ret = security_file_mmap(file, reqprot, prot, flags); ++ ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); + if (ret < 0) + return ret; + +@@ -1336,8 +1336,7 @@ + return 0; + } + +-struct page *filemap_nopage(struct vm_area_struct *area, +- unsigned long address, int *type) ++struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata) + { + BUG(); + return NULL; +diff -Nurb linux-2.6.22-570/mm/page_alloc.c linux-2.6.22-591/mm/page_alloc.c +--- linux-2.6.22-570/mm/page_alloc.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/mm/page_alloc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -143,6 +143,42 @@ + EXPORT_SYMBOL(nr_node_ids); + #endif + ++#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY ++static inline int get_pageblock_migratetype(struct page *page) ++{ ++ return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end); ++} ++ ++static void set_pageblock_migratetype(struct page *page, int migratetype) ++{ ++ set_pageblock_flags_group(page, (unsigned long)migratetype, ++ PB_migrate, PB_migrate_end); ++} ++ ++static inline int gfpflags_to_migratetype(gfp_t gfp_flags) ++{ ++ WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); ++ ++ return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) | ++ ((gfp_flags & __GFP_RECLAIMABLE) != 0); ++} ++ ++#else ++static inline int get_pageblock_migratetype(struct page *page) ++{ ++ return MIGRATE_UNMOVABLE; ++} ++ ++static void set_pageblock_migratetype(struct page *page, int migratetype) ++{ ++} ++ ++static inline int gfpflags_to_migratetype(gfp_t gfp_flags) ++{ ++ return MIGRATE_UNMOVABLE; ++} ++#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */ ++ + #ifdef CONFIG_DEBUG_VM + static int page_outside_zone_boundaries(struct zone *zone, struct page *page) + { +@@ -397,6 +433,7 @@ + { + unsigned long page_idx; + int order_size = 1 << order; ++ int migratetype = get_pageblock_migratetype(page); + + if (unlikely(PageCompound(page))) + destroy_compound_page(page, order); +@@ -409,7 +446,6 @@ + __mod_zone_page_state(zone, NR_FREE_PAGES, order_size); + while (order < MAX_ORDER-1) { + unsigned long combined_idx; +- struct free_area *area; + struct page *buddy; + + buddy = __page_find_buddy(page, page_idx, order); +@@ -417,8 +453,7 @@ + break; /* Move the buddy up one level. */ + + list_del(&buddy->lru); +- area = zone->free_area + order; +- area->nr_free--; ++ zone->free_area[order].nr_free--; + rmv_page_order(buddy); + combined_idx = __find_combined_index(page_idx, order); + page = page + (combined_idx - page_idx); +@@ -426,7 +461,8 @@ + order++; + } + set_page_order(page, order); +- list_add(&page->lru, &zone->free_area[order].free_list); ++ list_add(&page->lru, ++ &zone->free_area[order].free_list[migratetype]); + zone->free_area[order].nr_free++; + } + +@@ -566,7 +602,8 @@ + * -- wli + */ + static inline void expand(struct zone *zone, struct page *page, +- int low, int high, struct free_area *area) ++ int low, int high, struct free_area *area, ++ int migratetype) + { + unsigned long size = 1 << high; + +@@ -575,7 +612,7 @@ + high--; + size >>= 1; + VM_BUG_ON(bad_range(zone, &page[size])); +- list_add(&page[size].lru, &area->free_list); ++ list_add(&page[size].lru, &area->free_list[migratetype]); + area->nr_free++; + set_page_order(&page[size], high); + } +@@ -628,31 +665,172 @@ + return 0; + } + ++#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY ++/* ++ * This array describes the order lists are fallen back to when ++ * the free lists for the desirable migrate type are depleted ++ */ ++static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { ++ [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE }, ++ [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE }, ++ [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE }, ++}; ++ ++/* ++ * Move the free pages in a range to the free lists of the requested type. ++ * Note that start_page and end_pages are not aligned in a MAX_ORDER_NR_PAGES ++ * boundary. If alignment is required, use move_freepages_block() ++ */ ++int move_freepages(struct zone *zone, ++ struct page *start_page, struct page *end_page, ++ int migratetype) ++{ ++ struct page *page; ++ unsigned long order; ++ int blocks_moved = 0; ++ ++#ifndef CONFIG_HOLES_IN_ZONE ++ /* ++ * page_zone is not safe to call in this context when ++ * CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant ++ * anyway as we check zone boundaries in move_freepages_block(). ++ * Remove at a later date when no bug reports exist related to ++ * CONFIG_PAGE_GROUP_BY_MOBILITY ++ */ ++ BUG_ON(page_zone(start_page) != page_zone(end_page)); ++#endif ++ ++ for (page = start_page; page <= end_page;) { ++ if (!pfn_valid_within(page_to_pfn(page))) { ++ page++; ++ continue; ++ } ++ ++ if (!PageBuddy(page)) { ++ page++; ++ continue; ++ } ++ ++ order = page_order(page); ++ list_del(&page->lru); ++ list_add(&page->lru, ++ &zone->free_area[order].free_list[migratetype]); ++ page += 1 << order; ++ blocks_moved++; ++ } ++ ++ return blocks_moved; ++} ++ ++int move_freepages_block(struct zone *zone, struct page *page, int migratetype) ++{ ++ unsigned long start_pfn, end_pfn; ++ struct page *start_page, *end_page; ++ ++ start_pfn = page_to_pfn(page); ++ start_pfn = start_pfn & ~(MAX_ORDER_NR_PAGES-1); ++ start_page = pfn_to_page(start_pfn); ++ end_page = start_page + MAX_ORDER_NR_PAGES - 1; ++ end_pfn = start_pfn + MAX_ORDER_NR_PAGES - 1; ++ ++ /* Do not cross zone boundaries */ ++ if (start_pfn < zone->zone_start_pfn) ++ start_page = page; ++ if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages) ++ return 0; ++ ++ return move_freepages(zone, start_page, end_page, migratetype); ++} ++ ++/* Remove an element from the buddy allocator from the fallback list */ ++static struct page *__rmqueue_fallback(struct zone *zone, int order, ++ int start_migratetype) ++{ ++ struct free_area * area; ++ int current_order; ++ struct page *page; ++ int migratetype, i; ++ ++ /* Find the largest possible block of pages in the other list */ ++ for (current_order = MAX_ORDER-1; current_order >= order; ++ --current_order) { ++ for (i = 0; i < MIGRATE_TYPES - 1; i++) { ++ migratetype = fallbacks[start_migratetype][i]; ++ ++ area = &(zone->free_area[current_order]); ++ if (list_empty(&area->free_list[migratetype])) ++ continue; ++ ++ page = list_entry(area->free_list[migratetype].next, ++ struct page, lru); ++ area->nr_free--; ++ ++ /* ++ * If breaking a large block of pages, move all free ++ * pages to the preferred allocation list ++ */ ++ if (unlikely(current_order >= MAX_ORDER / 2)) { ++ migratetype = start_migratetype; ++ move_freepages_block(zone, page, migratetype); ++ } ++ ++ /* Remove the page from the freelists */ ++ list_del(&page->lru); ++ rmv_page_order(page); ++ __mod_zone_page_state(zone, NR_FREE_PAGES, ++ -(1UL << order)); ++ ++ if (current_order == MAX_ORDER - 1) ++ set_pageblock_migratetype(page, ++ start_migratetype); ++ ++ expand(zone, page, order, current_order, area, migratetype); ++ return page; ++ } ++ } ++ ++ return NULL; ++} ++#else ++static struct page *__rmqueue_fallback(struct zone *zone, int order, ++ int start_migratetype) ++{ ++ return NULL; ++} ++#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */ ++ + /* + * Do the hard work of removing an element from the buddy allocator. + * Call me with the zone->lock already held. + */ +-static struct page *__rmqueue(struct zone *zone, unsigned int order) ++static struct page *__rmqueue(struct zone *zone, unsigned int order, ++ int migratetype) + { + struct free_area * area; + unsigned int current_order; + struct page *page; + ++ /* Find a page of the appropriate size in the preferred list */ + for (current_order = order; current_order < MAX_ORDER; ++current_order) { +- area = zone->free_area + current_order; +- if (list_empty(&area->free_list)) ++ area = &(zone->free_area[current_order]); ++ if (list_empty(&area->free_list[migratetype])) + continue; + +- page = list_entry(area->free_list.next, struct page, lru); ++ page = list_entry(area->free_list[migratetype].next, ++ struct page, lru); + list_del(&page->lru); + rmv_page_order(page); + area->nr_free--; + __mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order)); +- expand(zone, page, order, current_order, area); +- return page; ++ expand(zone, page, order, current_order, area, migratetype); ++ goto got_page; + } + +- return NULL; ++ page = __rmqueue_fallback(zone, order, migratetype); ++ ++got_page: ++ ++ return page; + } + + /* +@@ -661,16 +839,18 @@ + * Returns the number of new pages which were placed at *list. + */ + static int rmqueue_bulk(struct zone *zone, unsigned int order, +- unsigned long count, struct list_head *list) ++ unsigned long count, struct list_head *list, ++ int migratetype) + { + int i; + + spin_lock(&zone->lock); + for (i = 0; i < count; ++i) { +- struct page *page = __rmqueue(zone, order); ++ struct page *page = __rmqueue(zone, order, migratetype); + if (unlikely(page == NULL)) + break; +- list_add_tail(&page->lru, list); ++ list_add(&page->lru, list); ++ set_page_private(page, migratetype); + } + spin_unlock(&zone->lock); + return i; +@@ -732,7 +912,7 @@ + { + unsigned long pfn, max_zone_pfn; + unsigned long flags; +- int order; ++ int order, t; + struct list_head *curr; + + if (!zone->spanned_pages) +@@ -749,15 +929,15 @@ + swsusp_unset_page_free(page); + } + +- for (order = MAX_ORDER - 1; order >= 0; --order) +- list_for_each(curr, &zone->free_area[order].free_list) { ++ for_each_migratetype_order(order, t) { ++ list_for_each(curr, &zone->free_area[order].free_list[t]) { + unsigned long i; + + pfn = page_to_pfn(list_entry(curr, struct page, lru)); + for (i = 0; i < (1UL << order); i++) + swsusp_set_page_free(pfn_to_page(pfn + i)); + } +- ++ } + spin_unlock_irqrestore(&zone->lock, flags); + } + +@@ -797,6 +977,7 @@ + local_irq_save(flags); + __count_vm_event(PGFREE); + list_add(&page->lru, &pcp->list); ++ set_page_private(page, get_pageblock_migratetype(page)); + pcp->count++; + if (pcp->count >= pcp->high) { + free_pages_bulk(zone, pcp->batch, &pcp->list, 0); +@@ -846,6 +1027,7 @@ + struct page *page; + int cold = !!(gfp_flags & __GFP_COLD); + int cpu; ++ int migratetype = gfpflags_to_migratetype(gfp_flags); + + again: + cpu = get_cpu(); +@@ -856,16 +1038,32 @@ + local_irq_save(flags); + if (!pcp->count) { + pcp->count = rmqueue_bulk(zone, 0, +- pcp->batch, &pcp->list); ++ pcp->batch, &pcp->list, migratetype); + if (unlikely(!pcp->count)) + goto failed; + } ++ ++#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY ++ /* Find a page of the appropriate migrate type */ ++ list_for_each_entry(page, &pcp->list, lru) ++ if (page_private(page) == migratetype) ++ break; ++ ++ /* Allocate more to the pcp list if necessary */ ++ if (unlikely(&page->lru == &pcp->list)) { ++ pcp->count += rmqueue_bulk(zone, 0, ++ pcp->batch, &pcp->list, migratetype); ++ page = list_entry(pcp->list.next, struct page, lru); ++ } ++#else + page = list_entry(pcp->list.next, struct page, lru); ++#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */ ++ + list_del(&page->lru); + pcp->count--; + } else { + spin_lock_irqsave(&zone->lock, flags); +- page = __rmqueue(zone, order); ++ page = __rmqueue(zone, order, migratetype); + spin_unlock(&zone->lock); + if (!page) + goto failed; +@@ -1952,6 +2150,16 @@ + init_page_count(page); + reset_page_mapcount(page); + SetPageReserved(page); ++ ++ /* ++ * Mark the block movable so that blocks are reserved for ++ * movable at startup. This will force kernel allocations ++ * to reserve their blocks rather than leaking throughout ++ * the address space during boot when many long-lived ++ * kernel allocations are made ++ */ ++ set_pageblock_migratetype(page, MIGRATE_MOVABLE); ++ + INIT_LIST_HEAD(&page->lru); + #ifdef WANT_PAGE_VIRTUAL + /* The shift won't overflow because ZONE_NORMAL is below 4G. */ +@@ -1964,9 +2172,9 @@ + void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, + unsigned long size) + { +- int order; +- for (order = 0; order < MAX_ORDER ; order++) { +- INIT_LIST_HEAD(&zone->free_area[order].free_list); ++ int order, t; ++ for_each_migratetype_order(order, t) { ++ INIT_LIST_HEAD(&zone->free_area[order].free_list[t]); + zone->free_area[order].nr_free = 0; + } + } +@@ -2584,6 +2792,41 @@ + realtotalpages); + } + ++#ifndef CONFIG_SPARSEMEM ++/* ++ * Calculate the size of the zone->blockflags rounded to an unsigned long ++ * Start by making sure zonesize is a multiple of MAX_ORDER-1 by rounding up ++ * Then figure 1 NR_PAGEBLOCK_BITS worth of bits per MAX_ORDER-1, finally ++ * round what is now in bits to nearest long in bits, then return it in ++ * bytes. ++ */ ++static unsigned long __init usemap_size(unsigned long zonesize) ++{ ++ unsigned long usemapsize; ++ ++ usemapsize = roundup(zonesize, MAX_ORDER_NR_PAGES); ++ usemapsize = usemapsize >> (MAX_ORDER-1); ++ usemapsize *= NR_PAGEBLOCK_BITS; ++ usemapsize = roundup(usemapsize, 8 * sizeof(unsigned long)); ++ ++ return usemapsize / 8; ++} ++ ++static void __init setup_usemap(struct pglist_data *pgdat, ++ struct zone *zone, unsigned long zonesize) ++{ ++ unsigned long usemapsize = usemap_size(zonesize); ++ zone->pageblock_flags = NULL; ++ if (usemapsize) { ++ zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); ++ memset(zone->pageblock_flags, 0, usemapsize); ++ } ++} ++#else ++static void inline setup_usemap(struct pglist_data *pgdat, ++ struct zone *zone, unsigned long zonesize) {} ++#endif /* CONFIG_SPARSEMEM */ ++ + /* + * Set up the zone data structures: + * - mark all pages reserved +@@ -2664,6 +2907,7 @@ + if (!size) + continue; + ++ setup_usemap(pgdat, zone, size); + ret = init_currently_empty_zone(zone, zone_start_pfn, + size, MEMMAP_EARLY); + BUG_ON(ret); +@@ -3363,6 +3607,21 @@ + for (order = 0; ((1UL << order) << PAGE_SHIFT) < size; order++) + ; + table = (void*) __get_free_pages(GFP_ATOMIC, order); ++ /* ++ * If bucketsize is not a power-of-two, we may free ++ * some pages at the end of hash table. ++ */ ++ if (table) { ++ unsigned long alloc_end = (unsigned long)table + ++ (PAGE_SIZE << order); ++ unsigned long used = (unsigned long)table + ++ PAGE_ALIGN(size); ++ split_page(virt_to_page(table), order); ++ while (used < alloc_end) { ++ free_page(used); ++ used += PAGE_SIZE; ++ } ++ } + } + } while (!table && size > PAGE_SIZE && --log2qty); + +@@ -3396,4 +3655,79 @@ + EXPORT_SYMBOL(page_to_pfn); + #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ + ++/* Return a pointer to the bitmap storing bits affecting a block of pages */ ++static inline unsigned long *get_pageblock_bitmap(struct zone *zone, ++ unsigned long pfn) ++{ ++#ifdef CONFIG_SPARSEMEM ++ return __pfn_to_section(pfn)->pageblock_flags; ++#else ++ return zone->pageblock_flags; ++#endif /* CONFIG_SPARSEMEM */ ++} + ++static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn) ++{ ++#ifdef CONFIG_SPARSEMEM ++ pfn &= (PAGES_PER_SECTION-1); ++ return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS; ++#else ++ pfn = pfn - zone->zone_start_pfn; ++ return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS; ++#endif /* CONFIG_SPARSEMEM */ ++} ++ ++/** ++ * get_pageblock_flags_group - Return the requested group of flags for the MAX_ORDER_NR_PAGES block of pages ++ * @page: The page within the block of interest ++ * @start_bitidx: The first bit of interest to retrieve ++ * @end_bitidx: The last bit of interest ++ * returns pageblock_bits flags ++ */ ++unsigned long get_pageblock_flags_group(struct page *page, ++ int start_bitidx, int end_bitidx) ++{ ++ struct zone *zone; ++ unsigned long *bitmap; ++ unsigned long pfn, bitidx; ++ unsigned long flags = 0; ++ unsigned long value = 1; ++ ++ zone = page_zone(page); ++ pfn = page_to_pfn(page); ++ bitmap = get_pageblock_bitmap(zone, pfn); ++ bitidx = pfn_to_bitidx(zone, pfn); ++ ++ for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) ++ if (test_bit(bitidx + start_bitidx, bitmap)) ++ flags |= value; ++ ++ return flags; ++} ++ ++/** ++ * set_pageblock_flags_group - Set the requested group of flags for a MAX_ORDER_NR_PAGES block of pages ++ * @page: The page within the block of interest ++ * @start_bitidx: The first bit of interest ++ * @end_bitidx: The last bit of interest ++ * @flags: The flags to set ++ */ ++void set_pageblock_flags_group(struct page *page, unsigned long flags, ++ int start_bitidx, int end_bitidx) ++{ ++ struct zone *zone; ++ unsigned long *bitmap; ++ unsigned long pfn, bitidx; ++ unsigned long value = 1; ++ ++ zone = page_zone(page); ++ pfn = page_to_pfn(page); ++ bitmap = get_pageblock_bitmap(zone, pfn); ++ bitidx = pfn_to_bitidx(zone, pfn); ++ ++ for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) ++ if (flags & value) ++ __set_bit(bitidx + start_bitidx, bitmap); ++ else ++ __clear_bit(bitidx + start_bitidx, bitmap); ++} +diff -Nurb linux-2.6.22-570/mm/pdflush.c linux-2.6.22-591/mm/pdflush.c +--- linux-2.6.22-570/mm/pdflush.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/mm/pdflush.c 2007-12-21 15:36:12.000000000 -0500 +@@ -92,6 +92,7 @@ + static int __pdflush(struct pdflush_work *my_work) + { + current->flags |= PF_FLUSHER | PF_SWAPWRITE; ++ set_freezable(); + my_work->fn = NULL; + my_work->who = current; + INIT_LIST_HEAD(&my_work->list); +diff -Nurb linux-2.6.22-570/mm/rmap.c linux-2.6.22-591/mm/rmap.c +--- linux-2.6.22-570/mm/rmap.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/mm/rmap.c 2007-12-21 15:36:12.000000000 -0500 +@@ -622,8 +622,10 @@ + printk (KERN_EMERG " page->count = %x\n", page_count(page)); + printk (KERN_EMERG " page->mapping = %p\n", page->mapping); + print_symbol (KERN_EMERG " vma->vm_ops = %s\n", (unsigned long)vma->vm_ops); +- if (vma->vm_ops) ++ if (vma->vm_ops) { + print_symbol (KERN_EMERG " vma->vm_ops->nopage = %s\n", (unsigned long)vma->vm_ops->nopage); ++ print_symbol (KERN_EMERG " vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault); ++ } + if (vma->vm_file && vma->vm_file->f_op) + print_symbol (KERN_EMERG " vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap); + BUG(); +diff -Nurb linux-2.6.22-570/mm/shmem.c linux-2.6.22-591/mm/shmem.c +--- linux-2.6.22-570/mm/shmem.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/mm/shmem.c 2007-12-21 15:36:12.000000000 -0500 +@@ -81,6 +81,7 @@ + SGP_READ, /* don't exceed i_size, don't allocate page */ + SGP_CACHE, /* don't exceed i_size, may allocate page */ + SGP_WRITE, /* may exceed i_size, may allocate page */ ++ SGP_FAULT, /* same as SGP_CACHE, return with page locked */ + }; + + static int shmem_getpage(struct inode *inode, unsigned long idx, +@@ -92,8 +93,11 @@ + * The above definition of ENTRIES_PER_PAGE, and the use of + * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: + * might be reconsidered if it ever diverges from PAGE_SIZE. ++ * ++ * Mobility flags are masked out as swap vectors cannot move + */ +- return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT); ++ return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO, ++ PAGE_CACHE_SHIFT-PAGE_SHIFT); + } + + static inline void shmem_dir_free(struct page *page) +@@ -371,7 +375,7 @@ + } + + spin_unlock(&info->lock); +- page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO); ++ page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping)); + if (page) + set_page_private(page, 0); + spin_lock(&info->lock); +@@ -1110,6 +1114,10 @@ + + if (idx >= SHMEM_MAX_INDEX) + return -EFBIG; ++ ++ if (type) ++ *type = VM_FAULT_MINOR; ++ + /* + * Normally, filepage is NULL on entry, and either found + * uptodate immediately, or allocated and zeroed, or read +@@ -1299,8 +1307,10 @@ + } + done: + if (*pagep != filepage) { +- unlock_page(filepage); + *pagep = filepage; ++ if (sgp != SGP_FAULT) ++ unlock_page(filepage); ++ + } + return 0; + +@@ -1312,72 +1322,29 @@ + return error; + } + +-static struct page *shmem_nopage(struct vm_area_struct *vma, +- unsigned long address, int *type) ++static struct page *shmem_fault(struct vm_area_struct *vma, ++ struct fault_data *fdata) + { + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct page *page = NULL; +- unsigned long idx; + int error; + +- idx = (address - vma->vm_start) >> PAGE_SHIFT; +- idx += vma->vm_pgoff; +- idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; +- if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode)) +- return NOPAGE_SIGBUS; +- +- error = shmem_getpage(inode, idx, &page, SGP_CACHE, type); +- if (error) +- return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; +- +- mark_page_accessed(page); +- return page; +-} +- +-static int shmem_populate(struct vm_area_struct *vma, +- unsigned long addr, unsigned long len, +- pgprot_t prot, unsigned long pgoff, int nonblock) +-{ +- struct inode *inode = vma->vm_file->f_path.dentry->d_inode; +- struct mm_struct *mm = vma->vm_mm; +- enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE; +- unsigned long size; +- +- size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; +- if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size) +- return -EINVAL; ++ BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE)); + +- while ((long) len > 0) { +- struct page *page = NULL; +- int err; +- /* +- * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE +- */ +- err = shmem_getpage(inode, pgoff, &page, sgp, NULL); +- if (err) +- return err; +- /* Page may still be null, but only if nonblock was set. */ +- if (page) { +- mark_page_accessed(page); +- err = install_page(mm, vma, addr, page, prot); +- if (err) { +- page_cache_release(page); +- return err; +- } +- } else if (vma->vm_flags & VM_NONLINEAR) { +- /* No page was found just because we can't read it in +- * now (being here implies nonblock != 0), but the page +- * may exist, so set the PTE to fault it in later. */ +- err = install_file_pte(mm, vma, addr, pgoff, prot); +- if (err) +- return err; ++ if (((loff_t)fdata->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { ++ fdata->type = VM_FAULT_SIGBUS; ++ return NULL; + } + +- len -= PAGE_SIZE; +- addr += PAGE_SIZE; +- pgoff++; ++ error = shmem_getpage(inode, fdata->pgoff, &page, ++ SGP_FAULT, &fdata->type); ++ if (error) { ++ fdata->type = ((error == -ENOMEM)?VM_FAULT_OOM:VM_FAULT_SIGBUS); ++ return NULL; + } +- return 0; ++ ++ mark_page_accessed(page); ++ return page; + } + + #ifdef CONFIG_NUMA +@@ -1424,6 +1391,7 @@ + { + file_accessed(file); + vma->vm_ops = &shmem_vm_ops; ++ vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR; + return 0; + } + +@@ -2477,8 +2445,7 @@ + }; + + static struct vm_operations_struct shmem_vm_ops = { +- .nopage = shmem_nopage, +- .populate = shmem_populate, ++ .fault = shmem_fault, + #ifdef CONFIG_NUMA + .set_policy = shmem_set_policy, + .get_policy = shmem_get_policy, +@@ -2614,5 +2581,6 @@ + fput(vma->vm_file); + vma->vm_file = file; + vma->vm_ops = &shmem_vm_ops; ++ vma->vm_flags |= VM_CAN_INVALIDATE; + return 0; + } +diff -Nurb linux-2.6.22-570/mm/shmem.c.orig linux-2.6.22-591/mm/shmem.c.orig +--- linux-2.6.22-570/mm/shmem.c.orig 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/mm/shmem.c.orig 1969-12-31 19:00:00.000000000 -0500 +@@ -1,2619 +0,0 @@ +-/* +- * Resizable virtual memory filesystem for Linux. +- * +- * Copyright (C) 2000 Linus Torvalds. +- * 2000 Transmeta Corp. +- * 2000-2001 Christoph Rohland +- * 2000-2001 SAP AG +- * 2002 Red Hat Inc. +- * Copyright (C) 2002-2005 Hugh Dickins. +- * Copyright (C) 2002-2005 VERITAS Software Corporation. +- * Copyright (C) 2004 Andi Kleen, SuSE Labs +- * +- * Extended attribute support for tmpfs: +- * Copyright (c) 2004, Luke Kenneth Casson Leighton +- * Copyright (c) 2004 Red Hat, Inc., James Morris +- * +- * This file is released under the GPL. +- */ +- +-/* +- * This virtual memory filesystem is heavily based on the ramfs. It +- * extends ramfs by the ability to use swap and honor resource limits +- * which makes it a completely usable filesystem. +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +- +-/* This magic number is used in glibc for posix shared memory */ +-#define TMPFS_MAGIC 0x01021994 +- +-#define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long)) +-#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE) +-#define BLOCKS_PER_PAGE (PAGE_CACHE_SIZE/512) +- +-#define SHMEM_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1)) +-#define SHMEM_MAX_BYTES ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT) +- +-#define VM_ACCT(size) (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT) +- +-/* info->flags needs VM_flags to handle pagein/truncate races efficiently */ +-#define SHMEM_PAGEIN VM_READ +-#define SHMEM_TRUNCATE VM_WRITE +- +-/* Definition to limit shmem_truncate's steps between cond_rescheds */ +-#define LATENCY_LIMIT 64 +- +-/* Pretend that each entry is of this size in directory's i_size */ +-#define BOGO_DIRENT_SIZE 20 +- +-/* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */ +-enum sgp_type { +- SGP_QUICK, /* don't try more than file page cache lookup */ +- SGP_READ, /* don't exceed i_size, don't allocate page */ +- SGP_CACHE, /* don't exceed i_size, may allocate page */ +- SGP_WRITE, /* may exceed i_size, may allocate page */ +-}; +- +-static int shmem_getpage(struct inode *inode, unsigned long idx, +- struct page **pagep, enum sgp_type sgp, int *type); +- +-static inline struct page *shmem_dir_alloc(gfp_t gfp_mask) +-{ +- /* +- * The above definition of ENTRIES_PER_PAGE, and the use of +- * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: +- * might be reconsidered if it ever diverges from PAGE_SIZE. +- */ +- return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT); +-} +- +-static inline void shmem_dir_free(struct page *page) +-{ +- __free_pages(page, PAGE_CACHE_SHIFT-PAGE_SHIFT); +-} +- +-static struct page **shmem_dir_map(struct page *page) +-{ +- return (struct page **)kmap_atomic(page, KM_USER0); +-} +- +-static inline void shmem_dir_unmap(struct page **dir) +-{ +- kunmap_atomic(dir, KM_USER0); +-} +- +-static swp_entry_t *shmem_swp_map(struct page *page) +-{ +- return (swp_entry_t *)kmap_atomic(page, KM_USER1); +-} +- +-static inline void shmem_swp_balance_unmap(void) +-{ +- /* +- * When passing a pointer to an i_direct entry, to code which +- * also handles indirect entries and so will shmem_swp_unmap, +- * we must arrange for the preempt count to remain in balance. +- * What kmap_atomic of a lowmem page does depends on config +- * and architecture, so pretend to kmap_atomic some lowmem page. +- */ +- (void) kmap_atomic(ZERO_PAGE(0), KM_USER1); +-} +- +-static inline void shmem_swp_unmap(swp_entry_t *entry) +-{ +- kunmap_atomic(entry, KM_USER1); +-} +- +-static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) +-{ +- return sb->s_fs_info; +-} +- +-/* +- * shmem_file_setup pre-accounts the whole fixed size of a VM object, +- * for shared memory and for shared anonymous (/dev/zero) mappings +- * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1), +- * consistent with the pre-accounting of private mappings ... +- */ +-static inline int shmem_acct_size(unsigned long flags, loff_t size) +-{ +- return (flags & VM_ACCOUNT)? +- security_vm_enough_memory(VM_ACCT(size)): 0; +-} +- +-static inline void shmem_unacct_size(unsigned long flags, loff_t size) +-{ +- if (flags & VM_ACCOUNT) +- vm_unacct_memory(VM_ACCT(size)); +-} +- +-/* +- * ... whereas tmpfs objects are accounted incrementally as +- * pages are allocated, in order to allow huge sparse files. +- * shmem_getpage reports shmem_acct_block failure as -ENOSPC not -ENOMEM, +- * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM. +- */ +-static inline int shmem_acct_block(unsigned long flags) +-{ +- return (flags & VM_ACCOUNT)? +- 0: security_vm_enough_memory(VM_ACCT(PAGE_CACHE_SIZE)); +-} +- +-static inline void shmem_unacct_blocks(unsigned long flags, long pages) +-{ +- if (!(flags & VM_ACCOUNT)) +- vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE)); +-} +- +-static const struct super_operations shmem_ops; +-static const struct address_space_operations shmem_aops; +-static const struct file_operations shmem_file_operations; +-static const struct inode_operations shmem_inode_operations; +-static const struct inode_operations shmem_dir_inode_operations; +-static const struct inode_operations shmem_special_inode_operations; +-static struct vm_operations_struct shmem_vm_ops; +- +-static struct backing_dev_info shmem_backing_dev_info __read_mostly = { +- .ra_pages = 0, /* No readahead */ +- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, +- .unplug_io_fn = default_unplug_io_fn, +-}; +- +-static LIST_HEAD(shmem_swaplist); +-static DEFINE_SPINLOCK(shmem_swaplist_lock); +- +-static void shmem_free_blocks(struct inode *inode, long pages) +-{ +- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); +- if (sbinfo->max_blocks) { +- spin_lock(&sbinfo->stat_lock); +- sbinfo->free_blocks += pages; +- inode->i_blocks -= pages*BLOCKS_PER_PAGE; +- spin_unlock(&sbinfo->stat_lock); +- } +-} +- +-/* +- * shmem_recalc_inode - recalculate the size of an inode +- * +- * @inode: inode to recalc +- * +- * We have to calculate the free blocks since the mm can drop +- * undirtied hole pages behind our back. +- * +- * But normally info->alloced == inode->i_mapping->nrpages + info->swapped +- * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped) +- * +- * It has to be called with the spinlock held. +- */ +-static void shmem_recalc_inode(struct inode *inode) +-{ +- struct shmem_inode_info *info = SHMEM_I(inode); +- long freed; +- +- freed = info->alloced - info->swapped - inode->i_mapping->nrpages; +- if (freed > 0) { +- info->alloced -= freed; +- shmem_unacct_blocks(info->flags, freed); +- shmem_free_blocks(inode, freed); +- } +-} +- +-/* +- * shmem_swp_entry - find the swap vector position in the info structure +- * +- * @info: info structure for the inode +- * @index: index of the page to find +- * @page: optional page to add to the structure. Has to be preset to +- * all zeros +- * +- * If there is no space allocated yet it will return NULL when +- * page is NULL, else it will use the page for the needed block, +- * setting it to NULL on return to indicate that it has been used. +- * +- * The swap vector is organized the following way: +- * +- * There are SHMEM_NR_DIRECT entries directly stored in the +- * shmem_inode_info structure. So small files do not need an addional +- * allocation. +- * +- * For pages with index > SHMEM_NR_DIRECT there is the pointer +- * i_indirect which points to a page which holds in the first half +- * doubly indirect blocks, in the second half triple indirect blocks: +- * +- * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the +- * following layout (for SHMEM_NR_DIRECT == 16): +- * +- * i_indirect -> dir --> 16-19 +- * | +-> 20-23 +- * | +- * +-->dir2 --> 24-27 +- * | +-> 28-31 +- * | +-> 32-35 +- * | +-> 36-39 +- * | +- * +-->dir3 --> 40-43 +- * +-> 44-47 +- * +-> 48-51 +- * +-> 52-55 +- */ +-static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, struct page **page) +-{ +- unsigned long offset; +- struct page **dir; +- struct page *subdir; +- +- if (index < SHMEM_NR_DIRECT) { +- shmem_swp_balance_unmap(); +- return info->i_direct+index; +- } +- if (!info->i_indirect) { +- if (page) { +- info->i_indirect = *page; +- *page = NULL; +- } +- return NULL; /* need another page */ +- } +- +- index -= SHMEM_NR_DIRECT; +- offset = index % ENTRIES_PER_PAGE; +- index /= ENTRIES_PER_PAGE; +- dir = shmem_dir_map(info->i_indirect); +- +- if (index >= ENTRIES_PER_PAGE/2) { +- index -= ENTRIES_PER_PAGE/2; +- dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE; +- index %= ENTRIES_PER_PAGE; +- subdir = *dir; +- if (!subdir) { +- if (page) { +- *dir = *page; +- *page = NULL; +- } +- shmem_dir_unmap(dir); +- return NULL; /* need another page */ +- } +- shmem_dir_unmap(dir); +- dir = shmem_dir_map(subdir); +- } +- +- dir += index; +- subdir = *dir; +- if (!subdir) { +- if (!page || !(subdir = *page)) { +- shmem_dir_unmap(dir); +- return NULL; /* need a page */ +- } +- *dir = subdir; +- *page = NULL; +- } +- shmem_dir_unmap(dir); +- return shmem_swp_map(subdir) + offset; +-} +- +-static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, unsigned long value) +-{ +- long incdec = value? 1: -1; +- +- entry->val = value; +- info->swapped += incdec; +- if ((unsigned long)(entry - info->i_direct) >= SHMEM_NR_DIRECT) { +- struct page *page = kmap_atomic_to_page(entry); +- set_page_private(page, page_private(page) + incdec); +- } +-} +- +-/* +- * shmem_swp_alloc - get the position of the swap entry for the page. +- * If it does not exist allocate the entry. +- * +- * @info: info structure for the inode +- * @index: index of the page to find +- * @sgp: check and recheck i_size? skip allocation? +- */ +-static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp) +-{ +- struct inode *inode = &info->vfs_inode; +- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); +- struct page *page = NULL; +- swp_entry_t *entry; +- +- if (sgp != SGP_WRITE && +- ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) +- return ERR_PTR(-EINVAL); +- +- while (!(entry = shmem_swp_entry(info, index, &page))) { +- if (sgp == SGP_READ) +- return shmem_swp_map(ZERO_PAGE(0)); +- /* +- * Test free_blocks against 1 not 0, since we have 1 data +- * page (and perhaps indirect index pages) yet to allocate: +- * a waste to allocate index if we cannot allocate data. +- */ +- if (sbinfo->max_blocks) { +- spin_lock(&sbinfo->stat_lock); +- if (sbinfo->free_blocks <= 1) { +- spin_unlock(&sbinfo->stat_lock); +- return ERR_PTR(-ENOSPC); +- } +- sbinfo->free_blocks--; +- inode->i_blocks += BLOCKS_PER_PAGE; +- spin_unlock(&sbinfo->stat_lock); +- } +- +- spin_unlock(&info->lock); +- page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO); +- if (page) +- set_page_private(page, 0); +- spin_lock(&info->lock); +- +- if (!page) { +- shmem_free_blocks(inode, 1); +- return ERR_PTR(-ENOMEM); +- } +- if (sgp != SGP_WRITE && +- ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) { +- entry = ERR_PTR(-EINVAL); +- break; +- } +- if (info->next_index <= index) +- info->next_index = index + 1; +- } +- if (page) { +- /* another task gave its page, or truncated the file */ +- shmem_free_blocks(inode, 1); +- shmem_dir_free(page); +- } +- if (info->next_index <= index && !IS_ERR(entry)) +- info->next_index = index + 1; +- return entry; +-} +- +-/* +- * shmem_free_swp - free some swap entries in a directory +- * +- * @dir: pointer to the directory +- * @edir: pointer after last entry of the directory +- * @punch_lock: pointer to spinlock when needed for the holepunch case +- */ +-static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, +- spinlock_t *punch_lock) +-{ +- spinlock_t *punch_unlock = NULL; +- swp_entry_t *ptr; +- int freed = 0; +- +- for (ptr = dir; ptr < edir; ptr++) { +- if (ptr->val) { +- if (unlikely(punch_lock)) { +- punch_unlock = punch_lock; +- punch_lock = NULL; +- spin_lock(punch_unlock); +- if (!ptr->val) +- continue; +- } +- free_swap_and_cache(*ptr); +- *ptr = (swp_entry_t){0}; +- freed++; +- } +- } +- if (punch_unlock) +- spin_unlock(punch_unlock); +- return freed; +-} +- +-static int shmem_map_and_free_swp(struct page *subdir, int offset, +- int limit, struct page ***dir, spinlock_t *punch_lock) +-{ +- swp_entry_t *ptr; +- int freed = 0; +- +- ptr = shmem_swp_map(subdir); +- for (; offset < limit; offset += LATENCY_LIMIT) { +- int size = limit - offset; +- if (size > LATENCY_LIMIT) +- size = LATENCY_LIMIT; +- freed += shmem_free_swp(ptr+offset, ptr+offset+size, +- punch_lock); +- if (need_resched()) { +- shmem_swp_unmap(ptr); +- if (*dir) { +- shmem_dir_unmap(*dir); +- *dir = NULL; +- } +- cond_resched(); +- ptr = shmem_swp_map(subdir); +- } +- } +- shmem_swp_unmap(ptr); +- return freed; +-} +- +-static void shmem_free_pages(struct list_head *next) +-{ +- struct page *page; +- int freed = 0; +- +- do { +- page = container_of(next, struct page, lru); +- next = next->next; +- shmem_dir_free(page); +- freed++; +- if (freed >= LATENCY_LIMIT) { +- cond_resched(); +- freed = 0; +- } +- } while (next); +-} +- +-static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) +-{ +- struct shmem_inode_info *info = SHMEM_I(inode); +- unsigned long idx; +- unsigned long size; +- unsigned long limit; +- unsigned long stage; +- unsigned long diroff; +- struct page **dir; +- struct page *topdir; +- struct page *middir; +- struct page *subdir; +- swp_entry_t *ptr; +- LIST_HEAD(pages_to_free); +- long nr_pages_to_free = 0; +- long nr_swaps_freed = 0; +- int offset; +- int freed; +- int punch_hole; +- spinlock_t *needs_lock; +- spinlock_t *punch_lock; +- unsigned long upper_limit; +- +- inode->i_ctime = inode->i_mtime = CURRENT_TIME; +- idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; +- if (idx >= info->next_index) +- return; +- +- spin_lock(&info->lock); +- info->flags |= SHMEM_TRUNCATE; +- if (likely(end == (loff_t) -1)) { +- limit = info->next_index; +- upper_limit = SHMEM_MAX_INDEX; +- info->next_index = idx; +- needs_lock = NULL; +- punch_hole = 0; +- } else { +- if (end + 1 >= inode->i_size) { /* we may free a little more */ +- limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >> +- PAGE_CACHE_SHIFT; +- upper_limit = SHMEM_MAX_INDEX; +- } else { +- limit = (end + 1) >> PAGE_CACHE_SHIFT; +- upper_limit = limit; +- } +- needs_lock = &info->lock; +- punch_hole = 1; +- } +- +- topdir = info->i_indirect; +- if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) { +- info->i_indirect = NULL; +- nr_pages_to_free++; +- list_add(&topdir->lru, &pages_to_free); +- } +- spin_unlock(&info->lock); +- +- if (info->swapped && idx < SHMEM_NR_DIRECT) { +- ptr = info->i_direct; +- size = limit; +- if (size > SHMEM_NR_DIRECT) +- size = SHMEM_NR_DIRECT; +- nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); +- } +- +- /* +- * If there are no indirect blocks or we are punching a hole +- * below indirect blocks, nothing to be done. +- */ +- if (!topdir || limit <= SHMEM_NR_DIRECT) +- goto done2; +- +- /* +- * The truncation case has already dropped info->lock, and we're safe +- * because i_size and next_index have already been lowered, preventing +- * access beyond. But in the punch_hole case, we still need to take +- * the lock when updating the swap directory, because there might be +- * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or +- * shmem_writepage. However, whenever we find we can remove a whole +- * directory page (not at the misaligned start or end of the range), +- * we first NULLify its pointer in the level above, and then have no +- * need to take the lock when updating its contents: needs_lock and +- * punch_lock (either pointing to info->lock or NULL) manage this. +- */ +- +- upper_limit -= SHMEM_NR_DIRECT; +- limit -= SHMEM_NR_DIRECT; +- idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; +- offset = idx % ENTRIES_PER_PAGE; +- idx -= offset; +- +- dir = shmem_dir_map(topdir); +- stage = ENTRIES_PER_PAGEPAGE/2; +- if (idx < ENTRIES_PER_PAGEPAGE/2) { +- middir = topdir; +- diroff = idx/ENTRIES_PER_PAGE; +- } else { +- dir += ENTRIES_PER_PAGE/2; +- dir += (idx - ENTRIES_PER_PAGEPAGE/2)/ENTRIES_PER_PAGEPAGE; +- while (stage <= idx) +- stage += ENTRIES_PER_PAGEPAGE; +- middir = *dir; +- if (*dir) { +- diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % +- ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; +- if (!diroff && !offset && upper_limit >= stage) { +- if (needs_lock) { +- spin_lock(needs_lock); +- *dir = NULL; +- spin_unlock(needs_lock); +- needs_lock = NULL; +- } else +- *dir = NULL; +- nr_pages_to_free++; +- list_add(&middir->lru, &pages_to_free); +- } +- shmem_dir_unmap(dir); +- dir = shmem_dir_map(middir); +- } else { +- diroff = 0; +- offset = 0; +- idx = stage; +- } +- } +- +- for (; idx < limit; idx += ENTRIES_PER_PAGE, diroff++) { +- if (unlikely(idx == stage)) { +- shmem_dir_unmap(dir); +- dir = shmem_dir_map(topdir) + +- ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; +- while (!*dir) { +- dir++; +- idx += ENTRIES_PER_PAGEPAGE; +- if (idx >= limit) +- goto done1; +- } +- stage = idx + ENTRIES_PER_PAGEPAGE; +- middir = *dir; +- if (punch_hole) +- needs_lock = &info->lock; +- if (upper_limit >= stage) { +- if (needs_lock) { +- spin_lock(needs_lock); +- *dir = NULL; +- spin_unlock(needs_lock); +- needs_lock = NULL; +- } else +- *dir = NULL; +- nr_pages_to_free++; +- list_add(&middir->lru, &pages_to_free); +- } +- shmem_dir_unmap(dir); +- cond_resched(); +- dir = shmem_dir_map(middir); +- diroff = 0; +- } +- punch_lock = needs_lock; +- subdir = dir[diroff]; +- if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) { +- if (needs_lock) { +- spin_lock(needs_lock); +- dir[diroff] = NULL; +- spin_unlock(needs_lock); +- punch_lock = NULL; +- } else +- dir[diroff] = NULL; +- nr_pages_to_free++; +- list_add(&subdir->lru, &pages_to_free); +- } +- if (subdir && page_private(subdir) /* has swap entries */) { +- size = limit - idx; +- if (size > ENTRIES_PER_PAGE) +- size = ENTRIES_PER_PAGE; +- freed = shmem_map_and_free_swp(subdir, +- offset, size, &dir, punch_lock); +- if (!dir) +- dir = shmem_dir_map(middir); +- nr_swaps_freed += freed; +- if (offset || punch_lock) { +- spin_lock(&info->lock); +- set_page_private(subdir, +- page_private(subdir) - freed); +- spin_unlock(&info->lock); +- } else +- BUG_ON(page_private(subdir) != freed); +- } +- offset = 0; +- } +-done1: +- shmem_dir_unmap(dir); +-done2: +- if (inode->i_mapping->nrpages && (info->flags & SHMEM_PAGEIN)) { +- /* +- * Call truncate_inode_pages again: racing shmem_unuse_inode +- * may have swizzled a page in from swap since vmtruncate or +- * generic_delete_inode did it, before we lowered next_index. +- * Also, though shmem_getpage checks i_size before adding to +- * cache, no recheck after: so fix the narrow window there too. +- * +- * Recalling truncate_inode_pages_range and unmap_mapping_range +- * every time for punch_hole (which never got a chance to clear +- * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive, +- * yet hardly ever necessary: try to optimize them out later. +- */ +- truncate_inode_pages_range(inode->i_mapping, start, end); +- if (punch_hole) +- unmap_mapping_range(inode->i_mapping, start, +- end - start, 1); +- } +- +- spin_lock(&info->lock); +- info->flags &= ~SHMEM_TRUNCATE; +- info->swapped -= nr_swaps_freed; +- if (nr_pages_to_free) +- shmem_free_blocks(inode, nr_pages_to_free); +- shmem_recalc_inode(inode); +- spin_unlock(&info->lock); +- +- /* +- * Empty swap vector directory pages to be freed? +- */ +- if (!list_empty(&pages_to_free)) { +- pages_to_free.prev->next = NULL; +- shmem_free_pages(pages_to_free.next); +- } +-} +- +-static void shmem_truncate(struct inode *inode) +-{ +- shmem_truncate_range(inode, inode->i_size, (loff_t)-1); +-} +- +-static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) +-{ +- struct inode *inode = dentry->d_inode; +- struct page *page = NULL; +- int error; +- +- if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { +- if (attr->ia_size < inode->i_size) { +- /* +- * If truncating down to a partial page, then +- * if that page is already allocated, hold it +- * in memory until the truncation is over, so +- * truncate_partial_page cannnot miss it were +- * it assigned to swap. +- */ +- if (attr->ia_size & (PAGE_CACHE_SIZE-1)) { +- (void) shmem_getpage(inode, +- attr->ia_size>>PAGE_CACHE_SHIFT, +- &page, SGP_READ, NULL); +- } +- /* +- * Reset SHMEM_PAGEIN flag so that shmem_truncate can +- * detect if any pages might have been added to cache +- * after truncate_inode_pages. But we needn't bother +- * if it's being fully truncated to zero-length: the +- * nrpages check is efficient enough in that case. +- */ +- if (attr->ia_size) { +- struct shmem_inode_info *info = SHMEM_I(inode); +- spin_lock(&info->lock); +- info->flags &= ~SHMEM_PAGEIN; +- spin_unlock(&info->lock); +- } +- } +- } +- +- error = inode_change_ok(inode, attr); +- if (!error) +- error = inode_setattr(inode, attr); +-#ifdef CONFIG_TMPFS_POSIX_ACL +- if (!error && (attr->ia_valid & ATTR_MODE)) +- error = generic_acl_chmod(inode, &shmem_acl_ops); +-#endif +- if (page) +- page_cache_release(page); +- return error; +-} +- +-static void shmem_delete_inode(struct inode *inode) +-{ +- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); +- struct shmem_inode_info *info = SHMEM_I(inode); +- +- if (inode->i_op->truncate == shmem_truncate) { +- truncate_inode_pages(inode->i_mapping, 0); +- shmem_unacct_size(info->flags, inode->i_size); +- inode->i_size = 0; +- shmem_truncate(inode); +- if (!list_empty(&info->swaplist)) { +- spin_lock(&shmem_swaplist_lock); +- list_del_init(&info->swaplist); +- spin_unlock(&shmem_swaplist_lock); +- } +- } +- BUG_ON(inode->i_blocks); +- if (sbinfo->max_inodes) { +- spin_lock(&sbinfo->stat_lock); +- sbinfo->free_inodes++; +- spin_unlock(&sbinfo->stat_lock); +- } +- clear_inode(inode); +-} +- +-static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir) +-{ +- swp_entry_t *ptr; +- +- for (ptr = dir; ptr < edir; ptr++) { +- if (ptr->val == entry.val) +- return ptr - dir; +- } +- return -1; +-} +- +-static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page) +-{ +- struct inode *inode; +- unsigned long idx; +- unsigned long size; +- unsigned long limit; +- unsigned long stage; +- struct page **dir; +- struct page *subdir; +- swp_entry_t *ptr; +- int offset; +- +- idx = 0; +- ptr = info->i_direct; +- spin_lock(&info->lock); +- limit = info->next_index; +- size = limit; +- if (size > SHMEM_NR_DIRECT) +- size = SHMEM_NR_DIRECT; +- offset = shmem_find_swp(entry, ptr, ptr+size); +- if (offset >= 0) { +- shmem_swp_balance_unmap(); +- goto found; +- } +- if (!info->i_indirect) +- goto lost2; +- +- dir = shmem_dir_map(info->i_indirect); +- stage = SHMEM_NR_DIRECT + ENTRIES_PER_PAGEPAGE/2; +- +- for (idx = SHMEM_NR_DIRECT; idx < limit; idx += ENTRIES_PER_PAGE, dir++) { +- if (unlikely(idx == stage)) { +- shmem_dir_unmap(dir-1); +- dir = shmem_dir_map(info->i_indirect) + +- ENTRIES_PER_PAGE/2 + idx/ENTRIES_PER_PAGEPAGE; +- while (!*dir) { +- dir++; +- idx += ENTRIES_PER_PAGEPAGE; +- if (idx >= limit) +- goto lost1; +- } +- stage = idx + ENTRIES_PER_PAGEPAGE; +- subdir = *dir; +- shmem_dir_unmap(dir); +- dir = shmem_dir_map(subdir); +- } +- subdir = *dir; +- if (subdir && page_private(subdir)) { +- ptr = shmem_swp_map(subdir); +- size = limit - idx; +- if (size > ENTRIES_PER_PAGE) +- size = ENTRIES_PER_PAGE; +- offset = shmem_find_swp(entry, ptr, ptr+size); +- if (offset >= 0) { +- shmem_dir_unmap(dir); +- goto found; +- } +- shmem_swp_unmap(ptr); +- } +- } +-lost1: +- shmem_dir_unmap(dir-1); +-lost2: +- spin_unlock(&info->lock); +- return 0; +-found: +- idx += offset; +- inode = &info->vfs_inode; +- if (move_from_swap_cache(page, idx, inode->i_mapping) == 0) { +- info->flags |= SHMEM_PAGEIN; +- shmem_swp_set(info, ptr + offset, 0); +- } +- shmem_swp_unmap(ptr); +- spin_unlock(&info->lock); +- /* +- * Decrement swap count even when the entry is left behind: +- * try_to_unuse will skip over mms, then reincrement count. +- */ +- swap_free(entry); +- return 1; +-} +- +-/* +- * shmem_unuse() search for an eventually swapped out shmem page. +- */ +-int shmem_unuse(swp_entry_t entry, struct page *page) +-{ +- struct list_head *p, *next; +- struct shmem_inode_info *info; +- int found = 0; +- +- spin_lock(&shmem_swaplist_lock); +- list_for_each_safe(p, next, &shmem_swaplist) { +- info = list_entry(p, struct shmem_inode_info, swaplist); +- if (!info->swapped) +- list_del_init(&info->swaplist); +- else if (shmem_unuse_inode(info, entry, page)) { +- /* move head to start search for next from here */ +- list_move_tail(&shmem_swaplist, &info->swaplist); +- found = 1; +- break; +- } +- } +- spin_unlock(&shmem_swaplist_lock); +- return found; +-} +- +-/* +- * Move the page from the page cache to the swap cache. +- */ +-static int shmem_writepage(struct page *page, struct writeback_control *wbc) +-{ +- struct shmem_inode_info *info; +- swp_entry_t *entry, swap; +- struct address_space *mapping; +- unsigned long index; +- struct inode *inode; +- +- BUG_ON(!PageLocked(page)); +- /* +- * shmem_backing_dev_info's capabilities prevent regular writeback or +- * sync from ever calling shmem_writepage; but a stacking filesystem +- * may use the ->writepage of its underlying filesystem, in which case +- * we want to do nothing when that underlying filesystem is tmpfs +- * (writing out to swap is useful as a response to memory pressure, but +- * of no use to stabilize the data) - just redirty the page, unlock it +- * and claim success in this case. AOP_WRITEPAGE_ACTIVATE, and the +- * page_mapped check below, must be avoided unless we're in reclaim. +- */ +- if (!wbc->for_reclaim) { +- set_page_dirty(page); +- unlock_page(page); +- return 0; +- } +- BUG_ON(page_mapped(page)); +- +- mapping = page->mapping; +- index = page->index; +- inode = mapping->host; +- info = SHMEM_I(inode); +- if (info->flags & VM_LOCKED) +- goto redirty; +- swap = get_swap_page(); +- if (!swap.val) +- goto redirty; +- +- spin_lock(&info->lock); +- shmem_recalc_inode(inode); +- if (index >= info->next_index) { +- BUG_ON(!(info->flags & SHMEM_TRUNCATE)); +- goto unlock; +- } +- entry = shmem_swp_entry(info, index, NULL); +- BUG_ON(!entry); +- BUG_ON(entry->val); +- +- if (move_to_swap_cache(page, swap) == 0) { +- shmem_swp_set(info, entry, swap.val); +- shmem_swp_unmap(entry); +- spin_unlock(&info->lock); +- if (list_empty(&info->swaplist)) { +- spin_lock(&shmem_swaplist_lock); +- /* move instead of add in case we're racing */ +- list_move_tail(&info->swaplist, &shmem_swaplist); +- spin_unlock(&shmem_swaplist_lock); +- } +- unlock_page(page); +- return 0; +- } +- +- shmem_swp_unmap(entry); +-unlock: +- spin_unlock(&info->lock); +- swap_free(swap); +-redirty: +- set_page_dirty(page); +- return AOP_WRITEPAGE_ACTIVATE; /* Return with the page locked */ +-} +- +-#ifdef CONFIG_NUMA +-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes) +-{ +- char *nodelist = strchr(value, ':'); +- int err = 1; +- +- if (nodelist) { +- /* NUL-terminate policy string */ +- *nodelist++ = '\0'; +- if (nodelist_parse(nodelist, *policy_nodes)) +- goto out; +- if (!nodes_subset(*policy_nodes, node_online_map)) +- goto out; +- } +- if (!strcmp(value, "default")) { +- *policy = MPOL_DEFAULT; +- /* Don't allow a nodelist */ +- if (!nodelist) +- err = 0; +- } else if (!strcmp(value, "prefer")) { +- *policy = MPOL_PREFERRED; +- /* Insist on a nodelist of one node only */ +- if (nodelist) { +- char *rest = nodelist; +- while (isdigit(*rest)) +- rest++; +- if (!*rest) +- err = 0; +- } +- } else if (!strcmp(value, "bind")) { +- *policy = MPOL_BIND; +- /* Insist on a nodelist */ +- if (nodelist) +- err = 0; +- } else if (!strcmp(value, "interleave")) { +- *policy = MPOL_INTERLEAVE; +- /* Default to nodes online if no nodelist */ +- if (!nodelist) +- *policy_nodes = node_online_map; +- err = 0; +- } +-out: +- /* Restore string for error message */ +- if (nodelist) +- *--nodelist = ':'; +- return err; +-} +- +-static struct page *shmem_swapin_async(struct shared_policy *p, +- swp_entry_t entry, unsigned long idx) +-{ +- struct page *page; +- struct vm_area_struct pvma; +- +- /* Create a pseudo vma that just contains the policy */ +- memset(&pvma, 0, sizeof(struct vm_area_struct)); +- pvma.vm_end = PAGE_SIZE; +- pvma.vm_pgoff = idx; +- pvma.vm_policy = mpol_shared_policy_lookup(p, idx); +- page = read_swap_cache_async(entry, &pvma, 0); +- mpol_free(pvma.vm_policy); +- return page; +-} +- +-struct page *shmem_swapin(struct shmem_inode_info *info, swp_entry_t entry, +- unsigned long idx) +-{ +- struct shared_policy *p = &info->policy; +- int i, num; +- struct page *page; +- unsigned long offset; +- +- num = valid_swaphandles(entry, &offset); +- for (i = 0; i < num; offset++, i++) { +- page = shmem_swapin_async(p, +- swp_entry(swp_type(entry), offset), idx); +- if (!page) +- break; +- page_cache_release(page); +- } +- lru_add_drain(); /* Push any new pages onto the LRU now */ +- return shmem_swapin_async(p, entry, idx); +-} +- +-static struct page * +-shmem_alloc_page(gfp_t gfp, struct shmem_inode_info *info, +- unsigned long idx) +-{ +- struct vm_area_struct pvma; +- struct page *page; +- +- memset(&pvma, 0, sizeof(struct vm_area_struct)); +- pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx); +- pvma.vm_pgoff = idx; +- pvma.vm_end = PAGE_SIZE; +- page = alloc_page_vma(gfp | __GFP_ZERO, &pvma, 0); +- mpol_free(pvma.vm_policy); +- return page; +-} +-#else +-static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes) +-{ +- return 1; +-} +- +-static inline struct page * +-shmem_swapin(struct shmem_inode_info *info,swp_entry_t entry,unsigned long idx) +-{ +- swapin_readahead(entry, 0, NULL); +- return read_swap_cache_async(entry, NULL, 0); +-} +- +-static inline struct page * +-shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx) +-{ +- return alloc_page(gfp | __GFP_ZERO); +-} +-#endif +- +-/* +- * shmem_getpage - either get the page from swap or allocate a new one +- * +- * If we allocate a new one we do not mark it dirty. That's up to the +- * vm. If we swap it in we mark it dirty since we also free the swap +- * entry since a page cannot live in both the swap and page cache +- */ +-static int shmem_getpage(struct inode *inode, unsigned long idx, +- struct page **pagep, enum sgp_type sgp, int *type) +-{ +- struct address_space *mapping = inode->i_mapping; +- struct shmem_inode_info *info = SHMEM_I(inode); +- struct shmem_sb_info *sbinfo; +- struct page *filepage = *pagep; +- struct page *swappage; +- swp_entry_t *entry; +- swp_entry_t swap; +- int error; +- +- if (idx >= SHMEM_MAX_INDEX) +- return -EFBIG; +- /* +- * Normally, filepage is NULL on entry, and either found +- * uptodate immediately, or allocated and zeroed, or read +- * in under swappage, which is then assigned to filepage. +- * But shmem_prepare_write passes in a locked filepage, +- * which may be found not uptodate by other callers too, +- * and may need to be copied from the swappage read in. +- */ +-repeat: +- if (!filepage) +- filepage = find_lock_page(mapping, idx); +- if (filepage && PageUptodate(filepage)) +- goto done; +- error = 0; +- if (sgp == SGP_QUICK) +- goto failed; +- +- spin_lock(&info->lock); +- shmem_recalc_inode(inode); +- entry = shmem_swp_alloc(info, idx, sgp); +- if (IS_ERR(entry)) { +- spin_unlock(&info->lock); +- error = PTR_ERR(entry); +- goto failed; +- } +- swap = *entry; +- +- if (swap.val) { +- /* Look it up and read it in.. */ +- swappage = lookup_swap_cache(swap); +- if (!swappage) { +- shmem_swp_unmap(entry); +- /* here we actually do the io */ +- if (type && *type == VM_FAULT_MINOR) { +- __count_vm_event(PGMAJFAULT); +- *type = VM_FAULT_MAJOR; +- } +- spin_unlock(&info->lock); +- swappage = shmem_swapin(info, swap, idx); +- if (!swappage) { +- spin_lock(&info->lock); +- entry = shmem_swp_alloc(info, idx, sgp); +- if (IS_ERR(entry)) +- error = PTR_ERR(entry); +- else { +- if (entry->val == swap.val) +- error = -ENOMEM; +- shmem_swp_unmap(entry); +- } +- spin_unlock(&info->lock); +- if (error) +- goto failed; +- goto repeat; +- } +- wait_on_page_locked(swappage); +- page_cache_release(swappage); +- goto repeat; +- } +- +- /* We have to do this with page locked to prevent races */ +- if (TestSetPageLocked(swappage)) { +- shmem_swp_unmap(entry); +- spin_unlock(&info->lock); +- wait_on_page_locked(swappage); +- page_cache_release(swappage); +- goto repeat; +- } +- if (PageWriteback(swappage)) { +- shmem_swp_unmap(entry); +- spin_unlock(&info->lock); +- wait_on_page_writeback(swappage); +- unlock_page(swappage); +- page_cache_release(swappage); +- goto repeat; +- } +- if (!PageUptodate(swappage)) { +- shmem_swp_unmap(entry); +- spin_unlock(&info->lock); +- unlock_page(swappage); +- page_cache_release(swappage); +- error = -EIO; +- goto failed; +- } +- +- if (filepage) { +- shmem_swp_set(info, entry, 0); +- shmem_swp_unmap(entry); +- delete_from_swap_cache(swappage); +- spin_unlock(&info->lock); +- copy_highpage(filepage, swappage); +- unlock_page(swappage); +- page_cache_release(swappage); +- flush_dcache_page(filepage); +- SetPageUptodate(filepage); +- set_page_dirty(filepage); +- swap_free(swap); +- } else if (!(error = move_from_swap_cache( +- swappage, idx, mapping))) { +- info->flags |= SHMEM_PAGEIN; +- shmem_swp_set(info, entry, 0); +- shmem_swp_unmap(entry); +- spin_unlock(&info->lock); +- filepage = swappage; +- swap_free(swap); +- } else { +- shmem_swp_unmap(entry); +- spin_unlock(&info->lock); +- unlock_page(swappage); +- page_cache_release(swappage); +- if (error == -ENOMEM) { +- /* let kswapd refresh zone for GFP_ATOMICs */ +- congestion_wait(WRITE, HZ/50); +- } +- goto repeat; +- } +- } else if (sgp == SGP_READ && !filepage) { +- shmem_swp_unmap(entry); +- filepage = find_get_page(mapping, idx); +- if (filepage && +- (!PageUptodate(filepage) || TestSetPageLocked(filepage))) { +- spin_unlock(&info->lock); +- wait_on_page_locked(filepage); +- page_cache_release(filepage); +- filepage = NULL; +- goto repeat; +- } +- spin_unlock(&info->lock); +- } else { +- shmem_swp_unmap(entry); +- sbinfo = SHMEM_SB(inode->i_sb); +- if (sbinfo->max_blocks) { +- spin_lock(&sbinfo->stat_lock); +- if (sbinfo->free_blocks == 0 || +- shmem_acct_block(info->flags)) { +- spin_unlock(&sbinfo->stat_lock); +- spin_unlock(&info->lock); +- error = -ENOSPC; +- goto failed; +- } +- sbinfo->free_blocks--; +- inode->i_blocks += BLOCKS_PER_PAGE; +- spin_unlock(&sbinfo->stat_lock); +- } else if (shmem_acct_block(info->flags)) { +- spin_unlock(&info->lock); +- error = -ENOSPC; +- goto failed; +- } +- +- if (!filepage) { +- spin_unlock(&info->lock); +- filepage = shmem_alloc_page(mapping_gfp_mask(mapping), +- info, +- idx); +- if (!filepage) { +- shmem_unacct_blocks(info->flags, 1); +- shmem_free_blocks(inode, 1); +- error = -ENOMEM; +- goto failed; +- } +- +- spin_lock(&info->lock); +- entry = shmem_swp_alloc(info, idx, sgp); +- if (IS_ERR(entry)) +- error = PTR_ERR(entry); +- else { +- swap = *entry; +- shmem_swp_unmap(entry); +- } +- if (error || swap.val || 0 != add_to_page_cache_lru( +- filepage, mapping, idx, GFP_ATOMIC)) { +- spin_unlock(&info->lock); +- page_cache_release(filepage); +- shmem_unacct_blocks(info->flags, 1); +- shmem_free_blocks(inode, 1); +- filepage = NULL; +- if (error) +- goto failed; +- goto repeat; +- } +- info->flags |= SHMEM_PAGEIN; +- } +- +- info->alloced++; +- spin_unlock(&info->lock); +- flush_dcache_page(filepage); +- SetPageUptodate(filepage); +- } +-done: +- if (*pagep != filepage) { +- unlock_page(filepage); +- *pagep = filepage; +- } +- return 0; +- +-failed: +- if (*pagep != filepage) { +- unlock_page(filepage); +- page_cache_release(filepage); +- } +- return error; +-} +- +-static struct page *shmem_nopage(struct vm_area_struct *vma, +- unsigned long address, int *type) +-{ +- struct inode *inode = vma->vm_file->f_path.dentry->d_inode; +- struct page *page = NULL; +- unsigned long idx; +- int error; +- +- idx = (address - vma->vm_start) >> PAGE_SHIFT; +- idx += vma->vm_pgoff; +- idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT; +- if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode)) +- return NOPAGE_SIGBUS; +- +- error = shmem_getpage(inode, idx, &page, SGP_CACHE, type); +- if (error) +- return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS; +- +- mark_page_accessed(page); +- return page; +-} +- +-static int shmem_populate(struct vm_area_struct *vma, +- unsigned long addr, unsigned long len, +- pgprot_t prot, unsigned long pgoff, int nonblock) +-{ +- struct inode *inode = vma->vm_file->f_path.dentry->d_inode; +- struct mm_struct *mm = vma->vm_mm; +- enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE; +- unsigned long size; +- +- size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; +- if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size) +- return -EINVAL; +- +- while ((long) len > 0) { +- struct page *page = NULL; +- int err; +- /* +- * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE +- */ +- err = shmem_getpage(inode, pgoff, &page, sgp, NULL); +- if (err) +- return err; +- /* Page may still be null, but only if nonblock was set. */ +- if (page) { +- mark_page_accessed(page); +- err = install_page(mm, vma, addr, page, prot); +- if (err) { +- page_cache_release(page); +- return err; +- } +- } else if (vma->vm_flags & VM_NONLINEAR) { +- /* No page was found just because we can't read it in +- * now (being here implies nonblock != 0), but the page +- * may exist, so set the PTE to fault it in later. */ +- err = install_file_pte(mm, vma, addr, pgoff, prot); +- if (err) +- return err; +- } +- +- len -= PAGE_SIZE; +- addr += PAGE_SIZE; +- pgoff++; +- } +- return 0; +-} +- +-#ifdef CONFIG_NUMA +-int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new) +-{ +- struct inode *i = vma->vm_file->f_path.dentry->d_inode; +- return mpol_set_shared_policy(&SHMEM_I(i)->policy, vma, new); +-} +- +-struct mempolicy * +-shmem_get_policy(struct vm_area_struct *vma, unsigned long addr) +-{ +- struct inode *i = vma->vm_file->f_path.dentry->d_inode; +- unsigned long idx; +- +- idx = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; +- return mpol_shared_policy_lookup(&SHMEM_I(i)->policy, idx); +-} +-#endif +- +-int shmem_lock(struct file *file, int lock, struct user_struct *user) +-{ +- struct inode *inode = file->f_path.dentry->d_inode; +- struct shmem_inode_info *info = SHMEM_I(inode); +- int retval = -ENOMEM; +- +- spin_lock(&info->lock); +- if (lock && !(info->flags & VM_LOCKED)) { +- if (!user_shm_lock(inode->i_size, user)) +- goto out_nomem; +- info->flags |= VM_LOCKED; +- } +- if (!lock && (info->flags & VM_LOCKED) && user) { +- user_shm_unlock(inode->i_size, user); +- info->flags &= ~VM_LOCKED; +- } +- retval = 0; +-out_nomem: +- spin_unlock(&info->lock); +- return retval; +-} +- +-static int shmem_mmap(struct file *file, struct vm_area_struct *vma) +-{ +- file_accessed(file); +- vma->vm_ops = &shmem_vm_ops; +- return 0; +-} +- +-static struct inode * +-shmem_get_inode(struct super_block *sb, int mode, dev_t dev) +-{ +- struct inode *inode; +- struct shmem_inode_info *info; +- struct shmem_sb_info *sbinfo = SHMEM_SB(sb); +- +- if (sbinfo->max_inodes) { +- spin_lock(&sbinfo->stat_lock); +- if (!sbinfo->free_inodes) { +- spin_unlock(&sbinfo->stat_lock); +- return NULL; +- } +- sbinfo->free_inodes--; +- spin_unlock(&sbinfo->stat_lock); +- } +- +- inode = new_inode(sb); +- if (inode) { +- inode->i_mode = mode; +- inode->i_uid = current->fsuid; +- inode->i_gid = current->fsgid; +- inode->i_blocks = 0; +- inode->i_mapping->a_ops = &shmem_aops; +- inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; +- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; +- inode->i_generation = get_seconds(); +- info = SHMEM_I(inode); +- memset(info, 0, (char *)inode - (char *)info); +- spin_lock_init(&info->lock); +- INIT_LIST_HEAD(&info->swaplist); +- +- switch (mode & S_IFMT) { +- default: +- inode->i_op = &shmem_special_inode_operations; +- init_special_inode(inode, mode, dev); +- break; +- case S_IFREG: +- inode->i_op = &shmem_inode_operations; +- inode->i_fop = &shmem_file_operations; +- mpol_shared_policy_init(&info->policy, sbinfo->policy, +- &sbinfo->policy_nodes); +- break; +- case S_IFDIR: +- inc_nlink(inode); +- /* Some things misbehave if size == 0 on a directory */ +- inode->i_size = 2 * BOGO_DIRENT_SIZE; +- inode->i_op = &shmem_dir_inode_operations; +- inode->i_fop = &simple_dir_operations; +- break; +- case S_IFLNK: +- /* +- * Must not load anything in the rbtree, +- * mpol_free_shared_policy will not be called. +- */ +- mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, +- NULL); +- break; +- } +- } else if (sbinfo->max_inodes) { +- spin_lock(&sbinfo->stat_lock); +- sbinfo->free_inodes++; +- spin_unlock(&sbinfo->stat_lock); +- } +- return inode; +-} +- +-#ifdef CONFIG_TMPFS +-static const struct inode_operations shmem_symlink_inode_operations; +-static const struct inode_operations shmem_symlink_inline_operations; +- +-/* +- * Normally tmpfs makes no use of shmem_prepare_write, but it +- * lets a tmpfs file be used read-write below the loop driver. +- */ +-static int +-shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) +-{ +- struct inode *inode = page->mapping->host; +- return shmem_getpage(inode, page->index, &page, SGP_WRITE, NULL); +-} +- +-static ssize_t +-shmem_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) +-{ +- struct inode *inode = file->f_path.dentry->d_inode; +- loff_t pos; +- unsigned long written; +- ssize_t err; +- +- if ((ssize_t) count < 0) +- return -EINVAL; +- +- if (!access_ok(VERIFY_READ, buf, count)) +- return -EFAULT; +- +- mutex_lock(&inode->i_mutex); +- +- pos = *ppos; +- written = 0; +- +- err = generic_write_checks(file, &pos, &count, 0); +- if (err || !count) +- goto out; +- +- err = remove_suid(file->f_path.dentry); +- if (err) +- goto out; +- +- inode->i_ctime = inode->i_mtime = CURRENT_TIME; +- +- do { +- struct page *page = NULL; +- unsigned long bytes, index, offset; +- char *kaddr; +- int left; +- +- offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ +- index = pos >> PAGE_CACHE_SHIFT; +- bytes = PAGE_CACHE_SIZE - offset; +- if (bytes > count) +- bytes = count; +- +- /* +- * We don't hold page lock across copy from user - +- * what would it guard against? - so no deadlock here. +- * But it still may be a good idea to prefault below. +- */ +- +- err = shmem_getpage(inode, index, &page, SGP_WRITE, NULL); +- if (err) +- break; +- +- left = bytes; +- if (PageHighMem(page)) { +- volatile unsigned char dummy; +- __get_user(dummy, buf); +- __get_user(dummy, buf + bytes - 1); +- +- kaddr = kmap_atomic(page, KM_USER0); +- left = __copy_from_user_inatomic(kaddr + offset, +- buf, bytes); +- kunmap_atomic(kaddr, KM_USER0); +- } +- if (left) { +- kaddr = kmap(page); +- left = __copy_from_user(kaddr + offset, buf, bytes); +- kunmap(page); +- } +- +- written += bytes; +- count -= bytes; +- pos += bytes; +- buf += bytes; +- if (pos > inode->i_size) +- i_size_write(inode, pos); +- +- flush_dcache_page(page); +- set_page_dirty(page); +- mark_page_accessed(page); +- page_cache_release(page); +- +- if (left) { +- pos -= left; +- written -= left; +- err = -EFAULT; +- break; +- } +- +- /* +- * Our dirty pages are not counted in nr_dirty, +- * and we do not attempt to balance dirty pages. +- */ +- +- cond_resched(); +- } while (count); +- +- *ppos = pos; +- if (written) +- err = written; +-out: +- mutex_unlock(&inode->i_mutex); +- return err; +-} +- +-static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc, read_actor_t actor) +-{ +- struct inode *inode = filp->f_path.dentry->d_inode; +- struct address_space *mapping = inode->i_mapping; +- unsigned long index, offset; +- +- index = *ppos >> PAGE_CACHE_SHIFT; +- offset = *ppos & ~PAGE_CACHE_MASK; +- +- for (;;) { +- struct page *page = NULL; +- unsigned long end_index, nr, ret; +- loff_t i_size = i_size_read(inode); +- +- end_index = i_size >> PAGE_CACHE_SHIFT; +- if (index > end_index) +- break; +- if (index == end_index) { +- nr = i_size & ~PAGE_CACHE_MASK; +- if (nr <= offset) +- break; +- } +- +- desc->error = shmem_getpage(inode, index, &page, SGP_READ, NULL); +- if (desc->error) { +- if (desc->error == -EINVAL) +- desc->error = 0; +- break; +- } +- +- /* +- * We must evaluate after, since reads (unlike writes) +- * are called without i_mutex protection against truncate +- */ +- nr = PAGE_CACHE_SIZE; +- i_size = i_size_read(inode); +- end_index = i_size >> PAGE_CACHE_SHIFT; +- if (index == end_index) { +- nr = i_size & ~PAGE_CACHE_MASK; +- if (nr <= offset) { +- if (page) +- page_cache_release(page); +- break; +- } +- } +- nr -= offset; +- +- if (page) { +- /* +- * If users can be writing to this page using arbitrary +- * virtual addresses, take care about potential aliasing +- * before reading the page on the kernel side. +- */ +- if (mapping_writably_mapped(mapping)) +- flush_dcache_page(page); +- /* +- * Mark the page accessed if we read the beginning. +- */ +- if (!offset) +- mark_page_accessed(page); +- } else { +- page = ZERO_PAGE(0); +- page_cache_get(page); +- } +- +- /* +- * Ok, we have the page, and it's up-to-date, so +- * now we can copy it to user space... +- * +- * The actor routine returns how many bytes were actually used.. +- * NOTE! This may not be the same as how much of a user buffer +- * we filled up (we may be padding etc), so we can only update +- * "pos" here (the actor routine has to update the user buffer +- * pointers and the remaining count). +- */ +- ret = actor(desc, page, offset, nr); +- offset += ret; +- index += offset >> PAGE_CACHE_SHIFT; +- offset &= ~PAGE_CACHE_MASK; +- +- page_cache_release(page); +- if (ret != nr || !desc->count) +- break; +- +- cond_resched(); +- } +- +- *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; +- file_accessed(filp); +-} +- +-static ssize_t shmem_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) +-{ +- read_descriptor_t desc; +- +- if ((ssize_t) count < 0) +- return -EINVAL; +- if (!access_ok(VERIFY_WRITE, buf, count)) +- return -EFAULT; +- if (!count) +- return 0; +- +- desc.written = 0; +- desc.count = count; +- desc.arg.buf = buf; +- desc.error = 0; +- +- do_shmem_file_read(filp, ppos, &desc, file_read_actor); +- if (desc.written) +- return desc.written; +- return desc.error; +-} +- +-static ssize_t shmem_file_sendfile(struct file *in_file, loff_t *ppos, +- size_t count, read_actor_t actor, void *target) +-{ +- read_descriptor_t desc; +- +- if (!count) +- return 0; +- +- desc.written = 0; +- desc.count = count; +- desc.arg.data = target; +- desc.error = 0; +- +- do_shmem_file_read(in_file, ppos, &desc, actor); +- if (desc.written) +- return desc.written; +- return desc.error; +-} +- +-static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) +-{ +- struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); +- +- buf->f_type = TMPFS_MAGIC; +- buf->f_bsize = PAGE_CACHE_SIZE; +- buf->f_namelen = NAME_MAX; +- spin_lock(&sbinfo->stat_lock); +- if (sbinfo->max_blocks) { +- buf->f_blocks = sbinfo->max_blocks; +- buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; +- } +- if (sbinfo->max_inodes) { +- buf->f_files = sbinfo->max_inodes; +- buf->f_ffree = sbinfo->free_inodes; +- } +- /* else leave those fields 0 like simple_statfs */ +- spin_unlock(&sbinfo->stat_lock); +- return 0; +-} +- +-/* +- * File creation. Allocate an inode, and we're done.. +- */ +-static int +-shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +-{ +- struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev); +- int error = -ENOSPC; +- +- if (inode) { +- error = security_inode_init_security(inode, dir, NULL, NULL, +- NULL); +- if (error) { +- if (error != -EOPNOTSUPP) { +- iput(inode); +- return error; +- } +- } +- error = shmem_acl_init(inode, dir); +- if (error) { +- iput(inode); +- return error; +- } +- if (dir->i_mode & S_ISGID) { +- inode->i_gid = dir->i_gid; +- if (S_ISDIR(mode)) +- inode->i_mode |= S_ISGID; +- } +- dir->i_size += BOGO_DIRENT_SIZE; +- dir->i_ctime = dir->i_mtime = CURRENT_TIME; +- d_instantiate(dentry, inode); +- dget(dentry); /* Extra count - pin the dentry in core */ +- } +- return error; +-} +- +-static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode) +-{ +- int error; +- +- if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0))) +- return error; +- inc_nlink(dir); +- return 0; +-} +- +-static int shmem_create(struct inode *dir, struct dentry *dentry, int mode, +- struct nameidata *nd) +-{ +- return shmem_mknod(dir, dentry, mode | S_IFREG, 0); +-} +- +-/* +- * Link a file.. +- */ +-static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) +-{ +- struct inode *inode = old_dentry->d_inode; +- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); +- +- /* +- * No ordinary (disk based) filesystem counts links as inodes; +- * but each new link needs a new dentry, pinning lowmem, and +- * tmpfs dentries cannot be pruned until they are unlinked. +- */ +- if (sbinfo->max_inodes) { +- spin_lock(&sbinfo->stat_lock); +- if (!sbinfo->free_inodes) { +- spin_unlock(&sbinfo->stat_lock); +- return -ENOSPC; +- } +- sbinfo->free_inodes--; +- spin_unlock(&sbinfo->stat_lock); +- } +- +- dir->i_size += BOGO_DIRENT_SIZE; +- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; +- inc_nlink(inode); +- atomic_inc(&inode->i_count); /* New dentry reference */ +- dget(dentry); /* Extra pinning count for the created dentry */ +- d_instantiate(dentry, inode); +- return 0; +-} +- +-static int shmem_unlink(struct inode *dir, struct dentry *dentry) +-{ +- struct inode *inode = dentry->d_inode; +- +- if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) { +- struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); +- if (sbinfo->max_inodes) { +- spin_lock(&sbinfo->stat_lock); +- sbinfo->free_inodes++; +- spin_unlock(&sbinfo->stat_lock); +- } +- } +- +- dir->i_size -= BOGO_DIRENT_SIZE; +- inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; +- drop_nlink(inode); +- dput(dentry); /* Undo the count from "create" - this does all the work */ +- return 0; +-} +- +-static int shmem_rmdir(struct inode *dir, struct dentry *dentry) +-{ +- if (!simple_empty(dentry)) +- return -ENOTEMPTY; +- +- drop_nlink(dentry->d_inode); +- drop_nlink(dir); +- return shmem_unlink(dir, dentry); +-} +- +-/* +- * The VFS layer already does all the dentry stuff for rename, +- * we just have to decrement the usage count for the target if +- * it exists so that the VFS layer correctly free's it when it +- * gets overwritten. +- */ +-static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) +-{ +- struct inode *inode = old_dentry->d_inode; +- int they_are_dirs = S_ISDIR(inode->i_mode); +- +- if (!simple_empty(new_dentry)) +- return -ENOTEMPTY; +- +- if (new_dentry->d_inode) { +- (void) shmem_unlink(new_dir, new_dentry); +- if (they_are_dirs) +- drop_nlink(old_dir); +- } else if (they_are_dirs) { +- drop_nlink(old_dir); +- inc_nlink(new_dir); +- } +- +- old_dir->i_size -= BOGO_DIRENT_SIZE; +- new_dir->i_size += BOGO_DIRENT_SIZE; +- old_dir->i_ctime = old_dir->i_mtime = +- new_dir->i_ctime = new_dir->i_mtime = +- inode->i_ctime = CURRENT_TIME; +- return 0; +-} +- +-static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname) +-{ +- int error; +- int len; +- struct inode *inode; +- struct page *page = NULL; +- char *kaddr; +- struct shmem_inode_info *info; +- +- len = strlen(symname) + 1; +- if (len > PAGE_CACHE_SIZE) +- return -ENAMETOOLONG; +- +- inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0); +- if (!inode) +- return -ENOSPC; +- +- error = security_inode_init_security(inode, dir, NULL, NULL, +- NULL); +- if (error) { +- if (error != -EOPNOTSUPP) { +- iput(inode); +- return error; +- } +- error = 0; +- } +- +- info = SHMEM_I(inode); +- inode->i_size = len-1; +- if (len <= (char *)inode - (char *)info) { +- /* do it inline */ +- memcpy(info, symname, len); +- inode->i_op = &shmem_symlink_inline_operations; +- } else { +- error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL); +- if (error) { +- iput(inode); +- return error; +- } +- inode->i_op = &shmem_symlink_inode_operations; +- kaddr = kmap_atomic(page, KM_USER0); +- memcpy(kaddr, symname, len); +- kunmap_atomic(kaddr, KM_USER0); +- set_page_dirty(page); +- page_cache_release(page); +- } +- if (dir->i_mode & S_ISGID) +- inode->i_gid = dir->i_gid; +- dir->i_size += BOGO_DIRENT_SIZE; +- dir->i_ctime = dir->i_mtime = CURRENT_TIME; +- d_instantiate(dentry, inode); +- dget(dentry); +- return 0; +-} +- +-static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) +-{ +- nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode)); +- return NULL; +-} +- +-static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) +-{ +- struct page *page = NULL; +- int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); +- nd_set_link(nd, res ? ERR_PTR(res) : kmap(page)); +- return page; +-} +- +-static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) +-{ +- if (!IS_ERR(nd_get_link(nd))) { +- struct page *page = cookie; +- kunmap(page); +- mark_page_accessed(page); +- page_cache_release(page); +- } +-} +- +-static const struct inode_operations shmem_symlink_inline_operations = { +- .readlink = generic_readlink, +- .follow_link = shmem_follow_link_inline, +-}; +- +-static const struct inode_operations shmem_symlink_inode_operations = { +- .truncate = shmem_truncate, +- .readlink = generic_readlink, +- .follow_link = shmem_follow_link, +- .put_link = shmem_put_link, +-}; +- +-#ifdef CONFIG_TMPFS_POSIX_ACL +-/** +- * Superblocks without xattr inode operations will get security.* xattr +- * support from the VFS "for free". As soon as we have any other xattrs +- * like ACLs, we also need to implement the security.* handlers at +- * filesystem level, though. +- */ +- +-static size_t shmem_xattr_security_list(struct inode *inode, char *list, +- size_t list_len, const char *name, +- size_t name_len) +-{ +- return security_inode_listsecurity(inode, list, list_len); +-} +- +-static int shmem_xattr_security_get(struct inode *inode, const char *name, +- void *buffer, size_t size) +-{ +- if (strcmp(name, "") == 0) +- return -EINVAL; +- return security_inode_getsecurity(inode, name, buffer, size, +- -EOPNOTSUPP); +-} +- +-static int shmem_xattr_security_set(struct inode *inode, const char *name, +- const void *value, size_t size, int flags) +-{ +- if (strcmp(name, "") == 0) +- return -EINVAL; +- return security_inode_setsecurity(inode, name, value, size, flags); +-} +- +-static struct xattr_handler shmem_xattr_security_handler = { +- .prefix = XATTR_SECURITY_PREFIX, +- .list = shmem_xattr_security_list, +- .get = shmem_xattr_security_get, +- .set = shmem_xattr_security_set, +-}; +- +-static struct xattr_handler *shmem_xattr_handlers[] = { +- &shmem_xattr_acl_access_handler, +- &shmem_xattr_acl_default_handler, +- &shmem_xattr_security_handler, +- NULL +-}; +-#endif +- +-static struct dentry *shmem_get_parent(struct dentry *child) +-{ +- return ERR_PTR(-ESTALE); +-} +- +-static int shmem_match(struct inode *ino, void *vfh) +-{ +- __u32 *fh = vfh; +- __u64 inum = fh[2]; +- inum = (inum << 32) | fh[1]; +- return ino->i_ino == inum && fh[0] == ino->i_generation; +-} +- +-static struct dentry *shmem_get_dentry(struct super_block *sb, void *vfh) +-{ +- struct dentry *de = NULL; +- struct inode *inode; +- __u32 *fh = vfh; +- __u64 inum = fh[2]; +- inum = (inum << 32) | fh[1]; +- +- inode = ilookup5(sb, (unsigned long)(inum+fh[0]), shmem_match, vfh); +- if (inode) { +- de = d_find_alias(inode); +- iput(inode); +- } +- +- return de? de: ERR_PTR(-ESTALE); +-} +- +-static struct dentry *shmem_decode_fh(struct super_block *sb, __u32 *fh, +- int len, int type, +- int (*acceptable)(void *context, struct dentry *de), +- void *context) +-{ +- if (len < 3) +- return ERR_PTR(-ESTALE); +- +- return sb->s_export_op->find_exported_dentry(sb, fh, NULL, acceptable, +- context); +-} +- +-static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, +- int connectable) +-{ +- struct inode *inode = dentry->d_inode; +- +- if (*len < 3) +- return 255; +- +- if (hlist_unhashed(&inode->i_hash)) { +- /* Unfortunately insert_inode_hash is not idempotent, +- * so as we hash inodes here rather than at creation +- * time, we need a lock to ensure we only try +- * to do it once +- */ +- static DEFINE_SPINLOCK(lock); +- spin_lock(&lock); +- if (hlist_unhashed(&inode->i_hash)) +- __insert_inode_hash(inode, +- inode->i_ino + inode->i_generation); +- spin_unlock(&lock); +- } +- +- fh[0] = inode->i_generation; +- fh[1] = inode->i_ino; +- fh[2] = ((__u64)inode->i_ino) >> 32; +- +- *len = 3; +- return 1; +-} +- +-static struct export_operations shmem_export_ops = { +- .get_parent = shmem_get_parent, +- .get_dentry = shmem_get_dentry, +- .encode_fh = shmem_encode_fh, +- .decode_fh = shmem_decode_fh, +-}; +- +-static int shmem_parse_options(char *options, int *mode, uid_t *uid, +- gid_t *gid, unsigned long *blocks, unsigned long *inodes, +- int *policy, nodemask_t *policy_nodes) +-{ +- char *this_char, *value, *rest; +- +- while (options != NULL) { +- this_char = options; +- for (;;) { +- /* +- * NUL-terminate this option: unfortunately, +- * mount options form a comma-separated list, +- * but mpol's nodelist may also contain commas. +- */ +- options = strchr(options, ','); +- if (options == NULL) +- break; +- options++; +- if (!isdigit(*options)) { +- options[-1] = '\0'; +- break; +- } +- } +- if (!*this_char) +- continue; +- if ((value = strchr(this_char,'=')) != NULL) { +- *value++ = 0; +- } else { +- printk(KERN_ERR +- "tmpfs: No value for mount option '%s'\n", +- this_char); +- return 1; +- } +- +- if (!strcmp(this_char,"size")) { +- unsigned long long size; +- size = memparse(value,&rest); +- if (*rest == '%') { +- size <<= PAGE_SHIFT; +- size *= totalram_pages; +- do_div(size, 100); +- rest++; +- } +- if (*rest) +- goto bad_val; +- *blocks = size >> PAGE_CACHE_SHIFT; +- } else if (!strcmp(this_char,"nr_blocks")) { +- *blocks = memparse(value,&rest); +- if (*rest) +- goto bad_val; +- } else if (!strcmp(this_char,"nr_inodes")) { +- *inodes = memparse(value,&rest); +- if (*rest) +- goto bad_val; +- } else if (!strcmp(this_char,"mode")) { +- if (!mode) +- continue; +- *mode = simple_strtoul(value,&rest,8); +- if (*rest) +- goto bad_val; +- } else if (!strcmp(this_char,"uid")) { +- if (!uid) +- continue; +- *uid = simple_strtoul(value,&rest,0); +- if (*rest) +- goto bad_val; +- } else if (!strcmp(this_char,"gid")) { +- if (!gid) +- continue; +- *gid = simple_strtoul(value,&rest,0); +- if (*rest) +- goto bad_val; +- } else if (!strcmp(this_char,"mpol")) { +- if (shmem_parse_mpol(value,policy,policy_nodes)) +- goto bad_val; +- } else { +- printk(KERN_ERR "tmpfs: Bad mount option %s\n", +- this_char); +- return 1; +- } +- } +- return 0; +- +-bad_val: +- printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n", +- value, this_char); +- return 1; +- +-} +- +-static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) +-{ +- struct shmem_sb_info *sbinfo = SHMEM_SB(sb); +- unsigned long max_blocks = sbinfo->max_blocks; +- unsigned long max_inodes = sbinfo->max_inodes; +- int policy = sbinfo->policy; +- nodemask_t policy_nodes = sbinfo->policy_nodes; +- unsigned long blocks; +- unsigned long inodes; +- int error = -EINVAL; +- +- if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, +- &max_inodes, &policy, &policy_nodes)) +- return error; +- +- spin_lock(&sbinfo->stat_lock); +- blocks = sbinfo->max_blocks - sbinfo->free_blocks; +- inodes = sbinfo->max_inodes - sbinfo->free_inodes; +- if (max_blocks < blocks) +- goto out; +- if (max_inodes < inodes) +- goto out; +- /* +- * Those tests also disallow limited->unlimited while any are in +- * use, so i_blocks will always be zero when max_blocks is zero; +- * but we must separately disallow unlimited->limited, because +- * in that case we have no record of how much is already in use. +- */ +- if (max_blocks && !sbinfo->max_blocks) +- goto out; +- if (max_inodes && !sbinfo->max_inodes) +- goto out; +- +- error = 0; +- sbinfo->max_blocks = max_blocks; +- sbinfo->free_blocks = max_blocks - blocks; +- sbinfo->max_inodes = max_inodes; +- sbinfo->free_inodes = max_inodes - inodes; +- sbinfo->policy = policy; +- sbinfo->policy_nodes = policy_nodes; +-out: +- spin_unlock(&sbinfo->stat_lock); +- return error; +-} +-#endif +- +-static void shmem_put_super(struct super_block *sb) +-{ +- kfree(sb->s_fs_info); +- sb->s_fs_info = NULL; +-} +- +-static int shmem_fill_super(struct super_block *sb, +- void *data, int silent) +-{ +- struct inode *inode; +- struct dentry *root; +- int mode = S_IRWXUGO | S_ISVTX; +- uid_t uid = current->fsuid; +- gid_t gid = current->fsgid; +- int err = -ENOMEM; +- struct shmem_sb_info *sbinfo; +- unsigned long blocks = 0; +- unsigned long inodes = 0; +- int policy = MPOL_DEFAULT; +- nodemask_t policy_nodes = node_online_map; +- +-#ifdef CONFIG_TMPFS +- /* +- * Per default we only allow half of the physical ram per +- * tmpfs instance, limiting inodes to one per page of lowmem; +- * but the internal instance is left unlimited. +- */ +- if (!(sb->s_flags & MS_NOUSER)) { +- blocks = totalram_pages / 2; +- inodes = totalram_pages - totalhigh_pages; +- if (inodes > blocks) +- inodes = blocks; +- if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, +- &inodes, &policy, &policy_nodes)) +- return -EINVAL; +- } +- sb->s_export_op = &shmem_export_ops; +-#else +- sb->s_flags |= MS_NOUSER; +-#endif +- +- /* Round up to L1_CACHE_BYTES to resist false sharing */ +- sbinfo = kmalloc(max((int)sizeof(struct shmem_sb_info), +- L1_CACHE_BYTES), GFP_KERNEL); +- if (!sbinfo) +- return -ENOMEM; +- +- spin_lock_init(&sbinfo->stat_lock); +- sbinfo->max_blocks = blocks; +- sbinfo->free_blocks = blocks; +- sbinfo->max_inodes = inodes; +- sbinfo->free_inodes = inodes; +- sbinfo->policy = policy; +- sbinfo->policy_nodes = policy_nodes; +- +- sb->s_fs_info = sbinfo; +- sb->s_maxbytes = SHMEM_MAX_BYTES; +- sb->s_blocksize = PAGE_CACHE_SIZE; +- sb->s_blocksize_bits = PAGE_CACHE_SHIFT; +- sb->s_magic = TMPFS_MAGIC; +- sb->s_op = &shmem_ops; +- sb->s_time_gran = 1; +-#ifdef CONFIG_TMPFS_POSIX_ACL +- sb->s_xattr = shmem_xattr_handlers; +- sb->s_flags |= MS_POSIXACL; +-#endif +- +- inode = shmem_get_inode(sb, S_IFDIR | mode, 0); +- if (!inode) +- goto failed; +- inode->i_uid = uid; +- inode->i_gid = gid; +- root = d_alloc_root(inode); +- if (!root) +- goto failed_iput; +- sb->s_root = root; +- return 0; +- +-failed_iput: +- iput(inode); +-failed: +- shmem_put_super(sb); +- return err; +-} +- +-static struct kmem_cache *shmem_inode_cachep; +- +-static struct inode *shmem_alloc_inode(struct super_block *sb) +-{ +- struct shmem_inode_info *p; +- p = (struct shmem_inode_info *)kmem_cache_alloc(shmem_inode_cachep, GFP_KERNEL); +- if (!p) +- return NULL; +- return &p->vfs_inode; +-} +- +-static void shmem_destroy_inode(struct inode *inode) +-{ +- if ((inode->i_mode & S_IFMT) == S_IFREG) { +- /* only struct inode is valid if it's an inline symlink */ +- mpol_free_shared_policy(&SHMEM_I(inode)->policy); +- } +- shmem_acl_destroy_inode(inode); +- kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); +-} +- +-static void init_once(void *foo, struct kmem_cache *cachep, +- unsigned long flags) +-{ +- struct shmem_inode_info *p = (struct shmem_inode_info *) foo; +- +- inode_init_once(&p->vfs_inode); +-#ifdef CONFIG_TMPFS_POSIX_ACL +- p->i_acl = NULL; +- p->i_default_acl = NULL; +-#endif +-} +- +-static int init_inodecache(void) +-{ +- shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", +- sizeof(struct shmem_inode_info), +- 0, 0, init_once, NULL); +- if (shmem_inode_cachep == NULL) +- return -ENOMEM; +- return 0; +-} +- +-static void destroy_inodecache(void) +-{ +- kmem_cache_destroy(shmem_inode_cachep); +-} +- +-static const struct address_space_operations shmem_aops = { +- .writepage = shmem_writepage, +- .set_page_dirty = __set_page_dirty_no_writeback, +-#ifdef CONFIG_TMPFS +- .prepare_write = shmem_prepare_write, +- .commit_write = simple_commit_write, +-#endif +- .migratepage = migrate_page, +-}; +- +-static const struct file_operations shmem_file_operations = { +- .mmap = shmem_mmap, +-#ifdef CONFIG_TMPFS +- .llseek = generic_file_llseek, +- .read = shmem_file_read, +- .write = shmem_file_write, +- .fsync = simple_sync_file, +- .sendfile = shmem_file_sendfile, +-#endif +-}; +- +-static const struct inode_operations shmem_inode_operations = { +- .truncate = shmem_truncate, +- .setattr = shmem_notify_change, +- .truncate_range = shmem_truncate_range, +-#ifdef CONFIG_TMPFS_POSIX_ACL +- .setxattr = generic_setxattr, +- .getxattr = generic_getxattr, +- .listxattr = generic_listxattr, +- .removexattr = generic_removexattr, +- .permission = shmem_permission, +-#endif +- +-}; +- +-static const struct inode_operations shmem_dir_inode_operations = { +-#ifdef CONFIG_TMPFS +- .create = shmem_create, +- .lookup = simple_lookup, +- .link = shmem_link, +- .unlink = shmem_unlink, +- .symlink = shmem_symlink, +- .mkdir = shmem_mkdir, +- .rmdir = shmem_rmdir, +- .mknod = shmem_mknod, +- .rename = shmem_rename, +-#endif +-#ifdef CONFIG_TMPFS_POSIX_ACL +- .setattr = shmem_notify_change, +- .setxattr = generic_setxattr, +- .getxattr = generic_getxattr, +- .listxattr = generic_listxattr, +- .removexattr = generic_removexattr, +- .permission = shmem_permission, +-#endif +-}; +- +-static const struct inode_operations shmem_special_inode_operations = { +-#ifdef CONFIG_TMPFS_POSIX_ACL +- .setattr = shmem_notify_change, +- .setxattr = generic_setxattr, +- .getxattr = generic_getxattr, +- .listxattr = generic_listxattr, +- .removexattr = generic_removexattr, +- .permission = shmem_permission, +-#endif +-}; +- +-static const struct super_operations shmem_ops = { +- .alloc_inode = shmem_alloc_inode, +- .destroy_inode = shmem_destroy_inode, +-#ifdef CONFIG_TMPFS +- .statfs = shmem_statfs, +- .remount_fs = shmem_remount_fs, +-#endif +- .delete_inode = shmem_delete_inode, +- .drop_inode = generic_delete_inode, +- .put_super = shmem_put_super, +-}; +- +-static struct vm_operations_struct shmem_vm_ops = { +- .nopage = shmem_nopage, +- .populate = shmem_populate, +-#ifdef CONFIG_NUMA +- .set_policy = shmem_set_policy, +- .get_policy = shmem_get_policy, +-#endif +-}; +- +- +-static int shmem_get_sb(struct file_system_type *fs_type, +- int flags, const char *dev_name, void *data, struct vfsmount *mnt) +-{ +- return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt); +-} +- +-static struct file_system_type tmpfs_fs_type = { +- .owner = THIS_MODULE, +- .name = "tmpfs", +- .get_sb = shmem_get_sb, +- .kill_sb = kill_litter_super, +-}; +-static struct vfsmount *shm_mnt; +- +-static int __init init_tmpfs(void) +-{ +- int error; +- +- error = init_inodecache(); +- if (error) +- goto out3; +- +- error = register_filesystem(&tmpfs_fs_type); +- if (error) { +- printk(KERN_ERR "Could not register tmpfs\n"); +- goto out2; +- } +- +- shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER, +- tmpfs_fs_type.name, NULL); +- if (IS_ERR(shm_mnt)) { +- error = PTR_ERR(shm_mnt); +- printk(KERN_ERR "Could not kern_mount tmpfs\n"); +- goto out1; +- } +- return 0; +- +-out1: +- unregister_filesystem(&tmpfs_fs_type); +-out2: +- destroy_inodecache(); +-out3: +- shm_mnt = ERR_PTR(error); +- return error; +-} +-module_init(init_tmpfs) +- +-/* +- * shmem_file_setup - get an unlinked file living in tmpfs +- * +- * @name: name for dentry (to be seen in /proc//maps +- * @size: size to be set for the file +- * +- */ +-struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) +-{ +- int error; +- struct file *file; +- struct inode *inode; +- struct dentry *dentry, *root; +- struct qstr this; +- +- if (IS_ERR(shm_mnt)) +- return (void *)shm_mnt; +- +- if (size < 0 || size > SHMEM_MAX_BYTES) +- return ERR_PTR(-EINVAL); +- +- if (shmem_acct_size(flags, size)) +- return ERR_PTR(-ENOMEM); +- +- error = -ENOMEM; +- this.name = name; +- this.len = strlen(name); +- this.hash = 0; /* will go */ +- root = shm_mnt->mnt_root; +- dentry = d_alloc(root, &this); +- if (!dentry) +- goto put_memory; +- +- error = -ENFILE; +- file = get_empty_filp(); +- if (!file) +- goto put_dentry; +- +- error = -ENOSPC; +- inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0); +- if (!inode) +- goto close_file; +- +- SHMEM_I(inode)->flags = flags & VM_ACCOUNT; +- d_instantiate(dentry, inode); +- inode->i_size = size; +- inode->i_nlink = 0; /* It is unlinked */ +- file->f_path.mnt = mntget(shm_mnt); +- file->f_path.dentry = dentry; +- file->f_mapping = inode->i_mapping; +- file->f_op = &shmem_file_operations; +- file->f_mode = FMODE_WRITE | FMODE_READ; +- return file; +- +-close_file: +- put_filp(file); +-put_dentry: +- dput(dentry); +-put_memory: +- shmem_unacct_size(flags, size); +- return ERR_PTR(error); +-} +- +-/* +- * shmem_zero_setup - setup a shared anonymous mapping +- * +- * @vma: the vma to be mmapped is prepared by do_mmap_pgoff +- */ +-int shmem_zero_setup(struct vm_area_struct *vma) +-{ +- struct file *file; +- loff_t size = vma->vm_end - vma->vm_start; +- +- file = shmem_file_setup("dev/zero", size, vma->vm_flags); +- if (IS_ERR(file)) +- return PTR_ERR(file); +- +- if (vma->vm_file) +- fput(vma->vm_file); +- vma->vm_file = file; +- vma->vm_ops = &shmem_vm_ops; +- return 0; +-} +diff -Nurb linux-2.6.22-570/mm/slab.c linux-2.6.22-591/mm/slab.c +--- linux-2.6.22-570/mm/slab.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/mm/slab.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1639,6 +1639,8 @@ + #endif + + flags |= cachep->gfpflags; ++ if (cachep->flags & SLAB_RECLAIM_ACCOUNT) ++ flags |= __GFP_RECLAIMABLE; + + page = alloc_pages_node(nodeid, flags, cachep->gfporder); + if (!page) +diff -Nurb linux-2.6.22-570/mm/slub.c linux-2.6.22-591/mm/slub.c +--- linux-2.6.22-570/mm/slub.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/mm/slub.c 2007-12-21 15:36:12.000000000 -0500 +@@ -985,6 +985,9 @@ + if (s->flags & SLAB_CACHE_DMA) + flags |= SLUB_DMA; + ++ if (s->flags & SLAB_RECLAIM_ACCOUNT) ++ flags |= __GFP_RECLAIMABLE; ++ + if (node == -1) + page = alloc_pages(flags, s->order); + else +@@ -1989,6 +1992,7 @@ + #ifdef CONFIG_NUMA + s->defrag_ratio = 100; + #endif ++ raise_kswapd_order(s->order); + + if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) + return 1; +diff -Nurb linux-2.6.22-570/mm/swap_state.c linux-2.6.22-591/mm/swap_state.c +--- linux-2.6.22-570/mm/swap_state.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/mm/swap_state.c 2007-12-21 15:36:12.000000000 -0500 +@@ -334,7 +334,8 @@ + * Get a new page to read into from swap. + */ + if (!new_page) { +- new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); ++ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, ++ vma, addr); + if (!new_page) + break; /* Out of memory */ + } +diff -Nurb linux-2.6.22-570/mm/truncate.c linux-2.6.22-591/mm/truncate.c +--- linux-2.6.22-570/mm/truncate.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/mm/truncate.c 2007-12-21 15:36:12.000000000 -0500 +@@ -82,7 +82,7 @@ + /* + * If truncate cannot remove the fs-private metadata from the page, the page + * becomes anonymous. It will be left on the LRU and may even be mapped into +- * user pagetables if we're racing with filemap_nopage(). ++ * user pagetables if we're racing with filemap_fault(). + * + * We need to bale out if page->mapping is no longer equal to the original + * mapping. This happens a) when the VM reclaimed the page while we waited on +@@ -192,6 +192,11 @@ + unlock_page(page); + continue; + } ++ if (page_mapped(page)) { ++ unmap_mapping_range(mapping, ++ (loff_t)page_index<index<index > next) + next = page->index; + next++; +@@ -397,7 +407,7 @@ + break; + } + wait_on_page_writeback(page); +- while (page_mapped(page)) { ++ if (page_mapped(page)) { + if (!did_range_unmap) { + /* + * Zap the rest of the file in one hit. +@@ -417,6 +427,7 @@ + PAGE_CACHE_SIZE, 0); + } + } ++ BUG_ON(page_mapped(page)); + ret = do_launder_page(mapping, page); + if (ret == 0 && !invalidate_complete_page2(mapping, page)) + ret = -EIO; +diff -Nurb linux-2.6.22-570/mm/util.c linux-2.6.22-591/mm/util.c +--- linux-2.6.22-570/mm/util.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/mm/util.c 2007-12-21 15:36:12.000000000 -0500 +@@ -18,9 +18,8 @@ + } + EXPORT_SYMBOL(__kzalloc); + +-/* ++/** + * kstrdup - allocate space for and copy an existing string +- * + * @s: the string to duplicate + * @gfp: the GFP mask used in the kmalloc() call when allocating memory + */ +@@ -41,6 +40,32 @@ + EXPORT_SYMBOL(kstrdup); + + /** ++ * kstrndup - allocate space for and copy an existing string ++ * @s: the string to duplicate ++ * @max: read at most @max chars from @s ++ * @gfp: the GFP mask used in the kmalloc() call when allocating memory ++ */ ++char *kstrndup(const char *s, size_t max, gfp_t gfp) ++{ ++ size_t len; ++ char *buf; ++ ++ if (!s) ++ return NULL; ++ ++ len = strlen(s); ++ if (len > max) ++ len = max; ++ buf = kmalloc_track_caller(len+1, gfp); ++ if (buf) { ++ memcpy(buf, s, len); ++ buf[len] = '\0'; ++ } ++ return buf; ++} ++EXPORT_SYMBOL(kstrndup); ++ ++/** + * kmemdup - duplicate region of memory + * + * @src: memory region to duplicate +@@ -60,7 +85,6 @@ + + /* + * strndup_user - duplicate an existing string from user space +- * + * @s: The string to duplicate + * @n: Maximum number of bytes to copy, including the trailing NUL. + */ +diff -Nurb linux-2.6.22-570/mm/vmalloc.c linux-2.6.22-591/mm/vmalloc.c +--- linux-2.6.22-570/mm/vmalloc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/mm/vmalloc.c 2007-12-21 15:36:12.000000000 -0500 +@@ -68,12 +68,12 @@ + } while (pud++, addr = next, addr != end); + } + +-void unmap_vm_area(struct vm_struct *area) ++void unmap_kernel_range(unsigned long addr, unsigned long size) + { + pgd_t *pgd; + unsigned long next; +- unsigned long addr = (unsigned long) area->addr; +- unsigned long end = addr + area->size; ++ unsigned long start = addr; ++ unsigned long end = addr + size; + + BUG_ON(addr >= end); + pgd = pgd_offset_k(addr); +@@ -84,7 +84,12 @@ + continue; + vunmap_pud_range(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +- flush_tlb_kernel_range((unsigned long) area->addr, end); ++ flush_tlb_kernel_range(start, end); ++} ++ ++static void unmap_vm_area(struct vm_struct *area) ++{ ++ unmap_kernel_range((unsigned long)area->addr, area->size); + } + + static int vmap_pte_range(pmd_t *pmd, unsigned long addr, +diff -Nurb linux-2.6.22-570/mm/vmscan.c linux-2.6.22-591/mm/vmscan.c +--- linux-2.6.22-570/mm/vmscan.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/mm/vmscan.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1272,6 +1272,34 @@ + return nr_reclaimed; + } + ++static unsigned int kswapd_min_order __read_mostly; ++ ++static inline int kswapd_order(unsigned int order) ++{ ++ return max(kswapd_min_order, order); ++} ++ ++/** ++ * raise_kswapd_order - Raise the minimum order that kswapd reclaims ++ * @order: The minimum order kswapd should reclaim at ++ * ++ * kswapd normally reclaims at order 0 unless there is a higher-order ++ * allocation being serviced. This function is used to set the minimum ++ * order that kswapd reclaims at when it is known there will be regular ++ * high-order allocations at a given order. ++ */ ++void raise_kswapd_order(unsigned int order) ++{ ++ if (order >= MAX_ORDER) ++ return; ++ ++ /* Update order if necessary and inform if changed */ ++ if (order > kswapd_min_order) { ++ kswapd_min_order = order; ++ printk(KERN_INFO "kswapd reclaim order set to %d\n", order); ++ } ++} ++ + /* + * The background pageout daemon, started as a kernel thread + * from the init process. +@@ -1314,13 +1342,14 @@ + * trying to free the first piece of memory in the first place). + */ + tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; ++ set_freezable(); + +- order = 0; ++ order = kswapd_order(0); + for ( ; ; ) { + unsigned long new_order; + + prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE); +- new_order = pgdat->kswapd_max_order; ++ new_order = kswapd_order(pgdat->kswapd_max_order); + pgdat->kswapd_max_order = 0; + if (order < new_order) { + /* +@@ -1332,7 +1361,7 @@ + if (!freezing(current)) + schedule(); + +- order = pgdat->kswapd_max_order; ++ order = kswapd_order(pgdat->kswapd_max_order); + } + finish_wait(&pgdat->kswapd_wait, &wait); + +diff -Nurb linux-2.6.22-570/net/802/tr.c linux-2.6.22-591/net/802/tr.c +--- linux-2.6.22-570/net/802/tr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/802/tr.c 2007-12-21 15:36:15.000000000 -0500 +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + + static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev); + static void rif_check_expire(unsigned long dummy); +@@ -532,7 +533,7 @@ + seq_puts(seq, + "if TR address TTL rcf routing segments\n"); + else { +- struct net_device *dev = dev_get_by_index(entry->iface); ++ struct net_device *dev = dev_get_by_index(&init_net, entry->iface); + long ttl = (long) (entry->last_used + sysctl_tr_rif_timeout) + - (long) jiffies; + +@@ -639,7 +640,7 @@ + rif_timer.function = rif_check_expire; + add_timer(&rif_timer); + +- proc_net_fops_create("tr_rif", S_IRUGO, &rif_seq_fops); ++ proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops); + return 0; + } + +diff -Nurb linux-2.6.22-570/net/8021q/Makefile linux-2.6.22-591/net/8021q/Makefile +--- linux-2.6.22-570/net/8021q/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/8021q/Makefile 2007-12-21 15:36:12.000000000 -0500 +@@ -4,7 +4,7 @@ + + obj-$(CONFIG_VLAN_8021Q) += 8021q.o + +-8021q-objs := vlan.o vlan_dev.o ++8021q-objs := vlan.o vlan_dev.o vlan_netlink.o + + ifeq ($(CONFIG_PROC_FS),y) + 8021q-objs += vlanproc.o +diff -Nurb linux-2.6.22-570/net/8021q/vlan.c linux-2.6.22-591/net/8021q/vlan.c +--- linux-2.6.22-570/net/8021q/vlan.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/8021q/vlan.c 2007-12-21 15:36:15.000000000 -0500 +@@ -31,6 +31,7 @@ + #include + #include + #include ++#include + + #include + #include "vlan.h" +@@ -50,7 +51,7 @@ + static char vlan_buggyright[] = "David S. Miller "; + + static int vlan_device_event(struct notifier_block *, unsigned long, void *); +-static int vlan_ioctl_handler(void __user *); ++static int vlan_ioctl_handler(struct net *net, void __user *); + static int unregister_vlan_dev(struct net_device *, unsigned short ); + + static struct notifier_block vlan_notifier_block = { +@@ -97,15 +98,22 @@ + + /* Register us to receive netdevice events */ + err = register_netdevice_notifier(&vlan_notifier_block); +- if (err < 0) { +- dev_remove_pack(&vlan_packet_type); +- vlan_proc_cleanup(); +- return err; +- } ++ if (err < 0) ++ goto err1; + +- vlan_ioctl_set(vlan_ioctl_handler); ++ err = vlan_netlink_init(); ++ if (err < 0) ++ goto err2; + ++ vlan_ioctl_set(vlan_ioctl_handler); + return 0; ++ ++err2: ++ unregister_netdevice_notifier(&vlan_notifier_block); ++err1: ++ vlan_proc_cleanup(); ++ dev_remove_pack(&vlan_packet_type); ++ return err; + } + + /* Cleanup all vlan devices +@@ -117,7 +125,7 @@ + struct net_device *dev, *nxt; + + rtnl_lock(); +- for_each_netdev_safe(dev, nxt) { ++ for_each_netdev_safe(&init_net, dev, nxt) { + if (dev->priv_flags & IFF_802_1Q_VLAN) { + unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, + VLAN_DEV_INFO(dev)->vlan_id); +@@ -136,6 +144,7 @@ + { + int i; + ++ vlan_netlink_fini(); + vlan_ioctl_set(NULL); + + /* Un-register us from receiving netdevice events */ +@@ -197,6 +206,34 @@ + kfree(grp); + } + ++static struct vlan_group *vlan_group_alloc(int ifindex) ++{ ++ struct vlan_group *grp; ++ unsigned int size; ++ unsigned int i; ++ ++ grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL); ++ if (!grp) ++ return NULL; ++ ++ size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN; ++ ++ for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) { ++ grp->vlan_devices_arrays[i] = kzalloc(size, GFP_KERNEL); ++ if (!grp->vlan_devices_arrays[i]) ++ goto err; ++ } ++ ++ grp->real_dev_ifindex = ifindex; ++ hlist_add_head_rcu(&grp->hlist, ++ &vlan_group_hash[vlan_grp_hashfn(ifindex)]); ++ return grp; ++ ++err: ++ vlan_group_free(grp); ++ return NULL; ++} ++ + static void vlan_rcu_free(struct rcu_head *rcu) + { + vlan_group_free(container_of(rcu, struct vlan_group, rcu)); +@@ -278,47 +315,62 @@ + return ret; + } + +-static int unregister_vlan_device(const char *vlan_IF_name) ++int unregister_vlan_device(struct net_device *dev) + { +- struct net_device *dev = NULL; + int ret; + +- +- dev = dev_get_by_name(vlan_IF_name); +- ret = -EINVAL; +- if (dev) { +- if (dev->priv_flags & IFF_802_1Q_VLAN) { +- rtnl_lock(); +- + ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev, + VLAN_DEV_INFO(dev)->vlan_id); +- +- dev_put(dev); + unregister_netdevice(dev); + +- rtnl_unlock(); +- + if (ret == 1) + ret = 0; ++ return ret; ++} ++ ++/* ++ * vlan network devices have devices nesting below it, and are a special ++ * "super class" of normal network devices; split their locks off into a ++ * separate class since they always nest. ++ */ ++static struct lock_class_key vlan_netdev_xmit_lock_key; ++ ++static int vlan_dev_init(struct net_device *dev) ++{ ++ struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev; ++ ++ /* IFF_BROADCAST|IFF_MULTICAST; ??? */ ++ dev->flags = real_dev->flags & ~IFF_UP; ++ dev->iflink = real_dev->ifindex; ++ dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | ++ (1<<__LINK_STATE_DORMANT))) | ++ (1<<__LINK_STATE_PRESENT); ++ ++ /* TODO: maybe just assign it to be ETHERNET? */ ++ dev->type = real_dev->type; ++ ++ memcpy(dev->broadcast, real_dev->broadcast, real_dev->addr_len); ++ memcpy(dev->dev_addr, real_dev->dev_addr, real_dev->addr_len); ++ dev->addr_len = real_dev->addr_len; ++ ++ if (real_dev->features & NETIF_F_HW_VLAN_TX) { ++ dev->hard_header = real_dev->hard_header; ++ dev->hard_header_len = real_dev->hard_header_len; ++ dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit; ++ dev->rebuild_header = real_dev->rebuild_header; + } else { +- printk(VLAN_ERR +- "%s: ERROR: Tried to remove a non-vlan device " +- "with VLAN code, name: %s priv_flags: %hX\n", +- __FUNCTION__, dev->name, dev->priv_flags); +- dev_put(dev); +- ret = -EPERM; +- } +- } else { +-#ifdef VLAN_DEBUG +- printk(VLAN_DBG "%s: WARNING: Could not find dev.\n", __FUNCTION__); +-#endif +- ret = -EINVAL; ++ dev->hard_header = vlan_dev_hard_header; ++ dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; ++ dev->hard_start_xmit = vlan_dev_hard_start_xmit; ++ dev->rebuild_header = vlan_dev_rebuild_header; + } ++ dev->hard_header_parse = real_dev->hard_header_parse; + +- return ret; ++ lockdep_set_class(&dev->_xmit_lock, &vlan_netdev_xmit_lock_key); ++ return 0; + } + +-static void vlan_setup(struct net_device *new_dev) ++void vlan_setup(struct net_device *new_dev) + { + SET_MODULE_OWNER(new_dev); + +@@ -338,6 +390,7 @@ + + /* set up method calls */ + new_dev->change_mtu = vlan_dev_change_mtu; ++ new_dev->init = vlan_dev_init; + new_dev->open = vlan_dev_open; + new_dev->stop = vlan_dev_stop; + new_dev->set_mac_address = vlan_dev_set_mac_address; +@@ -366,77 +419,110 @@ + } + } + +-/* +- * vlan network devices have devices nesting below it, and are a special +- * "super class" of normal network devices; split their locks off into a +- * separate class since they always nest. +- */ +-static struct lock_class_key vlan_netdev_xmit_lock_key; +- +- +-/* Attach a VLAN device to a mac address (ie Ethernet Card). +- * Returns the device that was created, or NULL if there was +- * an error of some kind. +- */ +-static struct net_device *register_vlan_device(const char *eth_IF_name, +- unsigned short VLAN_ID) ++int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id) + { +- struct vlan_group *grp; +- struct net_device *new_dev; +- struct net_device *real_dev; /* the ethernet device */ +- char name[IFNAMSIZ]; +- int i; +- +-#ifdef VLAN_DEBUG +- printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n", +- __FUNCTION__, eth_IF_name, VLAN_ID); +-#endif +- +- if (VLAN_ID >= VLAN_VID_MASK) +- goto out_ret_null; +- +- /* find the device relating to eth_IF_name. */ +- real_dev = dev_get_by_name(eth_IF_name); +- if (!real_dev) +- goto out_ret_null; +- + if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { + printk(VLAN_DBG "%s: VLANs not supported on %s.\n", + __FUNCTION__, real_dev->name); +- goto out_put_dev; ++ return -EOPNOTSUPP; + } + + if ((real_dev->features & NETIF_F_HW_VLAN_RX) && + !real_dev->vlan_rx_register) { + printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", + __FUNCTION__, real_dev->name); +- goto out_put_dev; ++ return -EOPNOTSUPP; + } + + if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && + (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) { + printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n", + __FUNCTION__, real_dev->name); +- goto out_put_dev; ++ return -EOPNOTSUPP; + } + +- /* From this point on, all the data structures must remain +- * consistent. +- */ +- rtnl_lock(); +- + /* The real device must be up and operating in order to + * assosciate a VLAN device with it. + */ + if (!(real_dev->flags & IFF_UP)) +- goto out_unlock; ++ return -ENETDOWN; + +- if (__find_vlan_dev(real_dev, VLAN_ID) != NULL) { ++ if (__find_vlan_dev(real_dev, vlan_id) != NULL) { + /* was already registered. */ + printk(VLAN_DBG "%s: ALREADY had VLAN registered\n", __FUNCTION__); +- goto out_unlock; ++ return -EEXIST; + } + ++ return 0; ++} ++ ++int register_vlan_dev(struct net_device *dev) ++{ ++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); ++ struct net_device *real_dev = vlan->real_dev; ++ unsigned short vlan_id = vlan->vlan_id; ++ struct vlan_group *grp, *ngrp = NULL; ++ int err; ++ ++ grp = __vlan_find_group(real_dev->ifindex); ++ if (!grp) { ++ ngrp = grp = vlan_group_alloc(real_dev->ifindex); ++ if (!grp) ++ return -ENOBUFS; ++ } ++ ++ err = register_netdevice(dev); ++ if (err < 0) ++ goto out_free_group; ++ ++ /* Account for reference in struct vlan_dev_info */ ++ dev_hold(real_dev); ++ ++ vlan_transfer_operstate(real_dev, dev); ++ linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */ ++ ++ /* So, got the sucker initialized, now lets place ++ * it into our local structure. ++ */ ++ vlan_group_set_device(grp, vlan_id, dev); ++ if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX) ++ real_dev->vlan_rx_register(real_dev, ngrp); ++ if (real_dev->features & NETIF_F_HW_VLAN_FILTER) ++ real_dev->vlan_rx_add_vid(real_dev, vlan_id); ++ ++ if (vlan_proc_add_dev(dev) < 0) ++ printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n", ++ dev->name); ++ return 0; ++ ++out_free_group: ++ if (ngrp) ++ vlan_group_free(ngrp); ++ return err; ++} ++ ++/* Attach a VLAN device to a mac address (ie Ethernet Card). ++ * Returns 0 if the device was created or a negative error code otherwise. ++ */ ++static int register_vlan_device(struct net_device *real_dev, ++ unsigned short VLAN_ID) ++{ ++ struct net_device *new_dev; ++ char name[IFNAMSIZ]; ++ int err; ++ ++#ifdef VLAN_DEBUG ++ printk(VLAN_DBG "%s: if_name -:%s:- vid: %i\n", ++ __FUNCTION__, eth_IF_name, VLAN_ID); ++#endif ++ ++ if (VLAN_ID >= VLAN_VID_MASK) ++ return -ERANGE; ++ ++ err = vlan_check_real_dev(real_dev, VLAN_ID); ++ if (err < 0) ++ return err; ++ + /* Gotta set up the fields for the device. */ + #ifdef VLAN_DEBUG + printk(VLAN_DBG "About to allocate name, vlan_name_type: %i\n", +@@ -471,138 +557,40 @@ + vlan_setup); + + if (new_dev == NULL) +- goto out_unlock; +- +-#ifdef VLAN_DEBUG +- printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name); +-#endif +- /* IFF_BROADCAST|IFF_MULTICAST; ??? */ +- new_dev->flags = real_dev->flags; +- new_dev->flags &= ~IFF_UP; +- +- new_dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | +- (1<<__LINK_STATE_DORMANT))) | +- (1<<__LINK_STATE_PRESENT); ++ return -ENOBUFS; + + /* need 4 bytes for extra VLAN header info, + * hope the underlying device can handle it. + */ + new_dev->mtu = real_dev->mtu; + +- /* TODO: maybe just assign it to be ETHERNET? */ +- new_dev->type = real_dev->type; +- +- new_dev->hard_header_len = real_dev->hard_header_len; +- if (!(real_dev->features & NETIF_F_HW_VLAN_TX)) { +- /* Regular ethernet + 4 bytes (18 total). */ +- new_dev->hard_header_len += VLAN_HLEN; +- } +- ++#ifdef VLAN_DEBUG ++ printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name); + VLAN_MEM_DBG("new_dev->priv malloc, addr: %p size: %i\n", + new_dev->priv, + sizeof(struct vlan_dev_info)); +- +- memcpy(new_dev->broadcast, real_dev->broadcast, real_dev->addr_len); +- memcpy(new_dev->dev_addr, real_dev->dev_addr, real_dev->addr_len); +- new_dev->addr_len = real_dev->addr_len; +- +- if (real_dev->features & NETIF_F_HW_VLAN_TX) { +- new_dev->hard_header = real_dev->hard_header; +- new_dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit; +- new_dev->rebuild_header = real_dev->rebuild_header; +- } else { +- new_dev->hard_header = vlan_dev_hard_header; +- new_dev->hard_start_xmit = vlan_dev_hard_start_xmit; +- new_dev->rebuild_header = vlan_dev_rebuild_header; +- } +- new_dev->hard_header_parse = real_dev->hard_header_parse; ++#endif + + VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */ + VLAN_DEV_INFO(new_dev)->real_dev = real_dev; + VLAN_DEV_INFO(new_dev)->dent = NULL; +- VLAN_DEV_INFO(new_dev)->flags = 1; ++ VLAN_DEV_INFO(new_dev)->flags = VLAN_FLAG_REORDER_HDR; + +-#ifdef VLAN_DEBUG +- printk(VLAN_DBG "About to go find the group for idx: %i\n", +- real_dev->ifindex); +-#endif +- +- if (register_netdevice(new_dev)) ++ new_dev->rtnl_link_ops = &vlan_link_ops; ++ err = register_vlan_dev(new_dev); ++ if (err < 0) + goto out_free_newdev; + +- lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key); +- +- new_dev->iflink = real_dev->ifindex; +- vlan_transfer_operstate(real_dev, new_dev); +- linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */ +- +- /* So, got the sucker initialized, now lets place +- * it into our local structure. +- */ +- grp = __vlan_find_group(real_dev->ifindex); +- +- /* Note, we are running under the RTNL semaphore +- * so it cannot "appear" on us. +- */ +- if (!grp) { /* need to add a new group */ +- grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL); +- if (!grp) +- goto out_free_unregister; +- +- for (i=0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) { +- grp->vlan_devices_arrays[i] = kzalloc( +- sizeof(struct net_device *)*VLAN_GROUP_ARRAY_PART_LEN, +- GFP_KERNEL); +- +- if (!grp->vlan_devices_arrays[i]) +- goto out_free_arrays; +- } +- +- /* printk(KERN_ALERT "VLAN REGISTER: Allocated new group.\n"); */ +- grp->real_dev_ifindex = real_dev->ifindex; +- +- hlist_add_head_rcu(&grp->hlist, +- &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]); +- +- if (real_dev->features & NETIF_F_HW_VLAN_RX) +- real_dev->vlan_rx_register(real_dev, grp); +- } +- +- vlan_group_set_device(grp, VLAN_ID, new_dev); +- +- if (vlan_proc_add_dev(new_dev)<0)/* create it's proc entry */ +- printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n", +- new_dev->name); +- +- if (real_dev->features & NETIF_F_HW_VLAN_FILTER) +- real_dev->vlan_rx_add_vid(real_dev, VLAN_ID); +- +- rtnl_unlock(); +- +- ++ /* Account for reference in struct vlan_dev_info */ ++ dev_hold(real_dev); + #ifdef VLAN_DEBUG + printk(VLAN_DBG "Allocated new device successfully, returning.\n"); + #endif +- return new_dev; +- +-out_free_arrays: +- vlan_group_free(grp); +- +-out_free_unregister: +- unregister_netdev(new_dev); +- goto out_unlock; ++ return 0; + + out_free_newdev: + free_netdev(new_dev); +- +-out_unlock: +- rtnl_unlock(); +- +-out_put_dev: +- dev_put(real_dev); +- +-out_ret_null: +- return NULL; ++ return err; + } + + static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) +@@ -612,6 +600,9 @@ + int i, flgs; + struct net_device *vlandev; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (!grp) + goto out; + +@@ -691,11 +682,12 @@ + * o execute requested action or pass command to the device driver + * arg is really a struct vlan_ioctl_args __user *. + */ +-static int vlan_ioctl_handler(void __user *arg) ++static int vlan_ioctl_handler(struct net *net, void __user *arg) + { +- int err = 0; ++ int err; + unsigned short vid = 0; + struct vlan_ioctl_args args; ++ struct net_device *dev = NULL; + + if (copy_from_user(&args, arg, sizeof(struct vlan_ioctl_args))) + return -EFAULT; +@@ -708,35 +700,61 @@ + printk(VLAN_DBG "%s: args.cmd: %x\n", __FUNCTION__, args.cmd); + #endif + ++ rtnl_lock(); ++ + switch (args.cmd) { + case SET_VLAN_INGRESS_PRIORITY_CMD: ++ case SET_VLAN_EGRESS_PRIORITY_CMD: ++ case SET_VLAN_FLAG_CMD: ++ case ADD_VLAN_CMD: ++ case DEL_VLAN_CMD: ++ case GET_VLAN_REALDEV_NAME_CMD: ++ case GET_VLAN_VID_CMD: ++ err = -ENODEV; ++ dev = __dev_get_by_name(&init_net, args.device1); ++ if (!dev) ++ goto out; ++ ++ err = -EINVAL; ++ if (args.cmd != ADD_VLAN_CMD && ++ !(dev->priv_flags & IFF_802_1Q_VLAN)) ++ goto out; ++ } ++ ++ switch (args.cmd) { ++ case SET_VLAN_INGRESS_PRIORITY_CMD: ++ err = -EPERM; + if (!capable(CAP_NET_ADMIN)) +- return -EPERM; +- err = vlan_dev_set_ingress_priority(args.device1, ++ break; ++ vlan_dev_set_ingress_priority(dev, + args.u.skb_priority, + args.vlan_qos); + break; + + case SET_VLAN_EGRESS_PRIORITY_CMD: ++ err = -EPERM; + if (!capable(CAP_NET_ADMIN)) +- return -EPERM; +- err = vlan_dev_set_egress_priority(args.device1, ++ break; ++ err = vlan_dev_set_egress_priority(dev, + args.u.skb_priority, + args.vlan_qos); + break; + + case SET_VLAN_FLAG_CMD: ++ err = -EPERM; + if (!capable(CAP_NET_ADMIN)) +- return -EPERM; +- err = vlan_dev_set_vlan_flag(args.device1, ++ break; ++ err = vlan_dev_set_vlan_flag(dev, + args.u.flag, + args.vlan_qos); + break; + + case SET_VLAN_NAME_TYPE_CMD: ++ err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; +- if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) { ++ if ((args.u.name_type >= 0) && ++ (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) { + vlan_name_type = args.u.name_type; + err = 0; + } else { +@@ -745,26 +763,17 @@ + break; + + case ADD_VLAN_CMD: ++ err = -EPERM; + if (!capable(CAP_NET_ADMIN)) +- return -EPERM; +- /* we have been given the name of the Ethernet Device we want to +- * talk to: args.dev1 We also have the +- * VLAN ID: args.u.VID +- */ +- if (register_vlan_device(args.device1, args.u.VID)) { +- err = 0; +- } else { +- err = -EINVAL; +- } ++ break; ++ err = register_vlan_device(dev, args.u.VID); + break; + + case DEL_VLAN_CMD: ++ err = -EPERM; + if (!capable(CAP_NET_ADMIN)) +- return -EPERM; +- /* Here, the args.dev1 is the actual VLAN we want +- * to get rid of. +- */ +- err = unregister_vlan_device(args.device1); ++ break; ++ err = unregister_vlan_device(dev); + break; + + case GET_VLAN_INGRESS_PRIORITY_CMD: +@@ -788,9 +797,7 @@ + err = -EINVAL; + break; + case GET_VLAN_REALDEV_NAME_CMD: +- err = vlan_dev_get_realdev_name(args.device1, args.u.device2); +- if (err) +- goto out; ++ vlan_dev_get_realdev_name(dev, args.u.device2); + if (copy_to_user(arg, &args, + sizeof(struct vlan_ioctl_args))) { + err = -EFAULT; +@@ -798,9 +805,7 @@ + break; + + case GET_VLAN_VID_CMD: +- err = vlan_dev_get_vid(args.device1, &vid); +- if (err) +- goto out; ++ vlan_dev_get_vid(dev, &vid); + args.u.VID = vid; + if (copy_to_user(arg, &args, + sizeof(struct vlan_ioctl_args))) { +@@ -812,9 +817,11 @@ + /* pass on to underlying device instead?? */ + printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n", + __FUNCTION__, args.cmd); +- return -EINVAL; ++ err = -EINVAL; ++ break; + } + out: ++ rtnl_unlock(); + return err; + } + +diff -Nurb linux-2.6.22-570/net/8021q/vlan.h linux-2.6.22-591/net/8021q/vlan.h +--- linux-2.6.22-570/net/8021q/vlan.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/8021q/vlan.h 2007-12-21 15:36:12.000000000 -0500 +@@ -62,11 +62,24 @@ + int vlan_dev_open(struct net_device* dev); + int vlan_dev_stop(struct net_device* dev); + int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd); +-int vlan_dev_set_ingress_priority(char* dev_name, __u32 skb_prio, short vlan_prio); +-int vlan_dev_set_egress_priority(char* dev_name, __u32 skb_prio, short vlan_prio); +-int vlan_dev_set_vlan_flag(char* dev_name, __u32 flag, short flag_val); +-int vlan_dev_get_realdev_name(const char* dev_name, char* result); +-int vlan_dev_get_vid(const char* dev_name, unsigned short* result); ++void vlan_dev_set_ingress_priority(const struct net_device *dev, ++ u32 skb_prio, short vlan_prio); ++int vlan_dev_set_egress_priority(const struct net_device *dev, ++ u32 skb_prio, short vlan_prio); ++int vlan_dev_set_vlan_flag(const struct net_device *dev, ++ u32 flag, short flag_val); ++void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); ++void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result); + void vlan_dev_set_multicast_list(struct net_device *vlan_dev); + ++int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id); ++void vlan_setup(struct net_device *dev); ++int register_vlan_dev(struct net_device *dev); ++int unregister_vlan_device(struct net_device *dev); ++ ++int vlan_netlink_init(void); ++void vlan_netlink_fini(void); ++ ++extern struct rtnl_link_ops vlan_link_ops; ++ + #endif /* !(__BEN_VLAN_802_1Q_INC__) */ +diff -Nurb linux-2.6.22-570/net/8021q/vlan_dev.c linux-2.6.22-591/net/8021q/vlan_dev.c +--- linux-2.6.22-570/net/8021q/vlan_dev.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/8021q/vlan_dev.c 2007-12-21 15:36:15.000000000 -0500 +@@ -73,7 +73,7 @@ + + static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) + { +- if (VLAN_DEV_INFO(skb->dev)->flags & 1) { ++ if (VLAN_DEV_INFO(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) { + if (skb_shared(skb) || skb_cloned(skb)) { + struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); + kfree_skb(skb); +@@ -132,6 +132,11 @@ + + vhdr = (struct vlan_hdr *)(skb->data); + ++ if (dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } ++ + /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ + vlan_TCI = ntohs(vhdr->h_vlan_TCI); + +@@ -360,7 +365,8 @@ + * header shuffling in the hard_start_xmit. Users can turn off this + * REORDER behaviour with the vconfig tool. + */ +- build_vlan_header = ((VLAN_DEV_INFO(dev)->flags & 1) == 0); ++ if (!(VLAN_DEV_INFO(dev)->flags & VLAN_FLAG_REORDER_HDR)) ++ build_vlan_header = 1; + + if (build_vlan_header) { + vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN); +@@ -544,136 +550,83 @@ + return 0; + } + +-int vlan_dev_set_ingress_priority(char *dev_name, __u32 skb_prio, short vlan_prio) ++void vlan_dev_set_ingress_priority(const struct net_device *dev, ++ u32 skb_prio, short vlan_prio) + { +- struct net_device *dev = dev_get_by_name(dev_name); ++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + +- if (dev) { +- if (dev->priv_flags & IFF_802_1Q_VLAN) { +- /* see if a priority mapping exists.. */ +- VLAN_DEV_INFO(dev)->ingress_priority_map[vlan_prio & 0x7] = skb_prio; +- dev_put(dev); +- return 0; +- } ++ if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio) ++ vlan->nr_ingress_mappings--; ++ else if (!vlan->ingress_priority_map[vlan_prio & 0x7] && skb_prio) ++ vlan->nr_ingress_mappings++; + +- dev_put(dev); +- } +- return -EINVAL; ++ vlan->ingress_priority_map[vlan_prio & 0x7] = skb_prio; + } + +-int vlan_dev_set_egress_priority(char *dev_name, __u32 skb_prio, short vlan_prio) ++int vlan_dev_set_egress_priority(const struct net_device *dev, ++ u32 skb_prio, short vlan_prio) + { +- struct net_device *dev = dev_get_by_name(dev_name); ++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); + struct vlan_priority_tci_mapping *mp = NULL; + struct vlan_priority_tci_mapping *np; ++ u32 vlan_qos = (vlan_prio << 13) & 0xE000; + +- if (dev) { +- if (dev->priv_flags & IFF_802_1Q_VLAN) { + /* See if a priority mapping exists.. */ +- mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF]; ++ mp = vlan->egress_priority_map[skb_prio & 0xF]; + while (mp) { + if (mp->priority == skb_prio) { +- mp->vlan_qos = ((vlan_prio << 13) & 0xE000); +- dev_put(dev); ++ if (mp->vlan_qos && !vlan_qos) ++ vlan->nr_egress_mappings--; ++ else if (!mp->vlan_qos && vlan_qos) ++ vlan->nr_egress_mappings++; ++ mp->vlan_qos = vlan_qos; + return 0; + } + mp = mp->next; + } + + /* Create a new mapping then. */ +- mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF]; ++ mp = vlan->egress_priority_map[skb_prio & 0xF]; + np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL); +- if (np) { ++ if (!np) ++ return -ENOBUFS; ++ + np->next = mp; + np->priority = skb_prio; +- np->vlan_qos = ((vlan_prio << 13) & 0xE000); +- VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF] = np; +- dev_put(dev); ++ np->vlan_qos = vlan_qos; ++ vlan->egress_priority_map[skb_prio & 0xF] = np; ++ if (vlan_qos) ++ vlan->nr_egress_mappings++; + return 0; +- } else { +- dev_put(dev); +- return -ENOBUFS; +- } +- } +- dev_put(dev); +- } +- return -EINVAL; + } + +-/* Flags are defined in the vlan_dev_info class in include/linux/if_vlan.h file. */ +-int vlan_dev_set_vlan_flag(char *dev_name, __u32 flag, short flag_val) ++/* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */ ++int vlan_dev_set_vlan_flag(const struct net_device *dev, ++ u32 flag, short flag_val) + { +- struct net_device *dev = dev_get_by_name(dev_name); +- +- if (dev) { +- if (dev->priv_flags & IFF_802_1Q_VLAN) { + /* verify flag is supported */ +- if (flag == 1) { ++ if (flag == VLAN_FLAG_REORDER_HDR) { + if (flag_val) { +- VLAN_DEV_INFO(dev)->flags |= 1; ++ VLAN_DEV_INFO(dev)->flags |= VLAN_FLAG_REORDER_HDR; + } else { +- VLAN_DEV_INFO(dev)->flags &= ~1; ++ VLAN_DEV_INFO(dev)->flags &= ~VLAN_FLAG_REORDER_HDR; + } +- dev_put(dev); + return 0; +- } else { +- printk(KERN_ERR "%s: flag %i is not valid.\n", +- __FUNCTION__, (int)(flag)); +- dev_put(dev); +- return -EINVAL; + } +- } else { +- printk(KERN_ERR +- "%s: %s is not a vlan device, priv_flags: %hX.\n", +- __FUNCTION__, dev->name, dev->priv_flags); +- dev_put(dev); +- } +- } else { +- printk(KERN_ERR "%s: Could not find device: %s\n", +- __FUNCTION__, dev_name); +- } +- ++ printk(KERN_ERR "%s: flag %i is not valid.\n", __FUNCTION__, flag); + return -EINVAL; + } + +- +-int vlan_dev_get_realdev_name(const char *dev_name, char* result) ++void vlan_dev_get_realdev_name(const struct net_device *dev, char *result) + { +- struct net_device *dev = dev_get_by_name(dev_name); +- int rv = 0; +- if (dev) { +- if (dev->priv_flags & IFF_802_1Q_VLAN) { + strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23); +- rv = 0; +- } else { +- rv = -EINVAL; +- } +- dev_put(dev); +- } else { +- rv = -ENODEV; +- } +- return rv; + } + +-int vlan_dev_get_vid(const char *dev_name, unsigned short* result) ++void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result) + { +- struct net_device *dev = dev_get_by_name(dev_name); +- int rv = 0; +- if (dev) { +- if (dev->priv_flags & IFF_802_1Q_VLAN) { + *result = VLAN_DEV_INFO(dev)->vlan_id; +- rv = 0; +- } else { +- rv = -EINVAL; +- } +- dev_put(dev); +- } else { +- rv = -ENODEV; +- } +- return rv; + } + +- + int vlan_dev_set_mac_address(struct net_device *dev, void *addr_struct_p) + { + struct sockaddr *addr = (struct sockaddr *)(addr_struct_p); +@@ -828,7 +781,7 @@ + break; + + case SIOCETHTOOL: +- err = dev_ethtool(&ifrr); ++ err = dev_ethtool(real_dev->nd_net, &ifrr); + } + + if (!err) +diff -Nurb linux-2.6.22-570/net/8021q/vlan_netlink.c linux-2.6.22-591/net/8021q/vlan_netlink.c +--- linux-2.6.22-570/net/8021q/vlan_netlink.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/net/8021q/vlan_netlink.c 2007-12-21 15:36:15.000000000 -0500 +@@ -0,0 +1,237 @@ ++/* ++ * VLAN netlink control interface ++ * ++ * Copyright (c) 2007 Patrick McHardy ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * version 2 as published by the Free Software Foundation. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "vlan.h" ++ ++ ++static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = { ++ [IFLA_VLAN_ID] = { .type = NLA_U16 }, ++ [IFLA_VLAN_FLAGS] = { .len = sizeof(struct ifla_vlan_flags) }, ++ [IFLA_VLAN_EGRESS_QOS] = { .type = NLA_NESTED }, ++ [IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED }, ++}; ++ ++static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = { ++ [IFLA_VLAN_QOS_MAPPING] = { .len = sizeof(struct ifla_vlan_qos_mapping) }, ++}; ++ ++ ++static inline int vlan_validate_qos_map(struct nlattr *attr) ++{ ++ if (!attr) ++ return 0; ++ return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy); ++} ++ ++static int vlan_validate(struct nlattr *tb[], struct nlattr *data[]) ++{ ++ struct ifla_vlan_flags *flags; ++ u16 id; ++ int err; ++ ++ if (!data) ++ return -EINVAL; ++ ++ if (data[IFLA_VLAN_ID]) { ++ id = nla_get_u16(data[IFLA_VLAN_ID]); ++ if (id >= VLAN_VID_MASK) ++ return -ERANGE; ++ } ++ if (data[IFLA_VLAN_FLAGS]) { ++ flags = nla_data(data[IFLA_VLAN_FLAGS]); ++ if ((flags->flags & flags->mask) & ~VLAN_FLAG_REORDER_HDR) ++ return -EINVAL; ++ } ++ ++ err = vlan_validate_qos_map(data[IFLA_VLAN_INGRESS_QOS]); ++ if (err < 0) ++ return err; ++ err = vlan_validate_qos_map(data[IFLA_VLAN_EGRESS_QOS]); ++ if (err < 0) ++ return err; ++ return 0; ++} ++ ++static int vlan_changelink(struct net_device *dev, ++ struct nlattr *tb[], struct nlattr *data[]) ++{ ++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); ++ struct ifla_vlan_flags *flags; ++ struct ifla_vlan_qos_mapping *m; ++ struct nlattr *attr; ++ int rem; ++ ++ if (data[IFLA_VLAN_FLAGS]) { ++ flags = nla_data(data[IFLA_VLAN_FLAGS]); ++ vlan->flags = (vlan->flags & ~flags->mask) | ++ (flags->flags & flags->mask); ++ } ++ if (data[IFLA_VLAN_INGRESS_QOS]) { ++ nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { ++ m = nla_data(attr); ++ vlan_dev_set_ingress_priority(dev, m->to, m->from); ++ } ++ } ++ if (data[IFLA_VLAN_EGRESS_QOS]) { ++ nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { ++ m = nla_data(attr); ++ vlan_dev_set_egress_priority(dev, m->from, m->to); ++ } ++ } ++ return 0; ++} ++ ++static int vlan_newlink(struct net_device *dev, ++ struct nlattr *tb[], struct nlattr *data[]) ++{ ++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); ++ struct net_device *real_dev; ++ int err; ++ ++ if (!data[IFLA_VLAN_ID]) ++ return -EINVAL; ++ ++ if (!tb[IFLA_LINK]) ++ return -EINVAL; ++ real_dev = __dev_get_by_index(&init_net, nla_get_u32(tb[IFLA_LINK])); ++ if (!real_dev) ++ return -ENODEV; ++ ++ vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); ++ vlan->real_dev = real_dev; ++ vlan->flags = VLAN_FLAG_REORDER_HDR; ++ ++ err = vlan_check_real_dev(real_dev, vlan->vlan_id); ++ if (err < 0) ++ return err; ++ ++ if (!tb[IFLA_MTU]) ++ dev->mtu = real_dev->mtu; ++ else if (dev->mtu > real_dev->mtu) ++ return -EINVAL; ++ ++ err = vlan_changelink(dev, tb, data); ++ if (err < 0) ++ return err; ++ ++ return register_vlan_dev(dev); ++} ++ ++static void vlan_dellink(struct net_device *dev) ++{ ++ unregister_vlan_device(dev); ++} ++ ++static inline size_t vlan_qos_map_size(unsigned int n) ++{ ++ if (n == 0) ++ return 0; ++ /* IFLA_VLAN_{EGRESS,INGRESS}_QOS + n * IFLA_VLAN_QOS_MAPPING */ ++ return nla_total_size(sizeof(struct nlattr)) + ++ nla_total_size(sizeof(struct ifla_vlan_qos_mapping)) * n; ++} ++ ++static size_t vlan_get_size(const struct net_device *dev) ++{ ++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); ++ ++ return nla_total_size(2) + /* IFLA_VLAN_ID */ ++ vlan_qos_map_size(vlan->nr_ingress_mappings) + ++ vlan_qos_map_size(vlan->nr_egress_mappings); ++} ++ ++static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev) ++{ ++ struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev); ++ struct vlan_priority_tci_mapping *pm; ++ struct ifla_vlan_flags f; ++ struct ifla_vlan_qos_mapping m; ++ struct nlattr *nest; ++ unsigned int i; ++ ++ NLA_PUT_U16(skb, IFLA_VLAN_ID, VLAN_DEV_INFO(dev)->vlan_id); ++ if (vlan->flags) { ++ f.flags = vlan->flags; ++ f.mask = ~0; ++ NLA_PUT(skb, IFLA_VLAN_FLAGS, sizeof(f), &f); ++ } ++ if (vlan->nr_ingress_mappings) { ++ nest = nla_nest_start(skb, IFLA_VLAN_INGRESS_QOS); ++ if (nest == NULL) ++ goto nla_put_failure; ++ ++ for (i = 0; i < ARRAY_SIZE(vlan->ingress_priority_map); i++) { ++ if (!vlan->ingress_priority_map[i]) ++ continue; ++ ++ m.from = i; ++ m.to = vlan->ingress_priority_map[i]; ++ NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING, ++ sizeof(m), &m); ++ } ++ nla_nest_end(skb, nest); ++ } ++ ++ if (vlan->nr_egress_mappings) { ++ nest = nla_nest_start(skb, IFLA_VLAN_EGRESS_QOS); ++ if (nest == NULL) ++ goto nla_put_failure; ++ ++ for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) { ++ for (pm = vlan->egress_priority_map[i]; pm; ++ pm = pm->next) { ++ if (!pm->vlan_qos) ++ continue; ++ ++ m.from = pm->priority; ++ m.to = (pm->vlan_qos >> 13) & 0x7; ++ NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING, ++ sizeof(m), &m); ++ } ++ } ++ nla_nest_end(skb, nest); ++ } ++ return 0; ++ ++nla_put_failure: ++ return -EMSGSIZE; ++} ++ ++struct rtnl_link_ops vlan_link_ops __read_mostly = { ++ .kind = "vlan", ++ .maxtype = IFLA_VLAN_MAX, ++ .policy = vlan_policy, ++ .priv_size = sizeof(struct vlan_dev_info), ++ .setup = vlan_setup, ++ .validate = vlan_validate, ++ .newlink = vlan_newlink, ++ .changelink = vlan_changelink, ++ .dellink = vlan_dellink, ++ .get_size = vlan_get_size, ++ .fill_info = vlan_fill_info, ++}; ++ ++int __init vlan_netlink_init(void) ++{ ++ return rtnl_link_register(&vlan_link_ops); ++} ++ ++void __exit vlan_netlink_fini(void) ++{ ++ rtnl_link_unregister(&vlan_link_ops); ++} ++ ++MODULE_ALIAS_RTNL_LINK("vlan"); +diff -Nurb linux-2.6.22-570/net/8021q/vlanproc.c linux-2.6.22-591/net/8021q/vlanproc.c +--- linux-2.6.22-570/net/8021q/vlanproc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/8021q/vlanproc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -33,6 +33,7 @@ + #include + #include + #include ++#include + #include "vlanproc.h" + #include "vlan.h" + +@@ -143,7 +144,7 @@ + remove_proc_entry(name_conf, proc_vlan_dir); + + if (proc_vlan_dir) +- proc_net_remove(name_root); ++ proc_net_remove(&init_net, name_root); + + /* Dynamically added entries should be cleaned up as their vlan_device + * is removed, so we should not have to take care of it here... +@@ -156,7 +157,7 @@ + + int __init vlan_proc_init(void) + { +- proc_vlan_dir = proc_mkdir(name_root, proc_net); ++ proc_vlan_dir = proc_mkdir(name_root, init_net.proc_net); + if (proc_vlan_dir) { + proc_vlan_conf = create_proc_entry(name_conf, + S_IFREG|S_IRUSR|S_IWUSR, +@@ -253,7 +254,7 @@ + if (*pos == 0) + return SEQ_START_TOKEN; + +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if (!is_vlan_dev(dev)) + continue; + +@@ -272,9 +273,9 @@ + + dev = (struct net_device *)v; + if (v == SEQ_START_TOKEN) +- dev = net_device_entry(&dev_base_head); ++ dev = net_device_entry(&init_net.dev_base_head); + +- for_each_netdev_continue(dev) { ++ for_each_netdev_continue(&init_net, dev) { + if (!is_vlan_dev(dev)) + continue; + +@@ -342,7 +343,7 @@ + seq_printf(seq, "Device: %s", dev_info->real_dev->name); + /* now show all PRIORITY mappings relating to this VLAN */ + seq_printf(seq, +- "\nINGRESS priority mappings: 0:%lu 1:%lu 2:%lu 3:%lu 4:%lu 5:%lu 6:%lu 7:%lu\n", ++ "\nINGRESS priority mappings: 0:%u 1:%u 2:%u 3:%u 4:%u 5:%u 6:%u 7:%u\n", + dev_info->ingress_priority_map[0], + dev_info->ingress_priority_map[1], + dev_info->ingress_priority_map[2], +@@ -357,7 +358,7 @@ + const struct vlan_priority_tci_mapping *mp + = dev_info->egress_priority_map[i]; + while (mp) { +- seq_printf(seq, "%lu:%hu ", ++ seq_printf(seq, "%u:%hu ", + mp->priority, ((mp->vlan_qos >> 13) & 0x7)); + mp = mp->next; + } +diff -Nurb linux-2.6.22-570/net/Kconfig linux-2.6.22-591/net/Kconfig +--- linux-2.6.22-570/net/Kconfig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/Kconfig 2007-12-21 15:36:15.000000000 -0500 +@@ -27,6 +27,13 @@ + + menu "Networking options" + ++config NET_NS ++ bool "Network namespace support" ++ depends on EXPERIMENTAL ++ help ++ Support what appear to user space as multiple instances of the ++ network stack. ++ + source "net/packet/Kconfig" + source "net/unix/Kconfig" + source "net/xfrm/Kconfig" +diff -Nurb linux-2.6.22-570/net/Makefile linux-2.6.22-591/net/Makefile +--- linux-2.6.22-570/net/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/Makefile 2007-12-23 03:20:02.000000000 -0500 +@@ -14,7 +14,7 @@ + + # LLC has to be linked before the files in net/802/ + obj-$(CONFIG_LLC) += llc/ +-obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ ++obj-$(CONFIG_NET) += ethernet/ sched/ netlink/ 802/ + obj-$(CONFIG_NETFILTER) += netfilter/ + obj-$(CONFIG_INET) += ipv4/ + obj-$(CONFIG_XFRM) += xfrm/ +diff -Nurb linux-2.6.22-570/net/Makefile.orig linux-2.6.22-591/net/Makefile.orig +--- linux-2.6.22-570/net/Makefile.orig 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/net/Makefile.orig 2007-07-08 19:32:17.000000000 -0400 +@@ -0,0 +1,58 @@ ++# ++# Makefile for the linux networking. ++# ++# 2 Sep 2000, Christoph Hellwig ++# Rewritten to use lists instead of if-statements. ++# ++ ++obj-y := nonet.o ++ ++obj-$(CONFIG_NET) := socket.o core/ ++ ++tmp-$(CONFIG_COMPAT) := compat.o ++obj-$(CONFIG_NET) += $(tmp-y) ++ ++# LLC has to be linked before the files in net/802/ ++obj-$(CONFIG_LLC) += llc/ ++obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ ++obj-$(CONFIG_NETFILTER) += netfilter/ ++obj-$(CONFIG_INET) += ipv4/ ++obj-$(CONFIG_XFRM) += xfrm/ ++obj-$(CONFIG_UNIX) += unix/ ++ifneq ($(CONFIG_IPV6),) ++obj-y += ipv6/ ++endif ++obj-$(CONFIG_PACKET) += packet/ ++obj-$(CONFIG_NET_KEY) += key/ ++obj-$(CONFIG_NET_SCHED) += sched/ ++obj-$(CONFIG_BRIDGE) += bridge/ ++obj-$(CONFIG_IPX) += ipx/ ++obj-$(CONFIG_ATALK) += appletalk/ ++obj-$(CONFIG_WAN_ROUTER) += wanrouter/ ++obj-$(CONFIG_X25) += x25/ ++obj-$(CONFIG_LAPB) += lapb/ ++obj-$(CONFIG_NETROM) += netrom/ ++obj-$(CONFIG_ROSE) += rose/ ++obj-$(CONFIG_AX25) += ax25/ ++obj-$(CONFIG_IRDA) += irda/ ++obj-$(CONFIG_BT) += bluetooth/ ++obj-$(CONFIG_SUNRPC) += sunrpc/ ++obj-$(CONFIG_RXRPC) += rxrpc/ ++obj-$(CONFIG_AF_RXRPC) += rxrpc/ ++obj-$(CONFIG_ATM) += atm/ ++obj-$(CONFIG_DECNET) += decnet/ ++obj-$(CONFIG_ECONET) += econet/ ++obj-$(CONFIG_VLAN_8021Q) += 8021q/ ++obj-$(CONFIG_IP_DCCP) += dccp/ ++obj-$(CONFIG_IP_SCTP) += sctp/ ++obj-y += wireless/ ++obj-$(CONFIG_MAC80211) += mac80211/ ++obj-$(CONFIG_IEEE80211) += ieee80211/ ++obj-$(CONFIG_TIPC) += tipc/ ++obj-$(CONFIG_NETLABEL) += netlabel/ ++obj-$(CONFIG_IUCV) += iucv/ ++obj-$(CONFIG_RFKILL) += rfkill/ ++ ++ifeq ($(CONFIG_NET),y) ++obj-$(CONFIG_SYSCTL) += sysctl_net.o ++endif +diff -Nurb linux-2.6.22-570/net/appletalk/aarp.c linux-2.6.22-591/net/appletalk/aarp.c +--- linux-2.6.22-570/net/appletalk/aarp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/appletalk/aarp.c 2007-12-21 15:36:15.000000000 -0500 +@@ -330,15 +330,19 @@ + static int aarp_device_event(struct notifier_block *this, unsigned long event, + void *ptr) + { ++ struct net_device *dev = ptr; + int ct; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (event == NETDEV_DOWN) { + write_lock_bh(&aarp_lock); + + for (ct = 0; ct < AARP_HASH_SIZE; ct++) { +- __aarp_expire_device(&resolved[ct], ptr); +- __aarp_expire_device(&unresolved[ct], ptr); +- __aarp_expire_device(&proxies[ct], ptr); ++ __aarp_expire_device(&resolved[ct], dev); ++ __aarp_expire_device(&unresolved[ct], dev); ++ __aarp_expire_device(&proxies[ct], dev); + } + + write_unlock_bh(&aarp_lock); +@@ -712,6 +716,9 @@ + struct atalk_addr sa, *ma, da; + struct atalk_iface *ifa; + ++ if (dev->nd_net != &init_net) ++ goto out0; ++ + /* We only do Ethernet SNAP AARP. */ + if (dev->type != ARPHRD_ETHER) + goto out0; +diff -Nurb linux-2.6.22-570/net/appletalk/atalk_proc.c linux-2.6.22-591/net/appletalk/atalk_proc.c +--- linux-2.6.22-570/net/appletalk/atalk_proc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/appletalk/atalk_proc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + + static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos) +@@ -271,7 +272,7 @@ + struct proc_dir_entry *p; + int rc = -ENOMEM; + +- atalk_proc_dir = proc_mkdir("atalk", proc_net); ++ atalk_proc_dir = proc_mkdir("atalk", init_net.proc_net); + if (!atalk_proc_dir) + goto out; + atalk_proc_dir->owner = THIS_MODULE; +@@ -306,7 +307,7 @@ + out_route: + remove_proc_entry("interface", atalk_proc_dir); + out_interface: +- remove_proc_entry("atalk", proc_net); ++ remove_proc_entry("atalk", init_net.proc_net); + goto out; + } + +@@ -316,5 +317,5 @@ + remove_proc_entry("route", atalk_proc_dir); + remove_proc_entry("socket", atalk_proc_dir); + remove_proc_entry("arp", atalk_proc_dir); +- remove_proc_entry("atalk", proc_net); ++ remove_proc_entry("atalk", init_net.proc_net); + } +diff -Nurb linux-2.6.22-570/net/appletalk/ddp.c linux-2.6.22-591/net/appletalk/ddp.c +--- linux-2.6.22-570/net/appletalk/ddp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/appletalk/ddp.c 2007-12-21 15:36:15.000000000 -0500 +@@ -647,9 +647,14 @@ + static int ddp_device_event(struct notifier_block *this, unsigned long event, + void *ptr) + { ++ struct net_device *dev = ptr; ++ ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (event == NETDEV_DOWN) + /* Discard any use of this */ +- atalk_dev_down(ptr); ++ atalk_dev_down(dev); + + return NOTIFY_DONE; + } +@@ -672,7 +677,7 @@ + if (copy_from_user(&atreq, arg, sizeof(atreq))) + return -EFAULT; + +- dev = __dev_get_by_name(atreq.ifr_name); ++ dev = __dev_get_by_name(&init_net, atreq.ifr_name); + if (!dev) + return -ENODEV; + +@@ -896,7 +901,7 @@ + if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1)) + return -EFAULT; + name[IFNAMSIZ-1] = '\0'; +- dev = __dev_get_by_name(name); ++ dev = __dev_get_by_name(&init_net, name); + if (!dev) + return -ENODEV; + } +@@ -1024,11 +1029,14 @@ + * Create a socket. Initialise the socket, blank the addresses + * set the state. + */ +-static int atalk_create(struct socket *sock, int protocol) ++static int atalk_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + int rc = -ESOCKTNOSUPPORT; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + /* + * We permit SOCK_DGRAM and RAW is an extension. It is trivial to do + * and gives you the full ELAP frame. Should be handy for CAP 8) +@@ -1036,7 +1044,7 @@ + if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) + goto out; + rc = -ENOMEM; +- sk = sk_alloc(PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); ++ sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1); + if (!sk) + goto out; + rc = 0; +@@ -1265,7 +1273,7 @@ + + static int handle_ip_over_ddp(struct sk_buff *skb) + { +- struct net_device *dev = __dev_get_by_name("ipddp0"); ++ struct net_device *dev = __dev_get_by_name(&init_net, "ipddp0"); + struct net_device_stats *stats; + + /* This needs to be able to handle ipddp"N" devices */ +@@ -1398,6 +1406,9 @@ + int origlen; + __u16 len_hops; + ++ if (dev->nd_net != &init_net) ++ goto freeit; ++ + /* Don't mangle buffer if shared */ + if (!(skb = skb_share_check(skb, GFP_ATOMIC))) + goto out; +@@ -1483,6 +1494,9 @@ + static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) + { ++ if (dev->nd_net != &init_net) ++ goto freeit; ++ + /* Expand any short form frames */ + if (skb_mac_header(skb)[2] == 1) { + struct ddpehdr *ddp; +diff -Nurb linux-2.6.22-570/net/atm/clip.c linux-2.6.22-591/net/atm/clip.c +--- linux-2.6.22-570/net/atm/clip.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/atm/clip.c 2007-12-21 15:36:15.000000000 -0500 +@@ -293,7 +293,7 @@ + struct neigh_parms *parms; + + DPRINTK("clip_constructor (neigh %p, entry %p)\n", neigh, entry); +- neigh->type = inet_addr_type(entry->ip); ++ neigh->type = inet_addr_type(&init_net, entry->ip); + if (neigh->type != RTN_UNICAST) + return -EINVAL; + +@@ -525,7 +525,10 @@ + struct atmarp_entry *entry; + int error; + struct clip_vcc *clip_vcc; +- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} }; ++ struct flowi fl = { ++ .fl_net = &init_net, ++ .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} ++ }; + struct rtable *rt; + + if (vcc->push != clip_push) { +@@ -620,6 +623,9 @@ + { + struct net_device *dev = arg; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (event == NETDEV_UNREGISTER) { + neigh_ifdown(&clip_tbl, dev); + return NOTIFY_DONE; +@@ -954,6 +960,7 @@ + + seq = file->private_data; + seq->private = state; ++ state->ns.net = get_net(PROC_NET(inode)); + out: + return rc; + +@@ -962,11 +969,19 @@ + goto out; + } + ++static int arp_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct clip_seq_state *state = seq->private; ++ put_net(state->ns.net); ++ return seq_release_private(inode, file); ++} ++ + static const struct file_operations arp_seq_fops = { + .open = arp_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release_private, ++ .release = arp_seq_release, + .owner = THIS_MODULE + }; + #endif +diff -Nurb linux-2.6.22-570/net/atm/common.c linux-2.6.22-591/net/atm/common.c +--- linux-2.6.22-570/net/atm/common.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/atm/common.c 2007-12-21 15:36:15.000000000 -0500 +@@ -132,7 +132,7 @@ + .obj_size = sizeof(struct atm_vcc), + }; + +-int vcc_create(struct socket *sock, int protocol, int family) ++int vcc_create(struct net *net, struct socket *sock, int protocol, int family) + { + struct sock *sk; + struct atm_vcc *vcc; +@@ -140,7 +140,7 @@ + sock->sk = NULL; + if (sock->type == SOCK_STREAM) + return -EINVAL; +- sk = sk_alloc(family, GFP_KERNEL, &vcc_proto, 1); ++ sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, 1); + if (!sk) + return -ENOMEM; + sock_init_data(sock, sk); +diff -Nurb linux-2.6.22-570/net/atm/common.h linux-2.6.22-591/net/atm/common.h +--- linux-2.6.22-570/net/atm/common.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/atm/common.h 2007-12-21 15:36:15.000000000 -0500 +@@ -10,7 +10,7 @@ + #include /* for poll_table */ + + +-int vcc_create(struct socket *sock, int protocol, int family); ++int vcc_create(struct net *net, struct socket *sock, int protocol, int family); + int vcc_release(struct socket *sock); + int vcc_connect(struct socket *sock, int itf, short vpi, int vci); + int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, +diff -Nurb linux-2.6.22-570/net/atm/mpc.c linux-2.6.22-591/net/atm/mpc.c +--- linux-2.6.22-570/net/atm/mpc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/atm/mpc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -244,7 +244,7 @@ + char name[IFNAMSIZ]; + + sprintf(name, "lec%d", itf); +- dev = dev_get_by_name(name); ++ dev = dev_get_by_name(&init_net, name); + + return dev; + } +@@ -956,6 +956,10 @@ + struct lec_priv *priv; + + dev = (struct net_device *)dev_ptr; ++ ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (dev->name == NULL || strncmp(dev->name, "lec", 3)) + return NOTIFY_DONE; /* we are only interested in lec:s */ + +diff -Nurb linux-2.6.22-570/net/atm/proc.c linux-2.6.22-591/net/atm/proc.c +--- linux-2.6.22-570/net/atm/proc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/atm/proc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -22,6 +22,7 @@ + #include + #include + #include /* for __init */ ++#include + #include + #include + #include +@@ -475,7 +476,7 @@ + if (e->dirent) + remove_proc_entry(e->name, atm_proc_root); + } +- remove_proc_entry("net/atm", NULL); ++ remove_proc_entry("atm", init_net.proc_net); + } + + int __init atm_proc_init(void) +@@ -483,7 +484,7 @@ + static struct atm_proc_entry *e; + int ret; + +- atm_proc_root = proc_mkdir("net/atm",NULL); ++ atm_proc_root = proc_mkdir("atm", init_net.proc_net); + if (!atm_proc_root) + goto err_out; + for (e = atm_proc_ents; e->name; e++) { +diff -Nurb linux-2.6.22-570/net/atm/pvc.c linux-2.6.22-591/net/atm/pvc.c +--- linux-2.6.22-570/net/atm/pvc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/atm/pvc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -124,10 +124,13 @@ + }; + + +-static int pvc_create(struct socket *sock,int protocol) ++static int pvc_create(struct net *net, struct socket *sock,int protocol) + { ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + sock->ops = &pvc_proto_ops; +- return vcc_create(sock, protocol, PF_ATMPVC); ++ return vcc_create(net, sock, protocol, PF_ATMPVC); + } + + +diff -Nurb linux-2.6.22-570/net/atm/svc.c linux-2.6.22-591/net/atm/svc.c +--- linux-2.6.22-570/net/atm/svc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/atm/svc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -33,7 +33,7 @@ + #endif + + +-static int svc_create(struct socket *sock,int protocol); ++static int svc_create(struct net *net, struct socket *sock,int protocol); + + + /* +@@ -335,7 +335,7 @@ + + lock_sock(sk); + +- error = svc_create(newsock,0); ++ error = svc_create(sk->sk_net, newsock,0); + if (error) + goto out; + +@@ -636,12 +636,15 @@ + }; + + +-static int svc_create(struct socket *sock,int protocol) ++static int svc_create(struct net *net, struct socket *sock,int protocol) + { + int error; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + sock->ops = &svc_proto_ops; +- error = vcc_create(sock, protocol, AF_ATMSVC); ++ error = vcc_create(net, sock, protocol, AF_ATMSVC); + if (error) return error; + ATM_SD(sock)->local.sas_family = AF_ATMSVC; + ATM_SD(sock)->remote.sas_family = AF_ATMSVC; +diff -Nurb linux-2.6.22-570/net/ax25/af_ax25.c linux-2.6.22-591/net/ax25/af_ax25.c +--- linux-2.6.22-570/net/ax25/af_ax25.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ax25/af_ax25.c 2007-12-21 15:36:15.000000000 -0500 +@@ -47,6 +47,7 @@ + #include + #include + #include ++#include + + + +@@ -103,6 +104,9 @@ + { + struct net_device *dev = (struct net_device *)ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + /* Reject non AX.25 devices */ + if (dev->type != ARPHRD_AX25) + return NOTIFY_DONE; +@@ -627,7 +631,7 @@ + break; + } + +- dev = dev_get_by_name(devname); ++ dev = dev_get_by_name(&init_net, devname); + if (dev == NULL) { + res = -ENODEV; + break; +@@ -779,11 +783,14 @@ + .obj_size = sizeof(struct sock), + }; + +-static int ax25_create(struct socket *sock, int protocol) ++static int ax25_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + ax25_cb *ax25; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + switch (sock->type) { + case SOCK_DGRAM: + if (protocol == 0 || protocol == PF_AX25) +@@ -829,7 +836,7 @@ + return -ESOCKTNOSUPPORT; + } + +- if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) ++ if ((sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL) + return -ENOMEM; + + ax25 = sk->sk_protinfo = ax25_create_cb(); +@@ -854,7 +861,7 @@ + struct sock *sk; + ax25_cb *ax25, *oax25; + +- if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) ++ if ((sk = sk_alloc(osk->sk_net, PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + return NULL; + + if ((ax25 = ax25_create_cb()) == NULL) { +@@ -1998,9 +2005,9 @@ + register_netdevice_notifier(&ax25_dev_notifier); + ax25_register_sysctl(); + +- proc_net_fops_create("ax25_route", S_IRUGO, &ax25_route_fops); +- proc_net_fops_create("ax25", S_IRUGO, &ax25_info_fops); +- proc_net_fops_create("ax25_calls", S_IRUGO, &ax25_uid_fops); ++ proc_net_fops_create(&init_net, "ax25_route", S_IRUGO, &ax25_route_fops); ++ proc_net_fops_create(&init_net, "ax25", S_IRUGO, &ax25_info_fops); ++ proc_net_fops_create(&init_net, "ax25_calls", S_IRUGO, &ax25_uid_fops); + out: + return rc; + } +@@ -2014,9 +2021,9 @@ + + static void __exit ax25_exit(void) + { +- proc_net_remove("ax25_route"); +- proc_net_remove("ax25"); +- proc_net_remove("ax25_calls"); ++ proc_net_remove(&init_net, "ax25_route"); ++ proc_net_remove(&init_net, "ax25"); ++ proc_net_remove(&init_net, "ax25_calls"); + ax25_rt_free(); + ax25_uid_free(); + ax25_dev_free(); +diff -Nurb linux-2.6.22-570/net/ax25/ax25_in.c linux-2.6.22-591/net/ax25/ax25_in.c +--- linux-2.6.22-570/net/ax25/ax25_in.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ax25/ax25_in.c 2007-12-21 15:36:15.000000000 -0500 +@@ -451,6 +451,11 @@ + skb->sk = NULL; /* Initially we don't know who it's for */ + skb->destructor = NULL; /* Who initializes this, dammit?! */ + ++ if (dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } ++ + if ((*skb->data & 0x0F) != 0) { + kfree_skb(skb); /* Not a KISS data frame */ + return 0; +diff -Nurb linux-2.6.22-570/net/bluetooth/af_bluetooth.c linux-2.6.22-591/net/bluetooth/af_bluetooth.c +--- linux-2.6.22-570/net/bluetooth/af_bluetooth.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bluetooth/af_bluetooth.c 2007-12-21 15:36:15.000000000 -0500 +@@ -95,10 +95,13 @@ + } + EXPORT_SYMBOL(bt_sock_unregister); + +-static int bt_sock_create(struct socket *sock, int proto) ++static int bt_sock_create(struct net *net, struct socket *sock, int proto) + { + int err; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + if (proto < 0 || proto >= BT_MAX_PROTO) + return -EINVAL; + +@@ -113,7 +116,7 @@ + read_lock(&bt_proto_lock); + + if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { +- err = bt_proto[proto]->create(sock, proto); ++ err = bt_proto[proto]->create(net, sock, proto); + module_put(bt_proto[proto]->owner); + } + +diff -Nurb linux-2.6.22-570/net/bluetooth/bnep/core.c linux-2.6.22-591/net/bluetooth/bnep/core.c +--- linux-2.6.22-570/net/bluetooth/bnep/core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bluetooth/bnep/core.c 2007-12-21 15:36:12.000000000 -0500 +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -474,7 +475,6 @@ + + daemonize("kbnepd %s", dev->name); + set_user_nice(current, -15); +- current->flags |= PF_NOFREEZE; + + init_waitqueue_entry(&wait, current); + add_wait_queue(sk->sk_sleep, &wait); +diff -Nurb linux-2.6.22-570/net/bluetooth/bnep/sock.c linux-2.6.22-591/net/bluetooth/bnep/sock.c +--- linux-2.6.22-570/net/bluetooth/bnep/sock.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bluetooth/bnep/sock.c 2007-12-21 15:36:15.000000000 -0500 +@@ -204,7 +204,7 @@ + .obj_size = sizeof(struct bt_sock) + }; + +-static int bnep_sock_create(struct socket *sock, int protocol) ++static int bnep_sock_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + +@@ -213,7 +213,7 @@ + if (sock->type != SOCK_RAW) + return -ESOCKTNOSUPPORT; + +- sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); ++ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1); + if (!sk) + return -ENOMEM; + +diff -Nurb linux-2.6.22-570/net/bluetooth/cmtp/core.c linux-2.6.22-591/net/bluetooth/cmtp/core.c +--- linux-2.6.22-570/net/bluetooth/cmtp/core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bluetooth/cmtp/core.c 2007-12-21 15:36:12.000000000 -0500 +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -287,7 +288,6 @@ + + daemonize("kcmtpd_ctr_%d", session->num); + set_user_nice(current, -15); +- current->flags |= PF_NOFREEZE; + + init_waitqueue_entry(&wait, current); + add_wait_queue(sk->sk_sleep, &wait); +diff -Nurb linux-2.6.22-570/net/bluetooth/cmtp/sock.c linux-2.6.22-591/net/bluetooth/cmtp/sock.c +--- linux-2.6.22-570/net/bluetooth/cmtp/sock.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bluetooth/cmtp/sock.c 2007-12-21 15:36:15.000000000 -0500 +@@ -195,7 +195,7 @@ + .obj_size = sizeof(struct bt_sock) + }; + +-static int cmtp_sock_create(struct socket *sock, int protocol) ++static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + +@@ -204,7 +204,7 @@ + if (sock->type != SOCK_RAW) + return -ESOCKTNOSUPPORT; + +- sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); ++ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1); + if (!sk) + return -ENOMEM; + +diff -Nurb linux-2.6.22-570/net/bluetooth/hci_sock.c linux-2.6.22-591/net/bluetooth/hci_sock.c +--- linux-2.6.22-570/net/bluetooth/hci_sock.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bluetooth/hci_sock.c 2007-12-21 15:36:15.000000000 -0500 +@@ -618,7 +618,7 @@ + .obj_size = sizeof(struct hci_pinfo) + }; + +-static int hci_sock_create(struct socket *sock, int protocol) ++static int hci_sock_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + +@@ -629,7 +629,7 @@ + + sock->ops = &hci_sock_ops; + +- sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); ++ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1); + if (!sk) + return -ENOMEM; + +diff -Nurb linux-2.6.22-570/net/bluetooth/hidp/core.c linux-2.6.22-591/net/bluetooth/hidp/core.c +--- linux-2.6.22-570/net/bluetooth/hidp/core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bluetooth/hidp/core.c 2007-12-21 15:36:12.000000000 -0500 +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -547,7 +548,6 @@ + + daemonize("khidpd_%04x%04x", vendor, product); + set_user_nice(current, -15); +- current->flags |= PF_NOFREEZE; + + init_waitqueue_entry(&ctrl_wait, current); + init_waitqueue_entry(&intr_wait, current); +diff -Nurb linux-2.6.22-570/net/bluetooth/hidp/sock.c linux-2.6.22-591/net/bluetooth/hidp/sock.c +--- linux-2.6.22-570/net/bluetooth/hidp/sock.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bluetooth/hidp/sock.c 2007-12-21 15:36:15.000000000 -0500 +@@ -246,7 +246,7 @@ + .obj_size = sizeof(struct bt_sock) + }; + +-static int hidp_sock_create(struct socket *sock, int protocol) ++static int hidp_sock_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + +@@ -255,7 +255,7 @@ + if (sock->type != SOCK_RAW) + return -ESOCKTNOSUPPORT; + +- sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); ++ sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1); + if (!sk) + return -ENOMEM; + +diff -Nurb linux-2.6.22-570/net/bluetooth/l2cap.c linux-2.6.22-591/net/bluetooth/l2cap.c +--- linux-2.6.22-570/net/bluetooth/l2cap.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bluetooth/l2cap.c 2007-12-21 15:36:15.000000000 -0500 +@@ -518,11 +518,11 @@ + .obj_size = sizeof(struct l2cap_pinfo) + }; + +-static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio) ++static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) + { + struct sock *sk; + +- sk = sk_alloc(PF_BLUETOOTH, prio, &l2cap_proto, 1); ++ sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, 1); + if (!sk) + return NULL; + +@@ -543,7 +543,7 @@ + return sk; + } + +-static int l2cap_sock_create(struct socket *sock, int protocol) ++static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + +@@ -560,7 +560,7 @@ + + sock->ops = &l2cap_sock_ops; + +- sk = l2cap_sock_alloc(sock, protocol, GFP_ATOMIC); ++ sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC); + if (!sk) + return -ENOMEM; + +@@ -1425,7 +1425,7 @@ + goto response; + } + +- sk = l2cap_sock_alloc(NULL, BTPROTO_L2CAP, GFP_ATOMIC); ++ sk = l2cap_sock_alloc(parent->sk_net, NULL, BTPROTO_L2CAP, GFP_ATOMIC); + if (!sk) + goto response; + +diff -Nurb linux-2.6.22-570/net/bluetooth/rfcomm/core.c linux-2.6.22-591/net/bluetooth/rfcomm/core.c +--- linux-2.6.22-570/net/bluetooth/rfcomm/core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bluetooth/rfcomm/core.c 2007-12-21 15:36:12.000000000 -0500 +@@ -33,6 +33,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1940,7 +1941,6 @@ + + daemonize("krfcommd"); + set_user_nice(current, -10); +- current->flags |= PF_NOFREEZE; + + BT_DBG(""); + +diff -Nurb linux-2.6.22-570/net/bluetooth/rfcomm/sock.c linux-2.6.22-591/net/bluetooth/rfcomm/sock.c +--- linux-2.6.22-570/net/bluetooth/rfcomm/sock.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bluetooth/rfcomm/sock.c 2007-12-21 15:36:15.000000000 -0500 +@@ -282,12 +282,12 @@ + .obj_size = sizeof(struct rfcomm_pinfo) + }; + +-static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio) ++static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) + { + struct rfcomm_dlc *d; + struct sock *sk; + +- sk = sk_alloc(PF_BLUETOOTH, prio, &rfcomm_proto, 1); ++ sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, 1); + if (!sk) + return NULL; + +@@ -323,7 +323,7 @@ + return sk; + } + +-static int rfcomm_sock_create(struct socket *sock, int protocol) ++static int rfcomm_sock_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + +@@ -336,7 +336,7 @@ + + sock->ops = &rfcomm_sock_ops; + +- sk = rfcomm_sock_alloc(sock, protocol, GFP_ATOMIC); ++ sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC); + if (!sk) + return -ENOMEM; + +@@ -868,7 +868,7 @@ + goto done; + } + +- sk = rfcomm_sock_alloc(NULL, BTPROTO_RFCOMM, GFP_ATOMIC); ++ sk = rfcomm_sock_alloc(parent->sk_net, NULL, BTPROTO_RFCOMM, GFP_ATOMIC); + if (!sk) + goto done; + +diff -Nurb linux-2.6.22-570/net/bluetooth/sco.c linux-2.6.22-591/net/bluetooth/sco.c +--- linux-2.6.22-570/net/bluetooth/sco.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bluetooth/sco.c 2007-12-21 15:36:15.000000000 -0500 +@@ -414,11 +414,11 @@ + .obj_size = sizeof(struct sco_pinfo) + }; + +-static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio) ++static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio) + { + struct sock *sk; + +- sk = sk_alloc(PF_BLUETOOTH, prio, &sco_proto, 1); ++ sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, 1); + if (!sk) + return NULL; + +@@ -439,7 +439,7 @@ + return sk; + } + +-static int sco_sock_create(struct socket *sock, int protocol) ++static int sco_sock_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + +@@ -452,7 +452,7 @@ + + sock->ops = &sco_sock_ops; + +- sk = sco_sock_alloc(sock, protocol, GFP_ATOMIC); ++ sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC); + if (!sk) + return -ENOMEM; + +@@ -807,7 +807,7 @@ + + bh_lock_sock(parent); + +- sk = sco_sock_alloc(NULL, BTPROTO_SCO, GFP_ATOMIC); ++ sk = sco_sock_alloc(parent->sk_net, NULL, BTPROTO_SCO, GFP_ATOMIC); + if (!sk) { + bh_unlock_sock(parent); + goto done; +diff -Nurb linux-2.6.22-570/net/bridge/br_if.c linux-2.6.22-591/net/bridge/br_if.c +--- linux-2.6.22-570/net/bridge/br_if.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/bridge/br_if.c 2007-12-21 15:36:15.000000000 -0500 +@@ -45,7 +45,7 @@ + + old_fs = get_fs(); + set_fs(KERNEL_DS); +- err = dev_ethtool(&ifr); ++ err = dev_ethtool(dev->nd_net, &ifr); + set_fs(old_fs); + + if (!err) { +@@ -314,7 +314,7 @@ + int ret = 0; + + rtnl_lock(); +- dev = __dev_get_by_name(name); ++ dev = __dev_get_by_name(&init_net, name); + if (dev == NULL) + ret = -ENXIO; /* Could not find device */ + +@@ -455,7 +455,7 @@ + struct net_device *dev, *nxt; + + rtnl_lock(); +- for_each_netdev_safe(dev, nxt) ++ for_each_netdev_safe(&init_net, dev, nxt) + if (dev->priv_flags & IFF_EBRIDGE) + del_br(dev->priv); + rtnl_unlock(); +diff -Nurb linux-2.6.22-570/net/bridge/br_ioctl.c linux-2.6.22-591/net/bridge/br_ioctl.c +--- linux-2.6.22-570/net/bridge/br_ioctl.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bridge/br_ioctl.c 2007-12-21 15:36:15.000000000 -0500 +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + #include "br_private.h" + +@@ -27,7 +28,7 @@ + struct net_device *dev; + int i = 0; + +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if (i >= num) + break; + if (dev->priv_flags & IFF_EBRIDGE) +@@ -90,7 +91,7 @@ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + +- dev = dev_get_by_index(ifindex); ++ dev = dev_get_by_index(&init_net, ifindex); + if (dev == NULL) + return -EINVAL; + +@@ -364,7 +365,7 @@ + return -EOPNOTSUPP; + } + +-int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg) ++int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg) + { + switch (cmd) { + case SIOCGIFBR: +diff -Nurb linux-2.6.22-570/net/bridge/br_netfilter.c linux-2.6.22-591/net/bridge/br_netfilter.c +--- linux-2.6.22-570/net/bridge/br_netfilter.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/bridge/br_netfilter.c 2007-12-21 15:36:15.000000000 -0500 +@@ -310,6 +310,7 @@ + if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { + struct rtable *rt; + struct flowi fl = { ++ .fl_net = &init_net, + .nl_u = { + .ip4_u = { + .daddr = iph->daddr, +@@ -518,6 +519,10 @@ + if (unlikely(!pskb_may_pull(skb, len))) + goto out; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || + IS_PPPOE_IPV6(skb)) { + #ifdef CONFIG_SYSCTL +@@ -591,6 +596,10 @@ + { + struct sk_buff *skb = *pskb; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + if (skb->dst == (struct dst_entry *)&__fake_rtable) { + dst_release(skb->dst); + skb->dst = NULL; +@@ -635,6 +644,10 @@ + struct net_device *parent; + int pf; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + if (!skb->nf_bridge) + return NF_ACCEPT; + +@@ -674,6 +687,10 @@ + struct sk_buff *skb = *pskb; + struct net_device **d = (struct net_device **)(skb->cb); + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + #ifdef CONFIG_SYSCTL + if (!brnf_call_arptables) + return NF_ACCEPT; +@@ -718,6 +735,10 @@ + struct sk_buff *skb = *pskb; + struct nf_bridge_info *nf_bridge; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + if (!skb->nf_bridge) + return NF_ACCEPT; + +@@ -762,6 +783,10 @@ + struct net_device *realoutdev = bridge_parent(skb->dev); + int pf; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + #ifdef CONFIG_NETFILTER_DEBUG + /* Be very paranoid. This probably won't happen anymore, but let's + * keep the check just to be sure... */ +@@ -833,6 +858,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + if ((*pskb)->nf_bridge && + !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { + return NF_STOP; +diff -Nurb linux-2.6.22-570/net/bridge/br_netlink.c linux-2.6.22-591/net/bridge/br_netlink.c +--- linux-2.6.22-570/net/bridge/br_netlink.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bridge/br_netlink.c 2007-12-21 15:36:15.000000000 -0500 +@@ -12,6 +12,8 @@ + + #include + #include ++#include ++#include + #include "br_private.h" + + static inline size_t br_nlmsg_size(void) +@@ -95,10 +97,10 @@ + kfree_skb(skb); + goto errout; + } +- err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); ++ err = rtnl_notify(skb, &init_net,0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + errout: + if (err < 0) +- rtnl_set_sk_err(RTNLGRP_LINK, err); ++ rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err); + } + + /* +@@ -106,11 +108,15 @@ + */ + static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + struct net_device *dev; + int idx; + ++ if (net != &init_net) ++ return 0; ++ + idx = 0; +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + /* not a bridge port */ + if (dev->br_port == NULL || idx < cb->args[0]) + goto skip; +@@ -134,12 +140,16 @@ + */ + static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct ifinfomsg *ifm; + struct nlattr *protinfo; + struct net_device *dev; + struct net_bridge_port *p; + u8 new_state; + ++ if (net != &init_net) ++ return -EINVAL; ++ + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + +@@ -155,7 +165,7 @@ + if (new_state > BR_STATE_BLOCKING) + return -EINVAL; + +- dev = __dev_get_by_index(ifm->ifi_index); ++ dev = __dev_get_by_index(&init_net, ifm->ifi_index); + if (!dev) + return -ENODEV; + +diff -Nurb linux-2.6.22-570/net/bridge/br_notify.c linux-2.6.22-591/net/bridge/br_notify.c +--- linux-2.6.22-570/net/bridge/br_notify.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bridge/br_notify.c 2007-12-21 15:36:15.000000000 -0500 +@@ -15,6 +15,7 @@ + + #include + #include ++#include + + #include "br_private.h" + +@@ -36,6 +37,9 @@ + struct net_bridge_port *p = dev->br_port; + struct net_bridge *br; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + /* not a port of a bridge */ + if (p == NULL) + return NOTIFY_DONE; +diff -Nurb linux-2.6.22-570/net/bridge/br_private.h linux-2.6.22-591/net/bridge/br_private.h +--- linux-2.6.22-570/net/bridge/br_private.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bridge/br_private.h 2007-12-21 15:36:15.000000000 -0500 +@@ -196,7 +196,7 @@ + + /* br_ioctl.c */ + extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +-extern int br_ioctl_deviceless_stub(unsigned int cmd, void __user *arg); ++extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg); + + /* br_netfilter.c */ + #ifdef CONFIG_BRIDGE_NETFILTER +diff -Nurb linux-2.6.22-570/net/bridge/br_stp_bpdu.c linux-2.6.22-591/net/bridge/br_stp_bpdu.c +--- linux-2.6.22-570/net/bridge/br_stp_bpdu.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bridge/br_stp_bpdu.c 2007-12-21 15:36:15.000000000 -0500 +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -141,6 +142,9 @@ + struct net_bridge *br; + const unsigned char *buf; + ++ if (dev->nd_net != &init_net) ++ goto err; ++ + if (!p) + goto err; + +diff -Nurb linux-2.6.22-570/net/bridge/br_stp_if.c linux-2.6.22-591/net/bridge/br_stp_if.c +--- linux-2.6.22-570/net/bridge/br_stp_if.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bridge/br_stp_if.c 2007-12-21 15:36:12.000000000 -0500 +@@ -125,7 +125,7 @@ + char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; + char *envp[] = { NULL }; + +- r = call_usermodehelper(BR_STP_PROG, argv, envp, 1); ++ r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); + if (r == 0) { + br->stp_enabled = BR_USER_STP; + printk(KERN_INFO "%s: userspace STP started\n", br->dev->name); +diff -Nurb linux-2.6.22-570/net/bridge/br_sysfs_br.c linux-2.6.22-591/net/bridge/br_sysfs_br.c +--- linux-2.6.22-570/net/bridge/br_sysfs_br.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bridge/br_sysfs_br.c 2007-12-21 15:36:12.000000000 -0500 +@@ -360,8 +360,9 @@ + * + * Returns the number of bytes read. + */ +-static ssize_t brforward_read(struct kobject *kobj, char *buf, +- loff_t off, size_t count) ++static ssize_t brforward_read(struct kobject *kobj, ++ struct bin_attribute *bin_attr, ++ char *buf, loff_t off, size_t count) + { + struct device *dev = to_dev(kobj); + struct net_bridge *br = to_bridge(dev); +@@ -383,8 +384,7 @@ + + static struct bin_attribute bridge_forward = { + .attr = { .name = SYSFS_BRIDGE_FDB, +- .mode = S_IRUGO, +- .owner = THIS_MODULE, }, ++ .mode = S_IRUGO, }, + .read = brforward_read, + }; + +diff -Nurb linux-2.6.22-570/net/bridge/br_sysfs_if.c linux-2.6.22-591/net/bridge/br_sysfs_if.c +--- linux-2.6.22-570/net/bridge/br_sysfs_if.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bridge/br_sysfs_if.c 2007-12-21 15:36:12.000000000 -0500 +@@ -29,8 +29,7 @@ + #define BRPORT_ATTR(_name,_mode,_show,_store) \ + struct brport_attribute brport_attr_##_name = { \ + .attr = {.name = __stringify(_name), \ +- .mode = _mode, \ +- .owner = THIS_MODULE, }, \ ++ .mode = _mode }, \ + .show = _show, \ + .store = _store, \ + }; +diff -Nurb linux-2.6.22-570/net/bridge/netfilter/ebt_ulog.c linux-2.6.22-591/net/bridge/netfilter/ebt_ulog.c +--- linux-2.6.22-570/net/bridge/netfilter/ebt_ulog.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/bridge/netfilter/ebt_ulog.c 2007-12-21 15:36:15.000000000 -0500 +@@ -301,8 +301,9 @@ + spin_lock_init(&ulog_buffers[i].lock); + } + +- ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, +- NULL, NULL, THIS_MODULE); ++ ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, ++ EBT_ULOG_MAXNLGROUPS, NULL, NULL, ++ THIS_MODULE); + if (!ebtulognl) + ret = -ENOMEM; + else if ((ret = ebt_register_watcher(&ulog))) +diff -Nurb linux-2.6.22-570/net/bridge/netfilter/ebtable_filter.c linux-2.6.22-591/net/bridge/netfilter/ebtable_filter.c +--- linux-2.6.22-570/net/bridge/netfilter/ebtable_filter.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bridge/netfilter/ebtable_filter.c 2007-12-21 15:36:15.000000000 -0500 +@@ -64,6 +64,10 @@ + ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in, + const struct net_device *out, int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + return ebt_do_table(hook, pskb, in, out, &frame_filter); + } + +diff -Nurb linux-2.6.22-570/net/bridge/netfilter/ebtable_nat.c linux-2.6.22-591/net/bridge/netfilter/ebtable_nat.c +--- linux-2.6.22-570/net/bridge/netfilter/ebtable_nat.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bridge/netfilter/ebtable_nat.c 2007-12-21 15:36:15.000000000 -0500 +@@ -64,6 +64,10 @@ + ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in + , const struct net_device *out, int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + return ebt_do_table(hook, pskb, in, out, &frame_nat); + } + +@@ -71,6 +75,10 @@ + ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in + , const struct net_device *out, int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + return ebt_do_table(hook, pskb, in, out, &frame_nat); + } + +diff -Nurb linux-2.6.22-570/net/bridge/netfilter/ebtables.c linux-2.6.22-591/net/bridge/netfilter/ebtables.c +--- linux-2.6.22-570/net/bridge/netfilter/ebtables.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/bridge/netfilter/ebtables.c 2007-12-21 15:36:15.000000000 -0500 +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + /* needed for logical [in,out]-dev filtering */ + #include "../br_private.h" + +@@ -1438,6 +1439,9 @@ + { + int ret; + ++ if (sk->sk_net != &init_net) ++ return -ENOPROTOOPT; ++ + switch(cmd) { + case EBT_SO_SET_ENTRIES: + ret = do_replace(user, len); +@@ -1457,6 +1461,9 @@ + struct ebt_replace tmp; + struct ebt_table *t; + ++ if (sk->sk_net != &init_net) ++ return -ENOPROTOOPT; ++ + if (copy_from_user(&tmp, user, sizeof(tmp))) + return -EFAULT; + +diff -Nurb linux-2.6.22-570/net/core/Makefile linux-2.6.22-591/net/core/Makefile +--- linux-2.6.22-570/net/core/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/core/Makefile 2007-12-21 15:36:15.000000000 -0500 +@@ -3,7 +3,7 @@ + # + + obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ +- gen_stats.o gen_estimator.o ++ gen_stats.o gen_estimator.o net_namespace.o + + obj-$(CONFIG_SYSCTL) += sysctl_net_core.o + +diff -Nurb linux-2.6.22-570/net/core/dev.c linux-2.6.22-591/net/core/dev.c +--- linux-2.6.22-570/net/core/dev.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/core/dev.c 2007-12-21 15:36:15.000000000 -0500 +@@ -116,6 +116,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -152,9 +153,22 @@ + static struct list_head ptype_all __read_mostly; /* Taps */ + + #ifdef CONFIG_NET_DMA +-static struct dma_client *net_dma_client; +-static unsigned int net_dma_count; +-static spinlock_t net_dma_event_lock; ++struct net_dma { ++ struct dma_client client; ++ spinlock_t lock; ++ cpumask_t channel_mask; ++ struct dma_chan *channels[NR_CPUS]; ++}; ++ ++static enum dma_state_client ++netdev_dma_event(struct dma_client *client, struct dma_chan *chan, ++ enum dma_state state); ++ ++static struct net_dma net_dma = { ++ .client = { ++ .event_callback = netdev_dma_event, ++ }, ++}; + #endif + + /* +@@ -176,25 +190,50 @@ + * unregister_netdevice(), which must be called with the rtnl + * semaphore held. + */ +-LIST_HEAD(dev_base_head); + DEFINE_RWLOCK(dev_base_lock); + +-EXPORT_SYMBOL(dev_base_head); + EXPORT_SYMBOL(dev_base_lock); + + #define NETDEV_HASHBITS 8 +-static struct hlist_head dev_name_head[1<dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)]; ++} ++ ++static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) ++{ ++ return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; ++} ++ ++/* Device list insertion */ ++static int list_netdevice(struct net_device *dev) ++{ ++ struct net *net = dev->nd_net; ++ ++ ASSERT_RTNL(); ++ ++ write_lock_bh(&dev_base_lock); ++ list_add_tail(&dev->dev_list, &net->dev_base_head); ++ hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); ++ hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex)); ++ write_unlock_bh(&dev_base_lock); ++ return 0; + } + +-static inline struct hlist_head *dev_index_hash(int ifindex) ++/* Device list removal */ ++static void unlist_netdevice(struct net_device *dev) + { +- return &dev_index_head[ifindex & ((1<dev_list); ++ hlist_del(&dev->name_hlist); ++ hlist_del(&dev->index_hlist); ++ write_unlock_bh(&dev_base_lock); + } + + /* +@@ -477,7 +516,7 @@ + * If device already registered then return base of 1 + * to indicate not to probe for this interface + */ +- if (__dev_get_by_name(name)) ++ if (__dev_get_by_name(&init_net, name)) + return 1; + + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) +@@ -532,11 +571,11 @@ + * careful with locks. + */ + +-struct net_device *__dev_get_by_name(const char *name) ++struct net_device *__dev_get_by_name(struct net *net, const char *name) + { + struct hlist_node *p; + +- hlist_for_each(p, dev_name_hash(name)) { ++ hlist_for_each(p, dev_name_hash(net, name)) { + struct net_device *dev + = hlist_entry(p, struct net_device, name_hlist); + if (!strncmp(dev->name, name, IFNAMSIZ)) +@@ -556,12 +595,12 @@ + * matching device is found. + */ + +-struct net_device *dev_get_by_name(const char *name) ++struct net_device *dev_get_by_name(struct net *net, const char *name) + { + struct net_device *dev; + + read_lock(&dev_base_lock); +- dev = __dev_get_by_name(name); ++ dev = __dev_get_by_name(net, name); + if (dev) + dev_hold(dev); + read_unlock(&dev_base_lock); +@@ -579,11 +618,11 @@ + * or @dev_base_lock. + */ + +-struct net_device *__dev_get_by_index(int ifindex) ++struct net_device *__dev_get_by_index(struct net *net, int ifindex) + { + struct hlist_node *p; + +- hlist_for_each(p, dev_index_hash(ifindex)) { ++ hlist_for_each(p, dev_index_hash(net, ifindex)) { + struct net_device *dev + = hlist_entry(p, struct net_device, index_hlist); + if (dev->ifindex == ifindex) +@@ -603,12 +642,12 @@ + * dev_put to indicate they have finished with it. + */ + +-struct net_device *dev_get_by_index(int ifindex) ++struct net_device *dev_get_by_index(struct net *net, int ifindex) + { + struct net_device *dev; + + read_lock(&dev_base_lock); +- dev = __dev_get_by_index(ifindex); ++ dev = __dev_get_by_index(net, ifindex); + if (dev) + dev_hold(dev); + read_unlock(&dev_base_lock); +@@ -629,13 +668,13 @@ + * If the API was consistent this would be __dev_get_by_hwaddr + */ + +-struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) ++struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha) + { + struct net_device *dev; + + ASSERT_RTNL(); + +- for_each_netdev(dev) ++ for_each_netdev(&init_net, dev) + if (dev->type == type && + !memcmp(dev->dev_addr, ha, dev->addr_len)) + return dev; +@@ -645,12 +684,12 @@ + + EXPORT_SYMBOL(dev_getbyhwaddr); + +-struct net_device *__dev_getfirstbyhwtype(unsigned short type) ++struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type) + { + struct net_device *dev; + + ASSERT_RTNL(); +- for_each_netdev(dev) ++ for_each_netdev(net, dev) + if (dev->type == type) + return dev; + +@@ -659,12 +698,12 @@ + + EXPORT_SYMBOL(__dev_getfirstbyhwtype); + +-struct net_device *dev_getfirstbyhwtype(unsigned short type) ++struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) + { + struct net_device *dev; + + rtnl_lock(); +- dev = __dev_getfirstbyhwtype(type); ++ dev = __dev_getfirstbyhwtype(net, type); + if (dev) + dev_hold(dev); + rtnl_unlock(); +@@ -684,13 +723,13 @@ + * dev_put to indicate they have finished with it. + */ + +-struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) ++struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask) + { + struct net_device *dev, *ret; + + ret = NULL; + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(net, dev) { + if (((dev->flags ^ if_flags) & mask) == 0) { + dev_hold(dev); + ret = dev; +@@ -727,9 +766,10 @@ + } + + /** +- * dev_alloc_name - allocate a name for a device +- * @dev: device ++ * __dev_alloc_name - allocate a name for a device ++ * @net: network namespace to allocate the device name in + * @name: name format string ++ * @buf: scratch buffer and result name string + * + * Passed a format string - eg "lt%d" it will try and find a suitable + * id. It scans list of devices to build up a free map, then chooses +@@ -740,10 +780,9 @@ + * Returns the number of the unit assigned or a negative errno code. + */ + +-int dev_alloc_name(struct net_device *dev, const char *name) ++static int __dev_alloc_name(struct net *net, const char *name, char *buf) + { + int i = 0; +- char buf[IFNAMSIZ]; + const char *p; + const int max_netdevices = 8*PAGE_SIZE; + long *inuse; +@@ -764,14 +803,14 @@ + if (!inuse) + return -ENOMEM; + +- for_each_netdev(d) { ++ for_each_netdev(net, d) { + if (!sscanf(d->name, name, &i)) + continue; + if (i < 0 || i >= max_netdevices) + continue; + + /* avoid cases where sscanf is not exact inverse of printf */ +- snprintf(buf, sizeof(buf), name, i); ++ snprintf(buf, IFNAMSIZ, name, i); + if (!strncmp(buf, d->name, IFNAMSIZ)) + set_bit(i, inuse); + } +@@ -780,11 +819,9 @@ + free_page((unsigned long) inuse); + } + +- snprintf(buf, sizeof(buf), name, i); +- if (!__dev_get_by_name(buf)) { +- strlcpy(dev->name, buf, IFNAMSIZ); ++ snprintf(buf, IFNAMSIZ, name, i); ++ if (!__dev_get_by_name(net, buf)) + return i; +- } + + /* It is possible to run out of possible slots + * when the name is long and there isn't enough space left +@@ -793,6 +830,34 @@ + return -ENFILE; + } + ++/** ++ * dev_alloc_name - allocate a name for a device ++ * @dev: device ++ * @name: name format string ++ * ++ * Passed a format string - eg "lt%d" it will try and find a suitable ++ * id. It scans list of devices to build up a free map, then chooses ++ * the first empty slot. The caller must hold the dev_base or rtnl lock ++ * while allocating the name and adding the device in order to avoid ++ * duplicates. ++ * Limited to bits_per_byte * page size devices (ie 32K on most platforms). ++ * Returns the number of the unit assigned or a negative errno code. ++ */ ++ ++int dev_alloc_name(struct net_device *dev, const char *name) ++{ ++ char buf[IFNAMSIZ]; ++ struct net *net; ++ int ret; ++ ++ BUG_ON(!dev->nd_net); ++ net = dev->nd_net; ++ ret = __dev_alloc_name(net, name, buf); ++ if (ret >= 0) ++ strlcpy(dev->name, buf, IFNAMSIZ); ++ return ret; ++} ++ + + /** + * dev_change_name - change name of a device +@@ -805,9 +870,12 @@ + int dev_change_name(struct net_device *dev, char *newname) + { + int err = 0; ++ struct net *net; + + ASSERT_RTNL(); ++ BUG_ON(!dev->nd_net); + ++ net = dev->nd_net; + if (dev->flags & IFF_UP) + return -EBUSY; + +@@ -820,14 +888,18 @@ + return err; + strcpy(newname, dev->name); + } +- else if (__dev_get_by_name(newname)) ++ else if (__dev_get_by_name(net, newname)) + return -EEXIST; +- else ++ else { ++ if (strncmp(newname, dev->name, IFNAMSIZ)) ++ printk(KERN_INFO "%s renamed to %s\n", ++ dev->name, newname); + strlcpy(dev->name, newname, IFNAMSIZ); ++ } + + device_rename(&dev->dev, dev->name); + hlist_del(&dev->name_hlist); +- hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); ++ hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name)); + raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); + + return err; +@@ -871,12 +943,12 @@ + * available in this kernel then it becomes a nop. + */ + +-void dev_load(const char *name) ++void dev_load(struct net *net, const char *name) + { + struct net_device *dev; + + read_lock(&dev_base_lock); +- dev = __dev_get_by_name(name); ++ dev = __dev_get_by_name(net, name); + read_unlock(&dev_base_lock); + + if (!dev && capable(CAP_SYS_MODULE)) +@@ -1019,6 +1091,8 @@ + } + + ++static int dev_boot_phase = 1; ++ + /* + * Device change register/unregister. These are not inline or static + * as we export them to the world. +@@ -1045,14 +1119,17 @@ + + rtnl_lock(); + err = raw_notifier_chain_register(&netdev_chain, nb); +- if (!err) { +- for_each_netdev(dev) { ++ if (!err && !dev_boot_phase) { ++ struct net *net; ++ for_each_net(net) { ++ for_each_netdev(net, dev) { + nb->notifier_call(nb, NETDEV_REGISTER, dev); + + if (dev->flags & IFF_UP) + nb->notifier_call(nb, NETDEV_UP, dev); + } + } ++ } + rtnl_unlock(); + return err; + } +@@ -1086,9 +1163,9 @@ + * are as for raw_notifier_call_chain(). + */ + +-int call_netdevice_notifiers(unsigned long val, void *v) ++int call_netdevice_notifiers(unsigned long val, struct net_device *dev) + { +- return raw_notifier_call_chain(&netdev_chain, val, v); ++ return raw_notifier_call_chain(&netdev_chain, val, dev); + } + + /* When > 0 there are consumers of rx skb time stamps */ +@@ -1510,9 +1587,11 @@ + skb_set_transport_header(skb, skb->csum_start - + skb_headroom(skb)); + +- if (!(dev->features & NETIF_F_GEN_CSUM) && +- (!(dev->features & NETIF_F_IP_CSUM) || +- skb->protocol != htons(ETH_P_IP))) ++ if (!(dev->features & NETIF_F_GEN_CSUM) ++ || ((dev->features & NETIF_F_IP_CSUM) ++ && skb->protocol == htons(ETH_P_IP)) ++ || ((dev->features & NETIF_F_IPV6_CSUM) ++ && skb->protocol == htons(ETH_P_IPV6))) + if (skb_checksum_help(skb)) + goto out_kfree_skb; + } +@@ -2016,12 +2095,13 @@ + * There may not be any more sk_buffs coming right now, so push + * any pending DMA copies to hardware + */ +- if (net_dma_client) { +- struct dma_chan *chan; +- rcu_read_lock(); +- list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) ++ if (!cpus_empty(net_dma.channel_mask)) { ++ int chan_idx; ++ for_each_cpu_mask(chan_idx, net_dma.channel_mask) { ++ struct dma_chan *chan = net_dma.channels[chan_idx]; ++ if (chan) + dma_async_memcpy_issue_pending(chan); +- rcu_read_unlock(); ++ } + } + #endif + return; +@@ -2063,7 +2143,7 @@ + * match. --pb + */ + +-static int dev_ifname(struct ifreq __user *arg) ++static int dev_ifname(struct net *net, struct ifreq __user *arg) + { + struct net_device *dev; + struct ifreq ifr; +@@ -2076,7 +2156,7 @@ + return -EFAULT; + + read_lock(&dev_base_lock); +- dev = __dev_get_by_index(ifr.ifr_ifindex); ++ dev = __dev_get_by_index(net, ifr.ifr_ifindex); + if (!dev) { + read_unlock(&dev_base_lock); + return -ENODEV; +@@ -2096,7 +2176,7 @@ + * Thus we will need a 'compatibility mode'. + */ + +-static int dev_ifconf(char __user *arg) ++static int dev_ifconf(struct net *net, char __user *arg) + { + struct ifconf ifc; + struct net_device *dev; +@@ -2120,7 +2200,7 @@ + */ + + total = 0; +- for_each_netdev(dev) { ++ for_each_netdev(net, dev) { + if (!nx_dev_visible(current->nx_info, dev)) + continue; + for (i = 0; i < NPROTO; i++) { +@@ -2156,6 +2236,7 @@ + */ + void *dev_seq_start(struct seq_file *seq, loff_t *pos) + { ++ struct net *net = seq->private; + loff_t off; + struct net_device *dev; + +@@ -2164,7 +2245,7 @@ + return SEQ_START_TOKEN; + + off = 1; +- for_each_netdev(dev) ++ for_each_netdev(net, dev) + if (off++ == *pos) + return dev; + +@@ -2173,9 +2254,10 @@ + + void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) + { ++ struct net *net = seq->private; + ++*pos; + return v == SEQ_START_TOKEN ? +- first_net_device() : next_net_device((struct net_device *)v); ++ first_net_device(net) : next_net_device((struct net_device *)v); + } + + void dev_seq_stop(struct seq_file *seq, void *v) +@@ -2274,7 +2356,22 @@ + + static int dev_seq_open(struct inode *inode, struct file *file) + { +- return seq_open(file, &dev_seq_ops); ++ struct seq_file *seq; ++ int res; ++ res = seq_open(file, &dev_seq_ops); ++ if (!res) { ++ seq = file->private_data; ++ seq->private = get_net(PROC_NET(inode)); ++ } ++ return res; ++} ++ ++static int dev_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct net *net = seq->private; ++ put_net(net); ++ return seq_release(inode, file); + } + + static const struct file_operations dev_seq_fops = { +@@ -2282,7 +2379,7 @@ + .open = dev_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release, ++ .release = dev_seq_release, + }; + + static const struct seq_operations softnet_seq_ops = { +@@ -2434,30 +2531,49 @@ + }; + + +-static int __init dev_proc_init(void) ++static int dev_proc_net_init(struct net *net) + { + int rc = -ENOMEM; + +- if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) ++ if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops)) + goto out; +- if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) ++ if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops)) + goto out_dev; +- if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops)) +- goto out_dev2; +- +- if (wext_proc_init()) ++ if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops)) + goto out_softnet; ++ ++ if (wext_proc_init(net)) ++ goto out_ptype; + rc = 0; + out: + return rc; ++out_ptype: ++ proc_net_remove(net, "ptype"); + out_softnet: +- proc_net_remove("ptype"); +-out_dev2: +- proc_net_remove("softnet_stat"); ++ proc_net_remove(net, "softnet_stat"); + out_dev: +- proc_net_remove("dev"); ++ proc_net_remove(net, "dev"); + goto out; + } ++ ++static void dev_proc_net_exit(struct net *net) ++{ ++ wext_proc_exit(net); ++ ++ proc_net_remove(net, "ptype"); ++ proc_net_remove(net, "softnet_stat"); ++ proc_net_remove(net, "dev"); ++} ++ ++static struct pernet_operations dev_proc_ops = { ++ .init = dev_proc_net_init, ++ .exit = dev_proc_net_exit, ++}; ++ ++static int __init dev_proc_init(void) ++{ ++ return register_pernet_subsys(&dev_proc_ops); ++} + #else + #define dev_proc_init() 0 + #endif /* CONFIG_PROC_FS */ +@@ -2691,10 +2807,10 @@ + /* + * Perform the SIOCxIFxxx calls. + */ +-static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) ++static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) + { + int err; +- struct net_device *dev = __dev_get_by_name(ifr->ifr_name); ++ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + + if (!dev) + return -ENODEV; +@@ -2847,7 +2963,7 @@ + * positive or a negative errno code on error. + */ + +-int dev_ioctl(unsigned int cmd, void __user *arg) ++int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) + { + struct ifreq ifr; + int ret; +@@ -2860,12 +2976,12 @@ + + if (cmd == SIOCGIFCONF) { + rtnl_lock(); +- ret = dev_ifconf((char __user *) arg); ++ ret = dev_ifconf(net, (char __user *) arg); + rtnl_unlock(); + return ret; + } + if (cmd == SIOCGIFNAME) +- return dev_ifname((struct ifreq __user *)arg); ++ return dev_ifname(net, (struct ifreq __user *)arg); + + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; +@@ -2895,9 +3011,9 @@ + case SIOCGIFMAP: + case SIOCGIFINDEX: + case SIOCGIFTXQLEN: +- dev_load(ifr.ifr_name); ++ dev_load(net, ifr.ifr_name); + read_lock(&dev_base_lock); +- ret = dev_ifsioc(&ifr, cmd); ++ ret = dev_ifsioc(net, &ifr, cmd); + read_unlock(&dev_base_lock); + if (!ret) { + if (colon) +@@ -2909,9 +3025,9 @@ + return ret; + + case SIOCETHTOOL: +- dev_load(ifr.ifr_name); ++ dev_load(net, ifr.ifr_name); + rtnl_lock(); +- ret = dev_ethtool(&ifr); ++ ret = dev_ethtool(net, &ifr); + rtnl_unlock(); + if (!ret) { + if (colon) +@@ -2933,9 +3049,9 @@ + case SIOCSIFNAME: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; +- dev_load(ifr.ifr_name); ++ dev_load(net, ifr.ifr_name); + rtnl_lock(); +- ret = dev_ifsioc(&ifr, cmd); ++ ret = dev_ifsioc(net, &ifr, cmd); + rtnl_unlock(); + if (!ret) { + if (colon) +@@ -2974,9 +3090,9 @@ + /* fall through */ + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: +- dev_load(ifr.ifr_name); ++ dev_load(net, ifr.ifr_name); + rtnl_lock(); +- ret = dev_ifsioc(&ifr, cmd); ++ ret = dev_ifsioc(net, &ifr, cmd); + rtnl_unlock(); + return ret; + +@@ -2996,9 +3112,9 @@ + if (cmd == SIOCWANDEV || + (cmd >= SIOCDEVPRIVATE && + cmd <= SIOCDEVPRIVATE + 15)) { +- dev_load(ifr.ifr_name); ++ dev_load(net, ifr.ifr_name); + rtnl_lock(); +- ret = dev_ifsioc(&ifr, cmd); ++ ret = dev_ifsioc(net, &ifr, cmd); + rtnl_unlock(); + if (!ret && copy_to_user(arg, &ifr, + sizeof(struct ifreq))) +@@ -3007,7 +3123,7 @@ + } + /* Take care of Wireless Extensions */ + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) +- return wext_handle_ioctl(&ifr, cmd, arg); ++ return wext_handle_ioctl(net, &ifr, cmd, arg); + return -EINVAL; + } + } +@@ -3020,19 +3136,17 @@ + * number. The caller must hold the rtnl semaphore or the + * dev_base_lock to be sure it remains unique. + */ +-static int dev_new_index(void) ++static int dev_new_index(struct net *net) + { + static int ifindex; + for (;;) { + if (++ifindex <= 0) + ifindex = 1; +- if (!__dev_get_by_index(ifindex)) ++ if (!__dev_get_by_index(net, ifindex)) + return ifindex; + } + } + +-static int dev_boot_phase = 1; +- + /* Delayed registration/unregisteration */ + static DEFINE_SPINLOCK(net_todo_list_lock); + static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); +@@ -3066,6 +3180,7 @@ + struct hlist_head *head; + struct hlist_node *p; + int ret; ++ struct net *net; + + BUG_ON(dev_boot_phase); + ASSERT_RTNL(); +@@ -3074,6 +3189,8 @@ + + /* When net_device's are persistent, this will be fatal. */ + BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); ++ BUG_ON(!dev->nd_net); ++ net = dev->nd_net; + + spin_lock_init(&dev->queue_lock); + spin_lock_init(&dev->_xmit_lock); +@@ -3098,12 +3215,12 @@ + goto out; + } + +- dev->ifindex = dev_new_index(); ++ dev->ifindex = dev_new_index(net); + if (dev->iflink == -1) + dev->iflink = dev->ifindex; + + /* Check for existence of name */ +- head = dev_name_hash(dev->name); ++ head = dev_name_hash(net, dev->name); + hlist_for_each(p, head) { + struct net_device *d + = hlist_entry(p, struct net_device, name_hlist); +@@ -3113,6 +3230,22 @@ + } + } + ++ /* Fix illegal checksum combinations */ ++ if ((dev->features & NETIF_F_HW_CSUM) && ++ (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { ++ printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n", ++ dev->name); ++ dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); ++ } ++ ++ if ((dev->features & NETIF_F_NO_CSUM) && ++ (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) { ++ printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n", ++ dev->name); ++ dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); ++ } ++ ++ + /* Fix illegal SG+CSUM combinations. */ + if ((dev->features & NETIF_F_SG) && + !(dev->features & NETIF_F_ALL_CSUM)) { +@@ -3164,12 +3297,8 @@ + set_bit(__LINK_STATE_PRESENT, &dev->state); + + dev_init_scheduler(dev); +- write_lock_bh(&dev_base_lock); +- list_add_tail(&dev->dev_list, &dev_base_head); +- hlist_add_head(&dev->name_hlist, head); +- hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); + dev_hold(dev); +- write_unlock_bh(&dev_base_lock); ++ list_netdevice(dev); + + /* Notify protocols, that a new device appeared. */ + raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); +@@ -3379,6 +3508,7 @@ + dev = (struct net_device *) + (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); + dev->padded = (char *)dev - (char *)p; ++ dev->nd_net = &init_net; + + if (sizeof_priv) + dev->priv = netdev_priv(dev); +@@ -3457,11 +3587,7 @@ + dev_close(dev); + + /* And unlink it from device chain. */ +- write_lock_bh(&dev_base_lock); +- list_del(&dev->dev_list); +- hlist_del(&dev->name_hlist); +- hlist_del(&dev->index_hlist); +- write_unlock_bh(&dev_base_lock); ++ unlist_netdevice(dev); + + dev->reg_state = NETREG_UNREGISTERING; + +@@ -3519,6 +3645,122 @@ + + EXPORT_SYMBOL(unregister_netdev); + ++/** ++ * dev_change_net_namespace - move device to different nethost namespace ++ * @dev: device ++ * @net: network namespace ++ * @pat: If not NULL name pattern to try if the current device name ++ * is already taken in the destination network namespace. ++ * ++ * This function shuts down a device interface and moves it ++ * to a new network namespace. On success 0 is returned, on ++ * a failure a netagive errno code is returned. ++ * ++ * Callers must hold the rtnl semaphore. ++ */ ++ ++int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) ++{ ++ char buf[IFNAMSIZ]; ++ const char *destname; ++ int err; ++ ++ ASSERT_RTNL(); ++ ++ /* Don't allow namespace local devices to be moved. */ ++ err = -EINVAL; ++ if (dev->features & NETIF_F_NETNS_LOCAL) ++ goto out; ++ ++ /* Ensure the device has been registrered */ ++ err = -EINVAL; ++ if (dev->reg_state != NETREG_REGISTERED) ++ goto out; ++ ++ /* Get out if there is nothing todo */ ++ err = 0; ++ if (dev->nd_net == net) ++ goto out; ++ ++ /* Pick the destination device name, and ensure ++ * we can use it in the destination network namespace. ++ */ ++ err = -EEXIST; ++ destname = dev->name; ++ if (__dev_get_by_name(net, destname)) { ++ /* We get here if we can't use the current device name */ ++ if (!pat) ++ goto out; ++ if (!dev_valid_name(pat)) ++ goto out; ++ if (strchr(pat, '%')) { ++ if (__dev_alloc_name(net, pat, buf) < 0) ++ goto out; ++ destname = buf; ++ } else ++ destname = pat; ++ if (__dev_get_by_name(net, destname)) ++ goto out; ++ } ++ ++ /* ++ * And now a mini version of register_netdevice unregister_netdevice. ++ */ ++ ++ /* If device is running close it first. */ ++ if (dev->flags & IFF_UP) ++ dev_close(dev); ++ ++ /* And unlink it from device chain */ ++ err = -ENODEV; ++ unlist_netdevice(dev); ++ ++ synchronize_net(); ++ ++ /* Shutdown queueing discipline. */ ++ dev_shutdown(dev); ++ ++ /* Notify protocols, that we are about to destroy ++ this device. They should clean all the things. ++ */ ++ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); ++ ++ /* ++ * Flush the multicast chain ++ */ ++ dev_mc_discard(dev); ++ ++ /* Actually switch the network namespace */ ++ dev->nd_net = net; ++ ++ /* Assign the new device name */ ++ if (destname != dev->name) ++ strcpy(dev->name, destname); ++ ++ /* If there is an ifindex conflict assign a new one */ ++ if (__dev_get_by_index(net, dev->ifindex)) { ++ int iflink = (dev->iflink == dev->ifindex); ++ dev->ifindex = dev_new_index(net); ++ if (iflink) ++ dev->iflink = dev->ifindex; ++ } ++ ++ /* Fixup sysfs */ ++ err = device_rename(&dev->dev, dev->name); ++ BUG_ON(err); ++ ++ /* Add the device back in the hashes */ ++ list_netdevice(dev); ++ ++ /* Notify protocols, that a new device appeared. */ ++ call_netdevice_notifiers(NETDEV_REGISTER, dev); ++ ++ synchronize_net(); ++ err = 0; ++out: ++ return err; ++} ++ + static int dev_cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *ocpu) +@@ -3569,12 +3811,13 @@ + * This is called when the number of channels allocated to the net_dma_client + * changes. The net_dma_client tries to have one DMA channel per CPU. + */ +-static void net_dma_rebalance(void) ++ ++static void net_dma_rebalance(struct net_dma *net_dma) + { +- unsigned int cpu, i, n; ++ unsigned int cpu, i, n, chan_idx; + struct dma_chan *chan; + +- if (net_dma_count == 0) { ++ if (cpus_empty(net_dma->channel_mask)) { + for_each_online_cpu(cpu) + rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); + return; +@@ -3583,10 +3826,12 @@ + i = 0; + cpu = first_cpu(cpu_online_map); + +- rcu_read_lock(); +- list_for_each_entry(chan, &net_dma_client->channels, client_node) { +- n = ((num_online_cpus() / net_dma_count) +- + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); ++ for_each_cpu_mask(chan_idx, net_dma->channel_mask) { ++ chan = net_dma->channels[chan_idx]; ++ ++ n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) ++ + (i < (num_online_cpus() % ++ cpus_weight(net_dma->channel_mask)) ? 1 : 0)); + + while(n) { + per_cpu(softnet_data, cpu).net_dma = chan; +@@ -3595,7 +3840,6 @@ + } + i++; + } +- rcu_read_unlock(); + } + + /** +@@ -3604,23 +3848,53 @@ + * @chan: DMA channel for the event + * @event: event type + */ +-static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, +- enum dma_event event) +-{ +- spin_lock(&net_dma_event_lock); +- switch (event) { +- case DMA_RESOURCE_ADDED: +- net_dma_count++; +- net_dma_rebalance(); ++static enum dma_state_client ++netdev_dma_event(struct dma_client *client, struct dma_chan *chan, ++ enum dma_state state) ++{ ++ int i, found = 0, pos = -1; ++ struct net_dma *net_dma = ++ container_of(client, struct net_dma, client); ++ enum dma_state_client ack = DMA_DUP; /* default: take no action */ ++ ++ spin_lock(&net_dma->lock); ++ switch (state) { ++ case DMA_RESOURCE_AVAILABLE: ++ for (i = 0; i < NR_CPUS; i++) ++ if (net_dma->channels[i] == chan) { ++ found = 1; ++ break; ++ } else if (net_dma->channels[i] == NULL && pos < 0) ++ pos = i; ++ ++ if (!found && pos >= 0) { ++ ack = DMA_ACK; ++ net_dma->channels[pos] = chan; ++ cpu_set(pos, net_dma->channel_mask); ++ net_dma_rebalance(net_dma); ++ } + break; + case DMA_RESOURCE_REMOVED: +- net_dma_count--; +- net_dma_rebalance(); ++ for (i = 0; i < NR_CPUS; i++) ++ if (net_dma->channels[i] == chan) { ++ found = 1; ++ pos = i; ++ break; ++ } ++ ++ if (found) { ++ ack = DMA_ACK; ++ cpu_clear(pos, net_dma->channel_mask); ++ net_dma->channels[i] = NULL; ++ net_dma_rebalance(net_dma); ++ } + break; + default: + break; + } +- spin_unlock(&net_dma_event_lock); ++ spin_unlock(&net_dma->lock); ++ ++ return ack; + } + + /** +@@ -3628,12 +3902,10 @@ + */ + static int __init netdev_dma_register(void) + { +- spin_lock_init(&net_dma_event_lock); +- net_dma_client = dma_async_client_register(netdev_dma_event); +- if (net_dma_client == NULL) +- return -ENOMEM; +- +- dma_async_client_chan_request(net_dma_client, num_online_cpus()); ++ spin_lock_init(&net_dma.lock); ++ dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); ++ dma_async_client_register(&net_dma.client); ++ dma_async_client_chan_request(&net_dma.client); + return 0; + } + +@@ -3679,6 +3951,75 @@ + } + EXPORT_SYMBOL(netdev_compute_features); + ++/* Initialize per network namespace state */ ++static int netdev_init(struct net *net) ++{ ++ int i; ++ INIT_LIST_HEAD(&net->dev_base_head); ++ rwlock_init(&dev_base_lock); ++ ++ net->dev_name_head = kmalloc( ++ sizeof(*net->dev_name_head)*NETDEV_HASHENTRIES, GFP_KERNEL); ++ if (!net->dev_name_head) ++ return -ENOMEM; ++ ++ net->dev_index_head = kmalloc( ++ sizeof(*net->dev_index_head)*NETDEV_HASHENTRIES, GFP_KERNEL); ++ if (!net->dev_index_head) { ++ kfree(net->dev_name_head); ++ return -ENOMEM; ++ } ++ ++ for (i = 0; i < NETDEV_HASHENTRIES; i++) ++ INIT_HLIST_HEAD(&net->dev_name_head[i]); ++ ++ for (i = 0; i < NETDEV_HASHENTRIES; i++) ++ INIT_HLIST_HEAD(&net->dev_index_head[i]); ++ ++ return 0; ++} ++ ++static void netdev_exit(struct net *net) ++{ ++ kfree(net->dev_name_head); ++ kfree(net->dev_index_head); ++} ++ ++static struct pernet_operations netdev_net_ops = { ++ .init = netdev_init, ++ .exit = netdev_exit, ++}; ++ ++static void default_device_exit(struct net *net) ++{ ++ struct net_device *dev, *next; ++ /* ++ * Push all migratable of the network devices back to the ++ * initial network namespace ++ */ ++ rtnl_lock(); ++ for_each_netdev_safe(net, dev, next) { ++ int err; ++ ++ /* Ignore unmoveable devices (i.e. loopback) */ ++ if (dev->features & NETIF_F_NETNS_LOCAL) ++ continue; ++ ++ /* Push remaing network devices to init_net */ ++ err = dev_change_net_namespace(dev, &init_net, "dev%d"); ++ if (err) { ++ printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n", ++ __func__, dev->name, err); ++ unregister_netdevice(dev); ++ } ++ } ++ rtnl_unlock(); ++} ++ ++static struct pernet_operations default_device_ops = { ++ .exit = default_device_exit, ++}; ++ + /* + * Initialize the DEV module. At boot time this walks the device list and + * unhooks any devices that fail to initialise (normally hardware not +@@ -3706,11 +4047,11 @@ + for (i = 0; i < 16; i++) + INIT_LIST_HEAD(&ptype_base[i]); + +- for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) +- INIT_HLIST_HEAD(&dev_name_head[i]); ++ if (register_pernet_subsys(&netdev_net_ops)) ++ goto out; + +- for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) +- INIT_HLIST_HEAD(&dev_index_head[i]); ++ if (register_pernet_device(&default_device_ops)) ++ goto out; + + /* + * Initialise the packet receive queues. +diff -Nurb linux-2.6.22-570/net/core/dev_mcast.c linux-2.6.22-591/net/core/dev_mcast.c +--- linux-2.6.22-570/net/core/dev_mcast.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/core/dev_mcast.c 2007-12-21 15:36:15.000000000 -0500 +@@ -46,6 +46,7 @@ + #include + #include + #include ++#include + + + /* +@@ -219,11 +220,12 @@ + #ifdef CONFIG_PROC_FS + static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos) + { ++ struct net *net = seq->private; + struct net_device *dev; + loff_t off = 0; + + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(net, dev) { + if (off++ == *pos) + return dev; + } +@@ -272,7 +274,22 @@ + + static int dev_mc_seq_open(struct inode *inode, struct file *file) + { +- return seq_open(file, &dev_mc_seq_ops); ++ struct seq_file *seq; ++ int res; ++ res = seq_open(file, &dev_mc_seq_ops); ++ if (!res) { ++ seq = file->private_data; ++ seq->private = get_net(PROC_NET(inode)); ++ } ++ return res; ++} ++ ++static int dev_mc_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct net *net = seq->private; ++ put_net(net); ++ return seq_release(inode, file); + } + + static const struct file_operations dev_mc_seq_fops = { +@@ -280,14 +297,31 @@ + .open = dev_mc_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release, ++ .release = dev_mc_seq_release, + }; + + #endif + ++static int dev_mc_net_init(struct net *net) ++{ ++ if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops)) ++ return -ENOMEM; ++ return 0; ++} ++ ++static void dev_mc_net_exit(struct net *net) ++{ ++ proc_net_remove(net, "dev_mcast"); ++} ++ ++static struct pernet_operations dev_mc_net_ops = { ++ .init = dev_mc_net_init, ++ .exit = dev_mc_net_exit, ++}; ++ + void __init dev_mcast_init(void) + { +- proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops); ++ register_pernet_subsys(&dev_mc_net_ops); + } + + EXPORT_SYMBOL(dev_mc_add); +diff -Nurb linux-2.6.22-570/net/core/dst.c linux-2.6.22-591/net/core/dst.c +--- linux-2.6.22-570/net/core/dst.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/core/dst.c 2007-12-21 15:36:15.000000000 -0500 +@@ -15,7 +15,9 @@ + #include + #include + #include ++#include + ++#include + #include + + /* Locking strategy: +@@ -236,13 +238,14 @@ + if (!unregister) { + dst->input = dst->output = dst_discard; + } else { +- dst->dev = &loopback_dev; +- dev_hold(&loopback_dev); ++ struct net *net = dev->nd_net; ++ dst->dev = &net->loopback_dev; ++ dev_hold(dst->dev); + dev_put(dev); + if (dst->neighbour && dst->neighbour->dev == dev) { +- dst->neighbour->dev = &loopback_dev; ++ dst->neighbour->dev = &net->loopback_dev; + dev_put(dev); +- dev_hold(&loopback_dev); ++ dev_hold(dst->neighbour->dev); + } + } + } +@@ -252,6 +255,9 @@ + struct net_device *dev = ptr; + struct dst_entry *dst; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + switch (event) { + case NETDEV_UNREGISTER: + case NETDEV_DOWN: +diff -Nurb linux-2.6.22-570/net/core/ethtool.c linux-2.6.22-591/net/core/ethtool.c +--- linux-2.6.22-570/net/core/ethtool.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/core/ethtool.c 2007-12-21 15:36:15.000000000 -0500 +@@ -798,9 +798,9 @@ + + /* The main entry point in this file. Called from net/core/dev.c */ + +-int dev_ethtool(struct ifreq *ifr) ++int dev_ethtool(struct net *net, struct ifreq *ifr) + { +- struct net_device *dev = __dev_get_by_name(ifr->ifr_name); ++ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name); + void __user *useraddr = ifr->ifr_data; + u32 ethcmd; + int rc; +diff -Nurb linux-2.6.22-570/net/core/fib_rules.c linux-2.6.22-591/net/core/fib_rules.c +--- linux-2.6.22-570/net/core/fib_rules.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/core/fib_rules.c 2007-12-21 15:36:15.000000000 -0500 +@@ -11,21 +11,20 @@ + #include + #include + #include ++#include ++#include + #include + +-static LIST_HEAD(rules_ops); +-static DEFINE_SPINLOCK(rules_mod_lock); +- +-static void notify_rule_change(int event, struct fib_rule *rule, ++static void notify_rule_change(struct net *net, int event, struct fib_rule *rule, + struct fib_rules_ops *ops, struct nlmsghdr *nlh, + u32 pid); + +-static struct fib_rules_ops *lookup_rules_ops(int family) ++static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family) + { + struct fib_rules_ops *ops; + + rcu_read_lock(); +- list_for_each_entry_rcu(ops, &rules_ops, list) { ++ list_for_each_entry_rcu(ops, &net->rules_ops, list) { + if (ops->family == family) { + if (!try_module_get(ops->owner)) + ops = NULL; +@@ -47,10 +46,10 @@ + static void flush_route_cache(struct fib_rules_ops *ops) + { + if (ops->flush_cache) +- ops->flush_cache(); ++ ops->flush_cache(ops); + } + +-int fib_rules_register(struct fib_rules_ops *ops) ++int fib_rules_register(struct net *net, struct fib_rules_ops *ops) + { + int err = -EEXIST; + struct fib_rules_ops *o; +@@ -63,15 +62,16 @@ + ops->action == NULL) + return -EINVAL; + +- spin_lock(&rules_mod_lock); +- list_for_each_entry(o, &rules_ops, list) ++ spin_lock(&net->rules_mod_lock); ++ list_for_each_entry(o, &net->rules_ops, list) + if (ops->family == o->family) + goto errout; + +- list_add_tail_rcu(&ops->list, &rules_ops); ++ hold_net(net); ++ list_add_tail_rcu(&ops->list, &net->rules_ops); + err = 0; + errout: +- spin_unlock(&rules_mod_lock); ++ spin_unlock(&net->rules_mod_lock); + + return err; + } +@@ -88,13 +88,13 @@ + } + } + +-int fib_rules_unregister(struct fib_rules_ops *ops) ++int fib_rules_unregister(struct net *net, struct fib_rules_ops *ops) + { + int err = 0; + struct fib_rules_ops *o; + +- spin_lock(&rules_mod_lock); +- list_for_each_entry(o, &rules_ops, list) { ++ spin_lock(&net->rules_mod_lock); ++ list_for_each_entry(o, &net->rules_ops, list) { + if (o == ops) { + list_del_rcu(&o->list); + cleanup_ops(ops); +@@ -104,9 +104,11 @@ + + err = -ENOENT; + out: +- spin_unlock(&rules_mod_lock); ++ spin_unlock(&net->rules_mod_lock); + + synchronize_rcu(); ++ if (!err) ++ release_net(net); + + return err; + } +@@ -197,6 +199,7 @@ + + static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rules_ops *ops = NULL; + struct fib_rule *rule, *r, *last = NULL; +@@ -206,7 +209,7 @@ + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) + goto errout; + +- ops = lookup_rules_ops(frh->family); ++ ops = lookup_rules_ops(net, frh->family); + if (ops == NULL) { + err = EAFNOSUPPORT; + goto errout; +@@ -234,7 +237,7 @@ + + rule->ifindex = -1; + nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); +- dev = __dev_get_by_name(rule->ifname); ++ dev = __dev_get_by_name(net, rule->ifname); + if (dev) + rule->ifindex = dev->ifindex; + } +@@ -256,7 +259,7 @@ + rule->table = frh_get_table(frh, tb); + + if (!rule->pref && ops->default_pref) +- rule->pref = ops->default_pref(); ++ rule->pref = ops->default_pref(ops); + + err = -EINVAL; + if (tb[FRA_GOTO]) { +@@ -319,7 +322,7 @@ + else + list_add_rcu(&rule->list, ops->rules_list); + +- notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); ++ notify_rule_change(net, RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); + flush_route_cache(ops); + rules_ops_put(ops); + return 0; +@@ -333,6 +336,7 @@ + + static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rules_ops *ops = NULL; + struct fib_rule *rule, *tmp; +@@ -342,7 +346,7 @@ + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) + goto errout; + +- ops = lookup_rules_ops(frh->family); ++ ops = lookup_rules_ops(net, frh->family); + if (ops == NULL) { + err = EAFNOSUPPORT; + goto errout; +@@ -408,7 +412,7 @@ + } + + synchronize_rcu(); +- notify_rule_change(RTM_DELRULE, rule, ops, nlh, ++ notify_rule_change(net, RTM_DELRULE, rule, ops, nlh, + NETLINK_CB(skb).pid); + fib_rule_put(rule); + flush_route_cache(ops); +@@ -514,13 +518,17 @@ + + static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + struct fib_rules_ops *ops; + int idx = 0, family; + ++ if (net != &init_net) ++ return -EINVAL; ++ + family = rtnl_msg_family(cb->nlh); + if (family != AF_UNSPEC) { + /* Protocol specific dump request */ +- ops = lookup_rules_ops(family); ++ ops = lookup_rules_ops(net, family); + if (ops == NULL) + return -EAFNOSUPPORT; + +@@ -528,7 +536,7 @@ + } + + rcu_read_lock(); +- list_for_each_entry_rcu(ops, &rules_ops, list) { ++ list_for_each_entry_rcu(ops, &net->rules_ops, list) { + if (idx < cb->args[0] || !try_module_get(ops->owner)) + goto skip; + +@@ -545,7 +553,7 @@ + return skb->len; + } + +-static void notify_rule_change(int event, struct fib_rule *rule, ++static void notify_rule_change(struct net *net, int event, struct fib_rule *rule, + struct fib_rules_ops *ops, struct nlmsghdr *nlh, + u32 pid) + { +@@ -563,10 +571,10 @@ + kfree_skb(skb); + goto errout; + } +- err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); ++ err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); + errout: + if (err < 0) +- rtnl_set_sk_err(ops->nlgroup, err); ++ rtnl_set_sk_err(net, ops->nlgroup, err); + } + + static void attach_rules(struct list_head *rules, struct net_device *dev) +@@ -594,19 +602,23 @@ + void *ptr) + { + struct net_device *dev = ptr; ++ struct net *net = dev->nd_net; + struct fib_rules_ops *ops; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + ASSERT_RTNL(); + rcu_read_lock(); + + switch (event) { + case NETDEV_REGISTER: +- list_for_each_entry(ops, &rules_ops, list) ++ list_for_each_entry(ops, &net->rules_ops, list) + attach_rules(ops->rules_list, dev); + break; + + case NETDEV_UNREGISTER: +- list_for_each_entry(ops, &rules_ops, list) ++ list_for_each_entry(ops, &net->rules_ops, list) + detach_rules(ops->rules_list, dev); + break; + } +@@ -620,13 +632,28 @@ + .notifier_call = fib_rules_event, + }; + ++static int fib_rules_net_init(struct net *net) ++{ ++ INIT_LIST_HEAD(&net->rules_ops); ++ spin_lock_init(&net->rules_mod_lock); ++ return 0; ++} ++ ++static struct pernet_operations fib_rules_net_ops = { ++ .init = fib_rules_net_init, ++}; ++ + static int __init fib_rules_init(void) + { ++ int ret; + rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL); + rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL); + rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule); + +- return register_netdevice_notifier(&fib_rules_notifier); ++ ret = register_pernet_subsys(&fib_rules_net_ops); ++ if (!ret) ++ ret = register_netdevice_notifier(&fib_rules_notifier); ++ return ret; + } + + subsys_initcall(fib_rules_init); +diff -Nurb linux-2.6.22-570/net/core/neighbour.c linux-2.6.22-591/net/core/neighbour.c +--- linux-2.6.22-570/net/core/neighbour.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/core/neighbour.c 2007-12-21 15:36:15.000000000 -0500 +@@ -33,6 +33,7 @@ + #include + #include + #include ++#include + + #define NEIGH_DEBUG 1 + +@@ -361,7 +362,7 @@ + return n; + } + +-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) ++struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net * net, const void *pkey) + { + struct neighbour *n; + int key_len = tbl->key_len; +@@ -371,7 +372,8 @@ + + read_lock_bh(&tbl->lock); + for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { +- if (!memcmp(n->primary_key, pkey, key_len)) { ++ if (!memcmp(n->primary_key, pkey, key_len) && ++ (net == n->dev->nd_net)) { + neigh_hold(n); + NEIGH_CACHE_STAT_INC(tbl, hits); + break; +@@ -449,7 +451,8 @@ + goto out; + } + +-struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, ++struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, ++ struct net * net, const void *pkey, + struct net_device *dev, int creat) + { + struct pneigh_entry *n; +@@ -465,6 +468,7 @@ + + for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { + if (!memcmp(n->key, pkey, key_len) && ++ (n->net == net) && + (n->dev == dev || !n->dev)) { + read_unlock_bh(&tbl->lock); + goto out; +@@ -479,6 +483,7 @@ + if (!n) + goto out; + ++ n->net = hold_net(net); + memcpy(n->key, pkey, key_len); + n->dev = dev; + if (dev) +@@ -501,7 +506,7 @@ + } + + +-int pneigh_delete(struct neigh_table *tbl, const void *pkey, ++int pneigh_delete(struct neigh_table *tbl, struct net * net, const void *pkey, + struct net_device *dev) + { + struct pneigh_entry *n, **np; +@@ -516,13 +521,15 @@ + write_lock_bh(&tbl->lock); + for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; + np = &n->next) { +- if (!memcmp(n->key, pkey, key_len) && n->dev == dev) { ++ if (!memcmp(n->key, pkey, key_len) && n->dev == dev && ++ (n->net == net)) { + *np = n->next; + write_unlock_bh(&tbl->lock); + if (tbl->pdestructor) + tbl->pdestructor(n); + if (n->dev) + dev_put(n->dev); ++ release_net(n->net); + kfree(n); + return 0; + } +@@ -545,6 +552,7 @@ + tbl->pdestructor(n); + if (n->dev) + dev_put(n->dev); ++ release_net(n->net); + kfree(n); + continue; + } +@@ -1266,12 +1274,37 @@ + spin_unlock(&tbl->proxy_queue.lock); + } + ++static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, ++ struct net * net, int ifindex) ++{ ++ struct neigh_parms *p; ++ ++ for (p = &tbl->parms; p; p = p->next) { ++ if (p->net != net) ++ continue; ++ if ((p->dev && p->dev->ifindex == ifindex) || ++ (!p->dev && !ifindex)) ++ return p; ++ } ++ ++ return NULL; ++} + + struct neigh_parms *neigh_parms_alloc(struct net_device *dev, + struct neigh_table *tbl) + { +- struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); ++ struct neigh_parms *p, *ref; ++ struct net * net; ++ ++ net = &init_net; ++ if (dev) ++ net = dev->nd_net; ++ ++ ref = lookup_neigh_params(tbl, net, 0); ++ if (!ref) ++ return NULL; + ++ p = kmemdup(ref, sizeof(*p), GFP_KERNEL); + if (p) { + p->tbl = tbl; + atomic_set(&p->refcnt, 1); +@@ -1287,6 +1320,7 @@ + dev_hold(dev); + p->dev = dev; + } ++ p->net = hold_net(net); + p->sysctl_table = NULL; + write_lock_bh(&tbl->lock); + p->next = tbl->parms.next; +@@ -1296,6 +1330,20 @@ + return p; + } + ++struct neigh_parms *neigh_parms_alloc_default(struct neigh_table *tbl, ++ struct net *net) ++{ ++ struct neigh_parms *parms; ++ if (net != &init_net) { ++ parms = neigh_parms_alloc(NULL, tbl); ++ release_net(parms->net); ++ parms->net = hold_net(net); ++ } ++ else ++ parms = neigh_parms_clone(&tbl->parms); ++ return parms; ++} ++ + static void neigh_rcu_free_parms(struct rcu_head *head) + { + struct neigh_parms *parms = +@@ -1328,6 +1376,7 @@ + + void neigh_parms_destroy(struct neigh_parms *parms) + { ++ release_net(parms->net); + kfree(parms); + } + +@@ -1338,6 +1387,7 @@ + unsigned long now = jiffies; + unsigned long phsize; + ++ tbl->parms.net = &init_net; + atomic_set(&tbl->parms.refcnt, 1); + INIT_RCU_HEAD(&tbl->parms.rcu_head); + tbl->parms.reachable_time = +@@ -1353,7 +1403,7 @@ + panic("cannot create neighbour cache statistics"); + + #ifdef CONFIG_PROC_FS +- tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat); ++ tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat); + if (!tbl->pde) + panic("cannot create neighbour proc dir entry"); + tbl->pde->proc_fops = &neigh_stat_seq_fops; +@@ -1443,6 +1493,7 @@ + + static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct ndmsg *ndm; + struct nlattr *dst_attr; + struct neigh_table *tbl; +@@ -1458,7 +1509,7 @@ + + ndm = nlmsg_data(nlh); + if (ndm->ndm_ifindex) { +- dev = dev_get_by_index(ndm->ndm_ifindex); ++ dev = dev_get_by_index(net, ndm->ndm_ifindex); + if (dev == NULL) { + err = -ENODEV; + goto out; +@@ -1477,7 +1528,7 @@ + goto out_dev_put; + + if (ndm->ndm_flags & NTF_PROXY) { +- err = pneigh_delete(tbl, nla_data(dst_attr), dev); ++ err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); + goto out_dev_put; + } + +@@ -1508,6 +1559,7 @@ + + static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct ndmsg *ndm; + struct nlattr *tb[NDA_MAX+1]; + struct neigh_table *tbl; +@@ -1524,7 +1576,7 @@ + + ndm = nlmsg_data(nlh); + if (ndm->ndm_ifindex) { +- dev = dev_get_by_index(ndm->ndm_ifindex); ++ dev = dev_get_by_index(net, ndm->ndm_ifindex); + if (dev == NULL) { + err = -ENODEV; + goto out; +@@ -1553,7 +1605,7 @@ + struct pneigh_entry *pn; + + err = -ENOBUFS; +- pn = pneigh_lookup(tbl, dst, dev, 1); ++ pn = pneigh_lookup(tbl, net, dst, dev, 1); + if (pn) { + pn->flags = ndm->ndm_flags; + err = 0; +@@ -1748,19 +1800,6 @@ + return -EMSGSIZE; + } + +-static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, +- int ifindex) +-{ +- struct neigh_parms *p; +- +- for (p = &tbl->parms; p; p = p->next) +- if ((p->dev && p->dev->ifindex == ifindex) || +- (!p->dev && !ifindex)) +- return p; +- +- return NULL; +-} +- + static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { + [NDTA_NAME] = { .type = NLA_STRING }, + [NDTA_THRESH1] = { .type = NLA_U32 }, +@@ -1788,6 +1827,7 @@ + + static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct neigh_table *tbl; + struct ndtmsg *ndtmsg; + struct nlattr *tb[NDTA_MAX+1]; +@@ -1837,7 +1877,7 @@ + if (tbp[NDTPA_IFINDEX]) + ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); + +- p = lookup_neigh_params(tbl, ifindex); ++ p = lookup_neigh_params(tbl, net, ifindex); + if (p == NULL) { + err = -ENOENT; + goto errout_tbl_lock; +@@ -1912,6 +1952,7 @@ + + static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + int family, tidx, nidx = 0; + int tbl_skip = cb->args[0]; + int neigh_skip = cb->args[1]; +@@ -1931,8 +1972,11 @@ + NLM_F_MULTI) <= 0) + break; + +- for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) { +- if (nidx < neigh_skip) ++ for (nidx = 0, p = tbl->parms.next; p; p = p->next) { ++ if (net != p->net) ++ continue; ++ ++ if (nidx++ < neigh_skip) + continue; + + if (neightbl_fill_param_info(skb, tbl, p, +@@ -2003,6 +2047,7 @@ + static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, + struct netlink_callback *cb) + { ++ struct net * net = skb->sk->sk_net; + struct neighbour *n; + int rc, h, s_h = cb->args[1]; + int idx, s_idx = idx = cb->args[2]; +@@ -2013,8 +2058,12 @@ + continue; + if (h > s_h) + s_idx = 0; +- for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) { +- if (idx < s_idx) ++ for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { ++ int lidx; ++ if (n->dev->nd_net != net) ++ continue; ++ lidx = idx++; ++ if (lidx < s_idx) + continue; + if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, +@@ -2109,6 +2158,7 @@ + static struct neighbour *neigh_get_first(struct seq_file *seq) + { + struct neigh_seq_state *state = seq->private; ++ struct net * net = state->net; + struct neigh_table *tbl = state->tbl; + struct neighbour *n = NULL; + int bucket = state->bucket; +@@ -2118,6 +2168,8 @@ + n = tbl->hash_buckets[bucket]; + + while (n) { ++ if (n->dev->nd_net != net) ++ goto next; + if (state->neigh_sub_iter) { + loff_t fakep = 0; + void *v; +@@ -2147,6 +2199,7 @@ + loff_t *pos) + { + struct neigh_seq_state *state = seq->private; ++ struct net * net = state->net; + struct neigh_table *tbl = state->tbl; + + if (state->neigh_sub_iter) { +@@ -2158,6 +2211,8 @@ + + while (1) { + while (n) { ++ if (n->dev->nd_net != net) ++ goto next; + if (state->neigh_sub_iter) { + void *v = state->neigh_sub_iter(state, n, pos); + if (v) +@@ -2204,6 +2259,7 @@ + static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) + { + struct neigh_seq_state *state = seq->private; ++ struct net * net = state->net; + struct neigh_table *tbl = state->tbl; + struct pneigh_entry *pn = NULL; + int bucket = state->bucket; +@@ -2211,6 +2267,8 @@ + state->flags |= NEIGH_SEQ_IS_PNEIGH; + for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { + pn = tbl->phash_buckets[bucket]; ++ while (pn && (pn->net != net)) ++ pn = pn->next; + if (pn) + break; + } +@@ -2224,6 +2282,7 @@ + loff_t *pos) + { + struct neigh_seq_state *state = seq->private; ++ struct net * net = state->net; + struct neigh_table *tbl = state->tbl; + + pn = pn->next; +@@ -2231,6 +2290,8 @@ + if (++state->bucket > PNEIGH_HASHMASK) + break; + pn = tbl->phash_buckets[state->bucket]; ++ while (pn && (pn->net != net)) ++ pn = pn->next; + if (pn) + break; + } +@@ -2433,6 +2494,7 @@ + + static void __neigh_notify(struct neighbour *n, int type, int flags) + { ++ struct net * net = n->dev->nd_net; + struct sk_buff *skb; + int err = -ENOBUFS; + +@@ -2447,10 +2509,10 @@ + kfree_skb(skb); + goto errout; + } +- err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); ++ err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + errout: + if (err < 0) +- rtnl_set_sk_err(RTNLGRP_NEIGH, err); ++ rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); + } + + void neigh_app_ns(struct neighbour *n) +@@ -2648,6 +2710,7 @@ + + if (!t) + return -ENOBUFS; ++ + t->neigh_vars[0].data = &p->mcast_probes; + t->neigh_vars[1].data = &p->ucast_probes; + t->neigh_vars[2].data = &p->app_probes; +@@ -2716,7 +2779,7 @@ + t->neigh_proto_dir[0].child = t->neigh_neigh_dir; + t->neigh_root_dir[0].child = t->neigh_proto_dir; + +- t->sysctl_header = register_sysctl_table(t->neigh_root_dir); ++ t->sysctl_header = register_net_sysctl_table(p->net, t->neigh_root_dir); + if (!t->sysctl_header) { + err = -ENOBUFS; + goto free_procname; +@@ -2738,7 +2801,7 @@ + if (p->sysctl_table) { + struct neigh_sysctl_table *t = p->sysctl_table; + p->sysctl_table = NULL; +- unregister_sysctl_table(t->sysctl_header); ++ unregister_net_sysctl_table(t->sysctl_header); + kfree(t->neigh_dev[0].procname); + kfree(t); + } +@@ -2771,6 +2834,7 @@ + EXPORT_SYMBOL(neigh_lookup); + EXPORT_SYMBOL(neigh_lookup_nodev); + EXPORT_SYMBOL(neigh_parms_alloc); ++EXPORT_SYMBOL(neigh_parms_alloc_default); + EXPORT_SYMBOL(neigh_parms_release); + EXPORT_SYMBOL(neigh_rand_reach_time); + EXPORT_SYMBOL(neigh_resolve_output); +diff -Nurb linux-2.6.22-570/net/core/net-sysfs.c linux-2.6.22-591/net/core/net-sysfs.c +--- linux-2.6.22-570/net/core/net-sysfs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/core/net-sysfs.c 2007-12-21 15:36:15.000000000 -0500 +@@ -13,7 +13,9 @@ + #include + #include + #include ++#include + #include ++#include + #include + #include + #include +@@ -29,16 +31,16 @@ + } + + /* use same locking rules as GIF* ioctl's */ +-static ssize_t netdev_show(const struct device *dev, ++static ssize_t netdev_show(const struct device *device, + struct device_attribute *attr, char *buf, + ssize_t (*format)(const struct net_device *, char *)) + { +- struct net_device *net = to_net_dev(dev); ++ struct net_device *dev = to_net_dev(device); + ssize_t ret = -EINVAL; + + read_lock(&dev_base_lock); +- if (dev_isalive(net)) +- ret = (*format)(net, buf); ++ if (dev_isalive(dev)) ++ ret = (*format)(dev, buf); + read_unlock(&dev_base_lock); + + return ret; +@@ -46,9 +48,9 @@ + + /* generate a show function for simple field */ + #define NETDEVICE_SHOW(field, format_string) \ +-static ssize_t format_##field(const struct net_device *net, char *buf) \ ++static ssize_t format_##field(const struct net_device *dev, char *buf) \ + { \ +- return sprintf(buf, format_string, net->field); \ ++ return sprintf(buf, format_string, dev->field); \ + } \ + static ssize_t show_##field(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +@@ -58,11 +60,11 @@ + + + /* use same locking and permission rules as SIF* ioctl's */ +-static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, ++static ssize_t netdev_store(struct device *device, struct device_attribute *attr, + const char *buf, size_t len, + int (*set)(struct net_device *, unsigned long)) + { +- struct net_device *net = to_net_dev(dev); ++ struct net_device *dev = to_net_dev(device); + char *endp; + unsigned long new; + int ret = -EINVAL; +@@ -75,8 +77,8 @@ + goto err; + + rtnl_lock(); +- if (dev_isalive(net)) { +- if ((ret = (*set)(net, new)) == 0) ++ if (dev_isalive(dev)) { ++ if ((ret = (*set)(dev, new)) == 0) + ret = len; + } + rtnl_unlock(); +@@ -103,45 +105,45 @@ + return cp - buf; + } + +-static ssize_t show_address(struct device *dev, struct device_attribute *attr, ++static ssize_t show_address(struct device *device, struct device_attribute *attr, + char *buf) + { +- struct net_device *net = to_net_dev(dev); ++ struct net_device *dev = to_net_dev(device); + ssize_t ret = -EINVAL; + + read_lock(&dev_base_lock); +- if (dev_isalive(net)) +- ret = format_addr(buf, net->dev_addr, net->addr_len); ++ if (dev_isalive(dev)) ++ ret = format_addr(buf, dev->dev_addr, dev->addr_len); + read_unlock(&dev_base_lock); + return ret; + } + +-static ssize_t show_broadcast(struct device *dev, ++static ssize_t show_broadcast(struct device *device, + struct device_attribute *attr, char *buf) + { +- struct net_device *net = to_net_dev(dev); +- if (dev_isalive(net)) +- return format_addr(buf, net->broadcast, net->addr_len); ++ struct net_device *dev = to_net_dev(device); ++ if (dev_isalive(dev)) ++ return format_addr(buf, dev->broadcast, dev->addr_len); + return -EINVAL; + } + +-static ssize_t show_carrier(struct device *dev, ++static ssize_t show_carrier(struct device *device, + struct device_attribute *attr, char *buf) + { +- struct net_device *netdev = to_net_dev(dev); +- if (netif_running(netdev)) { +- return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev)); ++ struct net_device *dev = to_net_dev(device); ++ if (netif_running(dev)) { ++ return sprintf(buf, fmt_dec, !!netif_carrier_ok(dev)); + } + return -EINVAL; + } + +-static ssize_t show_dormant(struct device *dev, ++static ssize_t show_dormant(struct device *device, + struct device_attribute *attr, char *buf) + { +- struct net_device *netdev = to_net_dev(dev); ++ struct net_device *dev = to_net_dev(device); + +- if (netif_running(netdev)) +- return sprintf(buf, fmt_dec, !!netif_dormant(netdev)); ++ if (netif_running(dev)) ++ return sprintf(buf, fmt_dec, !!netif_dormant(dev)); + + return -EINVAL; + } +@@ -156,15 +158,15 @@ + "up" + }; + +-static ssize_t show_operstate(struct device *dev, ++static ssize_t show_operstate(struct device *device, + struct device_attribute *attr, char *buf) + { +- const struct net_device *netdev = to_net_dev(dev); ++ const struct net_device *dev = to_net_dev(device); + unsigned char operstate; + + read_lock(&dev_base_lock); +- operstate = netdev->operstate; +- if (!netif_running(netdev)) ++ operstate = dev->operstate; ++ if (!netif_running(dev)) + operstate = IF_OPER_DOWN; + read_unlock(&dev_base_lock); + +@@ -177,57 +179,57 @@ + /* read-write attributes */ + NETDEVICE_SHOW(mtu, fmt_dec); + +-static int change_mtu(struct net_device *net, unsigned long new_mtu) ++static int change_mtu(struct net_device *dev, unsigned long new_mtu) + { +- return dev_set_mtu(net, (int) new_mtu); ++ return dev_set_mtu(dev, (int) new_mtu); + } + +-static ssize_t store_mtu(struct device *dev, struct device_attribute *attr, ++static ssize_t store_mtu(struct device *device, struct device_attribute *attr, + const char *buf, size_t len) + { +- return netdev_store(dev, attr, buf, len, change_mtu); ++ return netdev_store(device, attr, buf, len, change_mtu); + } + + NETDEVICE_SHOW(flags, fmt_hex); + +-static int change_flags(struct net_device *net, unsigned long new_flags) ++static int change_flags(struct net_device *dev, unsigned long new_flags) + { +- return dev_change_flags(net, (unsigned) new_flags); ++ return dev_change_flags(dev, (unsigned) new_flags); + } + +-static ssize_t store_flags(struct device *dev, struct device_attribute *attr, ++static ssize_t store_flags(struct device *device, struct device_attribute *attr, + const char *buf, size_t len) + { +- return netdev_store(dev, attr, buf, len, change_flags); ++ return netdev_store(device, attr, buf, len, change_flags); + } + + NETDEVICE_SHOW(tx_queue_len, fmt_ulong); + +-static int change_tx_queue_len(struct net_device *net, unsigned long new_len) ++static int change_tx_queue_len(struct net_device *dev, unsigned long new_len) + { +- net->tx_queue_len = new_len; ++ dev->tx_queue_len = new_len; + return 0; + } + +-static ssize_t store_tx_queue_len(struct device *dev, ++static ssize_t store_tx_queue_len(struct device *device, + struct device_attribute *attr, + const char *buf, size_t len) + { +- return netdev_store(dev, attr, buf, len, change_tx_queue_len); ++ return netdev_store(device, attr, buf, len, change_tx_queue_len); + } + + NETDEVICE_SHOW(weight, fmt_dec); + +-static int change_weight(struct net_device *net, unsigned long new_weight) ++static int change_weight(struct net_device *dev, unsigned long new_weight) + { +- net->weight = new_weight; ++ dev->weight = new_weight; + return 0; + } + +-static ssize_t store_weight(struct device *dev, struct device_attribute *attr, ++static ssize_t store_weight(struct device *device, struct device_attribute *attr, + const char *buf, size_t len) + { +- return netdev_store(dev, attr, buf, len, change_weight); ++ return netdev_store(device, attr, buf, len, change_weight); + } + + static struct device_attribute net_class_attributes[] = { +@@ -447,6 +449,23 @@ + kfree((char *)dev - dev->padded); + } + ++static const void *net_current_tag(void) ++{ ++ return current->nsproxy->net_ns; ++} ++ ++static const void *net_kobject_tag(struct kobject *kobj) ++{ ++ struct net_device *dev; ++ dev = container_of(kobj, struct net_device, dev.kobj); ++ return dev->nd_net; ++} ++ ++static const struct shadow_dir_operations net_shadow_dir_operations = { ++ .current_tag = net_current_tag, ++ .kobject_tag = net_kobject_tag, ++}; ++ + static struct class net_class = { + .name = "net", + .dev_release = netdev_release, +@@ -454,42 +473,43 @@ + #ifdef CONFIG_HOTPLUG + .dev_uevent = netdev_uevent, + #endif ++ .shadow_ops = &net_shadow_dir_operations, + }; + + /* Delete sysfs entries but hold kobject reference until after all + * netdev references are gone. + */ +-void netdev_unregister_sysfs(struct net_device * net) ++void netdev_unregister_sysfs(struct net_device * dev) + { +- struct device *dev = &(net->dev); ++ struct device *device = &(dev->dev); + +- kobject_get(&dev->kobj); +- device_del(dev); ++ kobject_get(&device->kobj); ++ device_del(device); + } + + /* Create sysfs entries for network device. */ +-int netdev_register_sysfs(struct net_device *net) ++int netdev_register_sysfs(struct net_device *dev) + { +- struct device *dev = &(net->dev); +- struct attribute_group **groups = net->sysfs_groups; ++ struct device *device = &(dev->dev); ++ struct attribute_group **groups = dev->sysfs_groups; + +- device_initialize(dev); +- dev->class = &net_class; +- dev->platform_data = net; +- dev->groups = groups; ++ device_initialize(device); ++ device->class = &net_class; ++ device->platform_data = dev; ++ device->groups = groups; + + BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); +- strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); ++ strlcpy(device->bus_id, dev->name, BUS_ID_SIZE); + +- if (net->get_stats) ++ if (dev->get_stats) + *groups++ = &netstat_group; + + #ifdef CONFIG_WIRELESS_EXT +- if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) ++ if (dev->wireless_handlers && dev->wireless_handlers->get_wireless_stats) + *groups++ = &wireless_group; + #endif + +- return device_add(dev); ++ return device_add(device); + } + + int netdev_sysfs_init(void) +diff -Nurb linux-2.6.22-570/net/core/net_namespace.c linux-2.6.22-591/net/core/net_namespace.c +--- linux-2.6.22-570/net/core/net_namespace.c 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/net/core/net_namespace.c 2007-12-21 15:36:15.000000000 -0500 +@@ -0,0 +1,332 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * Our network namespace constructor/destructor lists ++ */ ++ ++static LIST_HEAD(pernet_list); ++static struct list_head *first_device = &pernet_list; ++static DEFINE_MUTEX(net_mutex); ++ ++static DEFINE_MUTEX(net_list_mutex); ++LIST_HEAD(net_namespace_list); ++ ++static struct kmem_cache *net_cachep; ++ ++struct net init_net; ++EXPORT_SYMBOL_GPL(init_net); ++ ++void net_lock(void) ++{ ++ mutex_lock(&net_list_mutex); ++} ++ ++void net_unlock(void) ++{ ++ mutex_unlock(&net_list_mutex); ++} ++ ++static struct net *net_alloc(void) ++{ ++ return kmem_cache_alloc(net_cachep, GFP_KERNEL); ++} ++ ++static void net_free(struct net *net) ++{ ++ if (!net) ++ return; ++ ++ if (unlikely(atomic_read(&net->use_count) != 0)) { ++ printk(KERN_EMERG "network namespace not free! Usage: %d\n", ++ atomic_read(&net->use_count)); ++ return; ++ } ++ ++ kmem_cache_free(net_cachep, net); ++} ++ ++static void cleanup_net(struct work_struct *work) ++{ ++ struct pernet_operations *ops; ++ struct list_head *ptr; ++ struct net *net; ++ ++ net = container_of(work, struct net, work); ++ ++ mutex_lock(&net_mutex); ++ ++ /* Don't let anyone else find us. */ ++ net_lock(); ++ list_del(&net->list); ++ net_unlock(); ++ ++ /* Run all of the network namespace exit methods */ ++ list_for_each_prev(ptr, &pernet_list) { ++ ops = list_entry(ptr, struct pernet_operations, list); ++ if (ops->exit) ++ ops->exit(net); ++ } ++ ++ mutex_unlock(&net_mutex); ++ ++ /* Ensure there are no outstanding rcu callbacks using this ++ * network namespace. ++ */ ++ rcu_barrier(); ++ ++ /* Finally it is safe to free my network namespace structure */ ++ net_free(net); ++} ++ ++ ++void __put_net(struct net *net) ++{ ++ /* Cleanup the network namespace in process context */ ++ INIT_WORK(&net->work, cleanup_net); ++ schedule_work(&net->work); ++} ++EXPORT_SYMBOL_GPL(__put_net); ++ ++/* ++ * setup_net runs the initializers for the network namespace object. ++ */ ++static int setup_net(struct net *net) ++{ ++ /* Must be called with net_mutex held */ ++ struct pernet_operations *ops; ++ struct list_head *ptr; ++ int error; ++ ++ memset(net, 0, sizeof(struct net)); ++ atomic_set(&net->count, 1); ++ atomic_set(&net->use_count, 0); ++ ++ error = 0; ++ list_for_each(ptr, &pernet_list) { ++ ops = list_entry(ptr, struct pernet_operations, list); ++ if (ops->init) { ++ error = ops->init(net); ++ if (error < 0) ++ goto out_undo; ++ } ++ } ++out: ++ return error; ++out_undo: ++ /* Walk through the list backwards calling the exit functions ++ * for the pernet modules whose init functions did not fail. ++ */ ++ for (ptr = ptr->prev; ptr != &pernet_list; ptr = ptr->prev) { ++ ops = list_entry(ptr, struct pernet_operations, list); ++ if (ops->exit) ++ ops->exit(net); ++ } ++ goto out; ++} ++ ++struct net *copy_net_ns(unsigned long flags, struct net *old_net) ++{ ++ struct net *new_net = NULL; ++ int err; ++ ++ get_net(old_net); ++ ++ if (!(flags & CLONE_NEWNET)) ++ return old_net; ++ ++ err = -EPERM; ++ if (!capable(CAP_SYS_ADMIN)) ++ goto out; ++ ++ err = -ENOMEM; ++ new_net = net_alloc(); ++ if (!new_net) ++ goto out; ++ ++ mutex_lock(&net_mutex); ++ err = setup_net(new_net); ++ if (err) ++ goto out_unlock; ++ ++ net_lock(); ++ list_add_tail(&new_net->list, &net_namespace_list); ++ net_unlock(); ++ ++ ++out_unlock: ++ mutex_unlock(&net_mutex); ++out: ++ put_net(old_net); ++ if (err) { ++ net_free(new_net); ++ new_net = ERR_PTR(err); ++ } ++ return new_net; ++} ++ ++static int __init net_ns_init(void) ++{ ++ int err; ++ ++ printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); ++ net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), ++ SMP_CACHE_BYTES, ++ SLAB_PANIC, NULL, NULL); ++ mutex_lock(&net_mutex); ++ err = setup_net(&init_net); ++ ++ net_lock(); ++ list_add_tail(&init_net.list, &net_namespace_list); ++ net_unlock(); ++ ++ mutex_unlock(&net_mutex); ++ if (err) ++ panic("Could not setup the initial network namespace"); ++ ++ return 0; ++} ++ ++pure_initcall(net_ns_init); ++ ++static int register_pernet_operations(struct list_head *list, ++ struct pernet_operations *ops) ++{ ++ struct net *net, *undo_net; ++ int error; ++ ++ error = 0; ++ list_add_tail(&ops->list, list); ++ for_each_net(net) { ++ if (ops->init) { ++ error = ops->init(net); ++ if (error) ++ goto out_undo; ++ } ++ } ++out: ++ return error; ++ ++out_undo: ++ /* If I have an error cleanup all namespaces I initialized */ ++ list_del(&ops->list); ++ for_each_net(undo_net) { ++ if (undo_net == net) ++ goto undone; ++ if (ops->exit) ++ ops->exit(undo_net); ++ } ++undone: ++ goto out; ++} ++ ++static void unregister_pernet_operations(struct pernet_operations *ops) ++{ ++ struct net *net; ++ ++ list_del(&ops->list); ++ for_each_net(net) ++ if (ops->exit) ++ ops->exit(net); ++} ++ ++/** ++ * register_pernet_subsys - register a network namespace subsystem ++ * @ops: pernet operations structure for the subsystem ++ * ++ * Register a subsystem which has init and exit functions ++ * that are called when network namespaces are created and ++ * destroyed respectively. ++ * ++ * When registered all network namespace init functions are ++ * called for every existing network namespace. Allowing kernel ++ * modules to have a race free view of the set of network namespaces. ++ * ++ * When a new network namespace is created all of the init ++ * methods are called in the order in which they were registered. ++ * ++ * When a network namespace is destroyed all of the exit methods ++ * are called in the reverse of the order with which they were ++ * registered. ++ */ ++int register_pernet_subsys(struct pernet_operations *ops) ++{ ++ int error; ++ mutex_lock(&net_mutex); ++ error = register_pernet_operations(first_device, ops); ++ mutex_unlock(&net_mutex); ++ return error; ++} ++EXPORT_SYMBOL_GPL(register_pernet_subsys); ++ ++/** ++ * unregister_pernet_subsys - unregister a network namespace subsystem ++ * @ops: pernet operations structure to manipulate ++ * ++ * Remove the pernet operations structure from the list to be ++ * used when network namespaces are created or destoryed. In ++ * addition run the exit method for all existing network ++ * namespaces. ++ */ ++void unregister_pernet_subsys(struct pernet_operations *module) ++{ ++ mutex_lock(&net_mutex); ++ unregister_pernet_operations(module); ++ mutex_unlock(&net_mutex); ++} ++EXPORT_SYMBOL_GPL(unregister_pernet_subsys); ++ ++/** ++ * register_pernet_device - register a network namespace device ++ * @ops: pernet operations structure for the subsystem ++ * ++ * Register a device which has init and exit functions ++ * that are called when network namespaces are created and ++ * destroyed respectively. ++ * ++ * When registered all network namespace init functions are ++ * called for every existing network namespace. Allowing kernel ++ * modules to have a race free view of the set of network namespaces. ++ * ++ * When a new network namespace is created all of the init ++ * methods are called in the order in which they were registered. ++ * ++ * When a network namespace is destroyed all of the exit methods ++ * are called in the reverse of the order with which they were ++ * registered. ++ */ ++int register_pernet_device(struct pernet_operations *ops) ++{ ++ int error; ++ mutex_lock(&net_mutex); ++ error = register_pernet_operations(&pernet_list, ops); ++ if (!error && (first_device == &pernet_list)) ++ first_device = &ops->list; ++ mutex_unlock(&net_mutex); ++ return error; ++} ++EXPORT_SYMBOL_GPL(register_pernet_device); ++ ++/** ++ * unregister_pernet_device - unregister a network namespace netdevice ++ * @ops: pernet operations structure to manipulate ++ * ++ * Remove the pernet operations structure from the list to be ++ * used when network namespaces are created or destoryed. In ++ * addition run the exit method for all existing network ++ * namespaces. ++ */ ++void unregister_pernet_device(struct pernet_operations *ops) ++{ ++ mutex_lock(&net_mutex); ++ if (&ops->list == first_device) ++ first_device = first_device->next; ++ unregister_pernet_operations(ops); ++ mutex_unlock(&net_mutex); ++} ++EXPORT_SYMBOL_GPL(unregister_pernet_device); +diff -Nurb linux-2.6.22-570/net/core/netpoll.c linux-2.6.22-591/net/core/netpoll.c +--- linux-2.6.22-570/net/core/netpoll.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/core/netpoll.c 2007-12-21 15:36:15.000000000 -0500 +@@ -503,7 +503,8 @@ + + np->rx_hook(np, ntohs(uh->source), + (char *)(uh+1), +- ulen - sizeof(struct udphdr)); ++ ulen - sizeof(struct udphdr), ++ skb); + + kfree_skb(skb); + return 1; +@@ -633,7 +634,7 @@ + int err; + + if (np->dev_name) +- ndev = dev_get_by_name(np->dev_name); ++ ndev = dev_get_by_name(&init_net, np->dev_name); + if (!ndev) { + printk(KERN_ERR "%s: %s doesn't exist, aborting.\n", + np->name, np->dev_name); +diff -Nurb linux-2.6.22-570/net/core/pktgen.c linux-2.6.22-591/net/core/pktgen.c +--- linux-2.6.22-570/net/core/pktgen.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/core/pktgen.c 2007-12-21 15:36:15.000000000 -0500 +@@ -155,6 +155,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1903,6 +1904,9 @@ + { + struct net_device *dev = ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + /* It is OK that we do not hold the group lock right now, + * as we run under the RTNL lock. + */ +@@ -1933,7 +1937,7 @@ + pkt_dev->odev = NULL; + } + +- odev = dev_get_by_name(ifname); ++ odev = dev_get_by_name(&init_net, ifname); + if (!odev) { + printk("pktgen: no such netdevice: \"%s\"\n", ifname); + return -ENODEV; +@@ -3284,6 +3288,8 @@ + + set_current_state(TASK_INTERRUPTIBLE); + ++ set_freezable(); ++ + while (!kthread_should_stop()) { + pkt_dev = next_to_run(t); + +@@ -3568,7 +3574,7 @@ + + printk(version); + +- pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); ++ pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); + if (!pg_proc_dir) + return -ENODEV; + pg_proc_dir->owner = THIS_MODULE; +@@ -3577,7 +3583,7 @@ + if (pe == NULL) { + printk("pktgen: ERROR: cannot create %s procfs entry.\n", + PGCTRL); +- proc_net_remove(PG_PROC_DIR); ++ proc_net_remove(&init_net, PG_PROC_DIR); + return -EINVAL; + } + +@@ -3600,7 +3606,7 @@ + printk("pktgen: ERROR: Initialization failed for all threads\n"); + unregister_netdevice_notifier(&pktgen_notifier_block); + remove_proc_entry(PGCTRL, pg_proc_dir); +- proc_net_remove(PG_PROC_DIR); ++ proc_net_remove(&init_net, PG_PROC_DIR); + return -ENODEV; + } + +@@ -3627,7 +3633,7 @@ + + /* Clean up proc file system */ + remove_proc_entry(PGCTRL, pg_proc_dir); +- proc_net_remove(PG_PROC_DIR); ++ proc_net_remove(&init_net, PG_PROC_DIR); + } + + module_init(pg_init); +diff -Nurb linux-2.6.22-570/net/core/rtnetlink.c linux-2.6.22-591/net/core/rtnetlink.c +--- linux-2.6.22-570/net/core/rtnetlink.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/core/rtnetlink.c 2007-12-21 15:36:15.000000000 -0500 +@@ -59,7 +59,6 @@ + }; + + static DEFINE_MUTEX(rtnl_mutex); +-static struct sock *rtnl; + + void rtnl_lock(void) + { +@@ -73,9 +72,17 @@ + + void rtnl_unlock(void) + { ++ struct net *net; + mutex_unlock(&rtnl_mutex); ++ ++ net_lock(); ++ for_each_net(net) { ++ struct sock *rtnl = net->rtnl; + if (rtnl && rtnl->sk_receive_queue.qlen) + rtnl->sk_data_ready(rtnl, 0); ++ } ++ net_unlock(); ++ + netdev_run_todo(); + } + +@@ -97,6 +104,19 @@ + return 0; + } + ++int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr, ++ struct rtattr *rta, int len) ++{ ++ if (RTA_PAYLOAD(rta) < len) ++ return -1; ++ if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) { ++ rta = RTA_DATA(rta) + RTA_ALIGN(len); ++ return rtattr_parse_nested(tb, maxattr, rta); ++ } ++ memset(tb, 0, sizeof(struct rtattr *) * maxattr); ++ return 0; ++} ++ + static struct rtnl_link *rtnl_msg_handlers[NPROTO]; + + static inline int rtm_msgindex(int msgtype) +@@ -243,6 +263,143 @@ + + EXPORT_SYMBOL_GPL(rtnl_unregister_all); + ++static LIST_HEAD(link_ops); ++ ++/** ++ * __rtnl_link_register - Register rtnl_link_ops with rtnetlink. ++ * @ops: struct rtnl_link_ops * to register ++ * ++ * The caller must hold the rtnl_mutex. This function should be used ++ * by drivers that create devices during module initialization. It ++ * must be called before registering the devices. ++ * ++ * Returns 0 on success or a negative error code. ++ */ ++int __rtnl_link_register(struct rtnl_link_ops *ops) ++{ ++ list_add_tail(&ops->list, &link_ops); ++ return 0; ++} ++ ++EXPORT_SYMBOL_GPL(__rtnl_link_register); ++ ++/** ++ * rtnl_link_register - Register rtnl_link_ops with rtnetlink. ++ * @ops: struct rtnl_link_ops * to register ++ * ++ * Returns 0 on success or a negative error code. ++ */ ++int rtnl_link_register(struct rtnl_link_ops *ops) ++{ ++ int err; ++ ++ rtnl_lock(); ++ err = __rtnl_link_register(ops); ++ rtnl_unlock(); ++ return err; ++} ++ ++EXPORT_SYMBOL_GPL(rtnl_link_register); ++ ++/** ++ * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. ++ * @ops: struct rtnl_link_ops * to unregister ++ * ++ * The caller must hold the rtnl_mutex. This function should be used ++ * by drivers that unregister devices during module unloading. It must ++ * be called after unregistering the devices. ++ */ ++void __rtnl_link_unregister(struct rtnl_link_ops *ops) ++{ ++ list_del(&ops->list); ++} ++ ++EXPORT_SYMBOL_GPL(__rtnl_link_unregister); ++ ++/** ++ * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. ++ * @ops: struct rtnl_link_ops * to unregister ++ */ ++void rtnl_link_unregister(struct rtnl_link_ops *ops) ++{ ++ rtnl_lock(); ++ __rtnl_link_unregister(ops); ++ rtnl_unlock(); ++} ++ ++EXPORT_SYMBOL_GPL(rtnl_link_unregister); ++ ++static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind) ++{ ++ const struct rtnl_link_ops *ops; ++ ++ list_for_each_entry(ops, &link_ops, list) { ++ if (!strcmp(ops->kind, kind)) ++ return ops; ++ } ++ return NULL; ++} ++ ++static size_t rtnl_link_get_size(const struct net_device *dev) ++{ ++ const struct rtnl_link_ops *ops = dev->rtnl_link_ops; ++ size_t size; ++ ++ if (!ops) ++ return 0; ++ ++ size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ ++ nlmsg_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ ++ ++ if (ops->get_size) ++ /* IFLA_INFO_DATA + nested data */ ++ size += nlmsg_total_size(sizeof(struct nlattr)) + ++ ops->get_size(dev); ++ ++ if (ops->get_xstats_size) ++ size += ops->get_xstats_size(dev); /* IFLA_INFO_XSTATS */ ++ ++ return size; ++} ++ ++static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) ++{ ++ const struct rtnl_link_ops *ops = dev->rtnl_link_ops; ++ struct nlattr *linkinfo, *data; ++ int err = -EMSGSIZE; ++ ++ linkinfo = nla_nest_start(skb, IFLA_LINKINFO); ++ if (linkinfo == NULL) ++ goto out; ++ ++ if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0) ++ goto err_cancel_link; ++ if (ops->fill_xstats) { ++ err = ops->fill_xstats(skb, dev); ++ if (err < 0) ++ goto err_cancel_link; ++ } ++ if (ops->fill_info) { ++ data = nla_nest_start(skb, IFLA_INFO_DATA); ++ if (data == NULL) ++ goto err_cancel_link; ++ err = ops->fill_info(skb, dev); ++ if (err < 0) ++ goto err_cancel_data; ++ nla_nest_end(skb, data); ++ } ++ ++ nla_nest_end(skb, linkinfo); ++ return 0; ++ ++err_cancel_data: ++ nla_nest_cancel(skb, data); ++err_cancel_link: ++ nla_nest_cancel(skb, linkinfo); ++out: ++ return err; ++} ++ + static const int rtm_min[RTM_NR_FAMILIES] = + { + [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), +@@ -296,8 +453,9 @@ + return ret; + } + +-int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) ++int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo) + { ++ struct sock *rtnl = net->rtnl; + int err = 0; + + NETLINK_CB(skb).dst_group = group; +@@ -309,14 +467,17 @@ + return err; + } + +-int rtnl_unicast(struct sk_buff *skb, u32 pid) ++int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) + { ++ struct sock *rtnl = net->rtnl; ++ + return nlmsg_unicast(rtnl, skb, pid); + } + +-int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, ++int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags) + { ++ struct sock *rtnl = net->rtnl; + int report = 0; + + if (nlh) +@@ -325,8 +486,10 @@ + return nlmsg_notify(rtnl, skb, pid, group, report, flags); + } + +-void rtnl_set_sk_err(u32 group, int error) ++void rtnl_set_sk_err(struct net *net, u32 group, int error) + { ++ struct sock *rtnl = net->rtnl; ++ + netlink_set_err(rtnl, 0, group, error); + } + +@@ -437,7 +600,7 @@ + a->tx_compressed = b->tx_compressed; + }; + +-static inline size_t if_nlmsg_size(void) ++static inline size_t if_nlmsg_size(const struct net_device *dev) + { + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ +@@ -452,7 +615,8 @@ + + nla_total_size(4) /* IFLA_LINK */ + + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(1) /* IFLA_OPERSTATE */ +- + nla_total_size(1); /* IFLA_LINKMODE */ ++ + nla_total_size(1) /* IFLA_LINKMODE */ ++ + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ + } + + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, +@@ -522,6 +686,11 @@ + } + } + ++ if (dev->rtnl_link_ops) { ++ if (rtnl_link_fill(skb, dev) < 0) ++ goto nla_put_failure; ++ } ++ + return nlmsg_end(skb, nlh); + + nla_put_failure: +@@ -531,12 +700,13 @@ + + static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + int idx; + int s_idx = cb->args[0]; + struct net_device *dev; + + idx = 0; +- for_each_netdev(dev) { ++ for_each_netdev(net, dev) { + if (!nx_dev_visible(skb->sk->sk_nx_info, dev)) + continue; + if (idx < s_idx) +@@ -555,6 +725,8 @@ + + static const struct nla_policy ifla_policy[IFLA_MAX+1] = { + [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, ++ [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, ++ [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, + [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, + [IFLA_MTU] = { .type = NLA_U32 }, + [IFLA_TXQLEN] = { .type = NLA_U32 }, +@@ -563,44 +735,16 @@ + [IFLA_LINKMODE] = { .type = NLA_U8 }, + }; + +-static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +-{ +- struct ifinfomsg *ifm; +- struct net_device *dev; +- int err, send_addr_notify = 0, modified = 0; +- struct nlattr *tb[IFLA_MAX+1]; +- char ifname[IFNAMSIZ]; +- +- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); +- if (err < 0) +- goto errout; +- +- if (tb[IFLA_IFNAME]) +- nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); +- else +- ifname[0] = '\0'; +- +- err = -EINVAL; +- ifm = nlmsg_data(nlh); +- if (ifm->ifi_index > 0) +- dev = dev_get_by_index(ifm->ifi_index); +- else if (tb[IFLA_IFNAME]) +- dev = dev_get_by_name(ifname); +- else +- goto errout; +- +- if (dev == NULL) { +- err = -ENODEV; +- goto errout; +- } +- +- if (tb[IFLA_ADDRESS] && +- nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) +- goto errout_dev; ++static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { ++ [IFLA_INFO_KIND] = { .type = NLA_STRING }, ++ [IFLA_INFO_DATA] = { .type = NLA_NESTED }, ++}; + +- if (tb[IFLA_BROADCAST] && +- nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) +- goto errout_dev; ++static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, ++ struct nlattr **tb, char *ifname, int modified) ++{ ++ int send_addr_notify = 0; ++ int err; + + if (tb[IFLA_MAP]) { + struct rtnl_link_ifmap *u_map; +@@ -608,12 +752,12 @@ + + if (!dev->set_config) { + err = -EOPNOTSUPP; +- goto errout_dev; ++ goto errout; + } + + if (!netif_device_present(dev)) { + err = -ENODEV; +- goto errout_dev; ++ goto errout; + } + + u_map = nla_data(tb[IFLA_MAP]); +@@ -626,7 +770,7 @@ + + err = dev->set_config(dev, &k_map); + if (err < 0) +- goto errout_dev; ++ goto errout; + + modified = 1; + } +@@ -637,19 +781,19 @@ + + if (!dev->set_mac_address) { + err = -EOPNOTSUPP; +- goto errout_dev; ++ goto errout; + } + + if (!netif_device_present(dev)) { + err = -ENODEV; +- goto errout_dev; ++ goto errout; + } + + len = sizeof(sa_family_t) + dev->addr_len; + sa = kmalloc(len, GFP_KERNEL); + if (!sa) { + err = -ENOMEM; +- goto errout_dev; ++ goto errout; + } + sa->sa_family = dev->type; + memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), +@@ -657,7 +801,7 @@ + err = dev->set_mac_address(dev, sa); + kfree(sa); + if (err) +- goto errout_dev; ++ goto errout; + send_addr_notify = 1; + modified = 1; + } +@@ -665,7 +809,7 @@ + if (tb[IFLA_MTU]) { + err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + if (err < 0) +- goto errout_dev; ++ goto errout; + modified = 1; + } + +@@ -677,7 +821,7 @@ + if (ifm->ifi_index > 0 && ifname[0]) { + err = dev_change_name(dev, ifname); + if (err < 0) +- goto errout_dev; ++ goto errout; + modified = 1; + } + +@@ -686,7 +830,6 @@ + send_addr_notify = 1; + } + +- + if (ifm->ifi_flags || ifm->ifi_change) { + unsigned int flags = ifm->ifi_flags; + +@@ -714,7 +857,7 @@ + + err = 0; + +-errout_dev: ++errout: + if (err < 0 && modified && net_ratelimit()) + printk(KERN_WARNING "A link change request failed with " + "some changes comitted already. Interface %s may " +@@ -723,14 +866,237 @@ + + if (send_addr_notify) + call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); ++ return err; ++} ++ ++static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ++{ ++ struct net *net = skb->sk->sk_net; ++ struct ifinfomsg *ifm; ++ struct net_device *dev; ++ int err; ++ struct nlattr *tb[IFLA_MAX+1]; ++ char ifname[IFNAMSIZ]; + ++ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); ++ if (err < 0) ++ goto errout; ++ ++ if (tb[IFLA_IFNAME]) ++ nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); ++ else ++ ifname[0] = '\0'; ++ ++ err = -EINVAL; ++ ifm = nlmsg_data(nlh); ++ if (ifm->ifi_index > 0) ++ dev = dev_get_by_index(net, ifm->ifi_index); ++ else if (tb[IFLA_IFNAME]) ++ dev = dev_get_by_name(net, ifname); ++ else ++ goto errout; ++ ++ if (dev == NULL) { ++ err = -ENODEV; ++ goto errout; ++ } ++ ++ if (tb[IFLA_ADDRESS] && ++ nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) ++ goto errout_dev; ++ ++ if (tb[IFLA_BROADCAST] && ++ nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) ++ goto errout_dev; ++ ++ err = do_setlink(dev, ifm, tb, ifname, 0); ++errout_dev: + dev_put(dev); + errout: + return err; + } + ++static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ++{ ++ struct net *net = skb->sk->sk_net; ++ const struct rtnl_link_ops *ops; ++ struct net_device *dev; ++ struct ifinfomsg *ifm; ++ char ifname[IFNAMSIZ]; ++ struct nlattr *tb[IFLA_MAX+1]; ++ int err; ++ ++ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); ++ if (err < 0) ++ return err; ++ ++ if (tb[IFLA_IFNAME]) ++ nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); ++ ++ ifm = nlmsg_data(nlh); ++ if (ifm->ifi_index > 0) ++ dev = __dev_get_by_index(net, ifm->ifi_index); ++ else if (tb[IFLA_IFNAME]) ++ dev = __dev_get_by_name(net, ifname); ++ else ++ return -EINVAL; ++ ++ if (!dev) ++ return -ENODEV; ++ ++ ops = dev->rtnl_link_ops; ++ if (!ops) ++ return -EOPNOTSUPP; ++ ++ ops->dellink(dev); ++ return 0; ++} ++ ++static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ++{ ++ struct net *net = skb->sk->sk_net; ++ const struct rtnl_link_ops *ops; ++ struct net_device *dev; ++ struct ifinfomsg *ifm; ++ char kind[MODULE_NAME_LEN]; ++ char ifname[IFNAMSIZ]; ++ struct nlattr *tb[IFLA_MAX+1]; ++ struct nlattr *linkinfo[IFLA_INFO_MAX+1]; ++ int err; ++ ++replay: ++ err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); ++ if (err < 0) ++ return err; ++ ++ if (tb[IFLA_IFNAME]) ++ nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); ++ else ++ ifname[0] = '\0'; ++ ++ ifm = nlmsg_data(nlh); ++ if (ifm->ifi_index > 0) ++ dev = __dev_get_by_index(net, ifm->ifi_index); ++ else if (ifname[0]) ++ dev = __dev_get_by_name(net, ifname); ++ else ++ dev = NULL; ++ ++ if (tb[IFLA_LINKINFO]) { ++ err = nla_parse_nested(linkinfo, IFLA_INFO_MAX, ++ tb[IFLA_LINKINFO], ifla_info_policy); ++ if (err < 0) ++ return err; ++ } else ++ memset(linkinfo, 0, sizeof(linkinfo)); ++ ++ if (linkinfo[IFLA_INFO_KIND]) { ++ nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); ++ ops = rtnl_link_ops_get(kind); ++ } else { ++ kind[0] = '\0'; ++ ops = NULL; ++ } ++ ++ if (1) { ++ struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL; ++ ++ if (ops) { ++ if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { ++ err = nla_parse_nested(attr, ops->maxtype, ++ linkinfo[IFLA_INFO_DATA], ++ ops->policy); ++ if (err < 0) ++ return err; ++ data = attr; ++ } ++ if (ops->validate) { ++ err = ops->validate(tb, data); ++ if (err < 0) ++ return err; ++ } ++ } ++ ++ if (dev) { ++ int modified = 0; ++ ++ if (nlh->nlmsg_flags & NLM_F_EXCL) ++ return -EEXIST; ++ if (nlh->nlmsg_flags & NLM_F_REPLACE) ++ return -EOPNOTSUPP; ++ ++ if (linkinfo[IFLA_INFO_DATA]) { ++ if (!ops || ops != dev->rtnl_link_ops || ++ !ops->changelink) ++ return -EOPNOTSUPP; ++ ++ err = ops->changelink(dev, tb, data); ++ if (err < 0) ++ return err; ++ modified = 1; ++ } ++ ++ return do_setlink(dev, ifm, tb, ifname, modified); ++ } ++ ++ if (!(nlh->nlmsg_flags & NLM_F_CREATE)) ++ return -ENODEV; ++ ++ if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change) ++ return -EOPNOTSUPP; ++ if (tb[IFLA_ADDRESS] || tb[IFLA_BROADCAST] || tb[IFLA_MAP] || ++ tb[IFLA_MASTER] || tb[IFLA_PROTINFO]) ++ return -EOPNOTSUPP; ++ ++ if (!ops) { ++#ifdef CONFIG_KMOD ++ if (kind[0]) { ++ __rtnl_unlock(); ++ request_module("rtnl-link-%s", kind); ++ rtnl_lock(); ++ ops = rtnl_link_ops_get(kind); ++ if (ops) ++ goto replay; ++ } ++#endif ++ return -EOPNOTSUPP; ++ } ++ ++ if (!ifname[0]) ++ snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); ++ dev = alloc_netdev(ops->priv_size, ifname, ops->setup); ++ if (!dev) ++ return -ENOMEM; ++ ++ if (strchr(dev->name, '%')) { ++ err = dev_alloc_name(dev, dev->name); ++ if (err < 0) ++ goto err_free; ++ } ++ dev->rtnl_link_ops = ops; ++ ++ if (tb[IFLA_MTU]) ++ dev->mtu = nla_get_u32(tb[IFLA_MTU]); ++ if (tb[IFLA_TXQLEN]) ++ dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); ++ if (tb[IFLA_WEIGHT]) ++ dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); ++ if (tb[IFLA_OPERSTATE]) ++ set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); ++ if (tb[IFLA_LINKMODE]) ++ dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); ++ ++ err = ops->newlink(dev, tb, data); ++err_free: ++ if (err < 0) ++ free_netdev(dev); ++ return err; ++ } ++} ++ + static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct ifinfomsg *ifm; + struct nlattr *tb[IFLA_MAX+1]; + struct net_device *dev = NULL; +@@ -743,13 +1109,13 @@ + + ifm = nlmsg_data(nlh); + if (ifm->ifi_index > 0) { +- dev = dev_get_by_index(ifm->ifi_index); ++ dev = dev_get_by_index(net, ifm->ifi_index); + if (dev == NULL) + return -ENODEV; + } else + return -EINVAL; + +- nskb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); ++ nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); + if (nskb == NULL) { + err = -ENOBUFS; + goto errout; +@@ -763,7 +1129,7 @@ + kfree_skb(nskb); + goto errout; + } +- err = rtnl_unicast(nskb, NETLINK_CB(skb).pid); ++ err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); + errout: + dev_put(dev); + +@@ -796,13 +1162,14 @@ + + void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) + { ++ struct net *net = dev->nd_net; + struct sk_buff *skb; + int err = -ENOBUFS; + + if (!nx_dev_visible(current->nx_info, dev)) + return; + +- skb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL); ++ skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL); + if (skb == NULL) + goto errout; + +@@ -813,10 +1180,10 @@ + kfree_skb(skb); + goto errout; + } +- err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); ++ err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); + errout: + if (err < 0) +- rtnl_set_sk_err(RTNLGRP_LINK, err); ++ rtnl_set_sk_err(net, RTNLGRP_LINK, err); + } + + /* Protected by RTNL sempahore. */ +@@ -827,6 +1194,7 @@ + + static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) + { ++ struct net *net = skb->sk->sk_net; + rtnl_doit_func doit; + int sz_idx, kind; + int min_len; +@@ -855,6 +1223,7 @@ + return -EPERM; + + if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { ++ struct sock *rtnl; + rtnl_dumpit_func dumpit; + + dumpit = rtnl_get_dumpit(family, type); +@@ -862,6 +1231,7 @@ + return -EOPNOTSUPP; + + __rtnl_unlock(); ++ rtnl = net->rtnl; + err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL); + rtnl_lock(); + return err; +@@ -911,6 +1281,10 @@ + static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) + { + struct net_device *dev = ptr; ++ ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + switch (event) { + case NETDEV_UNREGISTER: + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); +@@ -936,6 +1310,36 @@ + .notifier_call = rtnetlink_event, + }; + ++ ++static int rtnetlink_net_init(struct net *net) ++{ ++ struct sock *sk; ++ sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX, ++ rtnetlink_rcv, &rtnl_mutex, THIS_MODULE); ++ if (!sk) ++ return -ENOMEM; ++ ++ /* Don't hold an extra reference on the namespace */ ++ put_net(sk->sk_net); ++ net->rtnl = sk; ++ return 0; ++} ++ ++static void rtnetlink_net_exit(struct net *net) ++{ ++ /* At the last minute lie and say this is a socket for the ++ * initial network namespace. So the socket will be safe to ++ * free. ++ */ ++ net->rtnl->sk_net = get_net(&init_net); ++ sock_put(net->rtnl); ++} ++ ++static struct pernet_operations rtnetlink_net_ops = { ++ .init = rtnetlink_net_init, ++ .exit = rtnetlink_net_exit, ++}; ++ + void __init rtnetlink_init(void) + { + int i; +@@ -948,15 +1352,16 @@ + if (!rta_buf) + panic("rtnetlink_init: cannot allocate rta_buf\n"); + +- rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, +- &rtnl_mutex, THIS_MODULE); +- if (rtnl == NULL) ++ if (register_pernet_subsys(&rtnetlink_net_ops)) + panic("rtnetlink_init: cannot initialize rtnetlink\n"); ++ + netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); + register_netdevice_notifier(&rtnetlink_dev_notifier); + + rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo); + rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL); ++ rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL); ++ rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL); + + rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all); + rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all); +@@ -965,6 +1370,7 @@ + EXPORT_SYMBOL(__rta_fill); + EXPORT_SYMBOL(rtattr_strlcpy); + EXPORT_SYMBOL(rtattr_parse); ++EXPORT_SYMBOL(__rtattr_parse_nested_compat); + EXPORT_SYMBOL(rtnetlink_put_metrics); + EXPORT_SYMBOL(rtnl_lock); + EXPORT_SYMBOL(rtnl_trylock); +diff -Nurb linux-2.6.22-570/net/core/skbuff.c linux-2.6.22-591/net/core/skbuff.c +--- linux-2.6.22-570/net/core/skbuff.c 2007-12-21 15:36:03.000000000 -0500 ++++ linux-2.6.22-591/net/core/skbuff.c 2007-12-21 15:36:12.000000000 -0500 +@@ -417,6 +417,7 @@ + C(csum); + C(local_df); + n->cloned = 1; ++ n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; + n->nohdr = 0; + C(pkt_type); + C(ip_summed); +@@ -681,6 +682,7 @@ + skb->network_header += off; + skb->mac_header += off; + skb->cloned = 0; ++ skb->hdr_len = 0; + skb->nohdr = 0; + atomic_set(&skb_shinfo(skb)->dataref, 1); + return 0; +@@ -2012,13 +2014,13 @@ + skbuff_head_cache = kmem_cache_create("skbuff_head_cache", + sizeof(struct sk_buff), + 0, +- SLAB_HWCACHE_ALIGN|SLAB_PANIC, ++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TEMPORARY, + NULL, NULL); + skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", + (2*sizeof(struct sk_buff)) + + sizeof(atomic_t), + 0, +- SLAB_HWCACHE_ALIGN|SLAB_PANIC, ++ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TEMPORARY, + NULL, NULL); + } + +diff -Nurb linux-2.6.22-570/net/core/sock.c linux-2.6.22-591/net/core/sock.c +--- linux-2.6.22-570/net/core/sock.c 2007-12-21 15:36:03.000000000 -0500 ++++ linux-2.6.22-591/net/core/sock.c 2007-12-21 15:36:15.000000000 -0500 +@@ -123,6 +123,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -360,6 +361,7 @@ + char __user *optval, int optlen) + { + struct sock *sk=sock->sk; ++ struct net *net = sk->sk_net; + struct sk_filter *filter; + int val; + int valbool; +@@ -614,7 +616,7 @@ + if (devname[0] == '\0') { + sk->sk_bound_dev_if = 0; + } else { +- struct net_device *dev = dev_get_by_name(devname); ++ struct net_device *dev = dev_get_by_name(net, devname); + if (!dev) { + ret = -ENODEV; + break; +@@ -867,7 +869,7 @@ + * @prot: struct proto associated with this new sock instance + * @zero_it: if we should zero the newly allocated sock + */ +-struct sock *sk_alloc(int family, gfp_t priority, ++struct sock *sk_alloc(struct net *net, int family, gfp_t priority, + struct proto *prot, int zero_it) + { + struct sock *sk = NULL; +@@ -888,6 +890,7 @@ + */ + sk->sk_prot = sk->sk_prot_creator = prot; + sock_lock_init(sk); ++ sk->sk_net = get_net(net); + } + sock_vx_init(sk); + sock_nx_init(sk); +@@ -929,6 +932,7 @@ + __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); + + security_sk_free(sk); ++ put_net(sk->sk_net); + vx_sock_dec(sk); + clr_vx_info(&sk->sk_vx_info); + sk->sk_xid = -1; +@@ -943,7 +947,7 @@ + + struct sock *sk_clone(const struct sock *sk, const gfp_t priority) + { +- struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0); ++ struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0); + + if (newsk != NULL) { + struct sk_filter *filter; +@@ -2017,7 +2021,7 @@ + static int __init proto_init(void) + { + /* register /proc/net/protocols */ +- return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; ++ return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0; + } + + subsys_initcall(proto_init); +diff -Nurb linux-2.6.22-570/net/core/sysctl_net_core.c linux-2.6.22-591/net/core/sysctl_net_core.c +--- linux-2.6.22-570/net/core/sysctl_net_core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/core/sysctl_net_core.c 2007-12-21 15:36:15.000000000 -0500 +@@ -9,25 +9,10 @@ + #include + #include + #include ++#include ++#include + #include + +-#ifdef CONFIG_SYSCTL +- +-extern int netdev_max_backlog; +-extern int weight_p; +- +-extern __u32 sysctl_wmem_max; +-extern __u32 sysctl_rmem_max; +- +-extern int sysctl_core_destroy_delay; +- +-#ifdef CONFIG_XFRM +-extern u32 sysctl_xfrm_aevent_etime; +-extern u32 sysctl_xfrm_aevent_rseqth; +-extern int sysctl_xfrm_larval_drop; +-extern u32 sysctl_xfrm_acq_expires; +-#endif +- + ctl_table core_table[] = { + #ifdef CONFIG_NET + { +@@ -103,11 +88,32 @@ + .mode = 0644, + .proc_handler = &proc_dointvec + }, ++#endif /* CONFIG_NET */ ++ { ++ .ctl_name = NET_CORE_BUDGET, ++ .procname = "netdev_budget", ++ .data = &netdev_budget, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec ++ }, ++ { ++ .ctl_name = NET_CORE_WARNINGS, ++ .procname = "warnings", ++ .data = &net_msg_warn, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec ++ }, ++ { .ctl_name = 0 } ++}; ++ ++struct ctl_table multi_core_table[] = { + #ifdef CONFIG_XFRM + { + .ctl_name = NET_CORE_AEVENT_ETIME, + .procname = "xfrm_aevent_etime", +- .data = &sysctl_xfrm_aevent_etime, ++ .data = &init_net.sysctl_xfrm_aevent_etime, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = &proc_dointvec +@@ -115,7 +121,7 @@ + { + .ctl_name = NET_CORE_AEVENT_RSEQTH, + .procname = "xfrm_aevent_rseqth", +- .data = &sysctl_xfrm_aevent_rseqth, ++ .data = &init_net.sysctl_xfrm_aevent_rseqth, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = &proc_dointvec +@@ -123,7 +129,7 @@ + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm_larval_drop", +- .data = &sysctl_xfrm_larval_drop, ++ .data = &init_net.sysctl_xfrm_larval_drop, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec +@@ -131,38 +137,19 @@ + { + .ctl_name = CTL_UNNUMBERED, + .procname = "xfrm_acq_expires", +- .data = &sysctl_xfrm_acq_expires, ++ .data = &init_net.sysctl_xfrm_acq_expires, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + #endif /* CONFIG_XFRM */ +-#endif /* CONFIG_NET */ + { + .ctl_name = NET_CORE_SOMAXCONN, + .procname = "somaxconn", +- .data = &sysctl_somaxconn, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec +- }, +- { +- .ctl_name = NET_CORE_BUDGET, +- .procname = "netdev_budget", +- .data = &netdev_budget, ++ .data = &init_net.sysctl_somaxconn, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, +- { +- .ctl_name = NET_CORE_WARNINGS, +- .procname = "warnings", +- .data = &net_msg_warn, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec +- }, +- { .ctl_name = 0 } ++ {} + }; +- +-#endif +diff -Nurb linux-2.6.22-570/net/dccp/ccids/ccid3.c linux-2.6.22-591/net/dccp/ccids/ccid3.c +--- linux-2.6.22-570/net/dccp/ccids/ccid3.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/dccp/ccids/ccid3.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1,8 +1,8 @@ + /* + * net/dccp/ccids/ccid3.c + * +- * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. +- * Copyright (c) 2005-6 Ian McDonald ++ * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. ++ * Copyright (c) 2005-7 Ian McDonald + * + * An implementation of the DCCP protocol + * +@@ -49,7 +49,6 @@ + + static struct dccp_tx_hist *ccid3_tx_hist; + static struct dccp_rx_hist *ccid3_rx_hist; +-static struct dccp_li_hist *ccid3_li_hist; + + /* + * Transmitter Half-Connection Routines +@@ -194,25 +193,20 @@ + * The algorithm is not applicable if RTT < 4 microseconds. + */ + static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx, +- struct timeval *now) ++ ktime_t now) + { +- suseconds_t delta; + u32 quarter_rtts; + + if (unlikely(hctx->ccid3hctx_rtt < 4)) /* avoid divide-by-zero */ + return; + +- delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count); +- DCCP_BUG_ON(delta < 0); +- +- quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4); ++ quarter_rtts = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count); ++ quarter_rtts /= hctx->ccid3hctx_rtt / 4; + + if (quarter_rtts > 0) { +- hctx->ccid3hctx_t_last_win_count = *now; ++ hctx->ccid3hctx_t_last_win_count = now; + hctx->ccid3hctx_last_win_count += min_t(u32, quarter_rtts, 5); + hctx->ccid3hctx_last_win_count &= 0xF; /* mod 16 */ +- +- ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count); + } + } + +@@ -312,8 +306,8 @@ + { + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); +- struct timeval now; +- suseconds_t delay; ++ ktime_t now = ktime_get_real(); ++ s64 delay; + + BUG_ON(hctx == NULL); + +@@ -325,8 +319,6 @@ + if (unlikely(skb->len == 0)) + return -EBADMSG; + +- dccp_timestamp(sk, &now); +- + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, +@@ -349,7 +341,7 @@ + ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt); + hctx->ccid3hctx_rtt = dp->dccps_syn_rtt; + hctx->ccid3hctx_x = rfc3390_initial_rate(sk); +- hctx->ccid3hctx_t_ld = now; ++ hctx->ccid3hctx_t_ld = ktime_to_timeval(now); + } else { + /* Sender does not have RTT sample: X = MSS/second */ + hctx->ccid3hctx_x = dp->dccps_mss_cache; +@@ -361,7 +353,7 @@ + break; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: +- delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now); ++ delay = ktime_us_delta(hctx->ccid3hctx_t_nom, now); + ccid3_pr_debug("delay=%ld\n", (long)delay); + /* + * Scheduling of packet transmissions [RFC 3448, 4.6] +@@ -371,10 +363,10 @@ + * else + * // send the packet in (t_nom - t_now) milliseconds. + */ +- if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0) +- return delay / 1000L; ++ if (delay - (s64)hctx->ccid3hctx_delta >= 1000) ++ return (u32)delay / 1000L; + +- ccid3_hc_tx_update_win_count(hctx, &now); ++ ccid3_hc_tx_update_win_count(hctx, now); + break; + case TFRC_SSTATE_TERM: + DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); +@@ -387,8 +379,8 @@ + hctx->ccid3hctx_idle = 0; + + /* set the nominal send time for the next following packet */ +- timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi); +- ++ hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom, ++ hctx->ccid3hctx_t_ipi); + return 0; + } + +@@ -819,154 +811,6 @@ + return 0; + } + +-/* calculate first loss interval +- * +- * returns estimated loss interval in usecs */ +- +-static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) +-{ +- struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); +- struct dccp_rx_hist_entry *entry, *next, *tail = NULL; +- u32 x_recv, p; +- suseconds_t rtt, delta; +- struct timeval tstamp = { 0, }; +- int interval = 0; +- int win_count = 0; +- int step = 0; +- u64 fval; +- +- list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, +- dccphrx_node) { +- if (dccp_rx_hist_entry_data_packet(entry)) { +- tail = entry; +- +- switch (step) { +- case 0: +- tstamp = entry->dccphrx_tstamp; +- win_count = entry->dccphrx_ccval; +- step = 1; +- break; +- case 1: +- interval = win_count - entry->dccphrx_ccval; +- if (interval < 0) +- interval += TFRC_WIN_COUNT_LIMIT; +- if (interval > 4) +- goto found; +- break; +- } +- } +- } +- +- if (unlikely(step == 0)) { +- DCCP_WARN("%s(%p), packet history has no data packets!\n", +- dccp_role(sk), sk); +- return ~0; +- } +- +- if (unlikely(interval == 0)) { +- DCCP_WARN("%s(%p), Could not find a win_count interval > 0." +- "Defaulting to 1\n", dccp_role(sk), sk); +- interval = 1; +- } +-found: +- if (!tail) { +- DCCP_CRIT("tail is null\n"); +- return ~0; +- } +- +- delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); +- DCCP_BUG_ON(delta < 0); +- +- rtt = delta * 4 / interval; +- ccid3_pr_debug("%s(%p), approximated RTT to %dus\n", +- dccp_role(sk), sk, (int)rtt); +- +- /* +- * Determine the length of the first loss interval via inverse lookup. +- * Assume that X_recv can be computed by the throughput equation +- * s +- * X_recv = -------- +- * R * fval +- * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1]. +- */ +- if (rtt == 0) { /* would result in divide-by-zero */ +- DCCP_WARN("RTT==0\n"); +- return ~0; +- } +- +- dccp_timestamp(sk, &tstamp); +- delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); +- DCCP_BUG_ON(delta <= 0); +- +- x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); +- if (x_recv == 0) { /* would also trigger divide-by-zero */ +- DCCP_WARN("X_recv==0\n"); +- if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) { +- DCCP_BUG("stored value of X_recv is zero"); +- return ~0; +- } +- } +- +- fval = scaled_div(hcrx->ccid3hcrx_s, rtt); +- fval = scaled_div32(fval, x_recv); +- p = tfrc_calc_x_reverse_lookup(fval); +- +- ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied " +- "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); +- +- if (p == 0) +- return ~0; +- else +- return 1000000 / p; +-} +- +-static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) +-{ +- struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); +- struct dccp_li_hist_entry *head; +- u64 seq_temp; +- +- if (list_empty(&hcrx->ccid3hcrx_li_hist)) { +- if (!dccp_li_hist_interval_new(ccid3_li_hist, +- &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) +- return; +- +- head = list_entry(hcrx->ccid3hcrx_li_hist.next, +- struct dccp_li_hist_entry, dccplih_node); +- head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); +- } else { +- struct dccp_li_hist_entry *entry; +- struct list_head *tail; +- +- head = list_entry(hcrx->ccid3hcrx_li_hist.next, +- struct dccp_li_hist_entry, dccplih_node); +- /* FIXME win count check removed as was wrong */ +- /* should make this check with receive history */ +- /* and compare there as per section 10.2 of RFC4342 */ +- +- /* new loss event detected */ +- /* calculate last interval length */ +- seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); +- entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC); +- +- if (entry == NULL) { +- DCCP_BUG("out of memory - can not allocate entry"); +- return; +- } +- +- list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist); +- +- tail = hcrx->ccid3hcrx_li_hist.prev; +- list_del(tail); +- kmem_cache_free(ccid3_li_hist->dccplih_slab, tail); +- +- /* Create the newest interval */ +- entry->dccplih_seqno = seq_loss; +- entry->dccplih_interval = seq_temp; +- entry->dccplih_win_count = win_loss; +- } +-} +- + static int ccid3_hc_rx_detect_loss(struct sock *sk, + struct dccp_rx_hist_entry *packet) + { +@@ -992,7 +836,14 @@ + while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) + > TFRC_RECV_NUM_LATE_LOSS) { + loss = 1; +- ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss, ++ dccp_li_update_li(sk, ++ &hcrx->ccid3hcrx_li_hist, ++ &hcrx->ccid3hcrx_hist, ++ &hcrx->ccid3hcrx_tstamp_last_feedback, ++ hcrx->ccid3hcrx_s, ++ hcrx->ccid3hcrx_bytes_recv, ++ hcrx->ccid3hcrx_x_recv, ++ hcrx->ccid3hcrx_seqno_nonloss, + hcrx->ccid3hcrx_ccval_nonloss); + tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; + dccp_inc_seqno(&tmp_seqno); +@@ -1152,7 +1003,7 @@ + dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); + + /* Empty loss interval history */ +- dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist); ++ dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist); + } + + static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) +@@ -1236,19 +1087,12 @@ + if (ccid3_tx_hist == NULL) + goto out_free_rx; + +- ccid3_li_hist = dccp_li_hist_new("ccid3"); +- if (ccid3_li_hist == NULL) +- goto out_free_tx; +- + rc = ccid_register(&ccid3); + if (rc != 0) +- goto out_free_loss_interval_history; ++ goto out_free_tx; + out: + return rc; + +-out_free_loss_interval_history: +- dccp_li_hist_delete(ccid3_li_hist); +- ccid3_li_hist = NULL; + out_free_tx: + dccp_tx_hist_delete(ccid3_tx_hist); + ccid3_tx_hist = NULL; +@@ -1271,10 +1115,6 @@ + dccp_rx_hist_delete(ccid3_rx_hist); + ccid3_rx_hist = NULL; + } +- if (ccid3_li_hist != NULL) { +- dccp_li_hist_delete(ccid3_li_hist); +- ccid3_li_hist = NULL; +- } + } + module_exit(ccid3_module_exit); + +diff -Nurb linux-2.6.22-570/net/dccp/ccids/ccid3.h linux-2.6.22-591/net/dccp/ccids/ccid3.h +--- linux-2.6.22-570/net/dccp/ccids/ccid3.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/dccp/ccids/ccid3.h 2007-12-21 15:36:12.000000000 -0500 +@@ -36,6 +36,7 @@ + #ifndef _DCCP_CCID3_H_ + #define _DCCP_CCID3_H_ + ++#include + #include + #include + #include +@@ -108,10 +109,10 @@ + enum ccid3_hc_tx_states ccid3hctx_state:8; + u8 ccid3hctx_last_win_count; + u8 ccid3hctx_idle; +- struct timeval ccid3hctx_t_last_win_count; ++ ktime_t ccid3hctx_t_last_win_count; + struct timer_list ccid3hctx_no_feedback_timer; + struct timeval ccid3hctx_t_ld; +- struct timeval ccid3hctx_t_nom; ++ ktime_t ccid3hctx_t_nom; + u32 ccid3hctx_delta; + struct list_head ccid3hctx_hist; + struct ccid3_options_received ccid3hctx_options_received; +diff -Nurb linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.c linux-2.6.22-591/net/dccp/ccids/lib/loss_interval.c +--- linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/dccp/ccids/lib/loss_interval.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1,8 +1,8 @@ + /* + * net/dccp/ccids/lib/loss_interval.c + * +- * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. +- * Copyright (c) 2005-6 Ian McDonald ++ * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. ++ * Copyright (c) 2005-7 Ian McDonald + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify +@@ -15,58 +15,38 @@ + #include + #include "../../dccp.h" + #include "loss_interval.h" ++#include "packet_history.h" ++#include "tfrc.h" + +-struct dccp_li_hist *dccp_li_hist_new(const char *name) +-{ +- struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); +- static const char dccp_li_hist_mask[] = "li_hist_%s"; +- char *slab_name; +- +- if (hist == NULL) +- goto out; +- +- slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1, +- GFP_ATOMIC); +- if (slab_name == NULL) +- goto out_free_hist; ++#define DCCP_LI_HIST_IVAL_F_LENGTH 8 + +- sprintf(slab_name, dccp_li_hist_mask, name); +- hist->dccplih_slab = kmem_cache_create(slab_name, +- sizeof(struct dccp_li_hist_entry), +- 0, SLAB_HWCACHE_ALIGN, +- NULL, NULL); +- if (hist->dccplih_slab == NULL) +- goto out_free_slab_name; +-out: +- return hist; +-out_free_slab_name: +- kfree(slab_name); +-out_free_hist: +- kfree(hist); +- hist = NULL; +- goto out; +-} ++struct dccp_li_hist_entry { ++ struct list_head dccplih_node; ++ u64 dccplih_seqno:48, ++ dccplih_win_count:4; ++ u32 dccplih_interval; ++}; + +-EXPORT_SYMBOL_GPL(dccp_li_hist_new); ++struct kmem_cache *dccp_li_cachep __read_mostly; + +-void dccp_li_hist_delete(struct dccp_li_hist *hist) ++static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio) + { +- const char* name = kmem_cache_name(hist->dccplih_slab); +- +- kmem_cache_destroy(hist->dccplih_slab); +- kfree(name); +- kfree(hist); ++ return kmem_cache_alloc(dccp_li_cachep, prio); + } + +-EXPORT_SYMBOL_GPL(dccp_li_hist_delete); ++static inline void dccp_li_hist_entry_delete(struct dccp_li_hist_entry *entry) ++{ ++ if (entry != NULL) ++ kmem_cache_free(dccp_li_cachep, entry); ++} + +-void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list) ++void dccp_li_hist_purge(struct list_head *list) + { + struct dccp_li_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccplih_node) { + list_del_init(&entry->dccplih_node); +- kmem_cache_free(hist->dccplih_slab, entry); ++ kmem_cache_free(dccp_li_cachep, entry); + } + } + +@@ -118,16 +98,16 @@ + + EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); + +-int dccp_li_hist_interval_new(struct dccp_li_hist *hist, +- struct list_head *list, const u64 seq_loss, const u8 win_loss) ++static int dccp_li_hist_interval_new(struct list_head *list, ++ const u64 seq_loss, const u8 win_loss) + { + struct dccp_li_hist_entry *entry; + int i; + + for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { +- entry = dccp_li_hist_entry_new(hist, GFP_ATOMIC); ++ entry = dccp_li_hist_entry_new(GFP_ATOMIC); + if (entry == NULL) { +- dccp_li_hist_purge(hist, list); ++ dccp_li_hist_purge(list); + DCCP_BUG("loss interval list entry is NULL"); + return 0; + } +@@ -140,4 +120,176 @@ + return 1; + } + +-EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); ++/* calculate first loss interval ++ * ++ * returns estimated loss interval in usecs */ ++static u32 dccp_li_calc_first_li(struct sock *sk, ++ struct list_head *hist_list, ++ struct timeval *last_feedback, ++ u16 s, u32 bytes_recv, ++ u32 previous_x_recv) ++{ ++ struct dccp_rx_hist_entry *entry, *next, *tail = NULL; ++ u32 x_recv, p; ++ suseconds_t rtt, delta; ++ struct timeval tstamp = { 0, 0 }; ++ int interval = 0; ++ int win_count = 0; ++ int step = 0; ++ u64 fval; ++ ++ list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) { ++ if (dccp_rx_hist_entry_data_packet(entry)) { ++ tail = entry; ++ ++ switch (step) { ++ case 0: ++ tstamp = entry->dccphrx_tstamp; ++ win_count = entry->dccphrx_ccval; ++ step = 1; ++ break; ++ case 1: ++ interval = win_count - entry->dccphrx_ccval; ++ if (interval < 0) ++ interval += TFRC_WIN_COUNT_LIMIT; ++ if (interval > 4) ++ goto found; ++ break; ++ } ++ } ++ } ++ ++ if (unlikely(step == 0)) { ++ DCCP_WARN("%s(%p), packet history has no data packets!\n", ++ dccp_role(sk), sk); ++ return ~0; ++ } ++ ++ if (unlikely(interval == 0)) { ++ DCCP_WARN("%s(%p), Could not find a win_count interval > 0." ++ "Defaulting to 1\n", dccp_role(sk), sk); ++ interval = 1; ++ } ++found: ++ if (!tail) { ++ DCCP_CRIT("tail is null\n"); ++ return ~0; ++ } ++ ++ delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp); ++ DCCP_BUG_ON(delta < 0); ++ ++ rtt = delta * 4 / interval; ++ dccp_pr_debug("%s(%p), approximated RTT to %dus\n", ++ dccp_role(sk), sk, (int)rtt); ++ ++ /* ++ * Determine the length of the first loss interval via inverse lookup. ++ * Assume that X_recv can be computed by the throughput equation ++ * s ++ * X_recv = -------- ++ * R * fval ++ * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1]. ++ */ ++ if (rtt == 0) { /* would result in divide-by-zero */ ++ DCCP_WARN("RTT==0\n"); ++ return ~0; ++ } ++ ++ dccp_timestamp(sk, &tstamp); ++ delta = timeval_delta(&tstamp, last_feedback); ++ DCCP_BUG_ON(delta <= 0); ++ ++ x_recv = scaled_div32(bytes_recv, delta); ++ if (x_recv == 0) { /* would also trigger divide-by-zero */ ++ DCCP_WARN("X_recv==0\n"); ++ if (previous_x_recv == 0) { ++ DCCP_BUG("stored value of X_recv is zero"); ++ return ~0; ++ } ++ x_recv = previous_x_recv; ++ } ++ ++ fval = scaled_div(s, rtt); ++ fval = scaled_div32(fval, x_recv); ++ p = tfrc_calc_x_reverse_lookup(fval); ++ ++ dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied " ++ "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); ++ ++ if (p == 0) ++ return ~0; ++ else ++ return 1000000 / p; ++} ++ ++void dccp_li_update_li(struct sock *sk, ++ struct list_head *li_hist_list, ++ struct list_head *hist_list, ++ struct timeval *last_feedback, u16 s, u32 bytes_recv, ++ u32 previous_x_recv, u64 seq_loss, u8 win_loss) ++{ ++ struct dccp_li_hist_entry *head; ++ u64 seq_temp; ++ ++ if (list_empty(li_hist_list)) { ++ if (!dccp_li_hist_interval_new(li_hist_list, seq_loss, ++ win_loss)) ++ return; ++ ++ head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, ++ dccplih_node); ++ head->dccplih_interval = dccp_li_calc_first_li(sk, hist_list, ++ last_feedback, ++ s, bytes_recv, ++ previous_x_recv); ++ } else { ++ struct dccp_li_hist_entry *entry; ++ struct list_head *tail; ++ ++ head = list_entry(li_hist_list->next, struct dccp_li_hist_entry, ++ dccplih_node); ++ /* FIXME win count check removed as was wrong */ ++ /* should make this check with receive history */ ++ /* and compare there as per section 10.2 of RFC4342 */ ++ ++ /* new loss event detected */ ++ /* calculate last interval length */ ++ seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); ++ entry = dccp_li_hist_entry_new(GFP_ATOMIC); ++ ++ if (entry == NULL) { ++ DCCP_BUG("out of memory - can not allocate entry"); ++ return; ++ } ++ ++ list_add(&entry->dccplih_node, li_hist_list); ++ ++ tail = li_hist_list->prev; ++ list_del(tail); ++ kmem_cache_free(dccp_li_cachep, tail); ++ ++ /* Create the newest interval */ ++ entry->dccplih_seqno = seq_loss; ++ entry->dccplih_interval = seq_temp; ++ entry->dccplih_win_count = win_loss; ++ } ++} ++ ++EXPORT_SYMBOL_GPL(dccp_li_update_li); ++ ++static __init int dccp_li_init(void) ++{ ++ dccp_li_cachep = kmem_cache_create("dccp_li_hist", ++ sizeof(struct dccp_li_hist_entry), ++ 0, SLAB_HWCACHE_ALIGN, NULL, NULL); ++ return dccp_li_cachep == NULL ? -ENOBUFS : 0; ++} ++ ++static __exit void dccp_li_exit(void) ++{ ++ kmem_cache_destroy(dccp_li_cachep); ++} ++ ++module_init(dccp_li_init); ++module_exit(dccp_li_exit); +diff -Nurb linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.h linux-2.6.22-591/net/dccp/ccids/lib/loss_interval.h +--- linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/dccp/ccids/lib/loss_interval.h 2007-12-21 15:36:12.000000000 -0500 +@@ -3,8 +3,8 @@ + /* + * net/dccp/ccids/lib/loss_interval.h + * +- * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. +- * Copyright (c) 2005 Ian McDonald ++ * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand. ++ * Copyright (c) 2005-7 Ian McDonald + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it +@@ -14,44 +14,16 @@ + */ + + #include +-#include + #include + +-#define DCCP_LI_HIST_IVAL_F_LENGTH 8 +- +-struct dccp_li_hist { +- struct kmem_cache *dccplih_slab; +-}; +- +-extern struct dccp_li_hist *dccp_li_hist_new(const char *name); +-extern void dccp_li_hist_delete(struct dccp_li_hist *hist); +- +-struct dccp_li_hist_entry { +- struct list_head dccplih_node; +- u64 dccplih_seqno:48, +- dccplih_win_count:4; +- u32 dccplih_interval; +-}; +- +-static inline struct dccp_li_hist_entry * +- dccp_li_hist_entry_new(struct dccp_li_hist *hist, +- const gfp_t prio) +-{ +- return kmem_cache_alloc(hist->dccplih_slab, prio); +-} +- +-static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist, +- struct dccp_li_hist_entry *entry) +-{ +- if (entry != NULL) +- kmem_cache_free(hist->dccplih_slab, entry); +-} +- +-extern void dccp_li_hist_purge(struct dccp_li_hist *hist, +- struct list_head *list); ++extern void dccp_li_hist_purge(struct list_head *list); + + extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); + +-extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist, +- struct list_head *list, const u64 seq_loss, const u8 win_loss); ++extern void dccp_li_update_li(struct sock *sk, ++ struct list_head *li_hist_list, ++ struct list_head *hist_list, ++ struct timeval *last_feedback, u16 s, ++ u32 bytes_recv, u32 previous_x_recv, ++ u64 seq_loss, u8 win_loss); + #endif /* _DCCP_LI_HIST_ */ +diff -Nurb linux-2.6.22-570/net/dccp/dccp.h linux-2.6.22-591/net/dccp/dccp.h +--- linux-2.6.22-570/net/dccp/dccp.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/dccp/dccp.h 2007-12-21 15:36:12.000000000 -0500 +@@ -184,7 +184,7 @@ + /* + * Checksumming routines + */ +-static inline int dccp_csum_coverage(const struct sk_buff *skb) ++static inline unsigned int dccp_csum_coverage(const struct sk_buff *skb) + { + const struct dccp_hdr* dh = dccp_hdr(skb); + +@@ -195,7 +195,7 @@ + + static inline void dccp_csum_outgoing(struct sk_buff *skb) + { +- int cov = dccp_csum_coverage(skb); ++ unsigned int cov = dccp_csum_coverage(skb); + + if (cov >= skb->len) + dccp_hdr(skb)->dccph_cscov = 0; +diff -Nurb linux-2.6.22-570/net/dccp/ipv4.c linux-2.6.22-591/net/dccp/ipv4.c +--- linux-2.6.22-570/net/dccp/ipv4.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/dccp/ipv4.c 2007-12-21 15:36:15.000000000 -0500 +@@ -202,6 +202,7 @@ + */ + static void dccp_v4_err(struct sk_buff *skb, u32 info) + { ++ struct net *net = skb->dev->nd_net; + const struct iphdr *iph = (struct iphdr *)skb->data; + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + + (iph->ihl << 2)); +@@ -213,13 +214,16 @@ + __u64 seq; + int err; + ++ if (skb->dev->nd_net != &init_net) ++ return; ++ + if (skb->len < (iph->ihl << 2) + 8) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + return; + } + + sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, +- iph->saddr, dh->dccph_sport, inet_iif(skb)); ++ iph->saddr, dh->dccph_sport, inet_iif(skb), net); + if (sk == NULL) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + return; +@@ -441,7 +445,7 @@ + nsk = inet_lookup_established(&dccp_hashinfo, + iph->saddr, dh->dccph_sport, + iph->daddr, dh->dccph_dport, +- inet_iif(skb)); ++ inet_iif(skb), sk->sk_net); + if (nsk != NULL) { + if (nsk->sk_state != DCCP_TIME_WAIT) { + bh_lock_sock(nsk); +@@ -458,7 +462,8 @@ + struct sk_buff *skb) + { + struct rtable *rt; +- struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, ++ struct flowi fl = { .fl_net = &init_net, ++ .oif = ((struct rtable *)skb->dst)->rt_iif, + .nl_u = { .ip4_u = + { .daddr = ip_hdr(skb)->saddr, + .saddr = ip_hdr(skb)->daddr, +@@ -809,11 +814,16 @@ + /* this is called when real data arrives */ + static int dccp_v4_rcv(struct sk_buff *skb) + { ++ struct net *net = skb->dev->nd_net; + const struct dccp_hdr *dh; + const struct iphdr *iph; + struct sock *sk; + int min_cov; + ++ if (skb->dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } + /* Step 1: Check header basics */ + + if (dccp_invalid_packet(skb)) +@@ -852,7 +862,7 @@ + * Look up flow ID in table and get corresponding socket */ + sk = __inet_lookup(&dccp_hashinfo, + iph->saddr, dh->dccph_sport, +- iph->daddr, dh->dccph_dport, inet_iif(skb)); ++ iph->daddr, dh->dccph_dport, inet_iif(skb), net); + /* + * Step 2: + * If no socket ... +diff -Nurb linux-2.6.22-570/net/dccp/ipv6.c linux-2.6.22-591/net/dccp/ipv6.c +--- linux-2.6.22-570/net/dccp/ipv6.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/dccp/ipv6.c 2007-12-21 15:36:15.000000000 -0500 +@@ -94,6 +94,7 @@ + static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + int type, int code, int offset, __be32 info) + { ++ struct net *net = skb->dev->nd_net; + struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); + struct ipv6_pinfo *np; +@@ -102,7 +103,7 @@ + __u64 seq; + + sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport, +- &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); ++ &hdr->saddr, dh->dccph_sport, inet6_iif(skb), net); + + if (sk == NULL) { + ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); +@@ -142,6 +143,7 @@ + for now. + */ + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &np->saddr); +@@ -242,6 +244,7 @@ + int err = -1; + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net, + fl.proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); + ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); +@@ -358,6 +361,7 @@ + &rxip6h->daddr); + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr); + ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr); + +@@ -407,7 +411,7 @@ + nsk = __inet6_lookup_established(&dccp_hashinfo, + &iph->saddr, dh->dccph_sport, + &iph->daddr, ntohs(dh->dccph_dport), +- inet6_iif(skb)); ++ inet6_iif(skb), sk->sk_net); + if (nsk != NULL) { + if (nsk->sk_state != DCCP_TIME_WAIT) { + bh_lock_sock(nsk); +@@ -584,6 +588,7 @@ + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.proto = IPPROTO_DCCP; + ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr); + if (opt != NULL && opt->srcrt != NULL) { +@@ -819,6 +824,7 @@ + { + const struct dccp_hdr *dh; + struct sk_buff *skb = *pskb; ++ struct net *net = skb->dev->nd_net; + struct sock *sk; + int min_cov; + +@@ -849,7 +855,7 @@ + sk = __inet6_lookup(&dccp_hashinfo, &ipv6_hdr(skb)->saddr, + dh->dccph_sport, + &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport), +- inet6_iif(skb)); ++ inet6_iif(skb), net); + /* + * Step 2: + * If no socket ... +@@ -937,6 +943,7 @@ + return -EAFNOSUPPORT; + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + + if (np->sndflow) { + fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; +diff -Nurb linux-2.6.22-570/net/dccp/probe.c linux-2.6.22-591/net/dccp/probe.c +--- linux-2.6.22-570/net/dccp/probe.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/dccp/probe.c 2007-12-21 15:36:15.000000000 -0500 +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + + #include "dccp.h" + #include "ccid.h" +@@ -168,7 +169,7 @@ + if (IS_ERR(dccpw.fifo)) + return PTR_ERR(dccpw.fifo); + +- if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops)) ++ if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops)) + goto err0; + + ret = register_jprobe(&dccp_send_probe); +@@ -178,7 +179,7 @@ + pr_info("DCCP watch registered (port=%d)\n", port); + return 0; + err1: +- proc_net_remove(procname); ++ proc_net_remove(&init_net, procname); + err0: + kfifo_free(dccpw.fifo); + return ret; +@@ -188,7 +189,7 @@ + static __exit void dccpprobe_exit(void) + { + kfifo_free(dccpw.fifo); +- proc_net_remove(procname); ++ proc_net_remove(&init_net, procname); + unregister_jprobe(&dccp_send_probe); + + } +diff -Nurb linux-2.6.22-570/net/decnet/af_decnet.c linux-2.6.22-591/net/decnet/af_decnet.c +--- linux-2.6.22-570/net/decnet/af_decnet.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/decnet/af_decnet.c 2007-12-21 15:36:15.000000000 -0500 +@@ -131,6 +131,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -470,10 +471,10 @@ + .obj_size = sizeof(struct dn_sock), + }; + +-static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp) ++static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp) + { + struct dn_scp *scp; +- struct sock *sk = sk_alloc(PF_DECnet, gfp, &dn_proto, 1); ++ struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, 1); + + if (!sk) + goto out; +@@ -674,10 +675,13 @@ + + + +-static int dn_create(struct socket *sock, int protocol) ++static int dn_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + switch(sock->type) { + case SOCK_SEQPACKET: + if (protocol != DNPROTO_NSP) +@@ -690,7 +694,7 @@ + } + + +- if ((sk = dn_alloc_sock(sock, GFP_KERNEL)) == NULL) ++ if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL) + return -ENOBUFS; + + sk->sk_protocol = protocol; +@@ -747,7 +751,7 @@ + if (dn_ntohs(saddr->sdn_nodeaddrl)) { + read_lock(&dev_base_lock); + ldev = NULL; +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if (!dev->dn_ptr) + continue; + if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) { +@@ -943,6 +947,7 @@ + + err = -EHOSTUNREACH; + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.oif = sk->sk_bound_dev_if; + fl.fld_dst = dn_saddr2dn(&scp->peer); + fl.fld_src = dn_saddr2dn(&scp->addr); +@@ -1090,7 +1095,7 @@ + + cb = DN_SKB_CB(skb); + sk->sk_ack_backlog--; +- newsk = dn_alloc_sock(newsock, sk->sk_allocation); ++ newsk = dn_alloc_sock(sk->sk_net, newsock, sk->sk_allocation); + if (newsk == NULL) { + release_sock(sk); + kfree_skb(skb); +@@ -2085,6 +2090,9 @@ + { + struct net_device *dev = (struct net_device *)ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + switch(event) { + case NETDEV_UP: + dn_dev_up(dev); +@@ -2399,7 +2407,7 @@ + dev_add_pack(&dn_dix_packet_type); + register_netdevice_notifier(&dn_dev_notifier); + +- proc_net_fops_create("decnet", S_IRUGO, &dn_socket_seq_fops); ++ proc_net_fops_create(&init_net, "decnet", S_IRUGO, &dn_socket_seq_fops); + dn_register_sysctl(); + out: + return rc; +@@ -2428,7 +2436,7 @@ + dn_neigh_cleanup(); + dn_fib_cleanup(); + +- proc_net_remove("decnet"); ++ proc_net_remove(&init_net, "decnet"); + + proto_unregister(&dn_proto); + } +diff -Nurb linux-2.6.22-570/net/decnet/dn_dev.c linux-2.6.22-591/net/decnet/dn_dev.c +--- linux-2.6.22-570/net/decnet/dn_dev.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/decnet/dn_dev.c 2007-12-21 15:36:15.000000000 -0500 +@@ -47,6 +47,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -513,7 +514,7 @@ + ifr->ifr_name[IFNAMSIZ-1] = 0; + + #ifdef CONFIG_KMOD +- dev_load(ifr->ifr_name); ++ dev_load(&init_net, ifr->ifr_name); + #endif + + switch(cmd) { +@@ -531,7 +532,7 @@ + + rtnl_lock(); + +- if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) { ++ if ((dev = __dev_get_by_name(&init_net, ifr->ifr_name)) == NULL) { + ret = -ENODEV; + goto done; + } +@@ -629,7 +630,7 @@ + { + struct net_device *dev; + struct dn_dev *dn_dev = NULL; +- dev = dev_get_by_index(ifindex); ++ dev = dev_get_by_index(&init_net, ifindex); + if (dev) { + dn_dev = dev->dn_ptr; + dev_put(dev); +@@ -647,12 +648,16 @@ + + static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct nlattr *tb[IFA_MAX+1]; + struct dn_dev *dn_db; + struct ifaddrmsg *ifm; + struct dn_ifaddr *ifa, **ifap; + int err = -EADDRNOTAVAIL; + ++ if (net != &init_net) ++ goto errout; ++ + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); + if (err < 0) + goto errout; +@@ -679,6 +684,7 @@ + + static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct nlattr *tb[IFA_MAX+1]; + struct net_device *dev; + struct dn_dev *dn_db; +@@ -686,6 +692,9 @@ + struct dn_ifaddr *ifa; + int err; + ++ if (net != &init_net) ++ return -EINVAL; ++ + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy); + if (err < 0) + return err; +@@ -694,7 +703,7 @@ + return -EINVAL; + + ifm = nlmsg_data(nlh); +- if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) ++ if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL) + return -ENODEV; + + if ((dn_db = dev->dn_ptr) == NULL) { +@@ -783,24 +792,28 @@ + kfree_skb(skb); + goto errout; + } +- err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); ++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); + errout: + if (err < 0) +- rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err); ++ rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err); + } + + static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + int idx, dn_idx = 0, skip_ndevs, skip_naddr; + struct net_device *dev; + struct dn_dev *dn_db; + struct dn_ifaddr *ifa; + ++ if (net != &init_net) ++ return 0; ++ + skip_ndevs = cb->args[0]; + skip_naddr = cb->args[1]; + + idx = 0; +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if (idx < skip_ndevs) + goto cont; + else if (idx > skip_ndevs) { +@@ -869,10 +882,10 @@ + rv = dn_dev_get_first(dev, addr); + read_unlock(&dev_base_lock); + dev_put(dev); +- if (rv == 0 || dev == &loopback_dev) ++ if (rv == 0 || dev == &init_net.loopback_dev) + return rv; + } +- dev = &loopback_dev; ++ dev = &init_net.loopback_dev; + dev_hold(dev); + goto last_chance; + } +@@ -1299,7 +1312,7 @@ + struct net_device *dev; + + rtnl_lock(); +- for_each_netdev(dev) ++ for_each_netdev(&init_net, dev) + dn_dev_down(dev); + rtnl_unlock(); + +@@ -1310,7 +1323,7 @@ + struct net_device *dev; + + rtnl_lock(); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if (dev->flags & IFF_UP) + dn_dev_up(dev); + } +@@ -1344,7 +1357,7 @@ + return SEQ_START_TOKEN; + + i = 1; +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if (!is_dn_dev(dev)) + continue; + +@@ -1363,9 +1376,9 @@ + + dev = (struct net_device *)v; + if (v == SEQ_START_TOKEN) +- dev = net_device_entry(&dev_base_head); ++ dev = net_device_entry(&init_net.dev_base_head); + +- for_each_netdev_continue(dev) { ++ for_each_netdev_continue(&init_net, dev) { + if (!is_dn_dev(dev)) + continue; + +@@ -1465,7 +1478,7 @@ + rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL); + rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr); + +- proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops); ++ proc_net_fops_create(&init_net, "decnet_dev", S_IRUGO, &dn_dev_seq_fops); + + #ifdef CONFIG_SYSCTL + { +@@ -1486,7 +1499,7 @@ + } + #endif /* CONFIG_SYSCTL */ + +- proc_net_remove("decnet_dev"); ++ proc_net_remove(&init_net, "decnet_dev"); + + dn_dev_devices_off(); + } +diff -Nurb linux-2.6.22-570/net/decnet/dn_fib.c linux-2.6.22-591/net/decnet/dn_fib.c +--- linux-2.6.22-570/net/decnet/dn_fib.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/decnet/dn_fib.c 2007-12-21 15:36:15.000000000 -0500 +@@ -203,8 +203,6 @@ + struct flowi fl; + struct dn_fib_res res; + +- memset(&fl, 0, sizeof(fl)); +- + if (nh->nh_flags&RTNH_F_ONLINK) { + struct net_device *dev; + +@@ -212,7 +210,7 @@ + return -EINVAL; + if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST) + return -EINVAL; +- if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) ++ if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL) + return -ENODEV; + if (!(dev->flags&IFF_UP)) + return -ENETDOWN; +@@ -223,6 +221,7 @@ + } + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.fld_dst = nh->nh_gw; + fl.oif = nh->nh_oif; + fl.fld_scope = r->rtm_scope + 1; +@@ -255,7 +254,7 @@ + if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) + return -EINVAL; + +- dev = __dev_get_by_index(nh->nh_oif); ++ dev = __dev_get_by_index(&init_net, nh->nh_oif); + if (dev == NULL || dev->dn_ptr == NULL) + return -ENODEV; + if (!(dev->flags&IFF_UP)) +@@ -355,7 +354,7 @@ + if (nhs != 1 || nh->nh_gw) + goto err_inval; + nh->nh_scope = RT_SCOPE_NOWHERE; +- nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); ++ nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif); + err = -ENODEV; + if (nh->nh_dev == NULL) + goto failure; +@@ -506,10 +505,14 @@ + + static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct dn_fib_table *tb; + struct rtattr **rta = arg; + struct rtmsg *r = NLMSG_DATA(nlh); + ++ if (net != &init_net) ++ return -EINVAL; ++ + if (dn_fib_check_attr(r, rta)) + return -EINVAL; + +@@ -522,10 +525,14 @@ + + static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct dn_fib_table *tb; + struct rtattr **rta = arg; + struct rtmsg *r = NLMSG_DATA(nlh); + ++ if (net != &init_net) ++ return -EINVAL; ++ + if (dn_fib_check_attr(r, rta)) + return -EINVAL; + +@@ -602,7 +609,7 @@ + + /* Scan device list */ + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + dn_db = dev->dn_ptr; + if (dn_db == NULL) + continue; +diff -Nurb linux-2.6.22-570/net/decnet/dn_neigh.c linux-2.6.22-591/net/decnet/dn_neigh.c +--- linux-2.6.22-570/net/decnet/dn_neigh.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/decnet/dn_neigh.c 2007-12-21 15:36:15.000000000 -0500 +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -591,6 +592,7 @@ + + seq = file->private_data; + seq->private = s; ++ s->net = get_net(PROC_NET(inode)); + out: + return rc; + out_kfree: +@@ -598,12 +600,20 @@ + goto out; + } + ++static int dn_neigh_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct neigh_seq_state *state = seq->private; ++ put_net(state->net); ++ return seq_release_private(inode, file); ++} ++ + static const struct file_operations dn_neigh_seq_fops = { + .owner = THIS_MODULE, + .open = dn_neigh_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release_private, ++ .release = dn_neigh_seq_release, + }; + + #endif +@@ -611,11 +621,11 @@ + void __init dn_neigh_init(void) + { + neigh_table_init(&dn_neigh_table); +- proc_net_fops_create("decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); ++ proc_net_fops_create(&init_net, "decnet_neigh", S_IRUGO, &dn_neigh_seq_fops); + } + + void __exit dn_neigh_cleanup(void) + { +- proc_net_remove("decnet_neigh"); ++ proc_net_remove(&init_net, "decnet_neigh"); + neigh_table_clear(&dn_neigh_table); + } +diff -Nurb linux-2.6.22-570/net/decnet/dn_nsp_out.c linux-2.6.22-591/net/decnet/dn_nsp_out.c +--- linux-2.6.22-570/net/decnet/dn_nsp_out.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/decnet/dn_nsp_out.c 2007-12-21 15:36:15.000000000 -0500 +@@ -91,6 +91,7 @@ + } + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.oif = sk->sk_bound_dev_if; + fl.fld_src = dn_saddr2dn(&scp->addr); + fl.fld_dst = dn_saddr2dn(&scp->peer); +diff -Nurb linux-2.6.22-570/net/decnet/dn_route.c linux-2.6.22-591/net/decnet/dn_route.c +--- linux-2.6.22-570/net/decnet/dn_route.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/decnet/dn_route.c 2007-12-21 15:36:15.000000000 -0500 +@@ -82,6 +82,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -583,6 +584,9 @@ + struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; + unsigned char padlen = 0; + ++ if (dev->nd_net != &init_net) ++ goto dump_it; ++ + if (dn == NULL) + goto dump_it; + +@@ -877,13 +881,14 @@ + + static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) + { +- struct flowi fl = { .nl_u = { .dn_u = ++ struct flowi fl = { .fl_net = &init_net, ++ .nl_u = { .dn_u = + { .daddr = oldflp->fld_dst, + .saddr = oldflp->fld_src, + .scope = RT_SCOPE_UNIVERSE, + } }, + .mark = oldflp->mark, +- .iif = loopback_dev.ifindex, ++ .iif = init_net.loopback_dev.ifindex, + .oif = oldflp->oif }; + struct dn_route *rt = NULL; + struct net_device *dev_out = NULL, *dev; +@@ -900,11 +905,11 @@ + "dn_route_output_slow: dst=%04x src=%04x mark=%d" + " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst), + dn_ntohs(oldflp->fld_src), +- oldflp->mark, loopback_dev.ifindex, oldflp->oif); ++ oldflp->mark, init_net.loopback_dev.ifindex, oldflp->oif); + + /* If we have an output interface, verify its a DECnet device */ + if (oldflp->oif) { +- dev_out = dev_get_by_index(oldflp->oif); ++ dev_out = dev_get_by_index(&init_net, oldflp->oif); + err = -ENODEV; + if (dev_out && dev_out->dn_ptr == NULL) { + dev_put(dev_out); +@@ -925,7 +930,7 @@ + goto out; + } + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if (!dev->dn_ptr) + continue; + if (!dn_dev_islocal(dev, oldflp->fld_src)) +@@ -953,7 +958,7 @@ + err = -EADDRNOTAVAIL; + if (dev_out) + dev_put(dev_out); +- dev_out = &loopback_dev; ++ dev_out = &init_net.loopback_dev; + dev_hold(dev_out); + if (!fl.fld_dst) { + fl.fld_dst = +@@ -962,7 +967,7 @@ + if (!fl.fld_dst) + goto out; + } +- fl.oif = loopback_dev.ifindex; ++ fl.oif = init_net.loopback_dev.ifindex; + res.type = RTN_LOCAL; + goto make_route; + } +@@ -995,7 +1000,7 @@ + * here + */ + if (!try_hard) { +- neigh = neigh_lookup_nodev(&dn_neigh_table, &fl.fld_dst); ++ neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst); + if (neigh) { + if ((oldflp->oif && + (neigh->dev->ifindex != oldflp->oif)) || +@@ -1008,7 +1013,7 @@ + if (dev_out) + dev_put(dev_out); + if (dn_dev_islocal(neigh->dev, fl.fld_dst)) { +- dev_out = &loopback_dev; ++ dev_out = &init_net.loopback_dev; + res.type = RTN_LOCAL; + } else { + dev_out = neigh->dev; +@@ -1029,7 +1034,7 @@ + /* Possible improvement - check all devices for local addr */ + if (dn_dev_islocal(dev_out, fl.fld_dst)) { + dev_put(dev_out); +- dev_out = &loopback_dev; ++ dev_out = &init_net.loopback_dev; + dev_hold(dev_out); + res.type = RTN_LOCAL; + goto select_source; +@@ -1065,7 +1070,7 @@ + fl.fld_src = fl.fld_dst; + if (dev_out) + dev_put(dev_out); +- dev_out = &loopback_dev; ++ dev_out = &init_net.loopback_dev; + dev_hold(dev_out); + fl.oif = dev_out->ifindex; + if (res.fi) +@@ -1103,6 +1108,7 @@ + atomic_set(&rt->u.dst.__refcnt, 1); + rt->u.dst.flags = DST_HOST; + ++ rt->fl.fl_net = &init_net; + rt->fl.fld_src = oldflp->fld_src; + rt->fl.fld_dst = oldflp->fld_dst; + rt->fl.oif = oldflp->oif; +@@ -1226,7 +1232,8 @@ + int flags = 0; + __le16 gateway = 0; + __le16 local_src = 0; +- struct flowi fl = { .nl_u = { .dn_u = ++ struct flowi fl = { .fl_net = &init_net, ++ .nl_u = { .dn_u = + { .daddr = cb->dst, + .saddr = cb->src, + .scope = RT_SCOPE_UNIVERSE, +@@ -1374,6 +1381,7 @@ + rt->rt_dst_map = fl.fld_dst; + rt->rt_src_map = fl.fld_src; + ++ rt->fl.fl_net = &init_net; + rt->fl.fld_src = cb->src; + rt->fl.fld_dst = cb->dst; + rt->fl.oif = 0; +@@ -1526,6 +1534,7 @@ + */ + static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = in_skb->sk->sk_net; + struct rtattr **rta = arg; + struct rtmsg *rtm = NLMSG_DATA(nlh); + struct dn_route *rt = NULL; +@@ -1534,7 +1543,11 @@ + struct sk_buff *skb; + struct flowi fl; + ++ if (net != &init_net) ++ return -EINVAL; ++ + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.proto = DNPROTO_NSP; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); +@@ -1552,7 +1565,7 @@ + + if (fl.iif) { + struct net_device *dev; +- if ((dev = dev_get_by_index(fl.iif)) == NULL) { ++ if ((dev = dev_get_by_index(&init_net, fl.iif)) == NULL) { + kfree_skb(skb); + return -ENODEV; + } +@@ -1598,7 +1611,7 @@ + goto out_free; + } + +- return rtnl_unicast(skb, NETLINK_CB(in_skb).pid); ++ return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + + out_free: + kfree_skb(skb); +@@ -1611,10 +1624,14 @@ + */ + int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + struct dn_route *rt; + int h, s_h; + int idx, s_idx; + ++ if (net != &init_net) ++ return 0; ++ + if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg)) + return -EINVAL; + if (!(((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)) +@@ -1814,7 +1831,7 @@ + + dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); + +- proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); ++ proc_net_fops_create(&init_net, "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); + + #ifdef CONFIG_DECNET_ROUTER + rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump); +@@ -1829,6 +1846,6 @@ + del_timer(&dn_route_timer); + dn_run_flush(0); + +- proc_net_remove("decnet_cache"); ++ proc_net_remove(&init_net, "decnet_cache"); + } + +diff -Nurb linux-2.6.22-570/net/decnet/dn_rules.c linux-2.6.22-591/net/decnet/dn_rules.c +--- linux-2.6.22-570/net/decnet/dn_rules.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/decnet/dn_rules.c 2007-12-21 15:36:15.000000000 -0500 +@@ -186,7 +186,10 @@ + + unsigned dnet_addr_type(__le16 addr) + { +- struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; ++ struct flowi fl = { ++ .fl_net = &init_net, ++ .nl_u = { .dn_u = { .daddr = addr } } ++ }; + struct dn_fib_res res; + unsigned ret = RTN_UNICAST; + struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); +@@ -223,7 +226,7 @@ + return -ENOBUFS; + } + +-static u32 dn_fib_rule_default_pref(void) ++static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops) + { + struct list_head *pos; + struct fib_rule *rule; +@@ -240,7 +243,7 @@ + return 0; + } + +-static void dn_fib_rule_flush_cache(void) ++static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops) + { + dn_rt_cache_flush(-1); + } +@@ -265,12 +268,12 @@ + void __init dn_fib_rules_init(void) + { + list_add_tail(&default_rule.common.list, &dn_fib_rules); +- fib_rules_register(&dn_fib_rules_ops); ++ fib_rules_register(&init_net, &dn_fib_rules_ops); + } + + void __exit dn_fib_rules_cleanup(void) + { +- fib_rules_unregister(&dn_fib_rules_ops); ++ fib_rules_unregister(&init_net, &dn_fib_rules_ops); + } + + +diff -Nurb linux-2.6.22-570/net/decnet/dn_table.c linux-2.6.22-591/net/decnet/dn_table.c +--- linux-2.6.22-570/net/decnet/dn_table.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/decnet/dn_table.c 2007-12-21 15:36:15.000000000 -0500 +@@ -375,10 +375,10 @@ + kfree_skb(skb); + goto errout; + } +- err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); ++ err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); + errout: + if (err < 0) +- rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err); ++ rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err); + } + + static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, +@@ -463,12 +463,16 @@ + + int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + unsigned int h, s_h; + unsigned int e = 0, s_e; + struct dn_fib_table *tb; + struct hlist_node *node; + int dumped = 0; + ++ if (net != &init_net) ++ return 0; ++ + if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && + ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) + return dn_cache_dump(skb, cb); +diff -Nurb linux-2.6.22-570/net/decnet/netfilter/dn_rtmsg.c linux-2.6.22-591/net/decnet/netfilter/dn_rtmsg.c +--- linux-2.6.22-570/net/decnet/netfilter/dn_rtmsg.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/decnet/netfilter/dn_rtmsg.c 2007-12-21 15:36:15.000000000 -0500 +@@ -93,6 +93,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + dnrmg_send_peer(*pskb); + return NF_ACCEPT; + } +@@ -137,7 +141,8 @@ + { + int rv = 0; + +- dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, ++ dnrmg = netlink_kernel_create(&init_net, ++ NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, + dnrmg_receive_user_sk, NULL, THIS_MODULE); + if (dnrmg == NULL) { + printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); +diff -Nurb linux-2.6.22-570/net/decnet/sysctl_net_decnet.c linux-2.6.22-591/net/decnet/sysctl_net_decnet.c +--- linux-2.6.22-570/net/decnet/sysctl_net_decnet.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/decnet/sysctl_net_decnet.c 2007-12-21 15:36:15.000000000 -0500 +@@ -259,7 +259,7 @@ + + devname[newlen] = 0; + +- dev = dev_get_by_name(devname); ++ dev = dev_get_by_name(&init_net, devname); + if (dev == NULL) + return -ENODEV; + +@@ -299,7 +299,7 @@ + devname[*lenp] = 0; + strip_it(devname); + +- dev = dev_get_by_name(devname); ++ dev = dev_get_by_name(&init_net, devname); + if (dev == NULL) + return -ENODEV; + +diff -Nurb linux-2.6.22-570/net/econet/af_econet.c linux-2.6.22-591/net/econet/af_econet.c +--- linux-2.6.22-570/net/econet/af_econet.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/econet/af_econet.c 2007-12-21 15:36:15.000000000 -0500 +@@ -608,12 +608,15 @@ + * Create an Econet socket + */ + +-static int econet_create(struct socket *sock, int protocol) ++static int econet_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + struct econet_sock *eo; + int err; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + /* Econet only provides datagram services. */ + if (sock->type != SOCK_DGRAM) + return -ESOCKTNOSUPPORT; +@@ -621,7 +624,7 @@ + sock->state = SS_UNCONNECTED; + + err = -ENOBUFS; +- sk = sk_alloc(PF_ECONET, GFP_KERNEL, &econet_proto, 1); ++ sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto, 1); + if (sk == NULL) + goto out; + +@@ -659,7 +662,7 @@ + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) + return -EFAULT; + +- if ((dev = dev_get_by_name(ifr.ifr_name)) == NULL) ++ if ((dev = dev_get_by_name(&init_net, ifr.ifr_name)) == NULL) + return -ENODEV; + + sec = (struct sockaddr_ec *)&ifr.ifr_addr; +@@ -1062,6 +1065,9 @@ + struct sock *sk; + struct ec_device *edev = dev->ec_ptr; + ++ if (dev->nd_net != &init_net) ++ goto drop; ++ + if (skb->pkt_type == PACKET_OTHERHOST) + goto drop; + +@@ -1116,6 +1122,9 @@ + struct net_device *dev = (struct net_device *)data; + struct ec_device *edev; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + switch (msg) { + case NETDEV_UNREGISTER: + /* A device has gone down - kill any data we hold for it. */ +diff -Nurb linux-2.6.22-570/net/ieee80211/ieee80211_module.c linux-2.6.22-591/net/ieee80211/ieee80211_module.c +--- linux-2.6.22-570/net/ieee80211/ieee80211_module.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ieee80211/ieee80211_module.c 2007-12-21 15:36:15.000000000 -0500 +@@ -264,7 +264,7 @@ + struct proc_dir_entry *e; + + ieee80211_debug_level = debug; +- ieee80211_proc = proc_mkdir(DRV_NAME, proc_net); ++ ieee80211_proc = proc_mkdir(DRV_NAME, init_net.proc_net); + if (ieee80211_proc == NULL) { + IEEE80211_ERROR("Unable to create " DRV_NAME + " proc directory\n"); +@@ -273,7 +273,7 @@ + e = create_proc_entry("debug_level", S_IFREG | S_IRUGO | S_IWUSR, + ieee80211_proc); + if (!e) { +- remove_proc_entry(DRV_NAME, proc_net); ++ remove_proc_entry(DRV_NAME, init_net.proc_net); + ieee80211_proc = NULL; + return -EIO; + } +@@ -293,7 +293,7 @@ + #ifdef CONFIG_IEEE80211_DEBUG + if (ieee80211_proc) { + remove_proc_entry("debug_level", ieee80211_proc); +- remove_proc_entry(DRV_NAME, proc_net); ++ remove_proc_entry(DRV_NAME, init_net.proc_net); + ieee80211_proc = NULL; + } + #endif /* CONFIG_IEEE80211_DEBUG */ +diff -Nurb linux-2.6.22-570/net/ipv4/Kconfig linux-2.6.22-591/net/ipv4/Kconfig +--- linux-2.6.22-570/net/ipv4/Kconfig 2007-12-21 15:36:02.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/Kconfig 2007-12-21 15:36:12.000000000 -0500 +@@ -116,48 +116,6 @@ + equal "cost" and chooses one of them in a non-deterministic fashion + if a matching packet arrives. + +-config IP_ROUTE_MULTIPATH_CACHED +- bool "IP: equal cost multipath with caching support (EXPERIMENTAL)" +- depends on IP_ROUTE_MULTIPATH +- help +- Normally, equal cost multipath routing is not supported by the +- routing cache. If you say Y here, alternative routes are cached +- and on cache lookup a route is chosen in a configurable fashion. +- +- If unsure, say N. +- +-config IP_ROUTE_MULTIPATH_RR +- tristate "MULTIPATH: round robin algorithm" +- depends on IP_ROUTE_MULTIPATH_CACHED +- help +- Multipath routes are chosen according to Round Robin +- +-config IP_ROUTE_MULTIPATH_RANDOM +- tristate "MULTIPATH: random algorithm" +- depends on IP_ROUTE_MULTIPATH_CACHED +- help +- Multipath routes are chosen in a random fashion. Actually, +- there is no weight for a route. The advantage of this policy +- is that it is implemented stateless and therefore introduces only +- a very small delay. +- +-config IP_ROUTE_MULTIPATH_WRANDOM +- tristate "MULTIPATH: weighted random algorithm" +- depends on IP_ROUTE_MULTIPATH_CACHED +- help +- Multipath routes are chosen in a weighted random fashion. +- The per route weights are the weights visible via ip route 2. As the +- corresponding state management introduces some overhead routing delay +- is increased. +- +-config IP_ROUTE_MULTIPATH_DRR +- tristate "MULTIPATH: interface round robin algorithm" +- depends on IP_ROUTE_MULTIPATH_CACHED +- help +- Connections are distributed in a round robin fashion over the +- available interfaces. This policy makes sense if the connections +- should be primarily distributed on interfaces and not on routes. +- + config IP_ROUTE_VERBOSE + bool "IP: verbose route monitoring" + depends on IP_ADVANCED_ROUTER +diff -Nurb linux-2.6.22-570/net/ipv4/Makefile linux-2.6.22-591/net/ipv4/Makefile +--- linux-2.6.22-570/net/ipv4/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/Makefile 2007-12-21 15:36:12.000000000 -0500 +@@ -29,14 +29,9 @@ + obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o + obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o + obj-$(CONFIG_IP_PNP) += ipconfig.o +-obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o +-obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o +-obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o +-obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o + obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ + obj-$(CONFIG_IP_VS) += ipvs/ + obj-$(CONFIG_INET_DIAG) += inet_diag.o +-obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o + obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o + obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o + obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o +diff -Nurb linux-2.6.22-570/net/ipv4/af_inet.c linux-2.6.22-591/net/ipv4/af_inet.c +--- linux-2.6.22-570/net/ipv4/af_inet.c 2007-12-21 15:36:03.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/af_inet.c 2007-12-21 15:36:15.000000000 -0500 +@@ -244,7 +244,7 @@ + * Create an inet socket. + */ + +-static int inet_create(struct socket *sock, int protocol) ++static int inet_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + struct list_head *p; +@@ -310,6 +310,10 @@ + goto out_rcu_unlock; + } + ++ err = -EPROTONOSUPPORT; ++ if (!(answer->flags & INET_PROTOSW_NETNS) && (net != &init_net)) ++ goto out_rcu_unlock; ++ + err = -EPERM; + if ((protocol == IPPROTO_ICMP) && + nx_capable(answer->capability, NXC_RAW_ICMP)) +@@ -326,7 +330,7 @@ + BUG_TRAP(answer_prot->slab != NULL); + + err = -ENOBUFS; +- sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot, 1); ++ sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, 1); + if (sk == NULL) + goto out; + +@@ -344,7 +348,7 @@ + inet->hdrincl = 1; + } + +- if (ipv4_config.no_pmtu_disc) ++ if (net->sysctl_ipv4_no_pmtu_disc) + inet->pmtudisc = IP_PMTUDISC_DONT; + else + inet->pmtudisc = IP_PMTUDISC_WANT; +@@ -423,12 +427,12 @@ + } + + /* It is off by default, see below. */ +-int sysctl_ip_nonlocal_bind __read_mostly; + + int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) + { + struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; + struct sock *sk = sock->sk; ++ struct net *net = sk->sk_net; + struct inet_sock *inet = inet_sk(sk); + struct nx_v4_sock_addr nsa; + unsigned short snum; +@@ -448,7 +452,7 @@ + if (err) + goto out; + +- chk_addr_ret = inet_addr_type(nsa.saddr); ++ chk_addr_ret = inet_addr_type(net, nsa.saddr); + + /* Not specified by any standard per-se, however it breaks too + * many applications when removed. It is unfortunate since +@@ -458,7 +462,7 @@ + * is temporarily down) + */ + err = -EADDRNOTAVAIL; +- if (!sysctl_ip_nonlocal_bind && ++ if (!net->sysctl_ip_nonlocal_bind && + !inet->freebind && + nsa.saddr != INADDR_ANY && + chk_addr_ret != RTN_LOCAL && +@@ -787,6 +791,7 @@ + int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) + { + struct sock *sk = sock->sk; ++ struct net *net = sk->sk_net; + int err = 0; + + switch (cmd) { +@@ -799,12 +804,12 @@ + case SIOCADDRT: + case SIOCDELRT: + case SIOCRTMSG: +- err = ip_rt_ioctl(cmd, (void __user *)arg); ++ err = ip_rt_ioctl(net, cmd, (void __user *)arg); + break; + case SIOCDARP: + case SIOCGARP: + case SIOCSARP: +- err = arp_ioctl(cmd, (void __user *)arg); ++ err = arp_ioctl(net, cmd, (void __user *)arg); + break; + case SIOCGIFADDR: + case SIOCSIFADDR: +@@ -817,7 +822,7 @@ + case SIOCSIFPFLAGS: + case SIOCGIFPFLAGS: + case SIOCSIFFLAGS: +- err = devinet_ioctl(cmd, (void __user *)arg); ++ err = devinet_ioctl(net, cmd, (void __user *)arg); + break; + default: + if (sk->sk_prot->ioctl) +@@ -927,7 +932,8 @@ + .capability = -1, + .no_check = 0, + .flags = INET_PROTOSW_PERMANENT | +- INET_PROTOSW_ICSK, ++ INET_PROTOSW_ICSK | ++ INET_PROTOSW_NETNS, + }, + + { +@@ -937,7 +943,8 @@ + .ops = &inet_dgram_ops, + .capability = -1, + .no_check = UDP_CSUM_DEFAULT, +- .flags = INET_PROTOSW_PERMANENT, ++ .flags = INET_PROTOSW_PERMANENT | ++ INET_PROTOSW_NETNS, + }, + + +@@ -948,7 +955,8 @@ + .ops = &inet_sockraw_ops, + .capability = CAP_NET_RAW, + .no_check = UDP_CSUM_DEFAULT, +- .flags = INET_PROTOSW_REUSE, ++ .flags = INET_PROTOSW_REUSE | ++ INET_PROTOSW_NETNS, + } + }; + +@@ -1029,8 +1037,6 @@ + * Shall we try to damage output packets if routing dev changes? + */ + +-int sysctl_ip_dynaddr __read_mostly; +- + static int inet_sk_reselect_saddr(struct sock *sk) + { + struct inet_sock *inet = inet_sk(sk); +@@ -1059,7 +1065,7 @@ + if (new_saddr == old_saddr) + return 0; + +- if (sysctl_ip_dynaddr > 1) { ++ if (sk->sk_net->sysctl_ip_dynaddr > 1) { + printk(KERN_INFO "%s(): shifting inet->" + "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", + __FUNCTION__, +@@ -1098,6 +1104,7 @@ + daddr = inet->opt->faddr; + { + struct flowi fl = { ++ .fl_net = sk->sk_net, + .oif = sk->sk_bound_dev_if, + .nl_u = { + .ip4_u = { +@@ -1127,7 +1134,7 @@ + * Other protocols have to map its equivalent state to TCP_SYN_SENT. + * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme + */ +- if (!sysctl_ip_dynaddr || ++ if (!sk->sk_net->sysctl_ip_dynaddr || + sk->sk_state != TCP_SYN_SENT || + (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || + (err = inet_sk_reselect_saddr(sk)) != 0) +@@ -1183,6 +1190,9 @@ + int ihl; + int id; + ++ if (!(features & NETIF_F_V4_CSUM)) ++ features &= ~NETIF_F_SG; ++ + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_UDP | +@@ -1353,6 +1363,24 @@ + .gso_segment = inet_gso_segment, + }; + ++ ++static int inet_net_init(struct net *net) ++{ ++ net->sysctl_ip_default_ttl = IPDEFTTL; ++ net->sysctl_ip_dynaddr = 0; ++ ++ return 0; ++} ++ ++static void inet_net_exit(struct net *net) ++{ ++} ++ ++static struct pernet_operations inet_net_ops = { ++ .init = inet_net_init, ++ .exit = inet_net_exit, ++}; ++ + static int __init inet_init(void) + { + struct sk_buff *dummy_skb; +@@ -1374,6 +1402,10 @@ + if (rc) + goto out_unregister_udp_proto; + ++ rc = register_pernet_subsys(&inet_net_ops); ++ if (rc) ++ goto out_unregister_raw_proto; ++ + /* + * Tell SOCKET that we are alive... + */ +@@ -1450,6 +1482,8 @@ + rc = 0; + out: + return rc; ++out_unregister_raw_proto: ++ proto_unregister(&raw_prot); + out_unregister_udp_proto: + proto_unregister(&udp_prot); + out_unregister_tcp_proto: +@@ -1472,15 +1506,11 @@ + goto out_tcp; + if (udp4_proc_init()) + goto out_udp; +- if (fib_proc_init()) +- goto out_fib; + if (ip_misc_proc_init()) + goto out_misc; + out: + return rc; + out_misc: +- fib_proc_exit(); +-out_fib: + udp4_proc_exit(); + out_udp: + tcp4_proc_exit(); +@@ -1516,4 +1546,3 @@ + EXPORT_SYMBOL(inet_stream_ops); + EXPORT_SYMBOL(inet_unregister_protosw); + EXPORT_SYMBOL(net_statistics); +-EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); +diff -Nurb linux-2.6.22-570/net/ipv4/ah4.c linux-2.6.22-591/net/ipv4/ah4.c +--- linux-2.6.22-570/net/ipv4/ah4.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/ah4.c 2007-12-21 15:36:15.000000000 -0500 +@@ -198,6 +198,9 @@ + struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2)); + struct xfrm_state *x; + ++ if (skb->dev->nd_net != &init_net) ++ return; ++ + if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || + icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) + return; +@@ -339,3 +342,4 @@ + module_init(ah4_init); + module_exit(ah4_fini); + MODULE_LICENSE("GPL"); ++MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_AH); +diff -Nurb linux-2.6.22-570/net/ipv4/arp.c linux-2.6.22-591/net/ipv4/arp.c +--- linux-2.6.22-570/net/ipv4/arp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/arp.c 2007-12-21 15:36:15.000000000 -0500 +@@ -109,6 +109,7 @@ + #include + #include + #include ++#include + #include + #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) + #include +@@ -235,10 +236,11 @@ + { + __be32 addr = *(__be32*)neigh->primary_key; + struct net_device *dev = neigh->dev; ++ struct net *net = dev->nd_net; + struct in_device *in_dev; + struct neigh_parms *parms; + +- neigh->type = inet_addr_type(addr); ++ neigh->type = inet_addr_type(net, addr); + + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); +@@ -332,6 +334,7 @@ + __be32 saddr = 0; + u8 *dst_ha = NULL; + struct net_device *dev = neigh->dev; ++ struct net *net = dev->nd_net; + __be32 target = *(__be32*)neigh->primary_key; + int probes = atomic_read(&neigh->probes); + struct in_device *in_dev = in_dev_get(dev); +@@ -342,14 +345,14 @@ + switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { + default: + case 0: /* By default announce any local IP */ +- if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL) ++ if (skb && inet_addr_type(net, ip_hdr(skb)->saddr) == RTN_LOCAL) + saddr = ip_hdr(skb)->saddr; + break; + case 1: /* Restrict announcements of saddr in same subnet */ + if (!skb) + break; + saddr = ip_hdr(skb)->saddr; +- if (inet_addr_type(saddr) == RTN_LOCAL) { ++ if (inet_addr_type(net, saddr) == RTN_LOCAL) { + /* saddr should be known to target */ + if (inet_addr_onlink(in_dev, target, saddr)) + break; +@@ -386,6 +389,7 @@ + static int arp_ignore(struct in_device *in_dev, struct net_device *dev, + __be32 sip, __be32 tip) + { ++ struct net *net = dev->nd_net; + int scope; + + switch (IN_DEV_ARP_IGNORE(in_dev)) { +@@ -416,13 +420,15 @@ + default: + return 0; + } +- return !inet_confirm_addr(dev, sip, tip, scope); ++ return !inet_confirm_addr(net, dev, sip, tip, scope); + } + + static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) + { +- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, +- .saddr = tip } } }; ++ struct flowi fl = { ++ .fl_net = dev->nd_net, ++ .nl_u = { .ip4_u = { .daddr = sip, .saddr = tip } } ++ }; + struct rtable *rt; + int flag = 0; + /*unsigned long now; */ +@@ -469,6 +475,7 @@ + int arp_find(unsigned char *haddr, struct sk_buff *skb) + { + struct net_device *dev = skb->dev; ++ struct net *net = dev->nd_net; + __be32 paddr; + struct neighbour *n; + +@@ -480,7 +487,7 @@ + + paddr = ((struct rtable*)skb->dst)->rt_gateway; + +- if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev)) ++ if (arp_set_predefined(inet_addr_type(net, paddr), haddr, paddr, dev)) + return 0; + + n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); +@@ -704,6 +711,7 @@ + static int arp_process(struct sk_buff *skb) + { + struct net_device *dev = skb->dev; ++ struct net *net = dev->nd_net; + struct in_device *in_dev = in_dev_get(dev); + struct arphdr *arp; + unsigned char *arp_ptr; +@@ -824,7 +832,7 @@ + /* Special case: IPv4 duplicate address detection packet (RFC2131) */ + if (sip == 0) { + if (arp->ar_op == htons(ARPOP_REQUEST) && +- inet_addr_type(tip) == RTN_LOCAL && ++ inet_addr_type(net, tip) == RTN_LOCAL && + !arp_ignore(in_dev,dev,sip,tip)) + arp_send(ARPOP_REPLY,ETH_P_ARP,tip,dev,tip,sha,dev->dev_addr,dev->dev_addr); + goto out; +@@ -854,7 +862,7 @@ + } else if (IN_DEV_FORWARD(in_dev)) { + if ((rt->rt_flags&RTCF_DNAT) || + (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && +- (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) { ++ (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) { + n = neigh_event_ns(&arp_tbl, sha, &sip, dev); + if (n) + neigh_release(n); +@@ -877,14 +885,14 @@ + + n = __neigh_lookup(&arp_tbl, &sip, dev, 0); + +- if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) { ++ if (IPV4_DEVCONF_ALL(net, ARP_ACCEPT)) { + /* Unsolicited ARP is not accepted by default. + It is possible, that this option should be enabled for some + devices (strip is candidate) + */ + if (n == NULL && + arp->ar_op == htons(ARPOP_REPLY) && +- inet_addr_type(sip) == RTN_UNICAST) ++ inet_addr_type(net, sip) == RTN_UNICAST) + n = __neigh_lookup(&arp_tbl, &sip, dev, -1); + } + +@@ -966,7 +974,7 @@ + * Set (create) an ARP cache entry. + */ + +-static int arp_req_set(struct arpreq *r, struct net_device * dev) ++static int arp_req_set(struct net *net, struct arpreq *r, struct net_device * dev) + { + __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; + struct neighbour *neigh; +@@ -977,17 +985,17 @@ + if (mask && mask != htonl(0xFFFFFFFF)) + return -EINVAL; + if (!dev && (r->arp_flags & ATF_COM)) { +- dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data); ++ dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, r->arp_ha.sa_data); + if (!dev) + return -ENODEV; + } + if (mask) { +- if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL) ++ if (pneigh_lookup(&arp_tbl, net, &ip, dev, 1) == NULL) + return -ENOBUFS; + return 0; + } + if (dev == NULL) { +- IPV4_DEVCONF_ALL(PROXY_ARP) = 1; ++ IPV4_DEVCONF_ALL(net, PROXY_ARP) = 1; + return 0; + } + if (__in_dev_get_rtnl(dev)) { +@@ -1000,8 +1008,10 @@ + if (r->arp_flags & ATF_PERM) + r->arp_flags |= ATF_COM; + if (dev == NULL) { +- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, +- .tos = RTO_ONLINK } } }; ++ struct flowi fl = { ++ .fl_net = net, ++ .nl_u = { .ip4_u = { .daddr = ip, .tos = RTO_ONLINK } } ++ }; + struct rtable * rt; + if ((err = ip_route_output_key(&rt, &fl)) != 0) + return err; +@@ -1080,7 +1090,7 @@ + return err; + } + +-static int arp_req_delete(struct arpreq *r, struct net_device * dev) ++static int arp_req_delete(struct net *net, struct arpreq *r, struct net_device * dev) + { + int err; + __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; +@@ -1090,10 +1100,10 @@ + __be32 mask = + ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr; + if (mask == htonl(0xFFFFFFFF)) +- return pneigh_delete(&arp_tbl, &ip, dev); ++ return pneigh_delete(&arp_tbl, net, &ip, dev); + if (mask == 0) { + if (dev == NULL) { +- IPV4_DEVCONF_ALL(PROXY_ARP) = 0; ++ IPV4_DEVCONF_ALL(net, PROXY_ARP) = 0; + return 0; + } + if (__in_dev_get_rtnl(dev)) { +@@ -1107,8 +1117,10 @@ + } + + if (dev == NULL) { +- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, +- .tos = RTO_ONLINK } } }; ++ struct flowi fl = { ++ .fl_net = net, ++ .nl_u = { .ip4_u = { .daddr = ip, .tos = RTO_ONLINK } } ++ }; + struct rtable * rt; + if ((err = ip_route_output_key(&rt, &fl)) != 0) + return err; +@@ -1133,7 +1145,7 @@ + * Handle an ARP layer I/O control request. + */ + +-int arp_ioctl(unsigned int cmd, void __user *arg) ++int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) + { + int err; + struct arpreq r; +@@ -1165,7 +1177,7 @@ + rtnl_lock(); + if (r.arp_dev[0]) { + err = -ENODEV; +- if ((dev = __dev_get_by_name(r.arp_dev)) == NULL) ++ if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL) + goto out; + + /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ +@@ -1181,10 +1193,10 @@ + + switch (cmd) { + case SIOCDARP: +- err = arp_req_delete(&r, dev); ++ err = arp_req_delete(net, &r, dev); + break; + case SIOCSARP: +- err = arp_req_set(&r, dev); ++ err = arp_req_set(net, &r, dev); + break; + case SIOCGARP: + err = arp_req_get(&r, dev); +@@ -1201,6 +1213,9 @@ + { + struct net_device *dev = ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + switch (event) { + case NETDEV_CHANGEADDR: + neigh_changeaddr(&arp_tbl, dev); +@@ -1227,6 +1242,54 @@ + } + + ++static int arp_proc_init(struct net *net); ++static void arp_proc_exit(struct net *net); ++ ++ ++static int arp_net_init(struct net *net) ++{ ++ int error; ++ if ((error = arp_proc_init(net))) ++ goto out_proc; ++ ++ error = -ENOMEM; ++ net->arp_neigh_parms_default = neigh_parms_alloc_default(&arp_tbl, net); ++ if (!net->arp_neigh_parms_default) ++ goto out_parm; ++ ++#ifdef CONFIG_SYSCTL ++ if ((error = neigh_sysctl_register( ++ NULL, net->arp_neigh_parms_default, ++ NET_IPV4, NET_IPV4_NEIGH, "ipv4", NULL, NULL))) ++ goto out_sysctl; ++#endif ++ ++out: ++ return error; ++ ++#ifdef CONFIG_SYSCTL ++out_sysctl: ++ neigh_parms_release(&arp_tbl, net->arp_neigh_parms_default); ++#endif ++out_parm: ++ arp_proc_exit(net); ++out_proc: ++ goto out; ++} ++ ++static void arp_net_exit(struct net *net) ++{ ++#ifdef CONFIG_SYSCTL ++ neigh_sysctl_unregister(net->arp_neigh_parms_default); ++#endif ++ neigh_parms_release(&arp_tbl, net->arp_neigh_parms_default); ++ arp_proc_exit(net); ++} ++ ++static struct pernet_operations arp_net_ops = { ++ .init = arp_net_init, ++ .exit = arp_net_exit, ++}; + /* + * Called once on startup. + */ +@@ -1236,18 +1299,12 @@ + .func = arp_rcv, + }; + +-static int arp_proc_init(void); +- + void __init arp_init(void) + { + neigh_table_init(&arp_tbl); + + dev_add_pack(&arp_packet_type); +- arp_proc_init(); +-#ifdef CONFIG_SYSCTL +- neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, +- NET_IPV4_NEIGH, "ipv4", NULL, NULL); +-#endif ++ register_pernet_subsys(&arp_net_ops); + register_netdevice_notifier(&arp_netdev_notifier); + } + +@@ -1383,6 +1440,8 @@ + + seq = file->private_data; + seq->private = s; ++ s->net = get_net(PROC_NET(inode)); ++ + out: + return rc; + out_kfree: +@@ -1390,28 +1449,46 @@ + goto out; + } + ++static int arp_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct neigh_seq_state *state = seq->private; ++ put_net(state->net); ++ return seq_release_private(inode, file); ++} ++ + static const struct file_operations arp_seq_fops = { + .owner = THIS_MODULE, + .open = arp_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release_private, ++ .release = arp_seq_release, + }; + +-static int __init arp_proc_init(void) ++static int arp_proc_init(struct net *net) + { +- if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops)) ++ if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops)) + return -ENOMEM; + return 0; + } + ++static void arp_proc_exit(struct net *net) ++{ ++ proc_net_remove(net, "arp"); ++} ++ + #else /* CONFIG_PROC_FS */ + +-static int __init arp_proc_init(void) ++static int arp_proc_init(struct net *net) + { + return 0; + } + ++static void arp_proc_exit(struct net *net) ++{ ++ return; ++} ++ + #endif /* CONFIG_PROC_FS */ + + EXPORT_SYMBOL(arp_broken_ops); +diff -Nurb linux-2.6.22-570/net/ipv4/devinet.c linux-2.6.22-591/net/ipv4/devinet.c +--- linux-2.6.22-570/net/ipv4/devinet.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/devinet.c 2007-12-21 15:36:15.000000000 -0500 +@@ -63,7 +63,7 @@ + #include + #include + +-struct ipv4_devconf ipv4_devconf = { ++static struct ipv4_devconf ipv4_devconf_template = { + .data = { + [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, +@@ -72,7 +72,7 @@ + }, + }; + +-static struct ipv4_devconf ipv4_devconf_dflt = { ++static struct ipv4_devconf ipv4_devconf_dflt_template = { + .data = { + [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1, + [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1, +@@ -82,7 +82,7 @@ + }, + }; + +-#define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr) ++#define IPV4_DEVCONF_DFLT(net, attr) IPV4_DEVCONF(*((net)->ipv4_devconf_dflt), attr) + + static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { + [IFA_LOCAL] = { .type = NLA_U32 }, +@@ -98,7 +98,7 @@ + static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, + int destroy); + #ifdef CONFIG_SYSCTL +-static void devinet_sysctl_register(struct in_device *in_dev, ++static void devinet_sysctl_register(struct net *net, struct in_device *in_dev, + struct ipv4_devconf *p); + static void devinet_sysctl_unregister(struct ipv4_devconf *p); + #endif +@@ -149,6 +149,7 @@ + + static struct in_device *inetdev_init(struct net_device *dev) + { ++ struct net *net = dev->nd_net; + struct in_device *in_dev; + + ASSERT_RTNL(); +@@ -157,7 +158,7 @@ + if (!in_dev) + goto out; + INIT_RCU_HEAD(&in_dev->rcu_head); +- memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf)); ++ memcpy(&in_dev->cnf, &net->ipv4_devconf_dflt, sizeof(in_dev->cnf)); + in_dev->cnf.sysctl = NULL; + in_dev->dev = dev; + if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL) +@@ -173,7 +174,7 @@ + in_dev_hold(in_dev); + + #ifdef CONFIG_SYSCTL +- devinet_sysctl_register(in_dev, &in_dev->cnf); ++ devinet_sysctl_register(net, in_dev, &in_dev->cnf); + #endif + ip_mc_init_dev(in_dev); + if (dev->flags & IFF_UP) +@@ -203,8 +204,6 @@ + ASSERT_RTNL(); + + dev = in_dev->dev; +- if (dev == &loopback_dev) +- return; + + in_dev->dead = 1; + +@@ -415,12 +414,12 @@ + return inet_insert_ifa(ifa); + } + +-struct in_device *inetdev_by_index(int ifindex) ++struct in_device *inetdev_by_index(struct net *net, int ifindex) + { + struct net_device *dev; + struct in_device *in_dev = NULL; + read_lock(&dev_base_lock); +- dev = __dev_get_by_index(ifindex); ++ dev = __dev_get_by_index(net, ifindex); + if (dev) + in_dev = in_dev_get(dev); + read_unlock(&dev_base_lock); +@@ -444,6 +443,7 @@ + + static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct nlattr *tb[IFA_MAX+1]; + struct in_device *in_dev; + struct ifaddrmsg *ifm; +@@ -457,7 +457,7 @@ + goto errout; + + ifm = nlmsg_data(nlh); +- in_dev = inetdev_by_index(ifm->ifa_index); ++ in_dev = inetdev_by_index(net, ifm->ifa_index); + if (in_dev == NULL) { + err = -ENODEV; + goto errout; +@@ -488,7 +488,7 @@ + return err; + } + +-static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) ++static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) + { + struct nlattr *tb[IFA_MAX+1]; + struct in_ifaddr *ifa; +@@ -507,7 +507,7 @@ + goto errout; + } + +- dev = __dev_get_by_index(ifm->ifa_index); ++ dev = __dev_get_by_index(net, ifm->ifa_index); + if (dev == NULL) { + err = -ENODEV; + goto errout; +@@ -564,11 +564,12 @@ + + static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct in_ifaddr *ifa; + + ASSERT_RTNL(); + +- ifa = rtm_to_ifaddr(nlh); ++ ifa = rtm_to_ifaddr(net, nlh); + if (IS_ERR(ifa)) + return PTR_ERR(ifa); + +@@ -600,7 +601,7 @@ + } + + +-int devinet_ioctl(unsigned int cmd, void __user *arg) ++int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) + { + struct ifreq ifr; + struct sockaddr_in sin_orig; +@@ -629,7 +630,7 @@ + *colon = 0; + + #ifdef CONFIG_KMOD +- dev_load(ifr.ifr_name); ++ dev_load(net, ifr.ifr_name); + #endif + + switch (cmd) { +@@ -670,7 +671,7 @@ + rtnl_lock(); + + ret = -ENODEV; +- if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL) ++ if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL) + goto done; + + if (colon) +@@ -889,6 +890,7 @@ + + __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) + { ++ struct net *net = dev->nd_net; + __be32 addr = 0; + struct in_device *in_dev; + +@@ -919,7 +921,7 @@ + */ + read_lock(&dev_base_lock); + rcu_read_lock(); +- for_each_netdev(dev) { ++ for_each_netdev(net, dev) { + if ((in_dev = __in_dev_get_rcu(dev)) == NULL) + continue; + +@@ -982,7 +984,7 @@ + * - local: address, 0=autoselect the local address + * - scope: maximum allowed scope value for the local address + */ +-__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope) ++__be32 inet_confirm_addr(struct net *net, const struct net_device *dev, __be32 dst, __be32 local, int scope) + { + __be32 addr = 0; + struct in_device *in_dev; +@@ -998,7 +1000,7 @@ + + read_lock(&dev_base_lock); + rcu_read_lock(); +- for_each_netdev(dev) { ++ for_each_netdev(net, dev) { + if ((in_dev = __in_dev_get_rcu(dev))) { + addr = confirm_addr_indev(in_dev, dst, local, scope); + if (addr) +@@ -1059,6 +1061,7 @@ + void *ptr) + { + struct net_device *dev = ptr; ++ struct net *net = dev->nd_net; + struct in_device *in_dev = __in_dev_get_rtnl(dev); + + ASSERT_RTNL(); +@@ -1066,7 +1069,7 @@ + if (!in_dev) { + if (event == NETDEV_REGISTER) { + in_dev = inetdev_init(dev); +- if (dev == &loopback_dev) { ++ if (dev == &net->loopback_dev) { + if (!in_dev) + panic("devinet: " + "Failed to create loopback\n"); +@@ -1085,7 +1088,7 @@ + case NETDEV_UP: + if (dev->mtu < 68) + break; +- if (dev == &loopback_dev) { ++ if (dev == &net->loopback_dev) { + struct in_ifaddr *ifa; + if ((ifa = inet_alloc_ifa()) != NULL) { + ifa->ifa_local = +@@ -1122,7 +1125,7 @@ + neigh_sysctl_unregister(in_dev->arp_parms); + neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4, + NET_IPV4_NEIGH, "ipv4", NULL, NULL); +- devinet_sysctl_register(in_dev, &in_dev->cnf); ++ devinet_sysctl_register(net, in_dev, &in_dev->cnf); + #endif + break; + } +@@ -1185,6 +1188,7 @@ + + static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + int idx, ip_idx; + struct net_device *dev; + struct in_device *in_dev; +@@ -1194,7 +1198,7 @@ + + s_ip_idx = ip_idx = cb->args[1]; + idx = 0; +- for_each_netdev(dev) { ++ for_each_netdev(net, dev) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) +@@ -1228,6 +1232,7 @@ + u32 pid) + { + struct sk_buff *skb; ++ struct net *net = ifa->ifa_dev->dev->nd_net; + u32 seq = nlh ? nlh->nlmsg_seq : 0; + int err = -ENOBUFS; + +@@ -1242,25 +1247,25 @@ + kfree_skb(skb); + goto errout; + } +- err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); ++ err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + errout: + if (err < 0) +- rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); ++ rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); + } + + #ifdef CONFIG_SYSCTL + +-static void devinet_copy_dflt_conf(int i) ++static void devinet_copy_dflt_conf(struct net *net, int i) + { + struct net_device *dev; + + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(net, dev) { + struct in_device *in_dev; + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (in_dev && !test_bit(i, in_dev->cnf.state)) +- in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i]; ++ in_dev->cnf.data[i] = net->ipv4_devconf_dflt->data[i]; + rcu_read_unlock(); + } + read_unlock(&dev_base_lock); +@@ -1274,12 +1279,13 @@ + + if (write) { + struct ipv4_devconf *cnf = ctl->extra1; ++ struct net *net = ctl->extra2; + int i = (int *)ctl->data - cnf->data; + + set_bit(i, cnf->state); + +- if (cnf == &ipv4_devconf_dflt) +- devinet_copy_dflt_conf(i); ++ if (cnf == net->ipv4_devconf_dflt) ++ devinet_copy_dflt_conf(net, i); + } + + return ret; +@@ -1291,6 +1297,7 @@ + { + struct ipv4_devconf *cnf; + int *valp = table->data; ++ struct net *net; + int new; + int i; + +@@ -1325,26 +1332,27 @@ + *valp = new; + + cnf = table->extra1; ++ net = table->extra2; + i = (int *)table->data - cnf->data; + + set_bit(i, cnf->state); + +- if (cnf == &ipv4_devconf_dflt) +- devinet_copy_dflt_conf(i); ++ if (cnf == net->ipv4_devconf_dflt) ++ devinet_copy_dflt_conf(net, i); + + return 1; + } + +-void inet_forward_change(void) ++void inet_forward_change(struct net *net) + { + struct net_device *dev; +- int on = IPV4_DEVCONF_ALL(FORWARDING); ++ int on = IPV4_DEVCONF_ALL(net, FORWARDING); + +- IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on; +- IPV4_DEVCONF_DFLT(FORWARDING) = on; ++ IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; ++ IPV4_DEVCONF_DFLT(net, FORWARDING) = on; + + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(net, dev) { + struct in_device *in_dev; + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); +@@ -1364,11 +1372,12 @@ + int *valp = ctl->data; + int val = *valp; + int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); ++ struct net *net = ctl->extra2; + + if (write && *valp != val) { +- if (valp == &IPV4_DEVCONF_ALL(FORWARDING)) +- inet_forward_change(); +- else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING)) ++ if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) ++ inet_forward_change(net); ++ else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) + rt_cache_flush(0); + } + +@@ -1407,13 +1416,14 @@ + { \ + .ctl_name = NET_IPV4_CONF_ ## attr, \ + .procname = name, \ +- .data = ipv4_devconf.data + \ ++ .data = ipv4_devconf_template.data + \ + NET_IPV4_CONF_ ## attr - 1, \ + .maxlen = sizeof(int), \ + .mode = mval, \ + .proc_handler = proc, \ + .strategy = sysctl, \ +- .extra1 = &ipv4_devconf, \ ++ .extra1 = &ipv4_devconf_template, \ ++ .extra2 = &init_net, \ + } + + #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \ +@@ -1503,25 +1513,29 @@ + }, + }; + +-static void devinet_sysctl_register(struct in_device *in_dev, ++static void devinet_sysctl_register(struct net *net, struct in_device *in_dev, + struct ipv4_devconf *p) + { + int i; + struct net_device *dev = in_dev ? in_dev->dev : NULL; +- struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t), +- GFP_KERNEL); ++ struct devinet_sysctl_table *t; + char *dev_name = NULL; + ++ t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL); + if (!t) + return; + for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) { +- t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf; ++ t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf_template; + t->devinet_vars[i].extra1 = p; ++ t->devinet_vars[i].extra2 = net; + } + + if (dev) { + dev_name = dev->name; + t->devinet_dev[0].ctl_name = dev->ifindex; ++ } else if (p == net->ipv4_devconf) { ++ dev_name = "all"; ++ t->devinet_dev[0].ctl_name = NET_PROTO_CONF_ALL; + } else { + dev_name = "default"; + t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT; +@@ -1542,7 +1556,7 @@ + t->devinet_proto_dir[0].child = t->devinet_conf_dir; + t->devinet_root_dir[0].child = t->devinet_proto_dir; + +- t->sysctl_header = register_sysctl_table(t->devinet_root_dir); ++ t->sysctl_header = register_net_sysctl_table(net, t->devinet_root_dir); + if (!t->sysctl_header) + goto free_procname; + +@@ -1562,26 +1576,59 @@ + if (p->sysctl) { + struct devinet_sysctl_table *t = p->sysctl; + p->sysctl = NULL; +- unregister_sysctl_table(t->sysctl_header); ++ unregister_net_sysctl_table(t->sysctl_header); + kfree(t->devinet_dev[0].procname); + kfree(t); + } + } + #endif + ++static int devinet_net_init(struct net *net) ++{ ++#ifdef CONFIG_SYSCTL ++ net->ipv4_devconf = kmemdup(&ipv4_devconf_template, ++ sizeof(ipv4_devconf_template), GFP_KERNEL); ++ if (!net->ipv4_devconf) ++ return -ENOMEM; ++ ++ net->ipv4_devconf_dflt = kmemdup(&ipv4_devconf_dflt_template, ++ sizeof(ipv4_devconf_template), ++ GFP_KERNEL); ++ if (!net->ipv4_devconf_dflt) { ++ kfree(net->ipv4_devconf); ++ return -ENOMEM; ++ } ++ ++ devinet_sysctl_register(net, NULL, net->ipv4_devconf); ++ devinet_sysctl_register(net, NULL, net->ipv4_devconf_dflt); ++ ++ multi_ipv4_table[0].data = &IPV4_DEVCONF_ALL(net, FORWARDING); ++#endif ++ return 0; ++} ++ ++static void devinet_net_exit(struct net *net) ++{ ++#ifdef CONFIG_SYSCTL ++ devinet_sysctl_unregister(net->ipv4_devconf_dflt); ++ devinet_sysctl_unregister(net->ipv4_devconf); ++#endif ++} ++ ++static struct pernet_operations devinet_net_ops = { ++ .init = devinet_net_init, ++ .exit = devinet_net_exit, ++}; ++ + void __init devinet_init(void) + { ++ register_pernet_subsys(&devinet_net_ops); + register_gifconf(PF_INET, inet_gifconf); + register_netdevice_notifier(&ip_netdev_notifier); + + rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL); + rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL); + rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr); +-#ifdef CONFIG_SYSCTL +- devinet_sysctl.sysctl_header = +- register_sysctl_table(devinet_sysctl.devinet_root_dir); +- devinet_sysctl_register(NULL, &ipv4_devconf_dflt); +-#endif + } + + EXPORT_SYMBOL(in_dev_finish_destroy); +diff -Nurb linux-2.6.22-570/net/ipv4/esp4.c linux-2.6.22-591/net/ipv4/esp4.c +--- linux-2.6.22-570/net/ipv4/esp4.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/esp4.c 2007-12-21 15:36:15.000000000 -0500 +@@ -307,6 +307,9 @@ + struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2)); + struct xfrm_state *x; + ++ if (skb->dev->nd_net != &init_net) ++ return; ++ + if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || + icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) + return; +@@ -481,3 +484,4 @@ + module_init(esp4_init); + module_exit(esp4_fini); + MODULE_LICENSE("GPL"); ++MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_ESP); +diff -Nurb linux-2.6.22-570/net/ipv4/fib_frontend.c linux-2.6.22-591/net/ipv4/fib_frontend.c +--- linux-2.6.22-570/net/ipv4/fib_frontend.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/fib_frontend.c 2007-12-21 15:36:15.000000000 -0500 +@@ -51,38 +51,34 @@ + + #ifndef CONFIG_IP_MULTIPLE_TABLES + +-struct fib_table *ip_fib_local_table; +-struct fib_table *ip_fib_main_table; +- + #define FIB_TABLE_HASHSZ 1 +-static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; + + #else + + #define FIB_TABLE_HASHSZ 256 +-static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; + +-struct fib_table *fib_new_table(u32 id) ++struct fib_table *fib_new_table(struct net *net, u32 id) + { + struct fib_table *tb; + unsigned int h; + + if (id == 0) + id = RT_TABLE_MAIN; +- tb = fib_get_table(id); ++ tb = fib_get_table(net, id); + if (tb) + return tb; + tb = fib_hash_init(id); + if (!tb) + return NULL; + h = id & (FIB_TABLE_HASHSZ - 1); +- hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); ++ hlist_add_head_rcu(&tb->tb_hlist, &net->ip_fib_table_hash[h]); + return tb; + } + +-struct fib_table *fib_get_table(u32 id) ++struct fib_table *fib_get_table(struct net *net, u32 id) + { + struct fib_table *tb; ++ struct hlist_head *head; + struct hlist_node *node; + unsigned int h; + +@@ -90,7 +86,8 @@ + id = RT_TABLE_MAIN; + h = id & (FIB_TABLE_HASHSZ - 1); + rcu_read_lock(); +- hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { ++ head = &net->ip_fib_table_hash[h]; ++ hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { + if (tb->tb_id == id) { + rcu_read_unlock(); + return tb; +@@ -99,9 +96,10 @@ + rcu_read_unlock(); + return NULL; + } ++ + #endif /* CONFIG_IP_MULTIPLE_TABLES */ + +-static void fib_flush(void) ++static void fib_flush(struct net *net) + { + int flushed = 0; + struct fib_table *tb; +@@ -109,7 +107,8 @@ + unsigned int h; + + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { +- hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) ++ struct hlist_head *head = &net->ip_fib_table_hash[h]; ++ hlist_for_each_entry(tb, node, head, tb_hlist) + flushed += tb->tb_flush(tb); + } + +@@ -121,18 +120,23 @@ + * Find the first device with a given source address. + */ + +-struct net_device * ip_dev_find(__be32 addr) ++struct net_device * ip_dev_find(struct net *net, __be32 addr) + { +- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; ++ struct flowi fl = { ++ .fl_net = net, ++ .nl_u = { .ip4_u = { .daddr = addr } } ++ }; + struct fib_result res; + struct net_device *dev = NULL; ++ struct fib_table *local_table; + + #ifdef CONFIG_IP_MULTIPLE_TABLES + res.r = NULL; + #endif + +- if (!ip_fib_local_table || +- ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res)) ++ local_table = fib_get_table(net, RT_TABLE_LOCAL); ++ if (!local_table || ++ local_table->tb_lookup(local_table, &fl, &res)) + return NULL; + if (res.type != RTN_LOCAL) + goto out; +@@ -145,11 +149,15 @@ + return dev; + } + +-unsigned inet_addr_type(__be32 addr) ++unsigned inet_addr_type(struct net *net, __be32 addr) + { +- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; ++ struct flowi fl = { ++ .fl_net = net, ++ .nl_u = { .ip4_u = { .daddr = addr } } ++ }; + struct fib_result res; + unsigned ret = RTN_BROADCAST; ++ struct fib_table *local_table; + + if (ZERONET(addr) || BADCLASS(addr)) + return RTN_BROADCAST; +@@ -160,10 +168,10 @@ + res.r = NULL; + #endif + +- if (ip_fib_local_table) { ++ local_table = fib_get_table(net, RT_TABLE_LOCAL); ++ if (local_table) { + ret = RTN_UNICAST; +- if (!ip_fib_local_table->tb_lookup(ip_fib_local_table, +- &fl, &res)) { ++ if (!local_table->tb_lookup(local_table, &fl, &res)) { + ret = res.type; + fib_res_put(&res); + } +@@ -183,7 +191,8 @@ + struct net_device *dev, __be32 *spec_dst, u32 *itag) + { + struct in_device *in_dev; +- struct flowi fl = { .nl_u = { .ip4_u = ++ struct flowi fl = { .fl_net = dev->nd_net, ++ .nl_u = { .ip4_u = + { .daddr = src, + .saddr = dst, + .tos = tos } }, +@@ -267,13 +276,16 @@ + return len + nla_total_size(4); + } + +-static int rtentry_to_fib_config(int cmd, struct rtentry *rt, ++static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, + struct fib_config *cfg) + { + __be32 addr; + int plen; + + memset(cfg, 0, sizeof(*cfg)); ++ cfg->fc_nlinfo.pid = 0; ++ cfg->fc_nlinfo.nlh = NULL; ++ cfg->fc_nlinfo.net = net; + + if (rt->rt_dst.sa_family != AF_INET) + return -EAFNOSUPPORT; +@@ -334,7 +346,7 @@ + colon = strchr(devname, ':'); + if (colon) + *colon = 0; +- dev = __dev_get_by_name(devname); ++ dev = __dev_get_by_name(net, devname); + if (!dev) + return -ENODEV; + cfg->fc_oif = dev->ifindex; +@@ -357,7 +369,7 @@ + if (rt->rt_gateway.sa_family == AF_INET && addr) { + cfg->fc_gw = addr; + if (rt->rt_flags & RTF_GATEWAY && +- inet_addr_type(addr) == RTN_UNICAST) ++ inet_addr_type(net, addr) == RTN_UNICAST) + cfg->fc_scope = RT_SCOPE_UNIVERSE; + } + +@@ -398,7 +410,7 @@ + * Handle IP routing ioctl calls. These are used to manipulate the routing tables + */ + +-int ip_rt_ioctl(unsigned int cmd, void __user *arg) ++int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) + { + struct fib_config cfg; + struct rtentry rt; +@@ -414,18 +426,18 @@ + return -EFAULT; + + rtnl_lock(); +- err = rtentry_to_fib_config(cmd, &rt, &cfg); ++ err = rtentry_to_fib_config(net, cmd, &rt, &cfg); + if (err == 0) { + struct fib_table *tb; + + if (cmd == SIOCDELRT) { +- tb = fib_get_table(cfg.fc_table); ++ tb = fib_get_table(net, cfg.fc_table); + if (tb) + err = tb->tb_delete(tb, &cfg); + else + err = -ESRCH; + } else { +- tb = fib_new_table(cfg.fc_table); ++ tb = fib_new_table(net, cfg.fc_table); + if (tb) + err = tb->tb_insert(tb, &cfg); + else +@@ -453,7 +465,6 @@ + [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, + [RTA_PROTOINFO] = { .type = NLA_U32 }, + [RTA_FLOW] = { .type = NLA_U32 }, +- [RTA_MP_ALGO] = { .type = NLA_U32 }, + }; + + static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, +@@ -481,6 +492,7 @@ + + cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.nlh = nlh; ++ cfg->fc_nlinfo.net = skb->sk->sk_net; + + if (cfg->fc_type > RTN_MAX) { + err = -EINVAL; +@@ -515,9 +527,6 @@ + case RTA_FLOW: + cfg->fc_flow = nla_get_u32(attr); + break; +- case RTA_MP_ALGO: +- cfg->fc_mp_alg = nla_get_u32(attr); +- break; + case RTA_TABLE: + cfg->fc_table = nla_get_u32(attr); + break; +@@ -531,6 +540,7 @@ + + static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct fib_config cfg; + struct fib_table *tb; + int err; +@@ -539,7 +549,7 @@ + if (err < 0) + goto errout; + +- tb = fib_get_table(cfg.fc_table); ++ tb = fib_get_table(net, cfg.fc_table); + if (tb == NULL) { + err = -ESRCH; + goto errout; +@@ -552,6 +562,7 @@ + + static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct fib_config cfg; + struct fib_table *tb; + int err; +@@ -560,7 +571,7 @@ + if (err < 0) + goto errout; + +- tb = fib_new_table(cfg.fc_table); ++ tb = fib_new_table(net, cfg.fc_table); + if (tb == NULL) { + err = -ENOBUFS; + goto errout; +@@ -573,6 +584,7 @@ + + static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + unsigned int h, s_h; + unsigned int e = 0, s_e; + struct fib_table *tb; +@@ -587,8 +599,9 @@ + s_e = cb->args[1]; + + for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { ++ struct hlist_head *head = &net->ip_fib_table_hash[h]; + e = 0; +- hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { ++ hlist_for_each_entry(tb, node, head, tb_hlist) { + if (e < s_e) + goto next; + if (dumped) +@@ -617,6 +630,7 @@ + + static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) + { ++ struct net *net = ifa->ifa_dev->dev->nd_net; + struct fib_table *tb; + struct fib_config cfg = { + .fc_protocol = RTPROT_KERNEL, +@@ -626,12 +640,13 @@ + .fc_prefsrc = ifa->ifa_local, + .fc_oif = ifa->ifa_dev->dev->ifindex, + .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, ++ .fc_nlinfo.net = net, + }; + + if (type == RTN_UNICAST) +- tb = fib_new_table(RT_TABLE_MAIN); ++ tb = fib_new_table(net, RT_TABLE_MAIN); + else +- tb = fib_new_table(RT_TABLE_LOCAL); ++ tb = fib_new_table(net, RT_TABLE_LOCAL); + + if (tb == NULL) + return; +@@ -692,6 +707,7 @@ + { + struct in_device *in_dev = ifa->ifa_dev; + struct net_device *dev = in_dev->dev; ++ struct net *net = dev->nd_net; + struct in_ifaddr *ifa1; + struct in_ifaddr *prim = ifa; + __be32 brd = ifa->ifa_address|~ifa->ifa_mask; +@@ -740,15 +756,15 @@ + fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); + + /* Check, that this local address finally disappeared. */ +- if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) { ++ if (inet_addr_type(net, ifa->ifa_local) != RTN_LOCAL) { + /* And the last, but not the least thing. + We must flush stray FIB entries. + + First of all, we scan fib_info list searching + for stray nexthop entries, then ignite fib_flush. + */ +- if (fib_sync_down(ifa->ifa_local, NULL, 0)) +- fib_flush(); ++ if (fib_sync_down(net, ifa->ifa_local, NULL, 0)) ++ fib_flush(net); + } + } + #undef LOCAL_OK +@@ -757,11 +773,12 @@ + #undef BRD1_OK + } + +-static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) ++static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn, struct fib_table *tb ) + { + + struct fib_result res; +- struct flowi fl = { .mark = frn->fl_mark, ++ struct flowi fl = { .fl_net = net, ++ .mark = frn->fl_mark, + .nl_u = { .ip4_u = { .daddr = frn->fl_addr, + .tos = frn->fl_tos, + .scope = frn->fl_scope } } }; +@@ -790,6 +807,7 @@ + + static void nl_fib_input(struct sock *sk, int len) + { ++ struct net *net = sk->sk_net; + struct sk_buff *skb = NULL; + struct nlmsghdr *nlh = NULL; + struct fib_result_nl *frn; +@@ -808,9 +826,9 @@ + } + + frn = (struct fib_result_nl *) NLMSG_DATA(nlh); +- tb = fib_get_table(frn->tb_id_in); ++ tb = fib_get_table(net, frn->tb_id_in); + +- nl_fib_lookup(frn, tb); ++ nl_fib_lookup(net, frn, tb); + + pid = NETLINK_CB(skb).pid; /* pid of sending process */ + NETLINK_CB(skb).pid = 0; /* from kernel */ +@@ -818,16 +836,36 @@ + netlink_unicast(sk, skb, pid, MSG_DONTWAIT); + } + +-static void nl_fib_lookup_init(void) ++static int nl_fib_lookup_init(struct net *net) + { +- netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL, +- THIS_MODULE); ++ int error = -ENOMEM; ++ struct sock *sk; ++ sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, nl_fib_input, ++ NULL, THIS_MODULE); ++ if (sk) { ++ /* Don't hold an extra reference on the namespace */ ++ put_net(sk->sk_net); ++ net->nlfl = sk; ++ error = 0; ++ } ++ return error; ++} ++ ++static void nl_fib_lookup_exit(struct net *net) ++{ ++ /* At the last minute lie and say this is a socket for the ++ * initial network namespace. So the socket will be safe to ++ * free. ++ */ ++ net->nlfl->sk_net = get_net(&init_net); ++ sock_put(net->nlfl); + } + + static void fib_disable_ip(struct net_device *dev, int force) + { +- if (fib_sync_down(0, dev, force)) +- fib_flush(); ++ struct net *net = dev->nd_net; ++ if (fib_sync_down(net, 0, dev, force)) ++ fib_flush(net); + rt_cache_flush(0); + arp_ifdown(dev); + } +@@ -864,6 +902,9 @@ + struct net_device *dev = ptr; + struct in_device *in_dev = __in_dev_get_rtnl(dev); + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (event == NETDEV_UNREGISTER) { + fib_disable_ip(dev, 2); + return NOTIFY_DONE; +@@ -893,6 +934,85 @@ + return NOTIFY_DONE; + } + ++static int ip_fib_net_init(struct net *net) ++{ ++ unsigned int i; ++ ++ net->ip_fib_table_hash = kzalloc( ++ sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); ++ if (!net->ip_fib_table_hash) ++ return -ENOMEM; ++ ++ for (i = 0; i < FIB_TABLE_HASHSZ; i++) ++ INIT_HLIST_HEAD(&net->ip_fib_table_hash[i]); ++#ifndef CONFIG_IP_MULTIPLE_TABLES ++ net->ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); ++ hlist_add_head_rcu(&net->ip_fib_local_table->tb_hlist, ++ &net->ip_fib_table_hash[0]); ++ net->ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); ++ hlist_add_head_rcu(&net->ip_fib_main_table->tb_hlist, ++ &net->ip_fib_table_hash[0]); ++#else ++ fib4_rules_init(net); ++#endif ++ return 0; ++} ++ ++static void ip_fib_net_exit(struct net *net) ++{ ++ unsigned int i; ++ ++#ifdef CONFIG_IP_MULTIPLE_TABLES ++ fib4_rules_exit(net); ++#endif ++ ++ synchronize_rcu(); /* needed? */ ++ for (i = 0; i < FIB_TABLE_HASHSZ; i++) { ++ struct fib_table *tb; ++ struct hlist_head *head; ++ struct hlist_node *node, *tmp; ++ ++ head = &net->ip_fib_table_hash[i]; ++ hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { ++ hlist_del(node); ++ fib_hash_exit(tb); ++ } ++ } ++ kfree(net->ip_fib_table_hash); ++} ++ ++static int fib_net_init(struct net *net) ++{ ++ int error; ++ ++ error = 0; ++ if ((error = ip_fib_net_init(net))) ++ goto out; ++ if ((error = fib_info_init(net))) ++ goto out_info; ++ if ((error = nl_fib_lookup_init(net))) ++ goto out_nlfl; ++ if ((error = fib_proc_init(net))) ++ goto out_proc; ++out: ++ return error; ++out_proc: ++ nl_fib_lookup_exit(net); ++out_nlfl: ++ fib_info_exit(net); ++out_info: ++ ip_fib_net_exit(net); ++ goto out; ++} ++ ++static void fib_net_exit(struct net *net) ++{ ++ fib_proc_exit(net); ++ nl_fib_lookup_exit(net); ++ fib_info_exit(net); ++ ip_fib_net_exit(net); ++} ++ + static struct notifier_block fib_inetaddr_notifier = { + .notifier_call =fib_inetaddr_event, + }; +@@ -901,28 +1021,20 @@ + .notifier_call =fib_netdev_event, + }; + ++static struct pernet_operations fib_net_ops = { ++ .init = fib_net_init, ++ .exit = fib_net_exit, ++}; ++ + void __init ip_fib_init(void) + { +- unsigned int i; +- +- for (i = 0; i < FIB_TABLE_HASHSZ; i++) +- INIT_HLIST_HEAD(&fib_table_hash[i]); +-#ifndef CONFIG_IP_MULTIPLE_TABLES +- ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); +- hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); +- ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); +- hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); +-#else +- fib4_rules_init(); +-#endif +- +- register_netdevice_notifier(&fib_netdev_notifier); +- register_inetaddr_notifier(&fib_inetaddr_notifier); +- nl_fib_lookup_init(); +- + rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); + rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); ++ ++ register_pernet_subsys(&fib_net_ops); ++ register_netdevice_notifier(&fib_netdev_notifier); ++ register_inetaddr_notifier(&fib_inetaddr_notifier); + } + + EXPORT_SYMBOL(inet_addr_type); +diff -Nurb linux-2.6.22-570/net/ipv4/fib_hash.c linux-2.6.22-591/net/ipv4/fib_hash.c +--- linux-2.6.22-570/net/ipv4/fib_hash.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/fib_hash.c 2007-12-21 15:36:15.000000000 -0500 +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + #include + + #include "fib_lookup.h" +@@ -274,11 +275,10 @@ + return err; + } + +-static int fn_hash_last_dflt=-1; +- + static void + fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) + { ++ struct net *net = flp->fl_net; + int order, last_idx; + struct hlist_node *node; + struct fib_node *f; +@@ -316,12 +316,12 @@ + if (next_fi != res->fi) + break; + } else if (!fib_detect_death(fi, order, &last_resort, +- &last_idx, &fn_hash_last_dflt)) { ++ &last_idx, &net->fn_hash_last_dflt)) { + if (res->fi) + fib_info_put(res->fi); + res->fi = fi; + atomic_inc(&fi->fib_clntref); +- fn_hash_last_dflt = order; ++ net->fn_hash_last_dflt = order; + goto out; + } + fi = next_fi; +@@ -330,16 +330,16 @@ + } + + if (order <= 0 || fi == NULL) { +- fn_hash_last_dflt = -1; ++ net->fn_hash_last_dflt = -1; + goto out; + } + +- if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) { ++ if (!fib_detect_death(fi, order, &last_resort, &last_idx, &net->fn_hash_last_dflt)) { + if (res->fi) + fib_info_put(res->fi); + res->fi = fi; + atomic_inc(&fi->fib_clntref); +- fn_hash_last_dflt = order; ++ net->fn_hash_last_dflt = order; + goto out; + } + +@@ -350,7 +350,7 @@ + if (last_resort) + atomic_inc(&last_resort->fib_clntref); + } +- fn_hash_last_dflt = last_idx; ++ net->fn_hash_last_dflt = last_idx; + out: + read_unlock(&fib_hash_lock); + } +@@ -759,11 +759,15 @@ + return skb->len; + } + +-#ifdef CONFIG_IP_MULTIPLE_TABLES ++void fib_hash_exit(struct fib_table *tb) ++{ ++ if (!tb) ++ return; ++ fn_hash_flush(tb); ++ kfree(tb); ++} ++ + struct fib_table * fib_hash_init(u32 id) +-#else +-struct fib_table * __init fib_hash_init(u32 id) +-#endif + { + struct fib_table *tb; + +@@ -799,6 +803,7 @@ + #ifdef CONFIG_PROC_FS + + struct fib_iter_state { ++ struct net *net; + struct fn_zone *zone; + int bucket; + struct hlist_head *hash_head; +@@ -812,7 +817,8 @@ + static struct fib_alias *fib_get_first(struct seq_file *seq) + { + struct fib_iter_state *iter = seq->private; +- struct fn_hash *table = (struct fn_hash *) ip_fib_main_table->tb_data; ++ struct fib_table *main_table = fib_get_table(iter->net, RT_TABLE_MAIN); ++ struct fn_hash *table = (struct fn_hash *) main_table->tb_data; + + iter->bucket = 0; + iter->hash_head = NULL; +@@ -948,10 +954,11 @@ + + static void *fib_seq_start(struct seq_file *seq, loff_t *pos) + { ++ struct fib_iter_state *iter = seq->private; + void *v = NULL; + + read_lock(&fib_hash_lock); +- if (ip_fib_main_table) ++ if (fib_get_table(iter->net, RT_TABLE_MAIN)) + v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; + return v; + } +@@ -1051,6 +1058,7 @@ + + seq = file->private_data; + seq->private = s; ++ s->net = get_net(PROC_NET(inode)); + out: + return rc; + out_kfree: +@@ -1058,23 +1066,32 @@ + goto out; + } + ++static int fib_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct fib_iter_state *iter = seq->private; ++ put_net(iter->net); ++ return seq_release_private(inode, file); ++} ++ + static const struct file_operations fib_seq_fops = { + .owner = THIS_MODULE, + .open = fib_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release_private, ++ .release = fib_seq_release, + }; + +-int __init fib_proc_init(void) ++int fib_proc_init(struct net *net) + { +- if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops)) ++ net->fn_hash_last_dflt = -1; ++ if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops)) + return -ENOMEM; + return 0; + } + +-void __init fib_proc_exit(void) ++void fib_proc_exit(struct net *net) + { +- proc_net_remove("route"); ++ proc_net_remove(net, "route"); + } + #endif /* CONFIG_PROC_FS */ +diff -Nurb linux-2.6.22-570/net/ipv4/fib_rules.c linux-2.6.22-591/net/ipv4/fib_rules.c +--- linux-2.6.22-570/net/ipv4/fib_rules.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/fib_rules.c 2007-12-21 15:36:15.000000000 -0500 +@@ -32,8 +32,6 @@ + #include + #include + +-static struct fib_rules_ops fib4_rules_ops; +- + struct fib4_rule + { + struct fib_rule common; +@@ -49,35 +47,14 @@ + #endif + }; + +-static struct fib4_rule default_rule = { +- .common = { +- .refcnt = ATOMIC_INIT(2), +- .pref = 0x7FFF, +- .table = RT_TABLE_DEFAULT, +- .action = FR_ACT_TO_TBL, +- }, ++struct fib4_rule_table { ++ struct list_head fib4_rules; ++ struct fib4_rule default_rule; ++ struct fib4_rule main_rule; ++ struct fib4_rule local_rule; ++ struct fib_rules_ops fib4_rules_ops; + }; + +-static struct fib4_rule main_rule = { +- .common = { +- .refcnt = ATOMIC_INIT(2), +- .pref = 0x7FFE, +- .table = RT_TABLE_MAIN, +- .action = FR_ACT_TO_TBL, +- }, +-}; +- +-static struct fib4_rule local_rule = { +- .common = { +- .refcnt = ATOMIC_INIT(2), +- .table = RT_TABLE_LOCAL, +- .action = FR_ACT_TO_TBL, +- .flags = FIB_RULE_PERMANENT, +- }, +-}; +- +-static LIST_HEAD(fib4_rules); +- + #ifdef CONFIG_NET_CLS_ROUTE + u32 fib_rules_tclass(struct fib_result *res) + { +@@ -87,12 +64,14 @@ + + int fib_lookup(struct flowi *flp, struct fib_result *res) + { ++ struct net *net = flp->fl_net; ++ struct fib4_rule_table *table = net->fib4_table; + struct fib_lookup_arg arg = { + .result = res, + }; + int err; + +- err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg); ++ err = fib_rules_lookup(&table->fib4_rules_ops, flp, 0, &arg); + res->r = arg.rule; + + return err; +@@ -122,7 +101,7 @@ + goto errout; + } + +- if ((tbl = fib_get_table(rule->table)) == NULL) ++ if ((tbl = fib_get_table(flp->fl_net, rule->table)) == NULL) + goto errout; + + err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); +@@ -138,7 +117,7 @@ + if (res->r && res->r->action == FR_ACT_TO_TBL && + FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { + struct fib_table *tb; +- if ((tb = fib_get_table(res->r->table)) != NULL) ++ if ((tb = fib_get_table(flp->fl_net, res->r->table)) != NULL) + tb->tb_select_default(tb, flp, res); + } + } +@@ -159,13 +138,13 @@ + return 1; + } + +-static struct fib_table *fib_empty_table(void) ++static struct fib_table *fib_empty_table(struct net *net) + { + u32 id; + + for (id = 1; id <= RT_TABLE_MAX; id++) +- if (fib_get_table(id) == NULL) +- return fib_new_table(id); ++ if (fib_get_table(net, id) == NULL) ++ return fib_new_table(net, id); + return NULL; + } + +@@ -178,6 +157,7 @@ + struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct nlattr **tb) + { ++ struct net *net = skb->sk->sk_net; + int err = -EINVAL; + struct fib4_rule *rule4 = (struct fib4_rule *) rule; + +@@ -188,7 +168,7 @@ + if (rule->action == FR_ACT_TO_TBL) { + struct fib_table *table; + +- table = fib_empty_table(); ++ table = fib_empty_table(net); + if (table == NULL) { + err = -ENOBUFS; + goto errout; +@@ -274,14 +254,15 @@ + return -ENOBUFS; + } + +-static u32 fib4_rule_default_pref(void) ++static u32 fib4_rule_default_pref(struct fib_rules_ops *ops) + { +- struct list_head *pos; ++ struct list_head *list, *pos; + struct fib_rule *rule; + +- if (!list_empty(&fib4_rules)) { +- pos = fib4_rules.next; +- if (pos->next != &fib4_rules) { ++ list = ops->rules_list; ++ if (!list_empty(list)) { ++ pos = list->next; ++ if (pos->next != list) { + rule = list_entry(pos->next, struct fib_rule, list); + if (rule->pref) + return rule->pref - 1; +@@ -298,12 +279,37 @@ + + nla_total_size(4); /* flow */ + } + +-static void fib4_rule_flush_cache(void) ++static void fib4_rule_flush_cache(struct fib_rules_ops *ops) + { + rt_cache_flush(-1); + } + +-static struct fib_rules_ops fib4_rules_ops = { ++static struct fib4_rule_table fib4_rule_table = { ++ .default_rule = { ++ .common = { ++ .refcnt = ATOMIC_INIT(2), ++ .pref = 0x7FFF, ++ .table = RT_TABLE_DEFAULT, ++ .action = FR_ACT_TO_TBL, ++ }, ++ }, ++ .main_rule = { ++ .common = { ++ .refcnt = ATOMIC_INIT(2), ++ .pref = 0x7FFE, ++ .table = RT_TABLE_MAIN, ++ .action = FR_ACT_TO_TBL, ++ }, ++ }, ++ .local_rule = { ++ .common = { ++ .refcnt = ATOMIC_INIT(2), ++ .table = RT_TABLE_LOCAL, ++ .action = FR_ACT_TO_TBL, ++ .flags = FIB_RULE_PERMANENT, ++ }, ++ }, ++ .fib4_rules_ops = { + .family = AF_INET, + .rule_size = sizeof(struct fib4_rule), + .addr_size = sizeof(u32), +@@ -317,15 +323,34 @@ + .flush_cache = fib4_rule_flush_cache, + .nlgroup = RTNLGRP_IPV4_RULE, + .policy = fib4_rule_policy, +- .rules_list = &fib4_rules, ++ .rules_list = &fib4_rule_table.fib4_rules, /* &fib4_rules, */ + .owner = THIS_MODULE, ++ }, + }; + +-void __init fib4_rules_init(void) ++ ++void fib4_rules_init(struct net *net) + { +- list_add_tail(&local_rule.common.list, &fib4_rules); +- list_add_tail(&main_rule.common.list, &fib4_rules); +- list_add_tail(&default_rule.common.list, &fib4_rules); ++ struct fib4_rule_table *table; ++ table = kmemdup(&fib4_rule_table, sizeof(*table), GFP_KERNEL); ++ if (!table) ++ return; ++ INIT_LIST_HEAD(&table->fib4_rules); ++ list_add_tail(&table->local_rule.common.list, &table->fib4_rules); ++ list_add_tail(&table->main_rule.common.list, &table->fib4_rules); ++ list_add_tail(&table->default_rule.common.list, &table->fib4_rules); ++ table->fib4_rules_ops.rules_list = &table->fib4_rules; ++ if (fib_rules_register(net, &table->fib4_rules_ops)) { ++ kfree(table); ++ return; ++ } ++ net->fib4_table = table; ++} + +- fib_rules_register(&fib4_rules_ops); ++void fib4_rules_exit(struct net *net) ++{ ++ struct fib4_rule_table *table = net->fib4_table; ++ if (table) ++ fib_rules_unregister(net, &table->fib4_rules_ops); ++ kfree(table); + } +diff -Nurb linux-2.6.22-570/net/ipv4/fib_semantics.c linux-2.6.22-591/net/ipv4/fib_semantics.c +--- linux-2.6.22-570/net/ipv4/fib_semantics.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/fib_semantics.c 2007-12-21 15:36:15.000000000 -0500 +@@ -42,7 +42,6 @@ + #include + #include + #include +-#include + #include + #include + +@@ -51,14 +50,9 @@ + #define FSprintk(a...) + + static DEFINE_SPINLOCK(fib_info_lock); +-static struct hlist_head *fib_info_hash; +-static struct hlist_head *fib_info_laddrhash; +-static unsigned int fib_hash_size; +-static unsigned int fib_info_cnt; + + #define DEVINDEX_HASHBITS 8 + #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) +-static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; + + #ifdef CONFIG_IP_ROUTE_MULTIPATH + +@@ -154,7 +148,8 @@ + dev_put(nh->nh_dev); + nh->nh_dev = NULL; + } endfor_nexthops(fi); +- fib_info_cnt--; ++ fi->fib_net->fib_info_cnt--; ++ release_net(fi->fib_net); + kfree(fi); + } + +@@ -197,9 +192,9 @@ + return 0; + } + +-static inline unsigned int fib_info_hashfn(const struct fib_info *fi) ++static inline unsigned int fib_info_hashfn(struct net *net, const struct fib_info *fi) + { +- unsigned int mask = (fib_hash_size - 1); ++ unsigned int mask = net->fib_info_hash_size - 1; + unsigned int val = fi->fib_nhs; + + val ^= fi->fib_protocol; +@@ -209,15 +204,15 @@ + return (val ^ (val >> 7) ^ (val >> 12)) & mask; + } + +-static struct fib_info *fib_find_info(const struct fib_info *nfi) ++static struct fib_info *fib_find_info(struct net *net, const struct fib_info *nfi) + { + struct hlist_head *head; + struct hlist_node *node; + struct fib_info *fi; + unsigned int hash; + +- hash = fib_info_hashfn(nfi); +- head = &fib_info_hash[hash]; ++ hash = fib_info_hashfn(net, nfi); ++ head = &net->fib_info_hash[hash]; + + hlist_for_each_entry(fi, node, head, fib_hash) { + if (fi->fib_nhs != nfi->fib_nhs) +@@ -250,6 +245,7 @@ + + int ip_fib_check_default(__be32 gw, struct net_device *dev) + { ++ struct net *net = dev->nd_net; + struct hlist_head *head; + struct hlist_node *node; + struct fib_nh *nh; +@@ -258,7 +254,7 @@ + spin_lock(&fib_info_lock); + + hash = fib_devindex_hashfn(dev->ifindex); +- head = &fib_info_devhash[hash]; ++ head = &net->fib_info_devhash[hash]; + hlist_for_each_entry(nh, node, head, nh_hash) { + if (nh->nh_dev == dev && + nh->nh_gw == gw && +@@ -321,11 +317,11 @@ + kfree_skb(skb); + goto errout; + } +- err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, ++ err = rtnl_notify(skb, info->net, info->pid, RTNLGRP_IPV4_ROUTE, + info->nlh, GFP_KERNEL); + errout: + if (err < 0) +- rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); ++ rtnl_set_sk_err(info->net, RTNLGRP_IPV4_ROUTE, err); + } + + /* Return the first fib alias matching TOS with +@@ -518,6 +514,7 @@ + static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, + struct fib_nh *nh) + { ++ struct net *net = cfg->fc_nlinfo.net; + int err; + + if (nh->nh_gw) { +@@ -532,9 +529,9 @@ + + if (cfg->fc_scope >= RT_SCOPE_LINK) + return -EINVAL; +- if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) ++ if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) + return -EINVAL; +- if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) ++ if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL) + return -ENODEV; + if (!(dev->flags&IFF_UP)) + return -ENETDOWN; +@@ -545,6 +542,7 @@ + } + { + struct flowi fl = { ++ .fl_net = net, + .nl_u = { + .ip4_u = { + .daddr = nh->nh_gw, +@@ -581,7 +579,7 @@ + if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) + return -EINVAL; + +- in_dev = inetdev_by_index(nh->nh_oif); ++ in_dev = inetdev_by_index(net, nh->nh_oif); + if (in_dev == NULL) + return -ENODEV; + if (!(in_dev->dev->flags&IFF_UP)) { +@@ -596,9 +594,9 @@ + return 0; + } + +-static inline unsigned int fib_laddr_hashfn(__be32 val) ++static inline unsigned int fib_laddr_hashfn(struct net *net, __be32 val) + { +- unsigned int mask = (fib_hash_size - 1); ++ unsigned int mask = net->fib_info_hash_size - 1; + + return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; + } +@@ -623,21 +621,22 @@ + free_pages((unsigned long) hash, get_order(bytes)); + } + +-static void fib_hash_move(struct hlist_head *new_info_hash, ++static void fib_hash_move(struct net *net, ++ struct hlist_head *new_info_hash, + struct hlist_head *new_laddrhash, + unsigned int new_size) + { + struct hlist_head *old_info_hash, *old_laddrhash; +- unsigned int old_size = fib_hash_size; ++ unsigned int old_size = net->fib_info_hash_size; + unsigned int i, bytes; + + spin_lock_bh(&fib_info_lock); +- old_info_hash = fib_info_hash; +- old_laddrhash = fib_info_laddrhash; +- fib_hash_size = new_size; ++ old_info_hash = net->fib_info_hash; ++ old_laddrhash = net->fib_info_laddrhash; ++ net->fib_info_hash_size = new_size; + + for (i = 0; i < old_size; i++) { +- struct hlist_head *head = &fib_info_hash[i]; ++ struct hlist_head *head = &net->fib_info_hash[i]; + struct hlist_node *node, *n; + struct fib_info *fi; + +@@ -647,15 +646,15 @@ + + hlist_del(&fi->fib_hash); + +- new_hash = fib_info_hashfn(fi); ++ new_hash = fib_info_hashfn(net, fi); + dest = &new_info_hash[new_hash]; + hlist_add_head(&fi->fib_hash, dest); + } + } +- fib_info_hash = new_info_hash; ++ net->fib_info_hash = new_info_hash; + + for (i = 0; i < old_size; i++) { +- struct hlist_head *lhead = &fib_info_laddrhash[i]; ++ struct hlist_head *lhead = &net->fib_info_laddrhash[i]; + struct hlist_node *node, *n; + struct fib_info *fi; + +@@ -665,12 +664,12 @@ + + hlist_del(&fi->fib_lhash); + +- new_hash = fib_laddr_hashfn(fi->fib_prefsrc); ++ new_hash = fib_laddr_hashfn(net, fi->fib_prefsrc); + ldest = &new_laddrhash[new_hash]; + hlist_add_head(&fi->fib_lhash, ldest); + } + } +- fib_info_laddrhash = new_laddrhash; ++ net->fib_info_laddrhash = new_laddrhash; + + spin_unlock_bh(&fib_info_lock); + +@@ -681,6 +680,7 @@ + + struct fib_info *fib_create_info(struct fib_config *cfg) + { ++ struct net *net = cfg->fc_nlinfo.net; + int err; + struct fib_info *fi = NULL; + struct fib_info *ofi; +@@ -697,17 +697,10 @@ + goto err_inval; + } + #endif +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- if (cfg->fc_mp_alg) { +- if (cfg->fc_mp_alg < IP_MP_ALG_NONE || +- cfg->fc_mp_alg > IP_MP_ALG_MAX) +- goto err_inval; +- } +-#endif + + err = -ENOBUFS; +- if (fib_info_cnt >= fib_hash_size) { +- unsigned int new_size = fib_hash_size << 1; ++ if (net->fib_info_cnt >= net->fib_info_hash_size) { ++ unsigned int new_size = net->fib_info_hash_size << 1; + struct hlist_head *new_info_hash; + struct hlist_head *new_laddrhash; + unsigned int bytes; +@@ -724,18 +717,19 @@ + memset(new_info_hash, 0, bytes); + memset(new_laddrhash, 0, bytes); + +- fib_hash_move(new_info_hash, new_laddrhash, new_size); ++ fib_hash_move(net, new_info_hash, new_laddrhash, new_size); + } + +- if (!fib_hash_size) ++ if (!net->fib_info_hash_size) + goto failure; + } + + fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); + if (fi == NULL) + goto failure; +- fib_info_cnt++; ++ net->fib_info_cnt++; + ++ fi->fib_net = hold_net(net); + fi->fib_protocol = cfg->fc_protocol; + fi->fib_flags = cfg->fc_flags; + fi->fib_priority = cfg->fc_priority; +@@ -791,10 +785,6 @@ + #endif + } + +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- fi->fib_mp_alg = cfg->fc_mp_alg; +-#endif +- + if (fib_props[cfg->fc_type].error) { + if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) + goto err_inval; +@@ -811,7 +801,7 @@ + if (nhs != 1 || nh->nh_gw) + goto err_inval; + nh->nh_scope = RT_SCOPE_NOWHERE; +- nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); ++ nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); + err = -ENODEV; + if (nh->nh_dev == NULL) + goto failure; +@@ -825,12 +815,12 @@ + if (fi->fib_prefsrc) { + if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || + fi->fib_prefsrc != cfg->fc_dst) +- if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) ++ if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL) + goto err_inval; + } + + link_it: +- if ((ofi = fib_find_info(fi)) != NULL) { ++ if ((ofi = fib_find_info(net, fi)) != NULL) { + fi->fib_dead = 1; + free_fib_info(fi); + ofi->fib_treeref++; +@@ -841,11 +831,13 @@ + atomic_inc(&fi->fib_clntref); + spin_lock_bh(&fib_info_lock); + hlist_add_head(&fi->fib_hash, +- &fib_info_hash[fib_info_hashfn(fi)]); ++ &net->fib_info_hash[fib_info_hashfn(net, fi)]); + if (fi->fib_prefsrc) { + struct hlist_head *head; ++ unsigned int hash; + +- head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; ++ hash = fib_laddr_hashfn(net, fi->fib_prefsrc); ++ head = &net->fib_info_laddrhash[hash]; + hlist_add_head(&fi->fib_lhash, head); + } + change_nexthops(fi) { +@@ -855,7 +847,7 @@ + if (!nh->nh_dev) + continue; + hash = fib_devindex_hashfn(nh->nh_dev->ifindex); +- head = &fib_info_devhash[hash]; ++ head = &net->fib_info_devhash[hash]; + hlist_add_head(&nh->nh_hash, head); + } endfor_nexthops(fi) + spin_unlock_bh(&fib_info_lock); +@@ -940,10 +932,6 @@ + res->type = fa->fa_type; + res->scope = fa->fa_scope; + res->fi = fa->fa_info; +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- res->netmask = mask; +- res->network = zone & inet_make_mask(prefixlen); +-#endif + atomic_inc(&res->fi->fib_clntref); + return 0; + } +@@ -1046,7 +1034,7 @@ + - device went down -> we must shutdown all nexthops going via it. + */ + +-int fib_sync_down(__be32 local, struct net_device *dev, int force) ++int fib_sync_down(struct net *net, __be32 local, struct net_device *dev, int force) + { + int ret = 0; + int scope = RT_SCOPE_NOWHERE; +@@ -1054,9 +1042,9 @@ + if (force) + scope = -1; + +- if (local && fib_info_laddrhash) { +- unsigned int hash = fib_laddr_hashfn(local); +- struct hlist_head *head = &fib_info_laddrhash[hash]; ++ if (local && net->fib_info_laddrhash) { ++ unsigned int hash = fib_laddr_hashfn(net, local); ++ struct hlist_head *head = &net->fib_info_laddrhash[hash]; + struct hlist_node *node; + struct fib_info *fi; + +@@ -1071,7 +1059,7 @@ + if (dev) { + struct fib_info *prev_fi = NULL; + unsigned int hash = fib_devindex_hashfn(dev->ifindex); +- struct hlist_head *head = &fib_info_devhash[hash]; ++ struct hlist_head *head = &net->fib_info_devhash[hash]; + struct hlist_node *node; + struct fib_nh *nh; + +@@ -1124,6 +1112,7 @@ + + int fib_sync_up(struct net_device *dev) + { ++ struct net *net = dev->nd_net; + struct fib_info *prev_fi; + unsigned int hash; + struct hlist_head *head; +@@ -1136,7 +1125,7 @@ + + prev_fi = NULL; + hash = fib_devindex_hashfn(dev->ifindex); +- head = &fib_info_devhash[hash]; ++ head = &net->fib_info_devhash[hash]; + ret = 0; + + hlist_for_each_entry(nh, node, head, nh_hash) { +@@ -1226,3 +1215,17 @@ + spin_unlock_bh(&fib_multipath_lock); + } + #endif ++ ++int fib_info_init(struct net *net) ++{ ++ net->fib_info_devhash = kzalloc( ++ sizeof(struct hlist_head)*DEVINDEX_HASHSIZE, GFP_KERNEL); ++ if (!net->fib_info_devhash) ++ return -ENOMEM; ++ return 0; ++} ++ ++void fib_info_exit(struct net *net) ++{ ++ kfree(net->fib_info_devhash); ++} +diff -Nurb linux-2.6.22-570/net/ipv4/fib_trie.c linux-2.6.22-591/net/ipv4/fib_trie.c +--- linux-2.6.22-570/net/ipv4/fib_trie.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/fib_trie.c 2007-12-21 15:36:15.000000000 -0500 +@@ -78,6 +78,7 @@ + #include + #include + #include ++#include + #include + #include "fib_lookup.h" + +@@ -172,7 +173,6 @@ + static void tnode_free(struct tnode *tn); + + static struct kmem_cache *fn_alias_kmem __read_mostly; +-static struct trie *trie_local = NULL, *trie_main = NULL; + + + /* rcu_read_lock needs to be hold by caller from readside */ +@@ -290,11 +290,10 @@ + WARN_ON(tn && tn->pos+tn->bits > 32); + } + +-static int halve_threshold = 25; +-static int inflate_threshold = 50; +-static int halve_threshold_root = 8; +-static int inflate_threshold_root = 15; +- ++static const int halve_threshold = 25; ++static const int inflate_threshold = 50; ++static const int halve_threshold_root = 15; ++static const int inflate_threshold_root = 25; + + static void __alias_free_mem(struct rcu_head *head) + { +@@ -1771,11 +1770,10 @@ + return found; + } + +-static int trie_last_dflt = -1; +- + static void + fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) + { ++ struct net *net = flp->fl_net; + struct trie *t = (struct trie *) tb->tb_data; + int order, last_idx; + struct fib_info *fi = NULL; +@@ -1819,28 +1817,28 @@ + if (next_fi != res->fi) + break; + } else if (!fib_detect_death(fi, order, &last_resort, +- &last_idx, &trie_last_dflt)) { ++ &last_idx, &net->trie_last_dflt)) { + if (res->fi) + fib_info_put(res->fi); + res->fi = fi; + atomic_inc(&fi->fib_clntref); +- trie_last_dflt = order; ++ net->trie_last_dflt = order; + goto out; + } + fi = next_fi; + order++; + } + if (order <= 0 || fi == NULL) { +- trie_last_dflt = -1; ++ net->trie_last_dflt = -1; + goto out; + } + +- if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) { ++ if (!fib_detect_death(fi, order, &last_resort, &last_idx, &net->trie_last_dflt)) { + if (res->fi) + fib_info_put(res->fi); + res->fi = fi; + atomic_inc(&fi->fib_clntref); +- trie_last_dflt = order; ++ net->trie_last_dflt = order; + goto out; + } + if (last_idx >= 0) { +@@ -1850,7 +1848,7 @@ + if (last_resort) + atomic_inc(&last_resort->fib_clntref); + } +- trie_last_dflt = last_idx; ++ net->trie_last_dflt = last_idx; + out:; + rcu_read_unlock(); + } +@@ -1957,11 +1955,15 @@ + + /* Fix more generic FIB names for init later */ + +-#ifdef CONFIG_IP_MULTIPLE_TABLES ++void fib_hash_exit(struct fib_table *tb) ++{ ++ if (!tb) ++ return; ++ fn_trie_flush(tb); ++ kfree(tb); ++} ++ + struct fib_table * fib_hash_init(u32 id) +-#else +-struct fib_table * __init fib_hash_init(u32 id) +-#endif + { + struct fib_table *tb; + struct trie *t; +@@ -1991,11 +1993,6 @@ + trie_init(t); + + if (id == RT_TABLE_LOCAL) +- trie_local = t; +- else if (id == RT_TABLE_MAIN) +- trie_main = t; +- +- if (id == RT_TABLE_LOCAL) + printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); + + return tb; +@@ -2004,6 +2001,8 @@ + #ifdef CONFIG_PROC_FS + /* Depth first Trie walk iterator */ + struct fib_trie_iter { ++ struct net *net; ++ struct trie *trie_local, *trie_main; + struct tnode *tnode; + struct trie *trie; + unsigned index; +@@ -2170,7 +2169,21 @@ + + static int fib_triestat_seq_show(struct seq_file *seq, void *v) + { ++ struct net *net = seq->private; ++ struct trie *trie_local, *trie_main; + struct trie_stat *stat; ++ struct fib_table *tb; ++ ++ trie_local = NULL; ++ tb = fib_get_table(net, RT_TABLE_LOCAL); ++ if (tb) ++ trie_local = (struct trie *) tb->tb_data; ++ ++ trie_main = NULL; ++ tb = fib_get_table(net, RT_TABLE_MAIN); ++ if (tb) ++ trie_main = (struct trie *) tb->tb_data; ++ + + stat = kmalloc(sizeof(*stat), GFP_KERNEL); + if (!stat) +@@ -2197,7 +2210,15 @@ + + static int fib_triestat_seq_open(struct inode *inode, struct file *file) + { +- return single_open(file, fib_triestat_seq_show, NULL); ++ return single_open(file, fib_triestat_seq_show, ++ get_net(PROC_NET(inode))); ++} ++ ++static int fib_triestat_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ put_net(seq->private); ++ return single_release(inode, file); + } + + static const struct file_operations fib_triestat_fops = { +@@ -2205,7 +2226,7 @@ + .open = fib_triestat_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = single_release, ++ .release = fib_triestat_seq_release, + }; + + static struct node *fib_trie_get_idx(struct fib_trie_iter *iter, +@@ -2214,13 +2235,13 @@ + loff_t idx = 0; + struct node *n; + +- for (n = fib_trie_get_first(iter, trie_local); ++ for (n = fib_trie_get_first(iter, iter->trie_local); + n; ++idx, n = fib_trie_get_next(iter)) { + if (pos == idx) + return n; + } + +- for (n = fib_trie_get_first(iter, trie_main); ++ for (n = fib_trie_get_first(iter, iter->trie_main); + n; ++idx, n = fib_trie_get_next(iter)) { + if (pos == idx) + return n; +@@ -2230,10 +2251,23 @@ + + static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos) + { ++ struct fib_trie_iter *iter = seq->private; ++ struct fib_table *tb; ++ ++ if (!iter->trie_local) { ++ tb = fib_get_table(iter->net, RT_TABLE_LOCAL); ++ if (tb) ++ iter->trie_local = (struct trie *) tb->tb_data; ++ } ++ if (!iter->trie_main) { ++ tb = fib_get_table(iter->net, RT_TABLE_MAIN); ++ if (tb) ++ iter->trie_main = (struct trie *) tb->tb_data; ++ } + rcu_read_lock(); + if (*pos == 0) + return SEQ_START_TOKEN; +- return fib_trie_get_idx(seq->private, *pos - 1); ++ return fib_trie_get_idx(iter, *pos - 1); + } + + static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos) +@@ -2251,8 +2285,8 @@ + return v; + + /* continue scan in next trie */ +- if (iter->trie == trie_local) +- return fib_trie_get_first(iter, trie_main); ++ if (iter->trie == iter->trie_local) ++ return fib_trie_get_first(iter, iter->trie_main); + + return NULL; + } +@@ -2318,7 +2352,7 @@ + return 0; + + if (!NODE_PARENT(n)) { +- if (iter->trie == trie_local) ++ if (iter->trie == iter->trie_local) + seq_puts(seq, ":\n"); + else + seq_puts(seq, "
:\n"); +@@ -2384,6 +2418,7 @@ + seq = file->private_data; + seq->private = s; + memset(s, 0, sizeof(*s)); ++ s->net = get_net(PROC_NET(inode)); + out: + return rc; + out_kfree: +@@ -2391,12 +2426,20 @@ + goto out; + } + ++static int fib_trie_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct fib_trie_iter *iter = seq->private; ++ put_net(iter->net); ++ return seq_release_private(inode, file); ++} ++ + static const struct file_operations fib_trie_fops = { + .owner = THIS_MODULE, + .open = fib_trie_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release_private, ++ .release = fib_trie_seq_release, + }; + + static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) +@@ -2434,7 +2477,7 @@ + return 0; + } + +- if (iter->trie == trie_local) ++ if (iter->trie == iter->trie_local) + return 0; + if (IS_TNODE(l)) + return 0; +@@ -2505,6 +2548,7 @@ + seq = file->private_data; + seq->private = s; + memset(s, 0, sizeof(*s)); ++ s->net = get_net(PROC_NET(inode)); + out: + return rc; + out_kfree: +@@ -2517,35 +2561,37 @@ + .open = fib_route_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release_private, ++ .release = fib_trie_seq_release, + }; + +-int __init fib_proc_init(void) ++int fib_proc_init(struct net *net) + { +- if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops)) ++ net->trie_last_dflt = -1; ++ ++ if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops)) + goto out1; + +- if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops)) ++ if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO, &fib_triestat_fops)) + goto out2; + +- if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops)) ++ if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops)) + goto out3; + + return 0; + + out3: +- proc_net_remove("fib_triestat"); ++ proc_net_remove(net, "fib_triestat"); + out2: +- proc_net_remove("fib_trie"); ++ proc_net_remove(net, "fib_trie"); + out1: + return -ENOMEM; + } + +-void __init fib_proc_exit(void) ++void fib_proc_exit(struct net *net) + { +- proc_net_remove("fib_trie"); +- proc_net_remove("fib_triestat"); +- proc_net_remove("route"); ++ proc_net_remove(net, "fib_trie"); ++ proc_net_remove(net, "fib_triestat"); ++ proc_net_remove(net, "route"); + } + + #endif /* CONFIG_PROC_FS */ +diff -Nurb linux-2.6.22-570/net/ipv4/icmp.c linux-2.6.22-591/net/ipv4/icmp.c +--- linux-2.6.22-570/net/ipv4/icmp.c 2007-12-21 15:36:03.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/icmp.c 2007-12-21 15:36:15.000000000 -0500 +@@ -229,14 +229,13 @@ + * + * On SMP we have one ICMP socket per-cpu. + */ +-static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL; +-#define icmp_socket __get_cpu_var(__icmp_socket) ++#define icmp_socket(NET) (*per_cpu_ptr((NET)->__icmp_socket, smp_processor_id())) + +-static __inline__ int icmp_xmit_lock(void) ++static __inline__ int icmp_xmit_lock(struct net *net) + { + local_bh_disable(); + +- if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) { ++ if (unlikely(!spin_trylock(&icmp_socket(net)->sk->sk_lock.slock))) { + /* This can happen if the output path signals a + * dst_link_failure() for an outgoing ICMP packet. + */ +@@ -246,9 +245,9 @@ + return 0; + } + +-static void icmp_xmit_unlock(void) ++static void icmp_xmit_unlock(struct net *net) + { +- spin_unlock_bh(&icmp_socket->sk->sk_lock.slock); ++ spin_unlock_bh(&icmp_socket(net)->sk->sk_lock.slock); + } + + /* +@@ -347,19 +346,20 @@ + static void icmp_push_reply(struct icmp_bxm *icmp_param, + struct ipcm_cookie *ipc, struct rtable *rt) + { ++ struct net *net = icmp_param->skb->dev->nd_net; + struct sk_buff *skb; + +- if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, ++ if (ip_append_data(icmp_socket(net)->sk, icmp_glue_bits, icmp_param, + icmp_param->data_len+icmp_param->head_len, + icmp_param->head_len, + ipc, rt, MSG_DONTWAIT) < 0) +- ip_flush_pending_frames(icmp_socket->sk); +- else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { ++ ip_flush_pending_frames(icmp_socket(net)->sk); ++ else if ((skb = skb_peek(&icmp_socket(net)->sk->sk_write_queue)) != NULL) { + struct icmphdr *icmph = icmp_hdr(skb); + __wsum csum = 0; + struct sk_buff *skb1; + +- skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) { ++ skb_queue_walk(&icmp_socket(net)->sk->sk_write_queue, skb1) { + csum = csum_add(csum, skb1->csum); + } + csum = csum_partial_copy_nocheck((void *)&icmp_param->data, +@@ -367,7 +367,7 @@ + icmp_param->head_len, csum); + icmph->checksum = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; +- ip_push_pending_frames(icmp_socket->sk); ++ ip_push_pending_frames(icmp_socket(net)->sk); + } + } + +@@ -377,7 +377,8 @@ + + static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) + { +- struct sock *sk = icmp_socket->sk; ++ struct net *net = icmp_param->skb->dev->nd_net; ++ struct sock *sk = icmp_socket(net)->sk; + struct inet_sock *inet = inet_sk(sk); + struct ipcm_cookie ipc; + struct rtable *rt = (struct rtable *)skb->dst; +@@ -386,7 +387,7 @@ + if (ip_options_echo(&icmp_param->replyopts, skb)) + return; + +- if (icmp_xmit_lock()) ++ if (icmp_xmit_lock(net)) + return; + + icmp_param->data.icmph.checksum = 0; +@@ -401,7 +402,8 @@ + daddr = icmp_param->replyopts.faddr; + } + { +- struct flowi fl = { .nl_u = { .ip4_u = ++ struct flowi fl = { .fl_net = net, ++ .nl_u = { .ip4_u = + { .daddr = daddr, + .saddr = rt->rt_spec_dst, + .tos = RT_TOS(ip_hdr(skb)->tos) } }, +@@ -415,7 +417,7 @@ + icmp_push_reply(icmp_param, &ipc, rt); + ip_rt_put(rt); + out_unlock: +- icmp_xmit_unlock(); ++ icmp_xmit_unlock(net); + } + + +@@ -436,6 +438,7 @@ + int room; + struct icmp_bxm icmp_param; + struct rtable *rt = (struct rtable *)skb_in->dst; ++ struct net *net; + struct ipcm_cookie ipc; + __be32 saddr; + u8 tos; +@@ -443,6 +446,7 @@ + if (!rt) + goto out; + ++ net = rt->fl.fl_net; + /* + * Find the original header. It is expected to be valid, of course. + * Check this, icmp_send is called from the most obscure devices +@@ -505,7 +509,7 @@ + } + } + +- if (icmp_xmit_lock()) ++ if (icmp_xmit_lock(net)) + return; + + /* +@@ -517,7 +521,7 @@ + struct net_device *dev = NULL; + + if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr) +- dev = dev_get_by_index(rt->fl.iif); ++ dev = dev_get_by_index(&init_net, rt->fl.iif); + + if (dev) { + saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK); +@@ -545,12 +549,13 @@ + icmp_param.skb = skb_in; + icmp_param.offset = skb_network_offset(skb_in); + icmp_out_count(icmp_param.data.icmph.type); +- inet_sk(icmp_socket->sk)->tos = tos; ++ inet_sk(icmp_socket(net)->sk)->tos = tos; + ipc.addr = iph->saddr; + ipc.opt = &icmp_param.replyopts; + + { + struct flowi fl = { ++ .fl_net = net, + .nl_u = { + .ip4_u = { + .daddr = icmp_param.replyopts.srr ? +@@ -593,7 +598,7 @@ + ende: + ip_rt_put(rt); + out_unlock: +- icmp_xmit_unlock(); ++ icmp_xmit_unlock(net); + out:; + } + +@@ -604,6 +609,7 @@ + + static void icmp_unreach(struct sk_buff *skb) + { ++ struct net *net = skb->dev->nd_net; + struct iphdr *iph; + struct icmphdr *icmph; + int hash, protocol; +@@ -634,7 +640,7 @@ + case ICMP_PORT_UNREACH: + break; + case ICMP_FRAG_NEEDED: +- if (ipv4_config.no_pmtu_disc) { ++ if (net->sysctl_ipv4_no_pmtu_disc) { + LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: " + "fragmentation needed " + "and DF set.\n", +@@ -678,7 +684,7 @@ + */ + + if (!sysctl_icmp_ignore_bogus_error_responses && +- inet_addr_type(iph->daddr) == RTN_BROADCAST) { ++ inet_addr_type(net, iph->daddr) == RTN_BROADCAST) { + if (net_ratelimit()) + printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP " + "type %u, code %u " +@@ -707,7 +713,7 @@ + hash = protocol & (MAX_INET_PROTOS - 1); + read_lock(&raw_v4_lock); + if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) { +- while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr, ++ while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, iph->daddr, + iph->saddr, + skb->dev->ifindex, skb->skb_tag)) != NULL) { + raw_err(raw_sk, skb, info); +@@ -1179,29 +1185,54 @@ + }, + }; + +-void __init icmp_init(struct net_proto_family *ops) ++static void icmp_net_exit(struct net *net) + { +- struct inet_sock *inet; ++ struct socket **sock; + int i; + + for_each_possible_cpu(i) { ++ sock = percpu_ptr(net->__icmp_socket, i); ++ if (!*sock) ++ continue; ++ /* At the last minute lie and say this is a socket for ++ * the initial network namespace. So the socket will ++ * be safe to free. ++ */ ++ (*sock)->sk->sk_net = get_net(&init_net); ++ sock_release(*sock); ++ *sock = NULL; ++ } ++ percpu_free(net->__icmp_socket); ++} ++ ++static int icmp_net_init(struct net *net) ++{ ++ struct socket **sock; ++ struct inet_sock *inet; + int err; ++ int i; ++ ++ net->__icmp_socket = alloc_percpu(struct socket *); ++ if (!net->__icmp_socket) ++ return -ENOMEM; ++ ++ for_each_possible_cpu(i) { + +- err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, +- &per_cpu(__icmp_socket, i)); ++ sock = percpu_ptr(net->__icmp_socket, i); + ++ err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, sock); + if (err < 0) +- panic("Failed to create the ICMP control socket.\n"); ++ goto fail; + +- per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC; ++ (*sock)->sk->sk_allocation = GFP_ATOMIC; + + /* Enough space for 2 64K ICMP packets, including + * sk_buff struct overhead. + */ +- per_cpu(__icmp_socket, i)->sk->sk_sndbuf = ++ (*sock)->sk->sk_sndbuf = + (2 * ((64 * 1024) + sizeof(struct sk_buff))); + +- inet = inet_sk(per_cpu(__icmp_socket, i)->sk); ++ inet = inet_sk((*sock)->sk); + inet->uc_ttl = -1; + inet->pmtudisc = IP_PMTUDISC_DONT; + +@@ -1209,8 +1240,27 @@ + * see it, we do not wish this socket to see incoming + * packets. + */ +- per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk); ++ (*sock)->sk->sk_prot->unhash((*sock)->sk); ++ ++ /* Don't hold an extra reference on the namespace */ ++ put_net((*sock)->sk->sk_net); + } ++ return 0; ++fail: ++ icmp_net_exit(net); ++ return err; ++ ++} ++ ++static struct pernet_operations icmp_net_ops = { ++ .init = icmp_net_init, ++ .exit = icmp_net_exit, ++}; ++ ++void __init icmp_init(struct net_proto_family *ops) ++{ ++ if (register_pernet_subsys(&icmp_net_ops)) ++ panic("Failed to create the ICMP control socket.\n"); + } + + EXPORT_SYMBOL(icmp_err_convert); +diff -Nurb linux-2.6.22-570/net/ipv4/igmp.c linux-2.6.22-591/net/ipv4/igmp.c +--- linux-2.6.22-570/net/ipv4/igmp.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/igmp.c 2007-12-21 15:36:15.000000000 -0500 +@@ -97,6 +97,7 @@ + #include + #include + #include ++#include + #include + #ifdef CONFIG_IP_MROUTE + #include +@@ -129,12 +130,12 @@ + */ + + #define IGMP_V1_SEEN(in_dev) \ +- (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 1 || \ ++ (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \ + IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \ + ((in_dev)->mr_v1_seen && \ + time_before(jiffies, (in_dev)->mr_v1_seen))) + #define IGMP_V2_SEEN(in_dev) \ +- (IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 2 || \ ++ (IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \ + IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \ + ((in_dev)->mr_v2_seen && \ + time_before(jiffies, (in_dev)->mr_v2_seen))) +@@ -296,7 +297,8 @@ + return NULL; + + { +- struct flowi fl = { .oif = dev->ifindex, ++ struct flowi fl = { .fl_net = &init_net, ++ .oif = dev->ifindex, + .nl_u = { .ip4_u = { + .daddr = IGMPV3_ALL_MCR } }, + .proto = IPPROTO_IGMP }; +@@ -646,7 +648,8 @@ + dst = group; + + { +- struct flowi fl = { .oif = dev->ifindex, ++ struct flowi fl = { .fl_net = &init_net, ++ .oif = dev->ifindex, + .nl_u = { .ip4_u = { .daddr = dst } }, + .proto = IPPROTO_IGMP }; + if (ip_route_output_key(&rt, &fl)) +@@ -929,6 +932,11 @@ + struct in_device *in_dev = in_dev_get(skb->dev); + int len = skb->len; + ++ if (skb->dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } ++ + if (in_dev==NULL) { + kfree_skb(skb); + return 0; +@@ -1393,20 +1401,22 @@ + + static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) + { +- struct flowi fl = { .nl_u = { .ip4_u = +- { .daddr = imr->imr_multiaddr.s_addr } } }; ++ struct flowi fl = { ++ .fl_net = &init_net, ++ .nl_u = { .ip4_u = { .daddr = imr->imr_multiaddr.s_addr } } ++ }; + struct rtable *rt; + struct net_device *dev = NULL; + struct in_device *idev = NULL; + + if (imr->imr_ifindex) { +- idev = inetdev_by_index(imr->imr_ifindex); ++ idev = inetdev_by_index(&init_net, imr->imr_ifindex); + if (idev) + __in_dev_put(idev); + return idev; + } + if (imr->imr_address.s_addr) { +- dev = ip_dev_find(imr->imr_address.s_addr); ++ dev = ip_dev_find(&init_net, imr->imr_address.s_addr); + if (!dev) + return NULL; + dev_put(dev); +@@ -2234,7 +2244,7 @@ + struct in_device *in_dev; + inet->mc_list = iml->next; + +- in_dev = inetdev_by_index(iml->multi.imr_ifindex); ++ in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex); + (void) ip_mc_leave_src(sk, iml, in_dev); + if (in_dev != NULL) { + ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr); +@@ -2291,7 +2301,7 @@ + struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); + + state->in_dev = NULL; +- for_each_netdev(state->dev) { ++ for_each_netdev(&init_net, state->dev) { + struct in_device *in_dev; + in_dev = in_dev_get(state->dev); + if (!in_dev) +@@ -2453,7 +2463,7 @@ + + state->idev = NULL; + state->im = NULL; +- for_each_netdev(state->dev) { ++ for_each_netdev(&init_net, state->dev) { + struct in_device *idev; + idev = in_dev_get(state->dev); + if (unlikely(idev == NULL)) +@@ -2613,8 +2623,8 @@ + + int __init igmp_mc_proc_init(void) + { +- proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops); +- proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops); ++ proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops); ++ proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops); + return 0; + } + #endif +diff -Nurb linux-2.6.22-570/net/ipv4/inet_connection_sock.c linux-2.6.22-591/net/ipv4/inet_connection_sock.c +--- linux-2.6.22-570/net/ipv4/inet_connection_sock.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/inet_connection_sock.c 2007-12-21 15:36:15.000000000 -0500 +@@ -32,7 +32,7 @@ + /* + * This array holds the first and last local port number. + */ +-int sysctl_local_port_range[2] = { 32768, 61000 }; ++//int sysctl_local_port_range[2] = { 32768, 61000 }; + + int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) + { +@@ -74,6 +74,7 @@ + + sk_for_each_bound(sk2, node, &tb->owners) { + if (sk != sk2 && ++ (sk->sk_net == sk2->sk_net) && + !inet_v6_ipv6only(sk2) && + (!sk->sk_bound_dev_if || + !sk2->sk_bound_dev_if || +@@ -98,6 +99,7 @@ + int (*bind_conflict)(const struct sock *sk, + const struct inet_bind_bucket *tb)) + { ++ struct net *net = sk->sk_net; + struct inet_bind_hashbucket *head; + struct hlist_node *node; + struct inet_bind_bucket *tb; +@@ -105,16 +107,16 @@ + + local_bh_disable(); + if (!snum) { +- int low = sysctl_local_port_range[0]; +- int high = sysctl_local_port_range[1]; ++ int low = sk->sk_net->sysctl_local_port_range[0]; ++ int high = sk->sk_net->sysctl_local_port_range[1]; + int remaining = (high - low) + 1; + int rover = net_random() % (high - low) + low; + + do { +- head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; ++ head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; + spin_lock(&head->lock); + inet_bind_bucket_for_each(tb, node, &head->chain) +- if (tb->port == rover) ++ if ((tb->port == rover) && (tb->net == net)) + goto next; + break; + next: +@@ -138,10 +140,10 @@ + */ + snum = rover; + } else { +- head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; ++ head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)]; + spin_lock(&head->lock); + inet_bind_bucket_for_each(tb, node, &head->chain) +- if (tb->port == snum) ++ if ((tb->port == snum) && (tb->net==net)) + goto tb_found; + } + tb = NULL; +@@ -161,7 +163,7 @@ + } + tb_not_found: + ret = 1; +- if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) ++ if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, net, snum)) == NULL) + goto fail_unlock; + if (hlist_empty(&tb->owners)) { + if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) +@@ -341,7 +343,8 @@ + struct rtable *rt; + const struct inet_request_sock *ireq = inet_rsk(req); + struct ip_options *opt = inet_rsk(req)->opt; +- struct flowi fl = { .oif = sk->sk_bound_dev_if, ++ struct flowi fl = { .fl_net = sk->sk_net, ++ .oif = sk->sk_bound_dev_if, + .nl_u = { .ip4_u = + { .daddr = ((opt && opt->srr) ? + opt->faddr : +diff -Nurb linux-2.6.22-570/net/ipv4/inet_diag.c linux-2.6.22-591/net/ipv4/inet_diag.c +--- linux-2.6.22-570/net/ipv4/inet_diag.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/inet_diag.c 2007-12-21 15:36:15.000000000 -0500 +@@ -227,6 +227,7 @@ + static int inet_diag_get_exact(struct sk_buff *in_skb, + const struct nlmsghdr *nlh) + { ++ struct net *net = in_skb->sk->sk_net; + int err; + struct sock *sk; + struct inet_diag_req *req = NLMSG_DATA(nlh); +@@ -242,7 +243,7 @@ + /* TODO: lback */ + sk = inet_lookup(hashinfo, req->id.idiag_dst[0], + req->id.idiag_dport, req->id.idiag_src[0], +- req->id.idiag_sport, req->id.idiag_if); ++ req->id.idiag_sport, req->id.idiag_if, net); + } + #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + else if (req->idiag_family == AF_INET6) { +@@ -251,7 +252,7 @@ + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, +- req->id.idiag_if); ++ req->id.idiag_if, net); + } + #endif + else { +@@ -906,8 +907,8 @@ + if (!inet_diag_table) + goto out; + +- idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, +- NULL, THIS_MODULE); ++ idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0, ++ inet_diag_rcv, NULL, THIS_MODULE); + if (idiagnl == NULL) + goto out_free_table; + err = 0; +diff -Nurb linux-2.6.22-570/net/ipv4/inet_hashtables.c linux-2.6.22-591/net/ipv4/inet_hashtables.c +--- linux-2.6.22-570/net/ipv4/inet_hashtables.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/inet_hashtables.c 2007-12-21 15:36:15.000000000 -0500 +@@ -29,11 +29,13 @@ + */ + struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, + struct inet_bind_hashbucket *head, ++ struct net *net, + const unsigned short snum) + { + struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); + + if (tb != NULL) { ++ tb->net = net; + tb->port = snum; + tb->fastreuse = 0; + INIT_HLIST_HEAD(&tb->owners); +@@ -66,7 +68,7 @@ + */ + static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) + { +- const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); ++ const int bhash = inet_bhashfn(sk->sk_net, inet_sk(sk)->num, hashinfo->bhash_size); + struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; + struct inet_bind_bucket *tb; + +@@ -127,7 +129,7 @@ + static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, + const __be32 daddr, + const unsigned short hnum, +- const int dif) ++ const int dif, struct net *net) + { + struct sock *result = NULL, *sk; + const struct hlist_node *node; +@@ -149,6 +151,8 @@ + continue; + score += 2; + } ++ if (sk->sk_net != net) ++ continue; + if (score == 5) + return sk; + if (score > hiscore) { +@@ -163,22 +167,22 @@ + /* Optimize the common listener case. */ + struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, + const __be32 daddr, const unsigned short hnum, +- const int dif) ++ const int dif, struct net *net) + { + struct sock *sk = NULL; + const struct hlist_head *head; + + read_lock(&hashinfo->lhash_lock); +- head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; ++ head = &hashinfo->listening_hash[net, inet_lhashfn(net, hnum)]; + if (!hlist_empty(head)) { + const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); + + if (inet->num == hnum && !sk->sk_node.next && + v4_inet_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) && + (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && +- !sk->sk_bound_dev_if) ++ !sk->sk_bound_dev_if && (sk->sk_net == net)) + goto sherry_cache; +- sk = inet_lookup_listener_slow(head, daddr, hnum, dif); ++ sk = inet_lookup_listener_slow(head, daddr, hnum, dif,net ); + } + if (sk) { + sherry_cache: +@@ -196,12 +200,13 @@ + { + struct inet_hashinfo *hinfo = death_row->hashinfo; + struct inet_sock *inet = inet_sk(sk); ++ struct net *net = sk->sk_net; + __be32 daddr = inet->rcv_saddr; + __be32 saddr = inet->daddr; + int dif = sk->sk_bound_dev_if; + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); +- unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); ++ unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); + struct sock *sk2; + const struct hlist_node *node; +@@ -214,7 +219,7 @@ + sk_for_each(sk2, node, &head->twchain) { + tw = inet_twsk(sk2); + +- if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { ++ if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, net)) { + if (twsk_unique(sk, sk2, twp)) + goto unique; + else +@@ -225,7 +230,7 @@ + + /* And established part... */ + sk_for_each(sk2, node, &head->chain) { +- if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) ++ if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, net)) + goto not_unique; + } + +@@ -271,6 +276,7 @@ + int inet_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk) + { ++ struct net *net = sk->sk_net; + struct inet_hashinfo *hinfo = death_row->hashinfo; + const unsigned short snum = inet_sk(sk)->num; + struct inet_bind_hashbucket *head; +@@ -278,8 +284,8 @@ + int ret; + + if (!snum) { +- int low = sysctl_local_port_range[0]; +- int high = sysctl_local_port_range[1]; ++ int low = sk->sk_net->sysctl_local_port_range[0]; ++ int high = sk->sk_net->sysctl_local_port_range[1]; + int range = high - low; + int i; + int port; +@@ -291,7 +297,7 @@ + local_bh_disable(); + for (i = 1; i <= range; i++) { + port = low + (i + offset) % range; +- head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; ++ head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; + spin_lock(&head->lock); + + /* Does not bother with rcv_saddr checks, +@@ -299,7 +305,7 @@ + * unique enough. + */ + inet_bind_bucket_for_each(tb, node, &head->chain) { +- if (tb->port == port) { ++ if ((tb->port == port) && (tb->net == net)) { + BUG_TRAP(!hlist_empty(&tb->owners)); + if (tb->fastreuse >= 0) + goto next_port; +@@ -311,7 +317,7 @@ + } + } + +- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port); ++ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, net, port); + if (!tb) { + spin_unlock(&head->lock); + break; +@@ -346,7 +352,7 @@ + goto out; + } + +- head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; ++ head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)]; + tb = inet_csk(sk)->icsk_bind_hash; + spin_lock_bh(&head->lock); + if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { +diff -Nurb linux-2.6.22-570/net/ipv4/inet_timewait_sock.c linux-2.6.22-591/net/ipv4/inet_timewait_sock.c +--- linux-2.6.22-570/net/ipv4/inet_timewait_sock.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/inet_timewait_sock.c 2007-12-21 15:36:15.000000000 -0500 +@@ -31,7 +31,7 @@ + write_unlock(&ehead->lock); + + /* Disassociate with bind bucket. */ +- bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; ++ bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_net, tw->tw_num, hashinfo->bhash_size)]; + spin_lock(&bhead->lock); + tb = tw->tw_tb; + __hlist_del(&tw->tw_bind_node); +@@ -65,7 +65,7 @@ + Note, that any socket with inet->num != 0 MUST be bound in + binding cache, even if it is closed. + */ +- bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)]; ++ bhead = &hashinfo->bhash[inet_bhashfn(sk->sk_net, inet->num, hashinfo->bhash_size)]; + spin_lock(&bhead->lock); + tw->tw_tb = icsk->icsk_bind_hash; + BUG_TRAP(icsk->icsk_bind_hash); +diff -Nurb linux-2.6.22-570/net/ipv4/inetpeer.c linux-2.6.22-591/net/ipv4/inetpeer.c +--- linux-2.6.22-570/net/ipv4/inetpeer.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/inetpeer.c 2007-12-21 15:36:15.000000000 -0500 +@@ -81,71 +81,94 @@ + .avl_height = 0 + }; + #define peer_avl_empty (&peer_fake_node) +-static struct inet_peer *peer_root = peer_avl_empty; + static DEFINE_RWLOCK(peer_pool_lock); + #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ + +-static int peer_total; +-/* Exported for sysctl_net_ipv4. */ +-int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more +- * aggressively at this stage */ +-int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ +-int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ +-int inet_peer_gc_mintime __read_mostly = 10 * HZ; +-int inet_peer_gc_maxtime __read_mostly = 120 * HZ; +- +-static struct inet_peer *inet_peer_unused_head; +-static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head; + static DEFINE_SPINLOCK(inet_peer_unused_lock); + + static void peer_check_expire(unsigned long dummy); +-static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0); + ++static int inet_peers_net_init(struct net *net); ++static void inet_peers_net_exit(struct net *net); ++static struct pernet_operations inet_peers_net_ops = { ++ .init = inet_peers_net_init, ++ .exit = inet_peers_net_exit, ++}; + + /* Called from ip_output.c:ip_init */ + void __init inet_initpeers(void) + { ++ peer_cachep = kmem_cache_create("inet_peer_cache", ++ sizeof(struct inet_peer), ++ 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, ++ NULL, NULL); ++ ++ register_pernet_subsys(&inet_peers_net_ops); ++} ++ ++static int inet_peers_net_init(struct net *net) ++{ + struct sysinfo si; + ++ net->peer_root = peer_avl_empty; ++ net->inet_peer_unused_tailp = &net->inet_peer_unused_head; ++ ++ net->inet_peer_threshold = 65536 + 128; /* start to throw entries more ++ * aggressively at this stage */ ++ net->inet_peer_minttl = 120 * HZ; /* TTL under high load: 120 sec */ ++ net->inet_peer_maxttl = 10 * 60 * HZ; /* usual time to live: 10 min */ ++ net->inet_peer_gc_mintime = 10 * HZ; ++ net->inet_peer_gc_maxtime = 120 * HZ; ++ + /* Use the straight interface to information about memory. */ + si_meminfo(&si); ++ + /* The values below were suggested by Alexey Kuznetsov + * . I don't have any opinion about the values + * myself. --SAW + */ + if (si.totalram <= (32768*1024)/PAGE_SIZE) +- inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */ ++ net->inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */ + if (si.totalram <= (16384*1024)/PAGE_SIZE) +- inet_peer_threshold >>= 1; /* about 512KB */ ++ net->inet_peer_threshold >>= 1; /* about 512KB */ + if (si.totalram <= (8192*1024)/PAGE_SIZE) +- inet_peer_threshold >>= 2; /* about 128KB */ ++ net->inet_peer_threshold >>= 2; /* about 128KB */ + +- peer_cachep = kmem_cache_create("inet_peer_cache", +- sizeof(struct inet_peer), +- 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, +- NULL, NULL); + ++ init_timer(&net->peer_periodic_timer); ++ net->peer_periodic_timer.function = peer_check_expire; + /* All the timers, started at system startup tend + to synchronize. Perturb it a bit. + */ +- peer_periodic_timer.expires = jiffies +- + net_random() % inet_peer_gc_maxtime +- + inet_peer_gc_maxtime; +- add_timer(&peer_periodic_timer); ++ net->peer_periodic_timer.expires = jiffies ++ + net_random() % net->inet_peer_gc_maxtime ++ + net->inet_peer_gc_maxtime; ++ /* Remember our namespace */ ++ net->peer_periodic_timer.data = (unsigned long)net; ++ add_timer(&net->peer_periodic_timer); ++ ++ return 0; ++} ++ ++static void inet_peers_net_exit(struct net *net) ++{ ++ del_timer(&net->peer_periodic_timer); ++ /* CHECKME do I need to do something to release all of the peers */ + } + + /* Called with or without local BH being disabled. */ +-static void unlink_from_unused(struct inet_peer *p) ++static void unlink_from_unused(struct net *net, struct inet_peer *p) + { + spin_lock_bh(&inet_peer_unused_lock); + if (p->unused_prevp != NULL) { + /* On unused list. */ +- *p->unused_prevp = p->unused_next; +- if (p->unused_next != NULL) +- p->unused_next->unused_prevp = p->unused_prevp; ++ *p->unused_prevp = p->u.unused_next; ++ if (p->u.unused_next != NULL) ++ p->u.unused_next->unused_prevp = p->unused_prevp; + else +- inet_peer_unused_tailp = p->unused_prevp; ++ net->inet_peer_unused_tailp = p->unused_prevp; + p->unused_prevp = NULL; /* mark it as removed */ ++ p->u.net = hold_net(net); /* Remember the net */ + } + spin_unlock_bh(&inet_peer_unused_lock); + } +@@ -160,9 +183,9 @@ + struct inet_peer *u, **v; \ + if (_stack) { \ + stackptr = _stack; \ +- *stackptr++ = &peer_root; \ ++ *stackptr++ = &net->peer_root; \ + } \ +- for (u = peer_root; u != peer_avl_empty; ) { \ ++ for (u = net->peer_root; u != peer_avl_empty; ) { \ + if (_daddr == u->v4daddr) \ + break; \ + if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ +@@ -279,7 +302,7 @@ + } while(0) + + /* May be called with local BH enabled. */ +-static void unlink_from_pool(struct inet_peer *p) ++static void unlink_from_pool(struct net *net, struct inet_peer *p) + { + int do_free; + +@@ -317,7 +340,7 @@ + delp[1] = &t->avl_left; /* was &p->avl_left */ + } + peer_avl_rebalance(stack, stackptr); +- peer_total--; ++ net->peer_total--; + do_free = 1; + } + write_unlock_bh(&peer_pool_lock); +@@ -335,13 +358,13 @@ + } + + /* May be called with local BH enabled. */ +-static int cleanup_once(unsigned long ttl) ++static int cleanup_once(struct net *net, unsigned long ttl) + { + struct inet_peer *p; + + /* Remove the first entry from the list of unused nodes. */ + spin_lock_bh(&inet_peer_unused_lock); +- p = inet_peer_unused_head; ++ p = net->inet_peer_unused_head; + if (p != NULL) { + __u32 delta = (__u32)jiffies - p->dtime; + if (delta < ttl) { +@@ -349,12 +372,13 @@ + spin_unlock_bh(&inet_peer_unused_lock); + return -1; + } +- inet_peer_unused_head = p->unused_next; +- if (p->unused_next != NULL) +- p->unused_next->unused_prevp = p->unused_prevp; ++ net->inet_peer_unused_head = p->u.unused_next; ++ if (p->u.unused_next != NULL) ++ p->u.unused_next->unused_prevp = p->unused_prevp; + else +- inet_peer_unused_tailp = p->unused_prevp; ++ net->inet_peer_unused_tailp = p->unused_prevp; + p->unused_prevp = NULL; /* mark as not on the list */ ++ p->u.net = hold_net(net); + /* Grab an extra reference to prevent node disappearing + * before unlink_from_pool() call. */ + atomic_inc(&p->refcnt); +@@ -367,12 +391,12 @@ + * happen because of entry limits in route cache. */ + return -1; + +- unlink_from_pool(p); ++ unlink_from_pool(net, p); + return 0; + } + + /* Called with or without local BH being disabled. */ +-struct inet_peer *inet_getpeer(__be32 daddr, int create) ++struct inet_peer *inet_getpeer(struct net *net, __be32 daddr, int create) + { + struct inet_peer *p, *n; + struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; +@@ -387,7 +411,7 @@ + if (p != peer_avl_empty) { + /* The existing node has been found. */ + /* Remove the entry from unused list if it was there. */ +- unlink_from_unused(p); ++ unlink_from_unused(net, p); + return p; + } + +@@ -413,13 +437,13 @@ + /* Link the node. */ + link_to_pool(n); + n->unused_prevp = NULL; /* not on the list */ +- peer_total++; ++ n->u.net = hold_net(net); /* Remember the net */ ++ net->peer_total++; + write_unlock_bh(&peer_pool_lock); + +- if (peer_total >= inet_peer_threshold) ++ if (net->peer_total >= net->inet_peer_threshold) + /* Remove one less-recently-used entry. */ +- cleanup_once(0); +- ++ cleanup_once(net, 0); + return n; + + out_free: +@@ -427,25 +451,26 @@ + atomic_inc(&p->refcnt); + write_unlock_bh(&peer_pool_lock); + /* Remove the entry from unused list if it was there. */ +- unlink_from_unused(p); ++ unlink_from_unused(net, p); + /* Free preallocated the preallocated node. */ + kmem_cache_free(peer_cachep, n); + return p; + } + + /* Called with local BH disabled. */ +-static void peer_check_expire(unsigned long dummy) ++static void peer_check_expire(unsigned long arg) + { ++ struct net *net = (void *)arg; + unsigned long now = jiffies; + int ttl; + +- if (peer_total >= inet_peer_threshold) +- ttl = inet_peer_minttl; ++ if (net->peer_total >= net->inet_peer_threshold) ++ ttl = net->inet_peer_minttl; + else +- ttl = inet_peer_maxttl +- - (inet_peer_maxttl - inet_peer_minttl) / HZ * +- peer_total / inet_peer_threshold * HZ; +- while (!cleanup_once(ttl)) { ++ ttl = net->inet_peer_maxttl ++ - (net->inet_peer_maxttl - net->inet_peer_minttl) / HZ * ++ net->peer_total / net->inet_peer_threshold * HZ; ++ while (!cleanup_once(net, ttl)) { + if (jiffies != now) + break; + } +@@ -453,25 +478,30 @@ + /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime + * interval depending on the total number of entries (more entries, + * less interval). */ +- if (peer_total >= inet_peer_threshold) +- peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; ++ if (net->peer_total >= net->inet_peer_threshold) ++ net->peer_periodic_timer.expires = jiffies ++ + net->inet_peer_gc_mintime; + else +- peer_periodic_timer.expires = jiffies +- + inet_peer_gc_maxtime +- - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * +- peer_total / inet_peer_threshold * HZ; +- add_timer(&peer_periodic_timer); ++ net->peer_periodic_timer.expires = jiffies ++ + net->inet_peer_gc_maxtime ++ - (net->inet_peer_gc_maxtime - net->inet_peer_gc_mintime) / HZ * ++ net->peer_total / net->inet_peer_threshold * HZ; ++ add_timer(&net->peer_periodic_timer); + } + + void inet_putpeer(struct inet_peer *p) + { + spin_lock_bh(&inet_peer_unused_lock); + if (atomic_dec_and_test(&p->refcnt)) { +- p->unused_prevp = inet_peer_unused_tailp; +- p->unused_next = NULL; +- *inet_peer_unused_tailp = p; +- inet_peer_unused_tailp = &p->unused_next; ++ struct net *net = p->u.net; ++ ++ p->unused_prevp = net->inet_peer_unused_tailp; ++ p->u.unused_next = NULL; ++ *net->inet_peer_unused_tailp = p; ++ net->inet_peer_unused_tailp = &p->u.unused_next; + p->dtime = (__u32)jiffies; ++ ++ release_net(net); + } + spin_unlock_bh(&inet_peer_unused_lock); + } +diff -Nurb linux-2.6.22-570/net/ipv4/ip_fragment.c linux-2.6.22-591/net/ipv4/ip_fragment.c +--- linux-2.6.22-570/net/ipv4/ip_fragment.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ip_fragment.c 2007-12-21 15:36:15.000000000 -0500 +@@ -49,21 +49,6 @@ + * as well. Or notify me, at least. --ANK + */ + +-/* Fragment cache limits. We will commit 256K at one time. Should we +- * cross that limit we will prune down to 192K. This should cope with +- * even the most extreme cases without allowing an attacker to measurably +- * harm machine performance. +- */ +-int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; +-int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; +- +-int sysctl_ipfrag_max_dist __read_mostly = 64; +- +-/* Important NOTE! Fragment queue must be destroyed before MSL expires. +- * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. +- */ +-int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; +- + struct ipfrag_skb_cb + { + struct inet_skb_parm h; +@@ -96,6 +81,7 @@ + int iif; + unsigned int rid; + struct inet_peer *peer; ++ struct net *net; + }; + + /* Hash table. */ +@@ -103,17 +89,13 @@ + #define IPQ_HASHSZ 64 + + /* Per-bucket lock is easy to add now. */ +-static struct hlist_head ipq_hash[IPQ_HASHSZ]; + static DEFINE_RWLOCK(ipfrag_lock); +-static u32 ipfrag_hash_rnd; +-static LIST_HEAD(ipq_lru_list); +-int ip_frag_nqueues = 0; + + static __inline__ void __ipq_unlink(struct ipq *qp) + { + hlist_del(&qp->list); + list_del(&qp->lru_list); +- ip_frag_nqueues--; ++ qp->net->ip_frag_nqueues--; + } + + static __inline__ void ipq_unlink(struct ipq *ipq) +@@ -123,70 +105,71 @@ + write_unlock(&ipfrag_lock); + } + +-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) ++static unsigned int ipqhashfn(struct net *net, __be16 id, __be32 saddr, __be32 daddr, u8 prot) + { + return jhash_3words((__force u32)id << 16 | prot, + (__force u32)saddr, (__force u32)daddr, +- ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); ++ net->ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); + } + +-static struct timer_list ipfrag_secret_timer; +-int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; +- +-static void ipfrag_secret_rebuild(unsigned long dummy) ++static void ipfrag_secret_rebuild(unsigned long arg) + { ++ struct net *net = (void *)arg; + unsigned long now = jiffies; + int i; + + write_lock(&ipfrag_lock); +- get_random_bytes(&ipfrag_hash_rnd, sizeof(u32)); ++ get_random_bytes(&net->ipfrag_hash_rnd, sizeof(u32)); + for (i = 0; i < IPQ_HASHSZ; i++) { + struct ipq *q; ++ struct hlist_head *head; + struct hlist_node *p, *n; + +- hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) { +- unsigned int hval = ipqhashfn(q->id, q->saddr, ++ head = &net->ipq_hash[i]; ++ hlist_for_each_entry_safe(q, p, n, head, list) { ++ unsigned int hval = ipqhashfn(net, q->id, q->saddr, + q->daddr, q->protocol); + + if (hval != i) { + hlist_del(&q->list); + + /* Relink to new hash chain. */ +- hlist_add_head(&q->list, &ipq_hash[hval]); ++ hlist_add_head(&q->list, &net->ipq_hash[hval]); + } + } + } + write_unlock(&ipfrag_lock); + +- mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval); ++ mod_timer(&net->ipfrag_secret_timer, ++ now + net->sysctl_ipfrag_secret_interval); + } + +-atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ +- + /* Memory Tracking Functions. */ +-static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) ++static __inline__ void frag_kfree_skb(struct net *net, struct sk_buff *skb, int *work) + { + if (work) + *work -= skb->truesize; +- atomic_sub(skb->truesize, &ip_frag_mem); ++ atomic_sub(skb->truesize, &net->ip_frag_mem); + kfree_skb(skb); + } + + static __inline__ void frag_free_queue(struct ipq *qp, int *work) + { ++ struct net *net = qp->net; + if (work) + *work -= sizeof(struct ipq); +- atomic_sub(sizeof(struct ipq), &ip_frag_mem); ++ atomic_sub(sizeof(struct ipq), &net->ip_frag_mem); ++ release_net(net); + kfree(qp); + } + +-static __inline__ struct ipq *frag_alloc_queue(void) ++static __inline__ struct ipq *frag_alloc_queue(struct net *net) + { + struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); + + if (!qp) + return NULL; +- atomic_add(sizeof(struct ipq), &ip_frag_mem); ++ atomic_add(sizeof(struct ipq), &net->ip_frag_mem); + return qp; + } + +@@ -209,7 +192,7 @@ + while (fp) { + struct sk_buff *xp = fp->next; + +- frag_kfree_skb(fp, work); ++ frag_kfree_skb(qp->net, fp, work); + fp = xp; + } + +@@ -241,23 +224,23 @@ + /* Memory limiting on fragments. Evictor trashes the oldest + * fragment queue until we are back under the threshold. + */ +-static void ip_evictor(void) ++static void ip_evictor(struct net *net) + { + struct ipq *qp; + struct list_head *tmp; + int work; + +- work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh; ++ work = atomic_read(&net->ip_frag_mem) - net->sysctl_ipfrag_low_thresh; + if (work <= 0) + return; + + while (work > 0) { + read_lock(&ipfrag_lock); +- if (list_empty(&ipq_lru_list)) { ++ if (list_empty(&net->ipq_lru_list)) { + read_unlock(&ipfrag_lock); + return; + } +- tmp = ipq_lru_list.next; ++ tmp = net->ipq_lru_list.next; + qp = list_entry(tmp, struct ipq, lru_list); + atomic_inc(&qp->refcnt); + read_unlock(&ipfrag_lock); +@@ -292,7 +275,7 @@ + if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { + struct sk_buff *head = qp->fragments; + /* Send an ICMP "Fragment Reassembly Timeout" message. */ +- if ((head->dev = dev_get_by_index(qp->iif)) != NULL) { ++ if ((head->dev = dev_get_by_index(qp->net, qp->iif)) != NULL) { + icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); + dev_put(head->dev); + } +@@ -304,7 +287,7 @@ + + /* Creation primitives. */ + +-static struct ipq *ip_frag_intern(struct ipq *qp_in) ++static struct ipq *ip_frag_intern(struct net *net, struct ipq *qp_in) + { + struct ipq *qp; + #ifdef CONFIG_SMP +@@ -313,14 +296,14 @@ + unsigned int hash; + + write_lock(&ipfrag_lock); +- hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, ++ hash = ipqhashfn(net, qp_in->id, qp_in->saddr, qp_in->daddr, + qp_in->protocol); + #ifdef CONFIG_SMP + /* With SMP race we have to recheck hash table, because + * such entry could be created on other cpu, while we + * promoted read lock to write lock. + */ +- hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { ++ hlist_for_each_entry(qp, n, &net->ipq_hash[hash], list) { + if (qp->id == qp_in->id && + qp->saddr == qp_in->saddr && + qp->daddr == qp_in->daddr && +@@ -336,26 +319,27 @@ + #endif + qp = qp_in; + +- if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) ++ if (!mod_timer(&qp->timer, jiffies + net->sysctl_ipfrag_time)) + atomic_inc(&qp->refcnt); + + atomic_inc(&qp->refcnt); +- hlist_add_head(&qp->list, &ipq_hash[hash]); ++ hlist_add_head(&qp->list, &net->ipq_hash[hash]); + INIT_LIST_HEAD(&qp->lru_list); +- list_add_tail(&qp->lru_list, &ipq_lru_list); +- ip_frag_nqueues++; ++ list_add_tail(&qp->lru_list, &net->ipq_lru_list); ++ net->ip_frag_nqueues++; + write_unlock(&ipfrag_lock); + return qp; + } + + /* Add an entry to the 'ipq' queue for a newly received IP datagram. */ +-static struct ipq *ip_frag_create(struct iphdr *iph, u32 user) ++static struct ipq *ip_frag_create(struct net *net, struct iphdr *iph, u32 user) + { + struct ipq *qp; + +- if ((qp = frag_alloc_queue()) == NULL) ++ if ((qp = frag_alloc_queue(net)) == NULL) + goto out_nomem; + ++ qp->net = hold_net(net); + qp->protocol = iph->protocol; + qp->last_in = 0; + qp->id = iph->id; +@@ -366,7 +350,8 @@ + qp->meat = 0; + qp->fragments = NULL; + qp->iif = 0; +- qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; ++ qp->peer = net->sysctl_ipfrag_max_dist ? ++ inet_getpeer(net, iph->saddr, 1) : NULL; + + /* Initialize a timer for this entry. */ + init_timer(&qp->timer); +@@ -375,7 +360,7 @@ + spin_lock_init(&qp->lock); + atomic_set(&qp->refcnt, 1); + +- return ip_frag_intern(qp); ++ return ip_frag_intern(net, qp); + + out_nomem: + LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n"); +@@ -385,7 +370,7 @@ + /* Find the correct entry in the "incomplete datagrams" queue for + * this IP datagram, and create new one, if nothing is found. + */ +-static inline struct ipq *ip_find(struct iphdr *iph, u32 user) ++static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) + { + __be16 id = iph->id; + __be32 saddr = iph->saddr; +@@ -396,8 +381,8 @@ + struct hlist_node *n; + + read_lock(&ipfrag_lock); +- hash = ipqhashfn(id, saddr, daddr, protocol); +- hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { ++ hash = ipqhashfn(net, id, saddr, daddr, protocol); ++ hlist_for_each_entry(qp, n, &net->ipq_hash[hash], list) { + if (qp->id == id && + qp->saddr == saddr && + qp->daddr == daddr && +@@ -410,14 +395,14 @@ + } + read_unlock(&ipfrag_lock); + +- return ip_frag_create(iph, user); ++ return ip_frag_create(net, iph, user); + } + + /* Is the fragment too far ahead to be part of ipq? */ + static inline int ip_frag_too_far(struct ipq *qp) + { + struct inet_peer *peer = qp->peer; +- unsigned int max = sysctl_ipfrag_max_dist; ++ unsigned int max = qp->net->sysctl_ipfrag_max_dist; + unsigned int start, end; + + int rc; +@@ -442,7 +427,7 @@ + { + struct sk_buff *fp; + +- if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { ++ if (!mod_timer(&qp->timer, jiffies + qp->net->sysctl_ipfrag_time)) { + atomic_inc(&qp->refcnt); + return -ETIMEDOUT; + } +@@ -450,7 +435,7 @@ + fp = qp->fragments; + do { + struct sk_buff *xp = fp->next; +- frag_kfree_skb(fp, NULL); ++ frag_kfree_skb(qp->net, fp, NULL); + fp = xp; + } while (fp); + +@@ -466,6 +451,7 @@ + /* Add new segment to existing queue. */ + static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) + { ++ struct net *net = qp->net; + struct sk_buff *prev, *next; + int flags, offset; + int ihl, end; +@@ -576,7 +562,7 @@ + qp->fragments = next; + + qp->meat -= free_it->len; +- frag_kfree_skb(free_it, NULL); ++ frag_kfree_skb(net, free_it, NULL); + } + } + +@@ -594,12 +580,12 @@ + skb->dev = NULL; + qp->stamp = skb->tstamp; + qp->meat += skb->len; +- atomic_add(skb->truesize, &ip_frag_mem); ++ atomic_add(skb->truesize, &net->ip_frag_mem); + if (offset == 0) + qp->last_in |= FIRST_IN; + + write_lock(&ipfrag_lock); +- list_move_tail(&qp->lru_list, &ipq_lru_list); ++ list_move_tail(&qp->lru_list, &net->ipq_lru_list); + write_unlock(&ipfrag_lock); + + return; +@@ -613,6 +599,7 @@ + + static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) + { ++ struct net *net = qp->net; + struct iphdr *iph; + struct sk_buff *fp, *head = qp->fragments; + int len; +@@ -654,12 +641,12 @@ + head->len -= clone->len; + clone->csum = 0; + clone->ip_summed = head->ip_summed; +- atomic_add(clone->truesize, &ip_frag_mem); ++ atomic_add(clone->truesize, &net->ip_frag_mem); + } + + skb_shinfo(head)->frag_list = head->next; + skb_push(head, head->data - skb_network_header(head)); +- atomic_sub(head->truesize, &ip_frag_mem); ++ atomic_sub(head->truesize, &net->ip_frag_mem); + + for (fp=head->next; fp; fp = fp->next) { + head->data_len += fp->len; +@@ -669,7 +656,7 @@ + else if (head->ip_summed == CHECKSUM_COMPLETE) + head->csum = csum_add(head->csum, fp->csum); + head->truesize += fp->truesize; +- atomic_sub(fp->truesize, &ip_frag_mem); ++ atomic_sub(fp->truesize, &net->ip_frag_mem); + } + + head->next = NULL; +@@ -700,19 +687,20 @@ + /* Process an incoming IP datagram fragment. */ + struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) + { ++ struct net *net = skb->dev->nd_net; + struct ipq *qp; + struct net_device *dev; + + IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); + + /* Start by cleaning up the memory. */ +- if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh) +- ip_evictor(); ++ if (atomic_read(&net->ip_frag_mem) > net->sysctl_ipfrag_high_thresh) ++ ip_evictor(net); + + dev = skb->dev; + + /* Lookup (or create) queue header */ +- if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { ++ if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { + struct sk_buff *ret = NULL; + + spin_lock(&qp->lock); +@@ -733,15 +721,70 @@ + return NULL; + } + +-void __init ipfrag_init(void) ++static int ipfrag_net_init(struct net *net) + { +- ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ ++ struct timer_list *secret_timer; ++ int i; ++ ++ /* Fragment cache limits. We will commit 256K at one time. Should we ++ * cross that limit we will prune down to 192K. This should cope with ++ * even the most extreme cases without allowing an attacker to measurably ++ * harm machine performance. ++ */ ++ net->sysctl_ipfrag_high_thresh = 256*1024; ++ net->sysctl_ipfrag_low_thresh = 192*1024; ++ net->sysctl_ipfrag_max_dist = 64; ++ ++ /* Important NOTE! Fragment queue must be destroyed before MSL expires. ++ * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. ++ */ ++ net->sysctl_ipfrag_time = IP_FRAG_TIME; ++ ++ net->sysctl_ipfrag_secret_interval = 10 * 60 * HZ; ++ ++ net->ipq_hash = kzalloc(sizeof(*net->ipq_hash)*IPQ_HASHSZ, GFP_KERNEL); ++ if (!net->ipq_hash) ++ return -ENOMEM; ++ ++ for (i = 0; i < IPQ_HASHSZ; i++) ++ INIT_HLIST_HEAD(&net->ipq_hash[i]); ++ INIT_LIST_HEAD(&net->ipq_lru_list); ++ net->ip_frag_nqueues = 0; ++ atomic_set(&net->ip_frag_mem, 0); ++ ++ ++ net->ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ + (jiffies ^ (jiffies >> 6))); + +- init_timer(&ipfrag_secret_timer); +- ipfrag_secret_timer.function = ipfrag_secret_rebuild; +- ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval; +- add_timer(&ipfrag_secret_timer); ++ secret_timer = &net->ipfrag_secret_timer; ++ init_timer(secret_timer); ++ secret_timer->function = ipfrag_secret_rebuild; ++ secret_timer->expires = jiffies + net->sysctl_ipfrag_secret_interval; ++ secret_timer->data = (unsigned long)net; ++ add_timer(secret_timer); ++ ++ return 0; ++} ++ ++static void ipfrag_net_exit(struct net *net) ++{ ++ del_timer(&net->ipfrag_secret_timer); ++ ++ net->sysctl_ipfrag_low_thresh = 0; ++ while (atomic_read(&net->ip_frag_mem)) ++ ip_evictor(net); ++ ++ kfree(net->ipq_hash); ++} ++ ++static struct pernet_operations ipfrag_net_ops = { ++ .init = ipfrag_net_init, ++ .exit = ipfrag_net_exit, ++}; ++ ++void ipfrag_init(void) ++{ ++ register_pernet_subsys(&ipfrag_net_ops); + } + + EXPORT_SYMBOL(ip_defrag); +diff -Nurb linux-2.6.22-570/net/ipv4/ip_gre.c linux-2.6.22-591/net/ipv4/ip_gre.c +--- linux-2.6.22-570/net/ipv4/ip_gre.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ip_gre.c 2007-12-21 15:36:15.000000000 -0500 +@@ -262,7 +262,7 @@ + int i; + for (i=1; i<100; i++) { + sprintf(name, "gre%d", i); +- if (__dev_get_by_name(name) == NULL) ++ if (__dev_get_by_name(&init_net, name) == NULL) + break; + } + if (i==100) +@@ -397,6 +397,9 @@ + struct flowi fl; + struct rtable *rt; + ++ if (skb->dev->nd_net != &init_net) ++ return; ++ + if (p[1] != htons(ETH_P_IP)) + return; + +@@ -475,6 +478,7 @@ + + /* Try to guess incoming interface */ + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.fl4_dst = eiph->saddr; + fl.fl4_tos = RT_TOS(eiph->tos); + fl.proto = IPPROTO_GRE; +@@ -559,6 +563,10 @@ + struct ip_tunnel *tunnel; + int offset = 4; + ++ if (skb->dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } + if (!pskb_may_pull(skb, 16)) + goto drop_nolock; + +@@ -740,7 +748,8 @@ + } + + { +- struct flowi fl = { .oif = tunnel->parms.link, ++ struct flowi fl = { .fl_net = &init_net, ++ .oif = tunnel->parms.link, + .nl_u = { .ip4_u = + { .daddr = dst, + .saddr = tiph->saddr, +@@ -1095,7 +1104,8 @@ + struct ip_tunnel *t = netdev_priv(dev); + + if (MULTICAST(t->parms.iph.daddr)) { +- struct flowi fl = { .oif = t->parms.link, ++ struct flowi fl = { .fl_net = &init_net, ++ .oif = t->parms.link, + .nl_u = { .ip4_u = + { .daddr = t->parms.iph.daddr, + .saddr = t->parms.iph.saddr, +@@ -1118,7 +1128,7 @@ + { + struct ip_tunnel *t = netdev_priv(dev); + if (MULTICAST(t->parms.iph.daddr) && t->mlink) { +- struct in_device *in_dev = inetdev_by_index(t->mlink); ++ struct in_device *in_dev = inetdev_by_index(&init_net, t->mlink); + if (in_dev) { + ip_mc_dec_group(in_dev, t->parms.iph.daddr); + in_dev_put(in_dev); +@@ -1168,7 +1178,8 @@ + /* Guess output device to choose reasonable mtu and hard_header_len */ + + if (iph->daddr) { +- struct flowi fl = { .oif = tunnel->parms.link, ++ struct flowi fl = { .fl_net = &init_net, ++ .oif = tunnel->parms.link, + .nl_u = { .ip4_u = + { .daddr = iph->daddr, + .saddr = iph->saddr, +@@ -1195,7 +1206,7 @@ + } + + if (!tdev && tunnel->parms.link) +- tdev = __dev_get_by_index(tunnel->parms.link); ++ tdev = __dev_get_by_index(&init_net, tunnel->parms.link); + + if (tdev) { + hlen = tdev->hard_header_len; +diff -Nurb linux-2.6.22-570/net/ipv4/ip_input.c linux-2.6.22-591/net/ipv4/ip_input.c +--- linux-2.6.22-570/net/ipv4/ip_input.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ip_input.c 2007-12-21 15:36:15.000000000 -0500 +@@ -280,6 +280,10 @@ + struct iphdr *iph; + struct net_device *dev = skb->dev; + ++ ++ if (skb->dev->nd_net != &init_net) ++ goto drop; ++ + /* It looks as overkill, because not all + IP options require packet mangling. + But it is the easiest for now, especially taking +diff -Nurb linux-2.6.22-570/net/ipv4/ip_options.c linux-2.6.22-591/net/ipv4/ip_options.c +--- linux-2.6.22-570/net/ipv4/ip_options.c 2007-12-21 15:36:03.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/ip_options.c 2007-12-21 15:36:15.000000000 -0500 +@@ -151,7 +151,7 @@ + __be32 addr; + + memcpy(&addr, sptr+soffset-1, 4); +- if (inet_addr_type(addr) != RTN_LOCAL) { ++ if (inet_addr_type(&init_net, addr) != RTN_LOCAL) { + dopt->ts_needtime = 1; + soffset += 8; + } +@@ -400,7 +400,7 @@ + { + __be32 addr; + memcpy(&addr, &optptr[optptr[2]-1], 4); +- if (inet_addr_type(addr) == RTN_UNICAST) ++ if (inet_addr_type(&init_net, addr) == RTN_UNICAST) + break; + if (skb) + timeptr = (__be32*)&optptr[optptr[2]+3]; +diff -Nurb linux-2.6.22-570/net/ipv4/ip_output.c linux-2.6.22-591/net/ipv4/ip_output.c +--- linux-2.6.22-570/net/ipv4/ip_output.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ip_output.c 2007-12-21 15:36:15.000000000 -0500 +@@ -83,8 +83,6 @@ + #include + #include + +-int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; +- + /* Generate a checksum for an outgoing IP datagram. */ + __inline__ void ip_send_check(struct iphdr *iph) + { +@@ -317,7 +315,8 @@ + daddr = opt->faddr; + + { +- struct flowi fl = { .oif = sk->sk_bound_dev_if, ++ struct flowi fl = { .fl_net = sk->sk_net, ++ .oif = sk->sk_bound_dev_if, + .nl_u = { .ip4_u = + { .daddr = daddr, + .saddr = inet->saddr, +@@ -837,7 +836,7 @@ + */ + if (transhdrlen && + length + fragheaderlen <= mtu && +- rt->u.dst.dev->features & NETIF_F_ALL_CSUM && ++ rt->u.dst.dev->features & NETIF_F_V4_CSUM && + !exthdrlen) + csummode = CHECKSUM_PARTIAL; + +@@ -1352,7 +1351,8 @@ + } + + { +- struct flowi fl = { .oif = arg->bound_dev_if, ++ struct flowi fl = { .fl_net = sk->sk_net, ++ .oif = arg->bound_dev_if, + .nl_u = { .ip4_u = + { .daddr = daddr, + .saddr = rt->rt_spec_dst, +diff -Nurb linux-2.6.22-570/net/ipv4/ip_sockglue.c linux-2.6.22-591/net/ipv4/ip_sockglue.c +--- linux-2.6.22-570/net/ipv4/ip_sockglue.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ip_sockglue.c 2007-12-21 15:36:15.000000000 -0500 +@@ -411,6 +411,7 @@ + static int do_ip_setsockopt(struct sock *sk, int level, + int optname, char __user *optval, int optlen) + { ++ struct net *net = sk->sk_net; + struct inet_sock *inet = inet_sk(sk); + int val=0,err; + +@@ -596,13 +597,13 @@ + err = 0; + break; + } +- dev = ip_dev_find(mreq.imr_address.s_addr); ++ dev = ip_dev_find(net, mreq.imr_address.s_addr); + if (dev) { + mreq.imr_ifindex = dev->ifindex; + dev_put(dev); + } + } else +- dev = __dev_get_by_index(mreq.imr_ifindex); ++ dev = __dev_get_by_index(net, mreq.imr_ifindex); + + + err = -EADDRNOTAVAIL; +@@ -956,6 +957,7 @@ + static int do_ip_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) + { ++ struct net *net = sk->sk_net; + struct inet_sock *inet = inet_sk(sk); + int val; + int len; +@@ -1023,7 +1025,7 @@ + break; + case IP_TTL: + val = (inet->uc_ttl == -1 ? +- sysctl_ip_default_ttl : ++ net->sysctl_ip_default_ttl : + inet->uc_ttl); + break; + case IP_HDRINCL: +diff -Nurb linux-2.6.22-570/net/ipv4/ipcomp.c linux-2.6.22-591/net/ipv4/ipcomp.c +--- linux-2.6.22-570/net/ipv4/ipcomp.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/ipcomp.c 2007-12-21 15:36:15.000000000 -0500 +@@ -175,6 +175,9 @@ + struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); + struct xfrm_state *x; + ++ if (skb->dev->nd_net != &init_net) ++ return; ++ + if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH || + icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) + return; +@@ -486,3 +489,4 @@ + MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173"); + MODULE_AUTHOR("James Morris "); + ++MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP); +diff -Nurb linux-2.6.22-570/net/ipv4/ipconfig.c linux-2.6.22-591/net/ipv4/ipconfig.c +--- linux-2.6.22-570/net/ipv4/ipconfig.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ipconfig.c 2007-12-21 15:36:15.000000000 -0500 +@@ -59,6 +59,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -184,16 +185,18 @@ + struct ic_device *d, **last; + struct net_device *dev; + unsigned short oflags; ++ struct net_device *lo; + + last = &ic_first_dev; + rtnl_lock(); + + /* bring loopback device up first */ +- if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0) +- printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name); ++ lo = &init_net.loopback_dev; ++ if (dev_change_flags(lo, lo->flags | IFF_UP) < 0) ++ printk(KERN_ERR "IP-Config: Failed to open %s\n", lo->name); + +- for_each_netdev(dev) { +- if (dev == &loopback_dev) ++ for_each_netdev(&init_net, dev) { ++ if (dev == lo) + continue; + if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) : + (!(dev->flags & IFF_LOOPBACK) && +@@ -283,7 +286,7 @@ + + mm_segment_t oldfs = get_fs(); + set_fs(get_ds()); +- res = devinet_ioctl(cmd, (struct ifreq __user *) arg); ++ res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg); + set_fs(oldfs); + return res; + } +@@ -294,7 +297,7 @@ + + mm_segment_t oldfs = get_fs(); + set_fs(get_ds()); +- res = ip_rt_ioctl(cmd, (void __user *) arg); ++ res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg); + set_fs(oldfs); + return res; + } +@@ -425,6 +428,9 @@ + unsigned char *sha, *tha; /* s for "source", t for "target" */ + struct ic_device *d; + ++ if (dev->nd_net != &init_net) ++ goto drop; ++ + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + return NET_RX_DROP; + +@@ -834,6 +840,9 @@ + struct ic_device *d; + int len, ext_len; + ++ if (dev->nd_net != &init_net) ++ goto drop; ++ + /* Perform verifications before taking the lock. */ + if (skb->pkt_type == PACKET_OTHERHOST) + goto drop; +@@ -1253,7 +1262,7 @@ + __be32 addr; + + #ifdef CONFIG_PROC_FS +- proc_net_fops_create("pnp", S_IRUGO, &pnp_seq_fops); ++ proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops); + #endif /* CONFIG_PROC_FS */ + + if (!ic_enable) +diff -Nurb linux-2.6.22-570/net/ipv4/ipip.c linux-2.6.22-591/net/ipv4/ipip.c +--- linux-2.6.22-570/net/ipv4/ipip.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ipip.c 2007-12-21 15:36:15.000000000 -0500 +@@ -225,7 +225,7 @@ + int i; + for (i=1; i<100; i++) { + sprintf(name, "tunl%d", i); +- if (__dev_get_by_name(name) == NULL) ++ if (__dev_get_by_name(&init_net, name) == NULL) + break; + } + if (i==100) +@@ -403,6 +403,7 @@ + + /* Try to guess incoming interface */ + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.fl4_daddr = eiph->saddr; + fl.fl4_tos = RT_TOS(eiph->tos); + fl.proto = IPPROTO_IPIP; +@@ -542,7 +543,8 @@ + } + + { +- struct flowi fl = { .oif = tunnel->parms.link, ++ struct flowi fl = { .fl_net = &init_net, ++ .oif = tunnel->parms.link, + .nl_u = { .ip4_u = + { .daddr = dst, + .saddr = tiph->saddr, +@@ -806,7 +808,8 @@ + memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); + + if (iph->daddr) { +- struct flowi fl = { .oif = tunnel->parms.link, ++ struct flowi fl = { .fl_net = &init_net, ++ .oif = tunnel->parms.link, + .nl_u = { .ip4_u = + { .daddr = iph->daddr, + .saddr = iph->saddr, +@@ -821,7 +824,7 @@ + } + + if (!tdev && tunnel->parms.link) +- tdev = __dev_get_by_index(tunnel->parms.link); ++ tdev = __dev_get_by_index(&init_net, tunnel->parms.link); + + if (tdev) { + dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); +diff -Nurb linux-2.6.22-570/net/ipv4/ipmr.c linux-2.6.22-591/net/ipv4/ipmr.c +--- linux-2.6.22-570/net/ipv4/ipmr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ipmr.c 2007-12-21 15:36:15.000000000 -0500 +@@ -62,6 +62,7 @@ + #include + #include + #include ++#include + #include + + #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) +@@ -124,7 +125,7 @@ + { + struct net_device *dev; + +- dev = __dev_get_by_name("tunl0"); ++ dev = __dev_get_by_name(&init_net, "tunl0"); + + if (dev) { + int err; +@@ -148,7 +149,7 @@ + + dev = NULL; + +- if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) { ++ if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) { + dev->flags |= IFF_MULTICAST; + + in_dev = __in_dev_get_rtnl(dev); +@@ -320,7 +321,7 @@ + e->error = -ETIMEDOUT; + memset(&e->msg, 0, sizeof(e->msg)); + +- rtnl_unicast(skb, NETLINK_CB(skb).pid); ++ rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); + } else + kfree_skb(skb); + } +@@ -422,7 +423,7 @@ + return -ENOBUFS; + break; + case 0: +- dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr); ++ dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr); + if (!dev) + return -EADDRNOTAVAIL; + dev_put(dev); +@@ -532,7 +533,7 @@ + memset(&e->msg, 0, sizeof(e->msg)); + } + +- rtnl_unicast(skb, NETLINK_CB(skb).pid); ++ rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); + } else + ip_mr_forward(skb, c, 0); + } +@@ -848,7 +849,7 @@ + { + rtnl_lock(); + if (sk == mroute_socket) { +- IPV4_DEVCONF_ALL(MC_FORWARDING)--; ++ IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--; + + write_lock_bh(&mrt_lock); + mroute_socket=NULL; +@@ -897,7 +898,7 @@ + mroute_socket=sk; + write_unlock_bh(&mrt_lock); + +- IPV4_DEVCONF_ALL(MC_FORWARDING)++; ++ IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++; + } + rtnl_unlock(); + return ret; +@@ -1082,13 +1083,18 @@ + + static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) + { ++ struct net_device *dev = ptr; + struct vif_device *v; + int ct; ++ ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (event != NETDEV_UNREGISTER) + return NOTIFY_DONE; + v=&vif_table[0]; + for (ct=0;ctdev==ptr) ++ if (v->dev==dev) + vif_delete(ct); + } + return NOTIFY_DONE; +@@ -1171,7 +1177,8 @@ + #endif + + if (vif->flags&VIFF_TUNNEL) { +- struct flowi fl = { .oif = vif->link, ++ struct flowi fl = { .fl_net = &init_net, ++ .oif = vif->link, + .nl_u = { .ip4_u = + { .daddr = vif->remote, + .saddr = vif->local, +@@ -1181,7 +1188,8 @@ + goto out_free; + encap = sizeof(struct iphdr); + } else { +- struct flowi fl = { .oif = vif->link, ++ struct flowi fl = { .fl_net = &init_net, ++ .oif = vif->link, + .nl_u = { .ip4_u = + { .daddr = iph->daddr, + .tos = RT_TOS(iph->tos) } }, +@@ -1498,6 +1506,10 @@ + struct iphdr *encap; + struct net_device *reg_dev = NULL; + ++ if (skb->dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } + if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap))) + goto drop; + +@@ -1922,7 +1934,7 @@ + ipmr_expire_timer.function=ipmr_expire_process; + register_netdevice_notifier(&ip_mr_notifier); + #ifdef CONFIG_PROC_FS +- proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops); +- proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops); ++ proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops); ++ proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops); + #endif + } +diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_app.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_app.c +--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_app.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_app.c 2007-12-21 15:36:15.000000000 -0500 +@@ -32,6 +32,7 @@ + #include + #include + #include ++#include + + #include + +@@ -616,12 +617,12 @@ + int ip_vs_app_init(void) + { + /* we will replace it with proc_net_ipvs_create() soon */ +- proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops); ++ proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); + return 0; + } + + + void ip_vs_app_cleanup(void) + { +- proc_net_remove("ip_vs_app"); ++ proc_net_remove(&init_net, "ip_vs_app"); + } +diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_conn.c +--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_conn.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_conn.c 2007-12-21 15:36:15.000000000 -0500 +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + + #include + +@@ -922,7 +923,7 @@ + rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); + } + +- proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops); ++ proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); + + /* calculate the random value for connection hash */ + get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); +@@ -938,6 +939,6 @@ + + /* Release the empty cache */ + kmem_cache_destroy(ip_vs_conn_cachep); +- proc_net_remove("ip_vs_conn"); ++ proc_net_remove(&init_net, "ip_vs_conn"); + vfree(ip_vs_conn_tab); + } +diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_core.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_core.c +--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_core.c 2007-12-21 15:36:15.000000000 -0500 +@@ -460,7 +460,7 @@ + and the destination is RTN_UNICAST (and not local), then create + a cache_bypass connection entry */ + if (sysctl_ip_vs_cache_bypass && svc->fwmark +- && (inet_addr_type(iph->daddr) == RTN_UNICAST)) { ++ && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) { + int ret, cs; + struct ip_vs_conn *cp; + +@@ -530,6 +530,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + if (!((*pskb)->ipvs_property)) + return NF_ACCEPT; + /* The packet was sent from IPVS, exit this chain */ +@@ -734,6 +738,10 @@ + struct ip_vs_conn *cp; + int ihl; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + EnterFunction(11); + + if (skb->ipvs_property) +@@ -818,7 +826,7 @@ + * if it came from this machine itself. So re-compute + * the routing information. + */ +- if (ip_route_me_harder(pskb, RTN_LOCAL) != 0) ++ if (ip_route_me_harder(&init_net, pskb, RTN_LOCAL) != 0) + goto drop; + skb = *pskb; + +@@ -956,12 +964,16 @@ + int ret, restart; + int ihl; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* + * Big tappo: only PACKET_HOST (neither loopback nor mcasts) + * ... don't know why 1st test DOES NOT include 2nd (?) + */ + if (unlikely(skb->pkt_type != PACKET_HOST +- || skb->dev == &loopback_dev || skb->sk)) { ++ || skb->dev == &init_net.loopback_dev || skb->sk)) { + IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", + skb->pkt_type, + ip_hdr(skb)->protocol, +@@ -1062,6 +1074,10 @@ + { + int r; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP) + return NF_ACCEPT; + +diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_ctl.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_ctl.c +--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_ctl.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_ctl.c 2007-12-21 15:36:15.000000000 -0500 +@@ -39,6 +39,7 @@ + #include + #include + #include ++#include + + #include + +@@ -679,7 +680,7 @@ + conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; + + /* check if local node and update the flags */ +- if (inet_addr_type(udest->addr) == RTN_LOCAL) { ++ if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) { + conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) + | IP_VS_CONN_F_LOCALNODE; + } +@@ -731,7 +732,7 @@ + + EnterFunction(2); + +- atype = inet_addr_type(udest->addr); ++ atype = inet_addr_type(&init_net, udest->addr); + if (atype != RTN_LOCAL && atype != RTN_UNICAST) + return -EINVAL; + +@@ -1932,6 +1933,9 @@ + struct ip_vs_service *svc; + struct ip_vs_dest_user *udest; + ++ if (sk->sk_net != &init_net) ++ return -ENOPROTOOPT; ++ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + +@@ -2196,6 +2200,9 @@ + unsigned char arg[128]; + int ret = 0; + ++ if (sk->sk_net != &init_net) ++ return -ENOPROTOOPT; ++ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + +@@ -2356,8 +2363,8 @@ + return ret; + } + +- proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops); +- proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops); ++ proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); ++ proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); + + sysctl_header = register_sysctl_table(vs_root_table); + +@@ -2390,8 +2397,8 @@ + cancel_work_sync(&defense_work.work); + ip_vs_kill_estimator(&ip_vs_stats); + unregister_sysctl_table(sysctl_header); +- proc_net_remove("ip_vs_stats"); +- proc_net_remove("ip_vs"); ++ proc_net_remove(&init_net, "ip_vs_stats"); ++ proc_net_remove(&init_net, "ip_vs"); + nf_unregister_sockopt(&ip_vs_sockopts); + LeaveFunction(2); + } +diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_lblcr.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_lblcr.c +--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_lblcr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_lblcr.c 2007-12-21 15:36:15.000000000 -0500 +@@ -843,7 +843,7 @@ + INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list); + sysctl_header = register_sysctl_table(lblcr_root_table); + #ifdef CONFIG_IP_VS_LBLCR_DEBUG +- proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); ++ proc_net_create(&init_net, "ip_vs_lblcr", 0, ip_vs_lblcr_getinfo); + #endif + return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); + } +@@ -852,7 +852,7 @@ + static void __exit ip_vs_lblcr_cleanup(void) + { + #ifdef CONFIG_IP_VS_LBLCR_DEBUG +- proc_net_remove("ip_vs_lblcr"); ++ proc_net_remove(&init_net, "ip_vs_lblcr"); + #endif + unregister_sysctl_table(sysctl_header); + unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); +diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_sync.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_sync.c +--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_sync.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_sync.c 2007-12-21 15:36:15.000000000 -0500 +@@ -387,7 +387,7 @@ + struct net_device *dev; + struct inet_sock *inet = inet_sk(sk); + +- if ((dev = __dev_get_by_name(ifname)) == NULL) ++ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) + return -ENODEV; + + if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) +@@ -412,7 +412,7 @@ + int num; + + if (sync_state == IP_VS_STATE_MASTER) { +- if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL) ++ if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) + return -ENODEV; + + num = (dev->mtu - sizeof(struct iphdr) - +@@ -423,7 +423,7 @@ + IP_VS_DBG(7, "setting the maximum length of sync sending " + "message %d.\n", sync_send_mesg_maxlen); + } else if (sync_state == IP_VS_STATE_BACKUP) { +- if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL) ++ if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) + return -ENODEV; + + sync_recv_mesg_maxlen = dev->mtu - +@@ -451,7 +451,7 @@ + memset(&mreq, 0, sizeof(mreq)); + memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); + +- if ((dev = __dev_get_by_name(ifname)) == NULL) ++ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) + return -ENODEV; + if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) + return -EINVAL; +@@ -472,7 +472,7 @@ + __be32 addr; + struct sockaddr_in sin; + +- if ((dev = __dev_get_by_name(ifname)) == NULL) ++ if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) + return -ENODEV; + + addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); +diff -Nurb linux-2.6.22-570/net/ipv4/ipvs/ip_vs_xmit.c linux-2.6.22-591/net/ipv4/ipvs/ip_vs_xmit.c +--- linux-2.6.22-570/net/ipv4/ipvs/ip_vs_xmit.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/ipvs/ip_vs_xmit.c 2007-12-21 15:36:15.000000000 -0500 +@@ -70,6 +70,7 @@ + if (!(rt = (struct rtable *) + __ip_vs_dst_check(dest, rtos, 0))) { + struct flowi fl = { ++ .fl_net = &init_net, + .oif = 0, + .nl_u = { + .ip4_u = { +@@ -93,6 +94,7 @@ + spin_unlock(&dest->dst_lock); + } else { + struct flowi fl = { ++ .fl_net = &init_net, + .oif = 0, + .nl_u = { + .ip4_u = { +@@ -160,6 +162,7 @@ + u8 tos = iph->tos; + int mtu; + struct flowi fl = { ++ .fl_net = &init_net, + .oif = 0, + .nl_u = { + .ip4_u = { +diff -Nurb linux-2.6.22-570/net/ipv4/multipath.c linux-2.6.22-591/net/ipv4/multipath.c +--- linux-2.6.22-570/net/ipv4/multipath.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/multipath.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,55 +0,0 @@ +-/* multipath.c: IPV4 multipath algorithm support. +- * +- * Copyright (C) 2004, 2005 Einar Lueck +- * Copyright (C) 2005 David S. Miller +- */ +- +-#include +-#include +-#include +-#include +- +-#include +- +-static DEFINE_SPINLOCK(alg_table_lock); +-struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX + 1]; +- +-int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n) +-{ +- struct ip_mp_alg_ops **slot; +- int err; +- +- if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX || +- !ops->mp_alg_select_route) +- return -EINVAL; +- +- spin_lock(&alg_table_lock); +- slot = &ip_mp_alg_table[n]; +- if (*slot != NULL) { +- err = -EBUSY; +- } else { +- *slot = ops; +- err = 0; +- } +- spin_unlock(&alg_table_lock); +- +- return err; +-} +-EXPORT_SYMBOL(multipath_alg_register); +- +-void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n) +-{ +- struct ip_mp_alg_ops **slot; +- +- if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX) +- return; +- +- spin_lock(&alg_table_lock); +- slot = &ip_mp_alg_table[n]; +- if (*slot == ops) +- *slot = NULL; +- spin_unlock(&alg_table_lock); +- +- synchronize_net(); +-} +-EXPORT_SYMBOL(multipath_alg_unregister); +diff -Nurb linux-2.6.22-570/net/ipv4/multipath_drr.c linux-2.6.22-591/net/ipv4/multipath_drr.c +--- linux-2.6.22-570/net/ipv4/multipath_drr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/multipath_drr.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,249 +0,0 @@ +-/* +- * Device round robin policy for multipath. +- * +- * +- * Version: $Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $ +- * +- * Authors: Einar Lueck +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * as published by the Free Software Foundation; either version +- * 2 of the License, or (at your option) any later version. +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-struct multipath_device { +- int ifi; /* interface index of device */ +- atomic_t usecount; +- int allocated; +-}; +- +-#define MULTIPATH_MAX_DEVICECANDIDATES 10 +- +-static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES]; +-static DEFINE_SPINLOCK(state_lock); +- +-static int inline __multipath_findslot(void) +-{ +- int i; +- +- for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { +- if (state[i].allocated == 0) +- return i; +- } +- return -1; +-} +- +-static int inline __multipath_finddev(int ifindex) +-{ +- int i; +- +- for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) { +- if (state[i].allocated != 0 && +- state[i].ifi == ifindex) +- return i; +- } +- return -1; +-} +- +-static int drr_dev_event(struct notifier_block *this, +- unsigned long event, void *ptr) +-{ +- struct net_device *dev = ptr; +- int devidx; +- +- switch (event) { +- case NETDEV_UNREGISTER: +- case NETDEV_DOWN: +- spin_lock_bh(&state_lock); +- +- devidx = __multipath_finddev(dev->ifindex); +- if (devidx != -1) { +- state[devidx].allocated = 0; +- state[devidx].ifi = 0; +- atomic_set(&state[devidx].usecount, 0); +- } +- +- spin_unlock_bh(&state_lock); +- break; +- } +- +- return NOTIFY_DONE; +-} +- +-static struct notifier_block drr_dev_notifier = { +- .notifier_call = drr_dev_event, +-}; +- +- +-static void drr_safe_inc(atomic_t *usecount) +-{ +- int n; +- +- atomic_inc(usecount); +- +- n = atomic_read(usecount); +- if (n <= 0) { +- int i; +- +- spin_lock_bh(&state_lock); +- +- for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) +- atomic_set(&state[i].usecount, 0); +- +- spin_unlock_bh(&state_lock); +- } +-} +- +-static void drr_select_route(const struct flowi *flp, +- struct rtable *first, struct rtable **rp) +-{ +- struct rtable *nh, *result, *cur_min; +- int min_usecount = -1; +- int devidx = -1; +- int cur_min_devidx = -1; +- +- /* 1. make sure all alt. nexthops have the same GC related data */ +- /* 2. determine the new candidate to be returned */ +- result = NULL; +- cur_min = NULL; +- for (nh = rcu_dereference(first); nh; +- nh = rcu_dereference(nh->u.dst.rt_next)) { +- if ((nh->u.dst.flags & DST_BALANCED) != 0 && +- multipath_comparekeys(&nh->fl, flp)) { +- int nh_ifidx = nh->u.dst.dev->ifindex; +- +- nh->u.dst.lastuse = jiffies; +- nh->u.dst.__use++; +- if (result != NULL) +- continue; +- +- /* search for the output interface */ +- +- /* this is not SMP safe, only add/remove are +- * SMP safe as wrong usecount updates have no big +- * impact +- */ +- devidx = __multipath_finddev(nh_ifidx); +- if (devidx == -1) { +- /* add the interface to the array +- * SMP safe +- */ +- spin_lock_bh(&state_lock); +- +- /* due to SMP: search again */ +- devidx = __multipath_finddev(nh_ifidx); +- if (devidx == -1) { +- /* add entry for device */ +- devidx = __multipath_findslot(); +- if (devidx == -1) { +- /* unlikely but possible */ +- continue; +- } +- +- state[devidx].allocated = 1; +- state[devidx].ifi = nh_ifidx; +- atomic_set(&state[devidx].usecount, 0); +- min_usecount = 0; +- } +- +- spin_unlock_bh(&state_lock); +- } +- +- if (min_usecount == 0) { +- /* if the device has not been used it is +- * the primary target +- */ +- drr_safe_inc(&state[devidx].usecount); +- result = nh; +- } else { +- int count = +- atomic_read(&state[devidx].usecount); +- +- if (min_usecount == -1 || +- count < min_usecount) { +- cur_min = nh; +- cur_min_devidx = devidx; +- min_usecount = count; +- } +- } +- } +- } +- +- if (!result) { +- if (cur_min) { +- drr_safe_inc(&state[cur_min_devidx].usecount); +- result = cur_min; +- } else { +- result = first; +- } +- } +- +- *rp = result; +-} +- +-static struct ip_mp_alg_ops drr_ops = { +- .mp_alg_select_route = drr_select_route, +-}; +- +-static int __init drr_init(void) +-{ +- int err = register_netdevice_notifier(&drr_dev_notifier); +- +- if (err) +- return err; +- +- err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR); +- if (err) +- goto fail; +- +- return 0; +- +-fail: +- unregister_netdevice_notifier(&drr_dev_notifier); +- return err; +-} +- +-static void __exit drr_exit(void) +-{ +- unregister_netdevice_notifier(&drr_dev_notifier); +- multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR); +-} +- +-module_init(drr_init); +-module_exit(drr_exit); +-MODULE_LICENSE("GPL"); +diff -Nurb linux-2.6.22-570/net/ipv4/multipath_random.c linux-2.6.22-591/net/ipv4/multipath_random.c +--- linux-2.6.22-570/net/ipv4/multipath_random.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/multipath_random.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,114 +0,0 @@ +-/* +- * Random policy for multipath. +- * +- * +- * Version: $Id: multipath_random.c,v 1.1.2.3 2004/09/21 08:42:11 elueck Exp $ +- * +- * Authors: Einar Lueck +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * as published by the Free Software Foundation; either version +- * 2 of the License, or (at your option) any later version. +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#define MULTIPATH_MAX_CANDIDATES 40 +- +-static void random_select_route(const struct flowi *flp, +- struct rtable *first, +- struct rtable **rp) +-{ +- struct rtable *rt; +- struct rtable *decision; +- unsigned char candidate_count = 0; +- +- /* count all candidate */ +- for (rt = rcu_dereference(first); rt; +- rt = rcu_dereference(rt->u.dst.rt_next)) { +- if ((rt->u.dst.flags & DST_BALANCED) != 0 && +- multipath_comparekeys(&rt->fl, flp)) +- ++candidate_count; +- } +- +- /* choose a random candidate */ +- decision = first; +- if (candidate_count > 1) { +- unsigned char i = 0; +- unsigned char candidate_no = (unsigned char) +- (random32() % candidate_count); +- +- /* find chosen candidate and adjust GC data for all candidates +- * to ensure they stay in cache +- */ +- for (rt = first; rt; rt = rt->u.dst.rt_next) { +- if ((rt->u.dst.flags & DST_BALANCED) != 0 && +- multipath_comparekeys(&rt->fl, flp)) { +- rt->u.dst.lastuse = jiffies; +- +- if (i == candidate_no) +- decision = rt; +- +- if (i >= candidate_count) +- break; +- +- i++; +- } +- } +- } +- +- decision->u.dst.__use++; +- *rp = decision; +-} +- +-static struct ip_mp_alg_ops random_ops = { +- .mp_alg_select_route = random_select_route, +-}; +- +-static int __init random_init(void) +-{ +- return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM); +-} +- +-static void __exit random_exit(void) +-{ +- multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM); +-} +- +-module_init(random_init); +-module_exit(random_exit); +-MODULE_LICENSE("GPL"); +diff -Nurb linux-2.6.22-570/net/ipv4/multipath_rr.c linux-2.6.22-591/net/ipv4/multipath_rr.c +--- linux-2.6.22-570/net/ipv4/multipath_rr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/multipath_rr.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,95 +0,0 @@ +-/* +- * Round robin policy for multipath. +- * +- * +- * Version: $Id: multipath_rr.c,v 1.1.2.2 2004/09/16 07:42:34 elueck Exp $ +- * +- * Authors: Einar Lueck +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * as published by the Free Software Foundation; either version +- * 2 of the License, or (at your option) any later version. +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-static void rr_select_route(const struct flowi *flp, +- struct rtable *first, struct rtable **rp) +-{ +- struct rtable *nh, *result, *min_use_cand = NULL; +- int min_use = -1; +- +- /* 1. make sure all alt. nexthops have the same GC related data +- * 2. determine the new candidate to be returned +- */ +- result = NULL; +- for (nh = rcu_dereference(first); nh; +- nh = rcu_dereference(nh->u.dst.rt_next)) { +- if ((nh->u.dst.flags & DST_BALANCED) != 0 && +- multipath_comparekeys(&nh->fl, flp)) { +- nh->u.dst.lastuse = jiffies; +- +- if (min_use == -1 || nh->u.dst.__use < min_use) { +- min_use = nh->u.dst.__use; +- min_use_cand = nh; +- } +- } +- } +- result = min_use_cand; +- if (!result) +- result = first; +- +- result->u.dst.__use++; +- *rp = result; +-} +- +-static struct ip_mp_alg_ops rr_ops = { +- .mp_alg_select_route = rr_select_route, +-}; +- +-static int __init rr_init(void) +-{ +- return multipath_alg_register(&rr_ops, IP_MP_ALG_RR); +-} +- +-static void __exit rr_exit(void) +-{ +- multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR); +-} +- +-module_init(rr_init); +-module_exit(rr_exit); +-MODULE_LICENSE("GPL"); +diff -Nurb linux-2.6.22-570/net/ipv4/multipath_wrandom.c linux-2.6.22-591/net/ipv4/multipath_wrandom.c +--- linux-2.6.22-570/net/ipv4/multipath_wrandom.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/multipath_wrandom.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,329 +0,0 @@ +-/* +- * Weighted random policy for multipath. +- * +- * +- * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $ +- * +- * Authors: Einar Lueck +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * as published by the Free Software Foundation; either version +- * 2 of the License, or (at your option) any later version. +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#define MULTIPATH_STATE_SIZE 15 +- +-struct multipath_candidate { +- struct multipath_candidate *next; +- int power; +- struct rtable *rt; +-}; +- +-struct multipath_dest { +- struct list_head list; +- +- const struct fib_nh *nh_info; +- __be32 netmask; +- __be32 network; +- unsigned char prefixlen; +- +- struct rcu_head rcu; +-}; +- +-struct multipath_bucket { +- struct list_head head; +- spinlock_t lock; +-}; +- +-struct multipath_route { +- struct list_head list; +- +- int oif; +- __be32 gw; +- struct list_head dests; +- +- struct rcu_head rcu; +-}; +- +-/* state: primarily weight per route information */ +-static struct multipath_bucket state[MULTIPATH_STATE_SIZE]; +- +-static unsigned char __multipath_lookup_weight(const struct flowi *fl, +- const struct rtable *rt) +-{ +- const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE; +- struct multipath_route *r; +- struct multipath_route *target_route = NULL; +- struct multipath_dest *d; +- int weight = 1; +- +- /* lookup the weight information for a certain route */ +- rcu_read_lock(); +- +- /* find state entry for gateway or add one if necessary */ +- list_for_each_entry_rcu(r, &state[state_idx].head, list) { +- if (r->gw == rt->rt_gateway && +- r->oif == rt->idev->dev->ifindex) { +- target_route = r; +- break; +- } +- } +- +- if (!target_route) { +- /* this should not happen... but we are prepared */ +- printk( KERN_CRIT"%s: missing state for gateway: %u and " \ +- "device %d\n", __FUNCTION__, rt->rt_gateway, +- rt->idev->dev->ifindex); +- goto out; +- } +- +- /* find state entry for destination */ +- list_for_each_entry_rcu(d, &target_route->dests, list) { +- __be32 targetnetwork = fl->fl4_dst & +- inet_make_mask(d->prefixlen); +- +- if ((targetnetwork & d->netmask) == d->network) { +- weight = d->nh_info->nh_weight; +- goto out; +- } +- } +- +-out: +- rcu_read_unlock(); +- return weight; +-} +- +-static void wrandom_init_state(void) +-{ +- int i; +- +- for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { +- INIT_LIST_HEAD(&state[i].head); +- spin_lock_init(&state[i].lock); +- } +-} +- +-static void wrandom_select_route(const struct flowi *flp, +- struct rtable *first, +- struct rtable **rp) +-{ +- struct rtable *rt; +- struct rtable *decision; +- struct multipath_candidate *first_mpc = NULL; +- struct multipath_candidate *mpc, *last_mpc = NULL; +- int power = 0; +- int last_power; +- int selector; +- const size_t size_mpc = sizeof(struct multipath_candidate); +- +- /* collect all candidates and identify their weights */ +- for (rt = rcu_dereference(first); rt; +- rt = rcu_dereference(rt->u.dst.rt_next)) { +- if ((rt->u.dst.flags & DST_BALANCED) != 0 && +- multipath_comparekeys(&rt->fl, flp)) { +- struct multipath_candidate* mpc = +- (struct multipath_candidate*) +- kmalloc(size_mpc, GFP_ATOMIC); +- +- if (!mpc) +- return; +- +- power += __multipath_lookup_weight(flp, rt) * 10000; +- +- mpc->power = power; +- mpc->rt = rt; +- mpc->next = NULL; +- +- if (!first_mpc) +- first_mpc = mpc; +- else +- last_mpc->next = mpc; +- +- last_mpc = mpc; +- } +- } +- +- /* choose a weighted random candidate */ +- decision = first; +- selector = random32() % power; +- last_power = 0; +- +- /* select candidate, adjust GC data and cleanup local state */ +- decision = first; +- last_mpc = NULL; +- for (mpc = first_mpc; mpc; mpc = mpc->next) { +- mpc->rt->u.dst.lastuse = jiffies; +- if (last_power <= selector && selector < mpc->power) +- decision = mpc->rt; +- +- last_power = mpc->power; +- kfree(last_mpc); +- last_mpc = mpc; +- } +- +- /* concurrent __multipath_flush may lead to !last_mpc */ +- kfree(last_mpc); +- +- decision->u.dst.__use++; +- *rp = decision; +-} +- +-static void wrandom_set_nhinfo(__be32 network, +- __be32 netmask, +- unsigned char prefixlen, +- const struct fib_nh *nh) +-{ +- const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE; +- struct multipath_route *r, *target_route = NULL; +- struct multipath_dest *d, *target_dest = NULL; +- +- /* store the weight information for a certain route */ +- spin_lock_bh(&state[state_idx].lock); +- +- /* find state entry for gateway or add one if necessary */ +- list_for_each_entry_rcu(r, &state[state_idx].head, list) { +- if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) { +- target_route = r; +- break; +- } +- } +- +- if (!target_route) { +- const size_t size_rt = sizeof(struct multipath_route); +- target_route = (struct multipath_route *) +- kmalloc(size_rt, GFP_ATOMIC); +- +- target_route->gw = nh->nh_gw; +- target_route->oif = nh->nh_oif; +- memset(&target_route->rcu, 0, sizeof(struct rcu_head)); +- INIT_LIST_HEAD(&target_route->dests); +- +- list_add_rcu(&target_route->list, &state[state_idx].head); +- } +- +- /* find state entry for destination or add one if necessary */ +- list_for_each_entry_rcu(d, &target_route->dests, list) { +- if (d->nh_info == nh) { +- target_dest = d; +- break; +- } +- } +- +- if (!target_dest) { +- const size_t size_dst = sizeof(struct multipath_dest); +- target_dest = (struct multipath_dest*) +- kmalloc(size_dst, GFP_ATOMIC); +- +- target_dest->nh_info = nh; +- target_dest->network = network; +- target_dest->netmask = netmask; +- target_dest->prefixlen = prefixlen; +- memset(&target_dest->rcu, 0, sizeof(struct rcu_head)); +- +- list_add_rcu(&target_dest->list, &target_route->dests); +- } +- /* else: we already stored this info for another destination => +- * we are finished +- */ +- +- spin_unlock_bh(&state[state_idx].lock); +-} +- +-static void __multipath_free(struct rcu_head *head) +-{ +- struct multipath_route *rt = container_of(head, struct multipath_route, +- rcu); +- kfree(rt); +-} +- +-static void __multipath_free_dst(struct rcu_head *head) +-{ +- struct multipath_dest *dst = container_of(head, +- struct multipath_dest, +- rcu); +- kfree(dst); +-} +- +-static void wrandom_flush(void) +-{ +- int i; +- +- /* defere delete to all entries */ +- for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { +- struct multipath_route *r; +- +- spin_lock_bh(&state[i].lock); +- list_for_each_entry_rcu(r, &state[i].head, list) { +- struct multipath_dest *d; +- list_for_each_entry_rcu(d, &r->dests, list) { +- list_del_rcu(&d->list); +- call_rcu(&d->rcu, +- __multipath_free_dst); +- } +- list_del_rcu(&r->list); +- call_rcu(&r->rcu, +- __multipath_free); +- } +- +- spin_unlock_bh(&state[i].lock); +- } +-} +- +-static struct ip_mp_alg_ops wrandom_ops = { +- .mp_alg_select_route = wrandom_select_route, +- .mp_alg_flush = wrandom_flush, +- .mp_alg_set_nhinfo = wrandom_set_nhinfo, +-}; +- +-static int __init wrandom_init(void) +-{ +- wrandom_init_state(); +- +- return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM); +-} +- +-static void __exit wrandom_exit(void) +-{ +- multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM); +-} +- +-module_init(wrandom_init); +-module_exit(wrandom_exit); +-MODULE_LICENSE("GPL"); +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/arp_tables.c linux-2.6.22-591/net/ipv4/netfilter/arp_tables.c +--- linux-2.6.22-570/net/ipv4/netfilter/arp_tables.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/arp_tables.c 2007-12-21 15:36:15.000000000 -0500 +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -773,7 +774,7 @@ + int ret; + struct arpt_table *t; + +- t = xt_find_table_lock(NF_ARP, entries->name); ++ t = xt_find_table_lock(&init_net, NF_ARP, entries->name); + if (t && !IS_ERR(t)) { + struct xt_table_info *private = t->private; + duprintf("t->private->number = %u\n", +@@ -843,7 +844,7 @@ + + duprintf("arp_tables: Translated table\n"); + +- t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name), ++ t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, tmp.name), + "arptable_%s", tmp.name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; +@@ -936,7 +937,7 @@ + goto free; + } + +- t = xt_find_table_lock(NF_ARP, tmp.name); ++ t = xt_find_table_lock(&init_net, NF_ARP, tmp.name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; + goto free; +@@ -971,6 +972,9 @@ + { + int ret; + ++ if (sk->sk_net != &init_net) ++ return -ENOPROTOOPT; ++ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + +@@ -995,6 +999,9 @@ + { + int ret; + ++ if (sk->sk_net != &init_net) ++ return -ENOPROTOOPT; ++ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + +@@ -1016,7 +1023,7 @@ + } + name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; + +- t = try_then_request_module(xt_find_table_lock(NF_ARP, name), ++ t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, name), + "arptable_%s", name); + if (t && !IS_ERR(t)) { + struct arpt_getinfo info; +@@ -1116,7 +1123,7 @@ + return ret; + } + +- ret = xt_register_table(table, &bootstrap, newinfo); ++ ret = xt_register_table(&init_net, table, &bootstrap, newinfo); + if (ret != 0) { + xt_free_table_info(newinfo); + return ret; +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/arptable_filter.c linux-2.6.22-591/net/ipv4/netfilter/arptable_filter.c +--- linux-2.6.22-570/net/ipv4/netfilter/arptable_filter.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/arptable_filter.c 2007-12-21 15:36:15.000000000 -0500 +@@ -61,6 +61,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + return arpt_do_table(pskb, hook, in, out, &packet_filter); + } + +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ip_queue.c linux-2.6.22-591/net/ipv4/netfilter/ip_queue.c +--- linux-2.6.22-570/net/ipv4/netfilter/ip_queue.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/ip_queue.c 2007-12-21 15:36:15.000000000 -0500 +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + + #define IPQ_QMAX_DEFAULT 1024 + #define IPQ_PROC_FS_NAME "ip_queue" +@@ -556,6 +557,9 @@ + { + struct net_device *dev = ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + /* Drop any packets associated with the downed device */ + if (event == NETDEV_DOWN) + ipq_dev_drop(dev->ifindex); +@@ -575,7 +579,7 @@ + if (event == NETLINK_URELEASE && + n->protocol == NETLINK_FIREWALL && n->pid) { + write_lock_bh(&queue_lock); +- if (n->pid == peer_pid) ++ if ((n->net == &init_net) && (n->pid == peer_pid)) + __ipq_reset(); + write_unlock_bh(&queue_lock); + } +@@ -667,14 +671,14 @@ + struct proc_dir_entry *proc; + + netlink_register_notifier(&ipq_nl_notifier); +- ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, +- NULL, THIS_MODULE); ++ ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, ++ ipq_rcv_sk, NULL, THIS_MODULE); + if (ipqnl == NULL) { + printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); + goto cleanup_netlink_notifier; + } + +- proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); ++ proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); + if (proc) + proc->owner = THIS_MODULE; + else { +@@ -695,8 +699,7 @@ + cleanup_sysctl: + unregister_sysctl_table(ipq_sysctl_header); + unregister_netdevice_notifier(&ipq_dev_notifier); +- proc_net_remove(IPQ_PROC_FS_NAME); +- ++ proc_net_remove(&init_net, IPQ_PROC_FS_NAME); + cleanup_ipqnl: + sock_release(ipqnl->sk_socket); + mutex_lock(&ipqnl_mutex); +@@ -715,7 +718,7 @@ + + unregister_sysctl_table(ipq_sysctl_header); + unregister_netdevice_notifier(&ipq_dev_notifier); +- proc_net_remove(IPQ_PROC_FS_NAME); ++ proc_net_remove(&init_net, IPQ_PROC_FS_NAME); + + sock_release(ipqnl->sk_socket); + mutex_lock(&ipqnl_mutex); +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ip_tables.c linux-2.6.22-591/net/ipv4/netfilter/ip_tables.c +--- linux-2.6.22-570/net/ipv4/netfilter/ip_tables.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/ip_tables.c 2007-12-21 15:36:15.000000000 -0500 +@@ -1039,7 +1039,7 @@ + } + #endif + +-static int get_info(void __user *user, int *len, int compat) ++static int get_info(struct net *net, void __user *user, int *len, int compat) + { + char name[IPT_TABLE_MAXNAMELEN]; + struct xt_table *t; +@@ -1059,7 +1059,7 @@ + if (compat) + xt_compat_lock(AF_INET); + #endif +- t = try_then_request_module(xt_find_table_lock(AF_INET, name), ++ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), + "iptable_%s", name); + if (t && !IS_ERR(t)) { + struct ipt_getinfo info; +@@ -1099,7 +1099,7 @@ + } + + static int +-get_entries(struct ipt_get_entries __user *uptr, int *len) ++get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len) + { + int ret; + struct ipt_get_entries get; +@@ -1119,7 +1119,7 @@ + return -EINVAL; + } + +- t = xt_find_table_lock(AF_INET, get.name); ++ t = xt_find_table_lock(net, AF_INET, get.name); + if (t && !IS_ERR(t)) { + struct xt_table_info *private = t->private; + duprintf("t->private->number = %u\n", +@@ -1142,7 +1142,7 @@ + } + + static int +-__do_replace(const char *name, unsigned int valid_hooks, ++__do_replace(struct net *net, const char *name, unsigned int valid_hooks, + struct xt_table_info *newinfo, unsigned int num_counters, + void __user *counters_ptr) + { +@@ -1159,7 +1159,7 @@ + goto out; + } + +- t = try_then_request_module(xt_find_table_lock(AF_INET, name), ++ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), + "iptable_%s", name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; +@@ -1211,7 +1211,7 @@ + } + + static int +-do_replace(void __user *user, unsigned int len) ++do_replace(struct net *net, void __user *user, unsigned int len) + { + int ret; + struct ipt_replace tmp; +@@ -1252,7 +1252,7 @@ + + duprintf("ip_tables: Translated table\n"); + +- ret = __do_replace(tmp.name, tmp.valid_hooks, ++ ret = __do_replace(net, tmp.name, tmp.valid_hooks, + newinfo, tmp.num_counters, + tmp.counters); + if (ret) +@@ -1289,7 +1289,7 @@ + } + + static int +-do_add_counters(void __user *user, unsigned int len, int compat) ++do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) + { + unsigned int i; + struct xt_counters_info tmp; +@@ -1341,7 +1341,7 @@ + goto free; + } + +- t = xt_find_table_lock(AF_INET, name); ++ t = xt_find_table_lock(net, AF_INET, name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; + goto free; +@@ -1745,7 +1745,7 @@ + } + + static int +-compat_do_replace(void __user *user, unsigned int len) ++compat_do_replace(struct net *net, void __user *user, unsigned int len) + { + int ret; + struct compat_ipt_replace tmp; +@@ -1786,7 +1786,7 @@ + + duprintf("compat_do_replace: Translated table\n"); + +- ret = __do_replace(tmp.name, tmp.valid_hooks, ++ ret = __do_replace(net, tmp.name, tmp.valid_hooks, + newinfo, tmp.num_counters, + compat_ptr(tmp.counters)); + if (ret) +@@ -1811,11 +1811,11 @@ + + switch (cmd) { + case IPT_SO_SET_REPLACE: +- ret = compat_do_replace(user, len); ++ ret = compat_do_replace(sk->sk_net, user, len); + break; + + case IPT_SO_SET_ADD_COUNTERS: +- ret = do_add_counters(user, len, 1); ++ ret = do_add_counters(sk->sk_net, user, len, 1); + break; + + default: +@@ -1904,7 +1904,7 @@ + } + + static int +-compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) ++compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, int *len) + { + int ret; + struct compat_ipt_get_entries get; +@@ -1928,7 +1928,7 @@ + } + + xt_compat_lock(AF_INET); +- t = xt_find_table_lock(AF_INET, get.name); ++ t = xt_find_table_lock(net, AF_INET, get.name); + if (t && !IS_ERR(t)) { + struct xt_table_info *private = t->private; + struct xt_table_info info; +@@ -1966,10 +1966,10 @@ + + switch (cmd) { + case IPT_SO_GET_INFO: +- ret = get_info(user, len, 1); ++ ret = get_info(sk->sk_net, user, len, 1); + break; + case IPT_SO_GET_ENTRIES: +- ret = compat_get_entries(user, len); ++ ret = compat_get_entries(sk->sk_net, user, len); + break; + default: + ret = do_ipt_get_ctl(sk, cmd, user, len); +@@ -1988,11 +1988,11 @@ + + switch (cmd) { + case IPT_SO_SET_REPLACE: +- ret = do_replace(user, len); ++ ret = do_replace(sk->sk_net, user, len); + break; + + case IPT_SO_SET_ADD_COUNTERS: +- ret = do_add_counters(user, len, 0); ++ ret = do_add_counters(sk->sk_net, user, len, 0); + break; + + default: +@@ -2013,11 +2013,11 @@ + + switch (cmd) { + case IPT_SO_GET_INFO: +- ret = get_info(user, len, 0); ++ ret = get_info(sk->sk_net, user, len, 0); + break; + + case IPT_SO_GET_ENTRIES: +- ret = get_entries(user, len); ++ ret = get_entries(sk->sk_net, user, len); + break; + + case IPT_SO_GET_REVISION_MATCH: +@@ -2054,7 +2054,7 @@ + return ret; + } + +-int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) ++int ipt_register_table(struct net *net, struct xt_table *table, const struct ipt_replace *repl) + { + int ret; + struct xt_table_info *newinfo; +@@ -2082,7 +2082,7 @@ + return ret; + } + +- ret = xt_register_table(table, &bootstrap, newinfo); ++ ret = xt_register_table(net, table, &bootstrap, newinfo); + if (ret != 0) { + xt_free_table_info(newinfo); + return ret; +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_CLUSTERIP.c linux-2.6.22-591/net/ipv4/netfilter/ipt_CLUSTERIP.c +--- linux-2.6.22-570/net/ipv4/netfilter/ipt_CLUSTERIP.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/ipt_CLUSTERIP.c 2007-12-21 15:36:15.000000000 -0500 +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + + #define CLUSTERIP_VERSION "0.8" + +@@ -427,7 +428,7 @@ + return 0; + } + +- dev = dev_get_by_name(e->ip.iniface); ++ dev = dev_get_by_name(&init_net, e->ip.iniface); + if (!dev) { + printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); + return 0; +@@ -523,6 +524,10 @@ + struct arp_payload *payload; + struct clusterip_config *c; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* we don't care about non-ethernet and non-ipv4 ARP */ + if (arp->ar_hrd != htons(ARPHRD_ETHER) + || arp->ar_pro != htons(ETH_P_IP) +@@ -735,7 +740,7 @@ + goto cleanup_target; + + #ifdef CONFIG_PROC_FS +- clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net); ++ clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net); + if (!clusterip_procdir) { + printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n"); + ret = -ENOMEM; +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.22-591/net/ipv4/netfilter/ipt_MASQUERADE.c +--- linux-2.6.22-570/net/ipv4/netfilter/ipt_MASQUERADE.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/ipt_MASQUERADE.c 2007-12-21 15:36:15.000000000 -0500 +@@ -131,6 +131,9 @@ + { + struct net_device *dev = ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (event == NETDEV_DOWN) { + /* Device was downed. Search entire table for + conntracks which were associated with that device, +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.22-591/net/ipv4/netfilter/ipt_REJECT.c +--- linux-2.6.22-570/net/ipv4/netfilter/ipt_REJECT.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/ipt_REJECT.c 2007-12-21 15:36:15.000000000 -0500 +@@ -137,7 +137,7 @@ + ) + addr_type = RTN_LOCAL; + +- if (ip_route_me_harder(&nskb, addr_type)) ++ if (ip_route_me_harder(&init_net, &nskb, addr_type)) + goto free_nskb; + + nskb->ip_summed = CHECKSUM_NONE; +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_ULOG.c linux-2.6.22-591/net/ipv4/netfilter/ipt_ULOG.c +--- linux-2.6.22-570/net/ipv4/netfilter/ipt_ULOG.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/ipt_ULOG.c 2007-12-21 15:36:15.000000000 -0500 +@@ -419,7 +419,8 @@ + for (i = 0; i < ULOG_MAXNLGROUPS; i++) + setup_timer(&ulog_buffers[i].timer, ulog_timer, i); + +- nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, ++ nflognl = netlink_kernel_create(&init_net, ++ NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, + NULL, THIS_MODULE); + if (!nflognl) + return -ENOMEM; +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_addrtype.c linux-2.6.22-591/net/ipv4/netfilter/ipt_addrtype.c +--- linux-2.6.22-570/net/ipv4/netfilter/ipt_addrtype.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/ipt_addrtype.c 2007-12-21 15:36:15.000000000 -0500 +@@ -24,7 +24,7 @@ + + static inline int match_type(__be32 addr, u_int16_t mask) + { +- return !!(mask & (1 << inet_addr_type(addr))); ++ return !!(mask & (1 << inet_addr_type(&init_net, addr))); + } + + static int match(const struct sk_buff *skb, +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/ipt_recent.c linux-2.6.22-591/net/ipv4/netfilter/ipt_recent.c +--- linux-2.6.22-570/net/ipv4/netfilter/ipt_recent.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/ipt_recent.c 2007-12-21 15:36:15.000000000 -0500 +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -485,7 +486,7 @@ + #ifdef CONFIG_PROC_FS + if (err) + return err; +- proc_dir = proc_mkdir("ipt_recent", proc_net); ++ proc_dir = proc_mkdir("ipt_recent", init_net.proc_net); + if (proc_dir == NULL) { + xt_unregister_match(&recent_match); + err = -ENOMEM; +@@ -499,7 +500,7 @@ + BUG_ON(!list_empty(&tables)); + xt_unregister_match(&recent_match); + #ifdef CONFIG_PROC_FS +- remove_proc_entry("ipt_recent", proc_net); ++ remove_proc_entry("ipt_recent", init_net.proc_net); + #endif + } + +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/iptable_filter.c linux-2.6.22-591/net/ipv4/netfilter/iptable_filter.c +--- linux-2.6.22-570/net/ipv4/netfilter/iptable_filter.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/iptable_filter.c 2007-12-21 15:36:15.000000000 -0500 +@@ -26,7 +26,7 @@ + struct ipt_replace repl; + struct ipt_standard entries[3]; + struct ipt_error term; +-} initial_table __initdata = { ++} initial_table = { + .repl = { + .name = "filter", + .valid_hooks = FILTER_VALID_HOOKS, +@@ -51,7 +51,7 @@ + .term = IPT_ERROR_INIT, /* ERROR */ + }; + +-static struct xt_table packet_filter = { ++static struct xt_table ip_packet_filter_dflt = { + .name = "filter", + .valid_hooks = FILTER_VALID_HOOKS, + .lock = RW_LOCK_UNLOCKED, +@@ -67,7 +67,9 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { +- return ipt_do_table(pskb, hook, in, out, &packet_filter); ++ struct net *net = (in?in:out)->nd_net; ++ ++ return ipt_do_table(pskb, hook, in, out, net->ip_packet_filter); + } + + static unsigned int +@@ -77,6 +79,8 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ struct net *net = (in?in:out)->nd_net; ++ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) + || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { +@@ -86,7 +90,7 @@ + return NF_ACCEPT; + } + +- return ipt_do_table(pskb, hook, in, out, &packet_filter); ++ return ipt_do_table(pskb, hook, in, out, net->ip_packet_filter); + } + + static struct nf_hook_ops ipt_ops[] = { +@@ -117,6 +121,30 @@ + static int forward = NF_ACCEPT; + module_param(forward, bool, 0000); + ++static int iptable_filter_net_init(struct net *net) ++{ ++ /* Allocate the table */ ++ net->ip_packet_filter = kmemdup(&ip_packet_filter_dflt, ++ sizeof(*net->ip_packet_filter), ++ GFP_KERNEL); ++ if (!net->ip_packet_filter) ++ return -ENOMEM; ++ ++ /* Register table */ ++ return ipt_register_table(net, net->ip_packet_filter, &initial_table.repl); ++} ++ ++static void iptable_filter_net_exit(struct net *net) ++{ ++ ipt_unregister_table(net->ip_packet_filter); ++ kfree(net->ip_packet_filter); ++} ++ ++static struct pernet_operations iptable_filter_net_ops = { ++ .init = iptable_filter_net_init, ++ .exit = iptable_filter_net_exit, ++}; ++ + static int __init iptable_filter_init(void) + { + int ret; +@@ -130,7 +158,7 @@ + initial_table.entries[1].target.verdict = -forward - 1; + + /* Register table */ +- ret = ipt_register_table(&packet_filter, &initial_table.repl); ++ ret = register_pernet_subsys(&iptable_filter_net_ops); + if (ret < 0) + return ret; + +@@ -142,14 +170,14 @@ + return ret; + + cleanup_table: +- ipt_unregister_table(&packet_filter); ++ unregister_pernet_subsys(&iptable_filter_net_ops); + return ret; + } + + static void __exit iptable_filter_fini(void) + { + nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); +- ipt_unregister_table(&packet_filter); ++ unregister_pernet_subsys(&iptable_filter_net_ops); + } + + module_init(iptable_filter_init); +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/iptable_mangle.c linux-2.6.22-591/net/ipv4/netfilter/iptable_mangle.c +--- linux-2.6.22-570/net/ipv4/netfilter/iptable_mangle.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/iptable_mangle.c 2007-12-21 15:36:15.000000000 -0500 +@@ -80,6 +80,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + return ipt_do_table(pskb, hook, in, out, &packet_mangler); + } + +@@ -96,6 +100,10 @@ + __be32 saddr, daddr; + u_int32_t mark; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) + || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { +@@ -121,7 +129,7 @@ + iph->daddr != daddr || + (*pskb)->mark != mark || + iph->tos != tos) +- if (ip_route_me_harder(pskb, RTN_UNSPEC)) ++ if (ip_route_me_harder(&init_net, pskb, RTN_UNSPEC)) + ret = NF_DROP; + } + +@@ -171,7 +179,7 @@ + int ret; + + /* Register table */ +- ret = ipt_register_table(&packet_mangler, &initial_table.repl); ++ ret = ipt_register_table(&init_net, &packet_mangler, &initial_table.repl); + if (ret < 0) + return ret; + +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/iptable_raw.c linux-2.6.22-591/net/ipv4/netfilter/iptable_raw.c +--- linux-2.6.22-570/net/ipv4/netfilter/iptable_raw.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/iptable_raw.c 2007-12-21 15:36:15.000000000 -0500 +@@ -52,6 +52,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + return ipt_do_table(pskb, hook, in, out, &packet_raw); + } + +@@ -96,7 +100,7 @@ + int ret; + + /* Register table */ +- ret = ipt_register_table(&packet_raw, &initial_table.repl); ++ ret = ipt_register_table(&init_net, &packet_raw, &initial_table.repl); + if (ret < 0) + return ret; + +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c linux-2.6.22-591/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +--- linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 2007-12-21 15:36:15.000000000 -0500 +@@ -120,6 +120,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* We've seen it coming out the other side: confirm it */ + return nf_conntrack_confirm(pskb); + } +@@ -135,6 +139,10 @@ + struct nf_conn_help *help; + struct nf_conntrack_helper *helper; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* This is where we call the helper: as the packet goes out. */ + ct = nf_ct_get(*pskb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) +@@ -157,6 +165,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if ((*pskb)->nfct) +@@ -180,6 +192,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + return nf_conntrack_in(PF_INET, hooknum, pskb); + } + +@@ -189,6 +205,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) + || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { +@@ -325,6 +345,9 @@ + struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_tuple tuple; + ++ if (sk->sk_net != &init_net) ++ return -ENOPROTOOPT; ++ + NF_CT_TUPLE_U_BLANK(&tuple); + tuple.src.u3.ip = inet->rcv_saddr; + tuple.src.u.tcp.port = inet->sport; +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c linux-2.6.22-591/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +--- linux-2.6.22-570/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c 2007-12-21 15:36:15.000000000 -0500 +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -378,16 +379,16 @@ + { + struct proc_dir_entry *proc, *proc_exp, *proc_stat; + +- proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops); ++ proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops); + if (!proc) + goto err1; + +- proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440, ++ proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440, + &ip_exp_file_ops); + if (!proc_exp) + goto err2; + +- proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat); ++ proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, init_net.proc_net_stat); + if (!proc_stat) + goto err3; + +@@ -397,16 +398,16 @@ + return 0; + + err3: +- proc_net_remove("ip_conntrack_expect"); ++ proc_net_remove(&init_net, "ip_conntrack_expect"); + err2: +- proc_net_remove("ip_conntrack"); ++ proc_net_remove(&init_net, "ip_conntrack"); + err1: + return -ENOMEM; + } + + void __exit nf_conntrack_ipv4_compat_fini(void) + { +- remove_proc_entry("ip_conntrack", proc_net_stat); +- proc_net_remove("ip_conntrack_expect"); +- proc_net_remove("ip_conntrack"); ++ remove_proc_entry("ip_conntrack", init_net.proc_net_stat); ++ proc_net_remove(&init_net, "ip_conntrack_expect"); ++ proc_net_remove(&init_net, "ip_conntrack"); + } +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_nat_helper.c linux-2.6.22-591/net/ipv4/netfilter/nf_nat_helper.c +--- linux-2.6.22-570/net/ipv4/netfilter/nf_nat_helper.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/nf_nat_helper.c 2007-12-21 15:36:12.000000000 -0500 +@@ -178,7 +178,7 @@ + datalen = (*pskb)->len - iph->ihl*4; + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + if (!(rt->rt_flags & RTCF_LOCAL) && +- (*pskb)->dev->features & NETIF_F_ALL_CSUM) { ++ (*pskb)->dev->features & NETIF_F_V4_CSUM) { + (*pskb)->ip_summed = CHECKSUM_PARTIAL; + (*pskb)->csum_start = skb_headroom(*pskb) + + skb_network_offset(*pskb) + +@@ -265,7 +265,7 @@ + + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + if (!(rt->rt_flags & RTCF_LOCAL) && +- (*pskb)->dev->features & NETIF_F_ALL_CSUM) { ++ (*pskb)->dev->features & NETIF_F_V4_CSUM) { + (*pskb)->ip_summed = CHECKSUM_PARTIAL; + (*pskb)->csum_start = skb_headroom(*pskb) + + skb_network_offset(*pskb) + +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_nat_rule.c linux-2.6.22-591/net/ipv4/netfilter/nf_nat_rule.c +--- linux-2.6.22-570/net/ipv4/netfilter/nf_nat_rule.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/nf_nat_rule.c 2007-12-21 15:36:15.000000000 -0500 +@@ -98,7 +98,10 @@ + static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) + { + static int warned = 0; +- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; ++ struct flowi fl = { ++ .fl_net = &init_net, ++ .nl_u = { .ip4_u = { .daddr = dstip } } ++ }; + struct rtable *rt; + + if (ip_route_output_key(&rt, &fl) != 0) +@@ -252,7 +255,7 @@ + { + int ret; + +- ret = ipt_register_table(&nat_table, &nat_initial_table.repl); ++ ret = ipt_register_table(&init_net, &nat_table, &nat_initial_table.repl); + if (ret != 0) + return ret; + ret = xt_register_target(&ipt_snat_reg); +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_nat_standalone.c linux-2.6.22-591/net/ipv4/netfilter/nf_nat_standalone.c +--- linux-2.6.22-570/net/ipv4/netfilter/nf_nat_standalone.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter/nf_nat_standalone.c 2007-12-21 15:36:15.000000000 -0500 +@@ -83,6 +83,10 @@ + /* maniptype == SRC for postrouting. */ + enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* We never see fragments: conntrack defrags on pre-routing + and local-out, and nf_nat_out protects post-routing. */ + NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET))); +@@ -172,6 +176,10 @@ + unsigned int ret; + __be32 daddr = ip_hdr(*pskb)->daddr; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + ret = nf_nat_fn(hooknum, pskb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + daddr != ip_hdr(*pskb)->daddr) { +@@ -194,6 +202,10 @@ + #endif + unsigned int ret; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) || + ip_hdrlen(*pskb) < sizeof(struct iphdr)) +@@ -227,6 +239,10 @@ + enum ip_conntrack_info ctinfo; + unsigned int ret; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) || + ip_hdrlen(*pskb) < sizeof(struct iphdr)) +@@ -239,7 +255,7 @@ + + if (ct->tuplehash[dir].tuple.dst.u3.ip != + ct->tuplehash[!dir].tuple.src.u3.ip) { +- if (ip_route_me_harder(pskb, RTN_UNSPEC)) ++ if (ip_route_me_harder(&init_net, pskb, RTN_UNSPEC)) + ret = NF_DROP; + } + #ifdef CONFIG_XFRM +@@ -262,6 +278,10 @@ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + ct = nf_ct_get(*pskb, &ctinfo); + if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { + DEBUGP("nf_nat_standalone: adjusting sequence number\n"); +diff -Nurb linux-2.6.22-570/net/ipv4/netfilter.c linux-2.6.22-591/net/ipv4/netfilter.c +--- linux-2.6.22-570/net/ipv4/netfilter.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/netfilter.c 2007-12-21 15:36:15.000000000 -0500 +@@ -8,7 +8,7 @@ + #include + + /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ +-int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) ++int ip_route_me_harder(struct net *net, struct sk_buff **pskb, unsigned addr_type) + { + const struct iphdr *iph = ip_hdr(*pskb); + struct rtable *rt; +@@ -17,7 +17,8 @@ + unsigned int hh_len; + unsigned int type; + +- type = inet_addr_type(iph->saddr); ++ fl.fl_net = net; ++ type = inet_addr_type(net, iph->saddr); + if (addr_type == RTN_UNSPEC) + addr_type = type; + +@@ -155,12 +156,13 @@ + const struct ip_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP_LOCAL_OUT) { ++ struct net *net = (info->indev?info->indev:info->outdev)->nd_net; + const struct iphdr *iph = ip_hdr(*pskb); + + if (!(iph->tos == rt_info->tos + && iph->daddr == rt_info->daddr + && iph->saddr == rt_info->saddr)) +- return ip_route_me_harder(pskb, RTN_UNSPEC); ++ return ip_route_me_harder(net, pskb, RTN_UNSPEC); + } + return 0; + } +diff -Nurb linux-2.6.22-570/net/ipv4/proc.c linux-2.6.22-591/net/ipv4/proc.c +--- linux-2.6.22-570/net/ipv4/proc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/proc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -44,6 +44,7 @@ + #include + #include + #include ++#include + + static int fold_prot_inuse(struct proto *proto) + { +@@ -69,8 +70,9 @@ + seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); + seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); + seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); +- seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues, +- atomic_read(&ip_frag_mem)); ++ seq_printf(seq, "FRAG: inuse %d memory %d\n", ++ init_net.ip_frag_nqueues, ++ atomic_read(&init_net.ip_frag_mem)); + return 0; + } + +@@ -260,7 +262,8 @@ + seq_printf(seq, " %s", snmp4_ipstats_list[i].name); + + seq_printf(seq, "\nIp: %d %d", +- IPV4_DEVCONF_ALL(FORWARDING) ? 1 : 2, sysctl_ip_default_ttl); ++ IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2, ++ init_net.sysctl_ip_default_ttl); + + for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + seq_printf(seq, " %lu", +@@ -380,20 +383,20 @@ + { + int rc = 0; + +- if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops)) ++ if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops)) + goto out_netstat; + +- if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops)) ++ if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops)) + goto out_snmp; + +- if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops)) ++ if (!proc_net_fops_create(&init_net, "sockstat", S_IRUGO, &sockstat_seq_fops)) + goto out_sockstat; + out: + return rc; + out_sockstat: +- proc_net_remove("snmp"); ++ proc_net_remove(&init_net, "snmp"); + out_snmp: +- proc_net_remove("netstat"); ++ proc_net_remove(&init_net, "netstat"); + out_netstat: + rc = -ENOMEM; + goto out; +diff -Nurb linux-2.6.22-570/net/ipv4/raw.c linux-2.6.22-591/net/ipv4/raw.c +--- linux-2.6.22-570/net/ipv4/raw.c 2007-12-21 15:36:03.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/raw.c 2007-12-21 15:36:15.000000000 -0500 +@@ -73,6 +73,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -101,7 +102,7 @@ + write_unlock_bh(&raw_v4_lock); + } + +-struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, ++struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num, + __be32 raddr, __be32 laddr, + int dif, int tag) + { +@@ -110,6 +111,9 @@ + sk_for_each_from(sk, node) { + struct inet_sock *inet = inet_sk(sk); + ++ if (sk->sk_net != net) ++ continue; ++ + if (inet->num == num && + !(inet->daddr && inet->daddr != raddr) && + (!sk->sk_nx_info || tag == 1 || sk->sk_nid == tag) && +@@ -152,6 +156,7 @@ + */ + int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash) + { ++ struct net *net = skb->dev->nd_net; + struct sock *sk; + struct hlist_head *head; + int delivered = 0; +@@ -160,7 +165,7 @@ + head = &raw_v4_htable[hash]; + if (hlist_empty(head)) + goto out; +- sk = __raw_v4_lookup(__sk_head(head), iph->protocol, ++ sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, + iph->saddr, iph->daddr, + skb->dev->ifindex, skb->skb_tag); + +@@ -173,7 +178,7 @@ + if (clone) + raw_rcv(sk, clone); + } +- sk = __raw_v4_lookup(sk_next(sk), iph->protocol, ++ sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, + iph->saddr, iph->daddr, + skb->dev->ifindex, skb->skb_tag); + } +@@ -484,7 +489,8 @@ + } + + { +- struct flowi fl = { .oif = ipc.oif, ++ struct flowi fl = { .fl_net = sk->sk_net, ++ .oif = ipc.oif, + .nl_u = { .ip4_u = + { .daddr = daddr, + .saddr = saddr, +@@ -574,7 +580,7 @@ + if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) + goto out; + v4_map_sock_addr(inet, addr, &nsa); +- chk_addr_ret = inet_addr_type(nsa.saddr); ++ chk_addr_ret = inet_addr_type(sk->sk_net, nsa.saddr); + ret = -EADDRNOTAVAIL; + if (nsa.saddr && chk_addr_ret != RTN_LOCAL && + chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) +@@ -798,6 +804,7 @@ + + #ifdef CONFIG_PROC_FS + struct raw_iter_state { ++ struct net *net; + int bucket; + }; + +@@ -811,11 +818,14 @@ + for (state->bucket = 0; state->bucket < RAWV4_HTABLE_SIZE; ++state->bucket) { + struct hlist_node *node; + +- sk_for_each(sk, node, &raw_v4_htable[state->bucket]) ++ sk_for_each(sk, node, &raw_v4_htable[state->bucket]) { ++ if (sk->sk_net != state->net) ++ continue; + if (sk->sk_family == PF_INET && + nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)) + goto found; + } ++ } + sk = NULL; + found: + return sk; +@@ -830,7 +840,7 @@ + try_again: + ; + } while (sk && (sk->sk_family != PF_INET || +- !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))); ++ !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT) || (sk->sk_net != state->net))); + + if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) { + sk = sk_head(&raw_v4_htable[state->bucket]); +@@ -933,6 +943,7 @@ + seq = file->private_data; + seq->private = s; + memset(s, 0, sizeof(*s)); ++ s->net = get_net(PROC_NET(inode)); + out: + return rc; + out_kfree: +@@ -940,23 +951,46 @@ + goto out; + } + ++static int raw_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct raw_iter_state *state = seq->private; ++ put_net(state->net); ++ return seq_release_private(inode, file); ++} ++ + static const struct file_operations raw_seq_fops = { + .owner = THIS_MODULE, + .open = raw_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release_private, ++ .release = raw_seq_release, + }; + +-int __init raw_proc_init(void) ++static int raw_proc_net_init(struct net *net) + { +- if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops)) ++ if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops)) + return -ENOMEM; + return 0; + } + ++static void raw_proc_net_exit(struct net *net) ++{ ++ proc_net_remove(net, "raw"); ++} ++ ++static struct pernet_operations raw_proc_net_ops = { ++ .init = raw_proc_net_init, ++ .exit = raw_proc_net_exit, ++}; ++ ++int __init raw_proc_init(void) ++{ ++ return register_pernet_subsys(&raw_proc_net_ops); ++} ++ + void __init raw_proc_exit(void) + { +- proc_net_remove("raw"); ++ unregister_pernet_subsys(&raw_proc_net_ops); + } + #endif /* CONFIG_PROC_FS */ +diff -Nurb linux-2.6.22-570/net/ipv4/route.c linux-2.6.22-591/net/ipv4/route.c +--- linux-2.6.22-570/net/ipv4/route.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/route.c 2007-12-21 15:36:15.000000000 -0500 +@@ -101,8 +101,8 @@ + #include + #include + #include +-#include + #include ++#include + #include + #ifdef CONFIG_SYSCTL + #include +@@ -266,6 +266,7 @@ + + #ifdef CONFIG_PROC_FS + struct rt_cache_iter_state { ++ struct net *net; + int bucket; + }; + +@@ -334,6 +335,7 @@ + + static int rt_cache_seq_show(struct seq_file *seq, void *v) + { ++ struct rt_cache_iter_state *st = seq->private; + if (v == SEQ_START_TOKEN) + seq_printf(seq, "%-127s\n", + "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" +@@ -343,6 +345,9 @@ + struct rtable *r = v; + char temp[256]; + ++ if (r->fl.fl_net != st->net) ++ return 0; ++ + sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t" + "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X", + r->u.dst.dev ? r->u.dst.dev->name : "*", +@@ -385,6 +390,7 @@ + seq = file->private_data; + seq->private = s; + memset(s, 0, sizeof(*s)); ++ s->net = get_net(PROC_NET(inode)); + out: + return rc; + out_kfree: +@@ -392,12 +398,20 @@ + goto out; + } + ++static int rt_cache_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct rt_cache_iter_state *st = seq->private; ++ put_net(st->net); ++ return seq_release_private(inode, file); ++} ++ + static const struct file_operations rt_cache_seq_fops = { + .owner = THIS_MODULE, + .open = rt_cache_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release_private, ++ .release = rt_cache_seq_release, + }; + + +@@ -495,13 +509,11 @@ + + static __inline__ void rt_free(struct rtable *rt) + { +- multipath_remove(rt); + call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + } + + static __inline__ void rt_drop(struct rtable *rt) + { +- multipath_remove(rt); + ip_rt_put(rt); + call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); + } +@@ -565,61 +577,16 @@ + + static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) + { +- return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | ++ return (((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | + (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) | + (fl1->mark ^ fl2->mark) | + (*(u16 *)&fl1->nl_u.ip4_u.tos ^ + *(u16 *)&fl2->nl_u.ip4_u.tos) | + (fl1->oif ^ fl2->oif) | +- (fl1->iif ^ fl2->iif)) == 0; ++ (fl1->iif ^ fl2->iif)) == 0) && ++ fl1->fl_net == fl2->fl_net; + } + +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +-static struct rtable **rt_remove_balanced_route(struct rtable **chain_head, +- struct rtable *expentry, +- int *removed_count) +-{ +- int passedexpired = 0; +- struct rtable **nextstep = NULL; +- struct rtable **rthp = chain_head; +- struct rtable *rth; +- +- if (removed_count) +- *removed_count = 0; +- +- while ((rth = *rthp) != NULL) { +- if (rth == expentry) +- passedexpired = 1; +- +- if (((*rthp)->u.dst.flags & DST_BALANCED) != 0 && +- compare_keys(&(*rthp)->fl, &expentry->fl)) { +- if (*rthp == expentry) { +- *rthp = rth->u.dst.rt_next; +- continue; +- } else { +- *rthp = rth->u.dst.rt_next; +- rt_free(rth); +- if (removed_count) +- ++(*removed_count); +- } +- } else { +- if (!((*rthp)->u.dst.flags & DST_BALANCED) && +- passedexpired && !nextstep) +- nextstep = &rth->u.dst.rt_next; +- +- rthp = &rth->u.dst.rt_next; +- } +- } +- +- rt_free(expentry); +- if (removed_count) +- ++(*removed_count); +- +- return nextstep; +-} +-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ +- +- + /* This runs via a timer and thus is always in BH context. */ + static void rt_check_expire(unsigned long dummy) + { +@@ -658,23 +625,9 @@ + } + + /* Cleanup aged off entries. */ +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- /* remove all related balanced entries if necessary */ +- if (rth->u.dst.flags & DST_BALANCED) { +- rthp = rt_remove_balanced_route( +- &rt_hash_table[i].chain, +- rth, NULL); +- if (!rthp) +- break; +- } else { + *rthp = rth->u.dst.rt_next; + rt_free(rth); + } +-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ +- *rthp = rth->u.dst.rt_next; +- rt_free(rth); +-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ +- } + spin_unlock(rt_hash_lock_addr(i)); + + /* Fallback loop breaker. */ +@@ -721,9 +674,6 @@ + if (delay < 0) + delay = ip_rt_min_delay; + +- /* flush existing multipath state*/ +- multipath_flush(); +- + spin_lock_bh(&rt_flush_lock); + + if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) { +@@ -842,31 +792,10 @@ + rthp = &rth->u.dst.rt_next; + continue; + } +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- /* remove all related balanced entries +- * if necessary +- */ +- if (rth->u.dst.flags & DST_BALANCED) { +- int r; +- +- rthp = rt_remove_balanced_route( +- &rt_hash_table[k].chain, +- rth, +- &r); +- goal -= r; +- if (!rthp) +- break; +- } else { + *rthp = rth->u.dst.rt_next; + rt_free(rth); + goal--; + } +-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ +- *rthp = rth->u.dst.rt_next; +- rt_free(rth); +- goal--; +-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ +- } + spin_unlock_bh(rt_hash_lock_addr(k)); + if (goal <= 0) + break; +@@ -939,12 +868,7 @@ + + spin_lock_bh(rt_hash_lock_addr(hash)); + while ((rth = *rthp) != NULL) { +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- if (!(rth->u.dst.flags & DST_BALANCED) && +- compare_keys(&rth->fl, &rt->fl)) { +-#else + if (compare_keys(&rth->fl, &rt->fl)) { +-#endif + /* Put it first */ + *rthp = rth->u.dst.rt_next; + /* +@@ -1055,7 +979,7 @@ + static DEFINE_SPINLOCK(rt_peer_lock); + struct inet_peer *peer; + +- peer = inet_getpeer(rt->rt_dst, create); ++ peer = inet_getpeer(rt->fl.fl_net, rt->rt_dst, create); + + spin_lock_bh(&rt_peer_lock); + if (rt->peer == NULL) { +@@ -1148,7 +1072,7 @@ + if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) + goto reject_redirect; + } else { +- if (inet_addr_type(new_gw) != RTN_UNICAST) ++ if (inet_addr_type(dev->nd_net, new_gw) != RTN_UNICAST) + goto reject_redirect; + } + +@@ -1189,6 +1113,7 @@ + + /* Copy all the information. */ + *rt = *rth; ++ hold_net(rt->fl.fl_net); + INIT_RCU_HEAD(&rt->u.dst.rcu_head); + rt->u.dst.__use = 1; + atomic_set(&rt->u.dst.__refcnt, 1); +@@ -1407,7 +1332,7 @@ + __be32 daddr = iph->daddr; + unsigned short est_mtu = 0; + +- if (ipv4_config.no_pmtu_disc) ++ if (init_net.sysctl_ipv4_no_pmtu_disc) + return 0; + + for (i = 0; i < 2; i++) { +@@ -1489,6 +1414,7 @@ + rt->idev = NULL; + in_dev_put(idev); + } ++ release_net(rt->fl.fl_net); + } + + static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, +@@ -1496,8 +1422,9 @@ + { + struct rtable *rt = (struct rtable *) dst; + struct in_device *idev = rt->idev; +- if (dev != &loopback_dev && idev && idev->dev == dev) { +- struct in_device *loopback_idev = in_dev_get(&loopback_dev); ++ struct net *net = dev->nd_net; ++ if (dev != &net->loopback_dev && idev && idev->dev == dev) { ++ struct in_device *loopback_idev = in_dev_get(&net->loopback_dev); + if (loopback_idev) { + rt->idev = loopback_idev; + in_dev_put(idev); +@@ -1584,7 +1511,7 @@ + rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu; + + if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) +- rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; ++ rt->u.dst.metrics[RTAX_HOPLIMIT-1] = init_net.sysctl_ip_default_ttl; + if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU) + rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; + if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0) +@@ -1605,6 +1532,7 @@ + static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev, int our) + { ++ struct net *net = dev->nd_net; + unsigned hash; + struct rtable *rth; + __be32 spec_dst; +@@ -1638,6 +1566,7 @@ + rth->u.dst.flags= DST_HOST; + if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) + rth->u.dst.flags |= DST_NOPOLICY; ++ rth->fl.fl_net = hold_net(net); + rth->fl.fl4_dst = daddr; + rth->rt_dst = daddr; + rth->fl.fl4_tos = tos; +@@ -1649,7 +1578,7 @@ + #endif + rth->rt_iif = + rth->fl.iif = dev->ifindex; +- rth->u.dst.dev = &loopback_dev; ++ rth->u.dst.dev = &net->loopback_dev; + dev_hold(rth->u.dst.dev); + rth->idev = in_dev_get(rth->u.dst.dev); + rth->fl.oif = 0; +@@ -1774,14 +1703,11 @@ + + atomic_set(&rth->u.dst.__refcnt, 1); + rth->u.dst.flags= DST_HOST; +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- if (res->fi->fib_nhs > 1) +- rth->u.dst.flags |= DST_BALANCED; +-#endif + if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) + rth->u.dst.flags |= DST_NOPOLICY; + if (IN_DEV_CONF_GET(out_dev, NOXFRM)) + rth->u.dst.flags |= DST_NOXFRM; ++ rth->fl.fl_net = hold_net(in_dev->dev->nd_net); + rth->fl.fl4_dst = daddr; + rth->rt_dst = daddr; + rth->fl.fl4_tos = tos; +@@ -1812,7 +1738,7 @@ + return err; + } + +-static inline int ip_mkroute_input_def(struct sk_buff *skb, ++static inline int ip_mkroute_input(struct sk_buff *skb, + struct fib_result* res, + const struct flowi *fl, + struct in_device *in_dev, +@@ -1837,63 +1763,6 @@ + return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst); + } + +-static inline int ip_mkroute_input(struct sk_buff *skb, +- struct fib_result* res, +- const struct flowi *fl, +- struct in_device *in_dev, +- __be32 daddr, __be32 saddr, u32 tos) +-{ +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- struct rtable* rth = NULL, *rtres; +- unsigned char hop, hopcount; +- int err = -EINVAL; +- unsigned int hash; +- +- if (res->fi) +- hopcount = res->fi->fib_nhs; +- else +- hopcount = 1; +- +- /* distinguish between multipath and singlepath */ +- if (hopcount < 2) +- return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, +- saddr, tos); +- +- /* add all alternatives to the routing cache */ +- for (hop = 0; hop < hopcount; hop++) { +- res->nh_sel = hop; +- +- /* put reference to previous result */ +- if (hop) +- ip_rt_put(rtres); +- +- /* create a routing cache entry */ +- err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, +- &rth); +- if (err) +- return err; +- +- /* put it into the cache */ +- hash = rt_hash(daddr, saddr, fl->iif); +- err = rt_intern_hash(hash, rth, &rtres); +- if (err) +- return err; +- +- /* forward hop information to multipath impl. */ +- multipath_set_nhinfo(rth, +- FIB_RES_NETWORK(*res), +- FIB_RES_NETMASK(*res), +- res->prefixlen, +- &FIB_RES_NH(*res)); +- } +- skb->dst = &rtres->u.dst; +- return err; +-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ +- return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos); +-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ +-} +- +- + /* + * NOTE. We drop all the packets that has local source + * addresses, because every properly looped back packet +@@ -1907,9 +1776,11 @@ + static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev) + { ++ struct net *net = dev->nd_net; + struct fib_result res; + struct in_device *in_dev = in_dev_get(dev); +- struct flowi fl = { .nl_u = { .ip4_u = ++ struct flowi fl = { .fl_net = net, ++ .nl_u = { .ip4_u = + { .daddr = daddr, + .saddr = saddr, + .tos = tos, +@@ -1967,7 +1838,7 @@ + if (res.type == RTN_LOCAL) { + int result; + result = fib_validate_source(saddr, daddr, tos, +- loopback_dev.ifindex, ++ net->loopback_dev.ifindex, + dev, &spec_dst, &itag); + if (result < 0) + goto martian_source; +@@ -2023,6 +1894,7 @@ + rth->u.dst.flags= DST_HOST; + if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) + rth->u.dst.flags |= DST_NOPOLICY; ++ rth->fl.fl_net = hold_net(net); + rth->fl.fl4_dst = daddr; + rth->rt_dst = daddr; + rth->fl.fl4_tos = tos; +@@ -2034,7 +1906,7 @@ + #endif + rth->rt_iif = + rth->fl.iif = dev->ifindex; +- rth->u.dst.dev = &loopback_dev; ++ rth->u.dst.dev = &net->loopback_dev; + dev_hold(rth->u.dst.dev); + rth->idev = in_dev_get(rth->u.dst.dev); + rth->rt_gateway = daddr; +@@ -2092,6 +1964,7 @@ + struct rtable * rth; + unsigned hash; + int iif = dev->ifindex; ++ struct net *net = dev->nd_net; + + tos &= IPTOS_RT_MASK; + hash = rt_hash(daddr, saddr, iif); +@@ -2104,7 +1977,8 @@ + rth->fl.iif == iif && + rth->fl.oif == 0 && + rth->fl.mark == skb->mark && +- rth->fl.fl4_tos == tos) { ++ rth->fl.fl4_tos == tos && ++ rth->fl.fl_net == net) { + rth->u.dst.lastuse = jiffies; + dst_hold(&rth->u.dst); + rth->u.dst.__use++; +@@ -2211,18 +2085,12 @@ + + atomic_set(&rth->u.dst.__refcnt, 1); + rth->u.dst.flags= DST_HOST; +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- if (res->fi) { +- rth->rt_multipath_alg = res->fi->fib_mp_alg; +- if (res->fi->fib_nhs > 1) +- rth->u.dst.flags |= DST_BALANCED; +- } +-#endif + if (IN_DEV_CONF_GET(in_dev, NOXFRM)) + rth->u.dst.flags |= DST_NOXFRM; + if (IN_DEV_CONF_GET(in_dev, NOPOLICY)) + rth->u.dst.flags |= DST_NOPOLICY; + ++ rth->fl.fl_net = hold_net(oldflp->fl_net); + rth->fl.fl4_dst = oldflp->fl4_dst; + rth->fl.fl4_tos = tos; + rth->fl.fl4_src = oldflp->fl4_src; +@@ -2277,7 +2145,7 @@ + return err; + } + +-static inline int ip_mkroute_output_def(struct rtable **rp, ++static inline int ip_mkroute_output(struct rtable **rp, + struct fib_result* res, + const struct flowi *fl, + const struct flowi *oldflp, +@@ -2295,68 +2163,6 @@ + return err; + } + +-static inline int ip_mkroute_output(struct rtable** rp, +- struct fib_result* res, +- const struct flowi *fl, +- const struct flowi *oldflp, +- struct net_device *dev_out, +- unsigned flags) +-{ +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- unsigned char hop; +- unsigned hash; +- int err = -EINVAL; +- struct rtable *rth = NULL; +- +- if (res->fi && res->fi->fib_nhs > 1) { +- unsigned char hopcount = res->fi->fib_nhs; +- +- for (hop = 0; hop < hopcount; hop++) { +- struct net_device *dev2nexthop; +- +- res->nh_sel = hop; +- +- /* hold a work reference to the output device */ +- dev2nexthop = FIB_RES_DEV(*res); +- dev_hold(dev2nexthop); +- +- /* put reference to previous result */ +- if (hop) +- ip_rt_put(*rp); +- +- err = __mkroute_output(&rth, res, fl, oldflp, +- dev2nexthop, flags); +- +- if (err != 0) +- goto cleanup; +- +- hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, +- oldflp->oif); +- err = rt_intern_hash(hash, rth, rp); +- +- /* forward hop information to multipath impl. */ +- multipath_set_nhinfo(rth, +- FIB_RES_NETWORK(*res), +- FIB_RES_NETMASK(*res), +- res->prefixlen, +- &FIB_RES_NH(*res)); +- cleanup: +- /* release work reference to output device */ +- dev_put(dev2nexthop); +- +- if (err != 0) +- return err; +- } +- return err; +- } else { +- return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, +- flags); +- } +-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ +- return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags); +-#endif +-} +- + /* + * Major route resolver routine. + */ +@@ -2364,7 +2170,9 @@ + static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) + { + u32 tos = RT_FL_TOS(oldflp); +- struct flowi fl = { .nl_u = { .ip4_u = ++ struct net *net = oldflp->fl_net; ++ struct flowi fl = { .fl_net = net, ++ .nl_u = { .ip4_u = + { .daddr = oldflp->fl4_dst, + .saddr = oldflp->fl4_src, + .tos = tos & IPTOS_RT_MASK, +@@ -2373,7 +2181,7 @@ + RT_SCOPE_UNIVERSE), + } }, + .mark = oldflp->mark, +- .iif = loopback_dev.ifindex, ++ .iif = net->loopback_dev.ifindex, + .oif = oldflp->oif }; + struct fib_result res; + unsigned flags = 0; +@@ -2395,7 +2203,7 @@ + goto out; + + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ +- dev_out = ip_dev_find(oldflp->fl4_src); ++ dev_out = ip_dev_find(net, oldflp->fl4_src); + if (dev_out == NULL) + goto out; + +@@ -2434,7 +2242,7 @@ + + + if (oldflp->oif) { +- dev_out = dev_get_by_index(oldflp->oif); ++ dev_out = dev_get_by_index(net, oldflp->oif); + err = -ENODEV; + if (dev_out == NULL) + goto out; +@@ -2467,9 +2275,9 @@ + fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); + if (dev_out) + dev_put(dev_out); +- dev_out = &loopback_dev; ++ dev_out = &net->loopback_dev; + dev_hold(dev_out); +- fl.oif = loopback_dev.ifindex; ++ fl.oif = net->loopback_dev.ifindex; + res.type = RTN_LOCAL; + flags |= RTCF_LOCAL; + goto make_route; +@@ -2514,7 +2322,7 @@ + fl.fl4_src = fl.fl4_dst; + if (dev_out) + dev_put(dev_out); +- dev_out = &loopback_dev; ++ dev_out = &net->loopback_dev; + dev_hold(dev_out); + fl.oif = dev_out->ifindex; + if (res.fi) +@@ -2568,19 +2376,9 @@ + rth->fl.iif == 0 && + rth->fl.oif == flp->oif && + rth->fl.mark == flp->mark && ++ rth->fl.fl_net == flp->fl_net && + !((rth->fl.fl4_tos ^ flp->fl4_tos) & + (IPTOS_RT_MASK | RTO_ONLINK))) { +- +- /* check for multipath routes and choose one if +- * necessary +- */ +- if (multipath_select_route(flp, rth, rp)) { +- dst_hold(&(*rp)->u.dst); +- RT_CACHE_STAT_INC(out_hit); +- rcu_read_unlock_bh(); +- return 0; +- } +- + rth->u.dst.lastuse = jiffies; + dst_hold(&rth->u.dst); + rth->u.dst.__use++; +@@ -2729,10 +2527,6 @@ + if (rt->u.dst.tclassid) + NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); + #endif +-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED +- if (rt->rt_multipath_alg != IP_MP_ALG_NONE) +- NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg); +-#endif + if (rt->fl.iif) + NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst); + else if (rt->rt_src != rt->fl.fl4_src) +@@ -2759,7 +2553,7 @@ + __be32 dst = rt->rt_dst; + + if (MULTICAST(dst) && !LOCAL_MCAST(dst) && +- IPV4_DEVCONF_ALL(MC_FORWARDING)) { ++ IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) { + int err = ipmr_get_route(skb, r, nowait); + if (err <= 0) { + if (!nowait) { +@@ -2790,6 +2584,7 @@ + + static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) + { ++ struct net *net = in_skb->sk->sk_net; + struct rtmsg *rtm; + struct nlattr *tb[RTA_MAX+1]; + struct rtable *rt = NULL; +@@ -2828,7 +2623,7 @@ + if (iif) { + struct net_device *dev; + +- dev = __dev_get_by_index(iif); ++ dev = __dev_get_by_index(net, iif); + if (dev == NULL) { + err = -ENODEV; + goto errout_free; +@@ -2845,6 +2640,7 @@ + err = -rt->u.dst.error; + } else { + struct flowi fl = { ++ .fl_net = net, + .nl_u = { + .ip4_u = { + .daddr = dst, +@@ -2869,7 +2665,7 @@ + if (err <= 0) + goto errout_free; + +- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); ++ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + errout: + return err; + +@@ -3182,6 +2978,48 @@ + } + __setup("rhash_entries=", set_rhash_entries); + ++ ++static void ip_rt_net_exit(struct net *net) ++{ ++#ifdef CONFIG_PROC_FS ++# ifdef CONFIG_NET_CLS_ROUTE ++ proc_net_remove(net, "rt_acct"); ++# endif ++ remove_proc_entry("rt_cache", net->proc_net_stat); ++ proc_net_remove(net, "rt_cache"); ++#endif ++ rt_run_flush(0); ++} ++ ++static int ip_rt_net_init(struct net *net) ++{ ++ int error = -ENOMEM; ++#ifdef CONFIG_PROC_FS ++ struct proc_dir_entry *rtstat_pde; ++ if (!proc_net_fops_create(net, "rt_cache", S_IRUGO, &rt_cache_seq_fops)) ++ goto out; ++ if (!(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, ++ net->proc_net_stat))) ++ goto out; ++ rtstat_pde->proc_fops = &rt_cpu_seq_fops; ++# ifdef CONFIG_NET_CLS_ROUTE ++ if (!create_proc_read_entry("rt_acct", 0, net->proc_net, ++ ip_rt_acct_read, NULL)) ++ goto out; ++# endif ++#endif ++ error = 0; ++out: ++ if (error) ++ ip_rt_net_exit(net); ++ return error; ++} ++ ++struct pernet_operations ip_rt_net_ops = { ++ .init = ip_rt_net_init, ++ .exit = ip_rt_net_exit, ++}; ++ + int __init ip_rt_init(void) + { + int rc = 0; +@@ -3245,20 +3083,7 @@ + ip_rt_secret_interval; + add_timer(&rt_secret_timer); + +-#ifdef CONFIG_PROC_FS +- { +- struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */ +- if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) || +- !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO, +- proc_net_stat))) { +- return -ENOMEM; +- } +- rtstat_pde->proc_fops = &rt_cpu_seq_fops; +- } +-#ifdef CONFIG_NET_CLS_ROUTE +- create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL); +-#endif +-#endif ++ register_pernet_subsys(&ip_rt_net_ops); + #ifdef CONFIG_XFRM + xfrm_init(); + xfrm4_init(); +diff -Nurb linux-2.6.22-570/net/ipv4/syncookies.c linux-2.6.22-591/net/ipv4/syncookies.c +--- linux-2.6.22-570/net/ipv4/syncookies.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/syncookies.c 2007-12-21 15:36:15.000000000 -0500 +@@ -253,7 +253,8 @@ + * no easy way to do this. + */ + { +- struct flowi fl = { .nl_u = { .ip4_u = ++ struct flowi fl = { .fl_net = &init_net, ++ .nl_u = { .ip4_u = + { .daddr = ((opt && opt->srr) ? + opt->faddr : + ireq->rmt_addr), +diff -Nurb linux-2.6.22-570/net/ipv4/sysctl_net_ipv4.c linux-2.6.22-591/net/ipv4/sysctl_net_ipv4.c +--- linux-2.6.22-570/net/ipv4/sysctl_net_ipv4.c 2007-12-21 15:36:02.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/sysctl_net_ipv4.c 2007-12-21 15:36:15.000000000 -0500 +@@ -29,21 +29,21 @@ + static int ip_local_port_range_max[] = { 65535, 65535 }; + #endif + +-struct ipv4_config ipv4_config; +- + #ifdef CONFIG_SYSCTL + + static + int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos) + { +- int val = IPV4_DEVCONF_ALL(FORWARDING); ++ struct net *net = ctl->extra2; ++ int *valp = ctl->data; ++ int old = *valp; + int ret; + + ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + +- if (write && IPV4_DEVCONF_ALL(FORWARDING) != val) +- inet_forward_change(); ++ if (write && *valp != old) ++ inet_forward_change(net); + + return ret; + } +@@ -53,6 +53,7 @@ + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen) + { ++ struct net *net = table->extra2; + int *valp = table->data; + int new; + +@@ -85,7 +86,7 @@ + } + + *valp = new; +- inet_forward_change(); ++ inet_forward_change(net); + return 1; + } + +@@ -188,22 +189,6 @@ + + ctl_table ipv4_table[] = { + { +- .ctl_name = NET_IPV4_TCP_TIMESTAMPS, +- .procname = "tcp_timestamps", +- .data = &sysctl_tcp_timestamps, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec +- }, +- { +- .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, +- .procname = "tcp_window_scaling", +- .data = &sysctl_tcp_window_scaling, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec +- }, +- { + .ctl_name = NET_IPV4_TCP_SACK, + .procname = "tcp_sack", + .data = &sysctl_tcp_sack, +@@ -220,40 +205,6 @@ + .proc_handler = &proc_dointvec + }, + { +- .ctl_name = NET_IPV4_FORWARD, +- .procname = "ip_forward", +- .data = &IPV4_DEVCONF_ALL(FORWARDING), +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &ipv4_sysctl_forward, +- .strategy = &ipv4_sysctl_forward_strategy +- }, +- { +- .ctl_name = NET_IPV4_DEFAULT_TTL, +- .procname = "ip_default_ttl", +- .data = &sysctl_ip_default_ttl, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &ipv4_doint_and_flush, +- .strategy = &ipv4_doint_and_flush_strategy, +- }, +- { +- .ctl_name = NET_IPV4_NO_PMTU_DISC, +- .procname = "ip_no_pmtu_disc", +- .data = &ipv4_config.no_pmtu_disc, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec +- }, +- { +- .ctl_name = NET_IPV4_NONLOCAL_BIND, +- .procname = "ip_nonlocal_bind", +- .data = &sysctl_ip_nonlocal_bind, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec +- }, +- { + .ctl_name = NET_IPV4_TCP_SYN_RETRIES, + .procname = "tcp_syn_retries", + .data = &sysctl_tcp_syn_retries, +@@ -286,39 +237,6 @@ + .proc_handler = &proc_dointvec + }, + { +- .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, +- .procname = "ipfrag_high_thresh", +- .data = &sysctl_ipfrag_high_thresh, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec +- }, +- { +- .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, +- .procname = "ipfrag_low_thresh", +- .data = &sysctl_ipfrag_low_thresh, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec +- }, +- { +- .ctl_name = NET_IPV4_DYNADDR, +- .procname = "ip_dynaddr", +- .data = &sysctl_ip_dynaddr, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec +- }, +- { +- .ctl_name = NET_IPV4_IPFRAG_TIME, +- .procname = "ipfrag_time", +- .data = &sysctl_ipfrag_time, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec_jiffies, +- .strategy = &sysctl_jiffies +- }, +- { + .ctl_name = NET_IPV4_TCP_KEEPALIVE_TIME, + .procname = "tcp_keepalive_time", + .data = &sysctl_tcp_keepalive_time, +@@ -422,17 +340,6 @@ + .proc_handler = &proc_dointvec + }, + { +- .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, +- .procname = "ip_local_port_range", +- .data = &sysctl_local_port_range, +- .maxlen = sizeof(sysctl_local_port_range), +- .mode = 0644, +- .proc_handler = &proc_dointvec_minmax, +- .strategy = &sysctl_intvec, +- .extra1 = ip_local_port_range_min, +- .extra2 = ip_local_port_range_max +- }, +- { + .ctl_name = NET_IPV4_ICMP_ECHO_IGNORE_ALL, + .procname = "icmp_echo_ignore_all", + .data = &sysctl_icmp_echo_ignore_all, +@@ -534,50 +441,6 @@ + .proc_handler = &proc_dointvec + }, + { +- .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, +- .procname = "inet_peer_threshold", +- .data = &inet_peer_threshold, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec +- }, +- { +- .ctl_name = NET_IPV4_INET_PEER_MINTTL, +- .procname = "inet_peer_minttl", +- .data = &inet_peer_minttl, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec_jiffies, +- .strategy = &sysctl_jiffies +- }, +- { +- .ctl_name = NET_IPV4_INET_PEER_MAXTTL, +- .procname = "inet_peer_maxttl", +- .data = &inet_peer_maxttl, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec_jiffies, +- .strategy = &sysctl_jiffies +- }, +- { +- .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, +- .procname = "inet_peer_gc_mintime", +- .data = &inet_peer_gc_mintime, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec_jiffies, +- .strategy = &sysctl_jiffies +- }, +- { +- .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, +- .procname = "inet_peer_gc_maxtime", +- .data = &inet_peer_gc_maxtime, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec_jiffies, +- .strategy = &sysctl_jiffies +- }, +- { + .ctl_name = NET_TCP_ORPHAN_RETRIES, + .procname = "tcp_orphan_retries", + .data = &sysctl_tcp_orphan_retries, +@@ -706,24 +569,6 @@ + .proc_handler = &proc_dointvec + }, + { +- .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, +- .procname = "ipfrag_secret_interval", +- .data = &sysctl_ipfrag_secret_interval, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec_jiffies, +- .strategy = &sysctl_jiffies +- }, +- { +- .ctl_name = NET_IPV4_IPFRAG_MAX_DIST, +- .procname = "ipfrag_max_dist", +- .data = &sysctl_ipfrag_max_dist, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec_minmax, +- .extra1 = &zero +- }, +- { + .ctl_name = NET_TCP_NO_METRICS_SAVE, + .procname = "tcp_no_metrics_save", + .data = &sysctl_tcp_nometrics_save, +@@ -865,6 +710,181 @@ + { .ctl_name = 0 } + }; + +-#endif /* CONFIG_SYSCTL */ ++struct ctl_table multi_ipv4_table[] = { ++ { ++ /* .data is filled in by devinet_net_init. ++ * As a consequence this table entry must be the first ++ * entry in multi_ipv4_table. ++ */ ++ .ctl_name = NET_IPV4_FORWARD, ++ .procname = "ip_forward", ++ .data = NULL, ++ .extra2 = &init_net, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &ipv4_sysctl_forward, ++ .strategy = &ipv4_sysctl_forward_strategy ++ }, ++ { ++ .ctl_name = NET_IPV4_DEFAULT_TTL, ++ .procname = "ip_default_ttl", ++ .data = &init_net.sysctl_ip_default_ttl, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &ipv4_doint_and_flush, ++ .strategy = &ipv4_doint_and_flush_strategy, ++ }, ++ { ++ .ctl_name = NET_IPV4_NO_PMTU_DISC, ++ .procname = "ip_no_pmtu_disc", ++ .data = &init_net.sysctl_ipv4_no_pmtu_disc, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec ++ }, ++ { ++ .ctl_name = NET_IPV4_NONLOCAL_BIND, ++ .procname = "ip_nonlocal_bind", ++ .data = &init_net.sysctl_ip_nonlocal_bind, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec ++ }, ++ { ++ .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, ++ .procname = "ip_local_port_range", ++ .data = &init_net.sysctl_local_port_range, ++ .maxlen = sizeof(init_net.sysctl_local_port_range), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec_minmax, ++ .strategy = &sysctl_intvec, ++ .extra1 = ip_local_port_range_min, ++ .extra2 = ip_local_port_range_max ++ }, ++ { ++ .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, ++ .procname = "ipfrag_high_thresh", ++ .data = &init_net.sysctl_ipfrag_high_thresh, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec ++ }, ++ { ++ .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, ++ .procname = "ipfrag_low_thresh", ++ .data = &init_net.sysctl_ipfrag_low_thresh, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec ++ }, ++ { ++ .ctl_name = NET_IPV4_IPFRAG_TIME, ++ .procname = "ipfrag_time", ++ .data = &init_net.sysctl_ipfrag_time, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec_jiffies, ++ .strategy = &sysctl_jiffies ++ }, ++ { ++ .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, ++ .procname = "ipfrag_secret_interval", ++ .data = &init_net.sysctl_ipfrag_secret_interval, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec_jiffies, ++ .strategy = &sysctl_jiffies ++ }, ++ { ++ .ctl_name = NET_IPV4_IPFRAG_MAX_DIST, ++ .procname = "ipfrag_max_dist", ++ .data = &init_net.sysctl_ipfrag_max_dist, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec_minmax, ++ .extra1 = &zero ++ }, ++ { ++ .ctl_name = NET_IPV4_DYNADDR, ++ .procname = "ip_dynaddr", ++ .data = &init_net.sysctl_ip_dynaddr, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec ++ }, ++ { ++ .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, ++ .procname = "ip_local_port_range", ++ .data = &init_net.sysctl_local_port_range, ++ .maxlen = sizeof(init_net.sysctl_local_port_range), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec_minmax, ++ .strategy = &sysctl_intvec, ++ .extra1 = ip_local_port_range_min, ++ .extra2 = ip_local_port_range_max ++ }, ++ { ++ .ctl_name = NET_IPV4_INET_PEER_THRESHOLD, ++ .procname = "inet_peer_threshold", ++ .data = &init_net.inet_peer_threshold, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec ++ }, ++ { ++ .ctl_name = NET_IPV4_INET_PEER_MINTTL, ++ .procname = "inet_peer_minttl", ++ .data = &init_net.inet_peer_minttl, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec_jiffies, ++ .strategy = &sysctl_jiffies ++ }, ++ { ++ .ctl_name = NET_IPV4_INET_PEER_MAXTTL, ++ .procname = "inet_peer_maxttl", ++ .data = &init_net.inet_peer_maxttl, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec_jiffies, ++ .strategy = &sysctl_jiffies ++ }, ++ { ++ .ctl_name = NET_IPV4_INET_PEER_GC_MINTIME, ++ .procname = "inet_peer_gc_mintime", ++ .data = &init_net.inet_peer_gc_mintime, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec_jiffies, ++ .strategy = &sysctl_jiffies ++ }, ++ { ++ .ctl_name = NET_IPV4_INET_PEER_GC_MAXTIME, ++ .procname = "inet_peer_gc_maxtime", ++ .data = &init_net.inet_peer_gc_maxtime, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec_jiffies, ++ .strategy = &sysctl_jiffies ++ }, ++ { ++ .ctl_name = NET_IPV4_TCP_TIMESTAMPS, ++ .procname = "tcp_timestamps", ++ .data = &init_net.sysctl_tcp_timestamps, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec ++ ++ }, ++ { ++ .ctl_name = NET_IPV4_TCP_WINDOW_SCALING, ++ .procname = "tcp_window_scaling", ++ .data = &init_net.sysctl_tcp_window_scaling, ++ .maxlen = sizeof(int), ++ .mode = 0644, ++ .proc_handler = &proc_dointvec ++ }, ++ {} ++}; + +-EXPORT_SYMBOL(ipv4_config); ++#endif /* CONFIG_SYSCTL */ +diff -Nurb linux-2.6.22-570/net/ipv4/tcp.c linux-2.6.22-591/net/ipv4/tcp.c +--- linux-2.6.22-570/net/ipv4/tcp.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/tcp.c 2007-12-21 15:36:15.000000000 -0500 +@@ -2409,6 +2409,23 @@ + } + __setup("thash_entries=", set_thash_entries); + ++static int tcp_net_init(struct net *net) ++{ ++ /* ++ * This array holds the first and last local port number. ++ */ ++ net->sysctl_local_port_range[0] = 32768; ++ net->sysctl_local_port_range[1] = 61000; ++ ++ net->sysctl_tcp_timestamps = 1; ++ net->sysctl_tcp_window_scaling = 1; ++ return 0; ++} ++ ++static struct pernet_operations tcp_net_ops = { ++ .init = tcp_net_init, ++}; ++ + void __init tcp_init(void) + { + struct sk_buff *skb = NULL; +@@ -2502,6 +2519,8 @@ + sysctl_tcp_rmem[1] = 87380; + sysctl_tcp_rmem[2] = max(87380, max_share); + ++ register_pernet_subsys(&tcp_net_ops); ++ + printk(KERN_INFO "TCP: Hash tables configured " + "(established %d bind %d)\n", + tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size); +diff -Nurb linux-2.6.22-570/net/ipv4/tcp_input.c linux-2.6.22-591/net/ipv4/tcp_input.c +--- linux-2.6.22-570/net/ipv4/tcp_input.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/tcp_input.c 2007-12-21 15:36:15.000000000 -0500 +@@ -72,8 +72,6 @@ + #include + #include + +-int sysctl_tcp_timestamps __read_mostly = 1; +-int sysctl_tcp_window_scaling __read_mostly = 1; + int sysctl_tcp_sack __read_mostly = 1; + int sysctl_tcp_fack __read_mostly = 1; + int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; +@@ -2922,7 +2920,7 @@ + break; + case TCPOPT_WINDOW: + if (opsize==TCPOLEN_WINDOW && th->syn && !estab) +- if (sysctl_tcp_window_scaling) { ++ if (init_net.sysctl_tcp_window_scaling) { + __u8 snd_wscale = *(__u8 *) ptr; + opt_rx->wscale_ok = 1; + if (snd_wscale > 14) { +@@ -2938,7 +2936,7 @@ + case TCPOPT_TIMESTAMP: + if (opsize==TCPOLEN_TIMESTAMP) { + if ((estab && opt_rx->tstamp_ok) || +- (!estab && sysctl_tcp_timestamps)) { ++ (!estab && init_net.sysctl_tcp_timestamps)) { + opt_rx->saw_tstamp = 1; + opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr)); + opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4))); +diff -Nurb linux-2.6.22-570/net/ipv4/tcp_ipv4.c linux-2.6.22-591/net/ipv4/tcp_ipv4.c +--- linux-2.6.22-570/net/ipv4/tcp_ipv4.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/tcp_ipv4.c 2007-12-21 15:36:15.000000000 -0500 +@@ -71,6 +71,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -353,6 +354,7 @@ + + void tcp_v4_err(struct sk_buff *skb, u32 info) + { ++ struct net *net = skb->dev->nd_net; + struct iphdr *iph = (struct iphdr *)skb->data; + struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); + struct tcp_sock *tp; +@@ -369,7 +371,7 @@ + } + + sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, +- th->source, inet_iif(skb)); ++ th->source, inet_iif(skb), net); + if (!sk) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + return; +@@ -1499,7 +1501,8 @@ + return tcp_check_req(sk, skb, req, prev); + + nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source, +- iph->daddr, th->dest, inet_iif(skb)); ++ iph->daddr, th->dest, inet_iif(skb), ++ sk->sk_net); + + if (nsk) { + if (nsk->sk_state != TCP_TIME_WAIT) { +@@ -1618,6 +1621,7 @@ + + int tcp_v4_rcv(struct sk_buff *skb) + { ++ struct net *net = skb->dev->nd_net; + const struct iphdr *iph; + struct tcphdr *th; + struct sock *sk; +@@ -1657,7 +1661,7 @@ + TCP_SKB_CB(skb)->sacked = 0; + + sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source, +- iph->daddr, th->dest, inet_iif(skb)); ++ iph->daddr, th->dest, inet_iif(skb), net); + if (!sk) + goto no_tcp_socket; + +@@ -1732,7 +1736,7 @@ + case TCP_TW_SYN: { + struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, + iph->daddr, th->dest, +- inet_iif(skb)); ++ inet_iif(skb), net); + if (sk2) { + inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_put(inet_twsk(sk)); +@@ -1766,7 +1770,7 @@ + int release_it = 0; + + if (!rt || rt->rt_dst != inet->daddr) { +- peer = inet_getpeer(inet->daddr, 1); ++ peer = inet_getpeer(sk->sk_net, inet->daddr, 1); + release_it = 1; + } else { + if (!rt->peer) +@@ -1791,7 +1795,7 @@ + + int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) + { +- struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); ++ struct inet_peer *peer = inet_getpeer(tw->tw_net, tw->tw_daddr, 1); + + if (peer) { + const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); +@@ -1980,7 +1984,8 @@ + if (req->sk && + !nx_check(req->sk->sk_nid, VS_WATCH_P | VS_IDENT)) + continue; +- if (req->rsk_ops->family == st->family) { ++ if ((req->rsk_ops->family == st->family) && ++ (req->sk->sk_net == st->net)) { + cur = req; + goto out; + } +@@ -2004,6 +2009,8 @@ + } + get_sk: + sk_for_each_from(sk, node) { ++ if (sk->sk_net != st->net) ++ continue; + vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)", + sk, sk->sk_nid, nx_current_nid()); + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)) +@@ -2054,11 +2061,10 @@ + struct hlist_node *node; + struct inet_timewait_sock *tw; + +- /* We can reschedule _before_ having picked the target: */ +- cond_resched_softirq(); +- +- read_lock(&tcp_hashinfo.ehash[st->bucket].lock); ++ read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { ++ if (sk->sk_net != st->net) ++ continue; + vxdprintk(VXD_CBIT(net, 6), + "sk,egf: %p [#%d] (from %d)", + sk, sk->sk_nid, nx_current_nid()); +@@ -2072,6 +2078,8 @@ + st->state = TCP_SEQ_STATE_TIME_WAIT; + inet_twsk_for_each(tw, node, + &tcp_hashinfo.ehash[st->bucket].twchain) { ++ if (tw->tw_net != st->net) ++ continue; + vxdprintk(VXD_CBIT(net, 6), + "tw: %p [#%d] (from %d)", + tw, tw->tw_nid, nx_current_nid()); +@@ -2082,7 +2090,7 @@ + rc = tw; + goto out; + } +- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); ++ read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + st->state = TCP_SEQ_STATE_ESTABLISHED; + } + out: +@@ -2102,7 +2110,8 @@ + tw = cur; + tw = tw_next(tw); + get_tw: +- while (tw && (tw->tw_family != st->family || ++ while (tw && ((tw->tw_net != st->net) || ++ (tw->tw_family != st->family) || + !nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))) { + tw = tw_next(tw); + } +@@ -2110,14 +2119,11 @@ + cur = tw; + goto out; + } +- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); ++ read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + st->state = TCP_SEQ_STATE_ESTABLISHED; + +- /* We can reschedule between buckets: */ +- cond_resched_softirq(); +- + if (++st->bucket < tcp_hashinfo.ehash_size) { +- read_lock(&tcp_hashinfo.ehash[st->bucket].lock); ++ read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); + } else { + cur = NULL; +@@ -2130,6 +2136,8 @@ + vxdprintk(VXD_CBIT(net, 6), + "sk,egn: %p [#%d] (from %d)", + sk, sk->sk_nid, nx_current_nid()); ++ if (sk->sk_net != st->net) ++ continue; + if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)) + continue; + if (sk->sk_family == st->family) +@@ -2167,7 +2175,6 @@ + + if (!rc) { + inet_listen_unlock(&tcp_hashinfo); +- local_bh_disable(); + st->state = TCP_SEQ_STATE_ESTABLISHED; + rc = established_get_idx(seq, pos); + } +@@ -2200,7 +2207,6 @@ + rc = listening_get_next(seq, v); + if (!rc) { + inet_listen_unlock(&tcp_hashinfo); +- local_bh_disable(); + st->state = TCP_SEQ_STATE_ESTABLISHED; + rc = established_get_first(seq); + } +@@ -2232,8 +2238,7 @@ + case TCP_SEQ_STATE_TIME_WAIT: + case TCP_SEQ_STATE_ESTABLISHED: + if (v) +- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); +- local_bh_enable(); ++ read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + break; + } + } +@@ -2262,6 +2267,7 @@ + goto out_kfree; + seq = file->private_data; + seq->private = s; ++ s->net = get_net(PROC_NET(inode)); + out: + return rc; + out_kfree: +@@ -2269,20 +2275,30 @@ + goto out; + } + +-int tcp_proc_register(struct tcp_seq_afinfo *afinfo) ++static int tcp_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct tcp_iter_state *st = seq->private; ++ put_net(st->net); ++ return seq_release_private(inode, file); ++} ++ ++int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo) + { + int rc = 0; + struct proc_dir_entry *p; + + if (!afinfo) + return -EINVAL; ++ if (net == &init_net) { + afinfo->seq_fops->owner = afinfo->owner; + afinfo->seq_fops->open = tcp_seq_open; + afinfo->seq_fops->read = seq_read; + afinfo->seq_fops->llseek = seq_lseek; +- afinfo->seq_fops->release = seq_release_private; ++ afinfo->seq_fops->release = tcp_seq_release; ++ } + +- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); ++ p = proc_net_fops_create(net, afinfo->name, S_IRUGO, afinfo->seq_fops); + if (p) + p->data = afinfo; + else +@@ -2290,11 +2306,12 @@ + return rc; + } + +-void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) ++void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) + { + if (!afinfo) + return; +- proc_net_remove(afinfo->name); ++ proc_net_remove(net, afinfo->name); ++ if (net == &init_net) + memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); + } + +@@ -2439,14 +2456,29 @@ + .seq_fops = &tcp4_seq_fops, + }; + ++static int tcp4_proc_net_init(struct net *net) ++{ ++ return tcp_proc_register(net, &tcp4_seq_afinfo); ++} ++ ++static void tcp4_proc_net_exit(struct net *net) ++{ ++ tcp_proc_unregister(net, &tcp4_seq_afinfo); ++} ++ ++static struct pernet_operations tcp4_proc_net_ops = { ++ .init = tcp4_proc_net_init, ++ .exit = tcp4_proc_net_exit, ++}; ++ + int __init tcp4_proc_init(void) + { +- return tcp_proc_register(&tcp4_seq_afinfo); ++ return register_pernet_subsys(&tcp4_proc_net_ops); + } + + void tcp4_proc_exit(void) + { +- tcp_proc_unregister(&tcp4_seq_afinfo); ++ unregister_pernet_subsys(&tcp4_proc_net_ops); + } + #endif /* CONFIG_PROC_FS */ + +@@ -2508,6 +2540,5 @@ + EXPORT_SYMBOL(tcp_proc_register); + EXPORT_SYMBOL(tcp_proc_unregister); + #endif +-EXPORT_SYMBOL(sysctl_local_port_range); + EXPORT_SYMBOL(sysctl_tcp_low_latency); + +diff -Nurb linux-2.6.22-570/net/ipv4/tcp_ipv4.c.orig linux-2.6.22-591/net/ipv4/tcp_ipv4.c.orig +--- linux-2.6.22-570/net/ipv4/tcp_ipv4.c.orig 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/tcp_ipv4.c.orig 1969-12-31 19:00:00.000000000 -0500 +@@ -1,2483 +0,0 @@ +-/* +- * INET An implementation of the TCP/IP protocol suite for the LINUX +- * operating system. INET is implemented using the BSD Socket +- * interface as the means of communication with the user level. +- * +- * Implementation of the Transmission Control Protocol(TCP). +- * +- * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $ +- * +- * IPv4 specific functions +- * +- * +- * code split from: +- * linux/ipv4/tcp.c +- * linux/ipv4/tcp_input.c +- * linux/ipv4/tcp_output.c +- * +- * See tcp.c for author information +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * as published by the Free Software Foundation; either version +- * 2 of the License, or (at your option) any later version. +- */ +- +-/* +- * Changes: +- * David S. Miller : New socket lookup architecture. +- * This code is dedicated to John Dyson. +- * David S. Miller : Change semantics of established hash, +- * half is devoted to TIME_WAIT sockets +- * and the rest go in the other half. +- * Andi Kleen : Add support for syncookies and fixed +- * some bugs: ip options weren't passed to +- * the TCP layer, missed a check for an +- * ACK bit. +- * Andi Kleen : Implemented fast path mtu discovery. +- * Fixed many serious bugs in the +- * request_sock handling and moved +- * most of it into the af independent code. +- * Added tail drop and some other bugfixes. +- * Added new listen semantics. +- * Mike McLagan : Routing by source +- * Juan Jose Ciarlante: ip_dynaddr bits +- * Andi Kleen: various fixes. +- * Vitaly E. Lavrov : Transparent proxy revived after year +- * coma. +- * Andi Kleen : Fix new listen. +- * Andi Kleen : Fix accept error reporting. +- * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which +- * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind +- * a single port at the same time. +- */ +- +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +- +-int sysctl_tcp_tw_reuse __read_mostly; +-int sysctl_tcp_low_latency __read_mostly; +- +-/* Check TCP sequence numbers in ICMP packets. */ +-#define ICMP_MIN_LENGTH 8 +- +-/* Socket used for sending RSTs */ +-static struct socket *tcp_socket __read_mostly; +- +-void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb); +- +-#ifdef CONFIG_TCP_MD5SIG +-static struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, +- __be32 addr); +-static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, +- __be32 saddr, __be32 daddr, +- struct tcphdr *th, int protocol, +- int tcplen); +-#endif +- +-struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { +- .lhash_lock = __RW_LOCK_UNLOCKED(tcp_hashinfo.lhash_lock), +- .lhash_users = ATOMIC_INIT(0), +- .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), +-}; +- +-static int tcp_v4_get_port(struct sock *sk, unsigned short snum) +-{ +- return inet_csk_get_port(&tcp_hashinfo, sk, snum, +- inet_csk_bind_conflict); +-} +- +-static void tcp_v4_hash(struct sock *sk) +-{ +- inet_hash(&tcp_hashinfo, sk); +-} +- +-void tcp_unhash(struct sock *sk) +-{ +- inet_unhash(&tcp_hashinfo, sk); +-} +- +-static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) +-{ +- return secure_tcp_sequence_number(ip_hdr(skb)->daddr, +- ip_hdr(skb)->saddr, +- tcp_hdr(skb)->dest, +- tcp_hdr(skb)->source); +-} +- +-int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) +-{ +- const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); +- struct tcp_sock *tp = tcp_sk(sk); +- +- /* With PAWS, it is safe from the viewpoint +- of data integrity. Even without PAWS it is safe provided sequence +- spaces do not overlap i.e. at data rates <= 80Mbit/sec. +- +- Actually, the idea is close to VJ's one, only timestamp cache is +- held not per host, but per port pair and TW bucket is used as state +- holder. +- +- If TW bucket has been already destroyed we fall back to VJ's scheme +- and use initial timestamp retrieved from peer table. +- */ +- if (tcptw->tw_ts_recent_stamp && +- (twp == NULL || (sysctl_tcp_tw_reuse && +- get_seconds() - tcptw->tw_ts_recent_stamp > 1))) { +- tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; +- if (tp->write_seq == 0) +- tp->write_seq = 1; +- tp->rx_opt.ts_recent = tcptw->tw_ts_recent; +- tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; +- sock_hold(sktw); +- return 1; +- } +- +- return 0; +-} +- +-EXPORT_SYMBOL_GPL(tcp_twsk_unique); +- +-/* This will initiate an outgoing connection. */ +-int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +-{ +- struct inet_sock *inet = inet_sk(sk); +- struct tcp_sock *tp = tcp_sk(sk); +- struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; +- struct rtable *rt; +- __be32 daddr, nexthop; +- int tmp; +- int err; +- +- if (addr_len < sizeof(struct sockaddr_in)) +- return -EINVAL; +- +- if (usin->sin_family != AF_INET) +- return -EAFNOSUPPORT; +- +- nexthop = daddr = usin->sin_addr.s_addr; +- if (inet->opt && inet->opt->srr) { +- if (!daddr) +- return -EINVAL; +- nexthop = inet->opt->faddr; +- } +- +- tmp = ip_route_connect(&rt, nexthop, inet->saddr, +- RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, +- IPPROTO_TCP, +- inet->sport, usin->sin_port, sk, 1); +- if (tmp < 0) { +- if (tmp == -ENETUNREACH) +- IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); +- return tmp; +- } +- +- if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { +- ip_rt_put(rt); +- return -ENETUNREACH; +- } +- +- if (!inet->opt || !inet->opt->srr) +- daddr = rt->rt_dst; +- +- if (!inet->saddr) +- inet->saddr = rt->rt_src; +- inet->rcv_saddr = inet->saddr; +- +- if (tp->rx_opt.ts_recent_stamp && inet->daddr != daddr) { +- /* Reset inherited state */ +- tp->rx_opt.ts_recent = 0; +- tp->rx_opt.ts_recent_stamp = 0; +- tp->write_seq = 0; +- } +- +- if (tcp_death_row.sysctl_tw_recycle && +- !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { +- struct inet_peer *peer = rt_get_peer(rt); +- /* +- * VJ's idea. We save last timestamp seen from +- * the destination in peer table, when entering state +- * TIME-WAIT * and initialize rx_opt.ts_recent from it, +- * when trying new connection. +- */ +- if (peer != NULL && +- peer->tcp_ts_stamp + TCP_PAWS_MSL >= get_seconds()) { +- tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp; +- tp->rx_opt.ts_recent = peer->tcp_ts; +- } +- } +- +- inet->dport = usin->sin_port; +- inet->daddr = daddr; +- +- inet_csk(sk)->icsk_ext_hdr_len = 0; +- if (inet->opt) +- inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; +- +- tp->rx_opt.mss_clamp = 536; +- +- /* Socket identity is still unknown (sport may be zero). +- * However we set state to SYN-SENT and not releasing socket +- * lock select source port, enter ourselves into the hash tables and +- * complete initialization after this. +- */ +- tcp_set_state(sk, TCP_SYN_SENT); +- err = inet_hash_connect(&tcp_death_row, sk); +- if (err) +- goto failure; +- +- err = ip_route_newports(&rt, IPPROTO_TCP, +- inet->sport, inet->dport, sk); +- if (err) +- goto failure; +- +- /* OK, now commit destination to socket. */ +- sk->sk_gso_type = SKB_GSO_TCPV4; +- sk_setup_caps(sk, &rt->u.dst); +- +- if (!tp->write_seq) +- tp->write_seq = secure_tcp_sequence_number(inet->saddr, +- inet->daddr, +- inet->sport, +- usin->sin_port); +- +- inet->id = tp->write_seq ^ jiffies; +- +- err = tcp_connect(sk); +- rt = NULL; +- if (err) +- goto failure; +- +- return 0; +- +-failure: +- /* +- * This unhashes the socket and releases the local port, +- * if necessary. +- */ +- tcp_set_state(sk, TCP_CLOSE); +- ip_rt_put(rt); +- sk->sk_route_caps = 0; +- inet->dport = 0; +- return err; +-} +- +-/* +- * This routine does path mtu discovery as defined in RFC1191. +- */ +-static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu) +-{ +- struct dst_entry *dst; +- struct inet_sock *inet = inet_sk(sk); +- +- /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs +- * send out by Linux are always <576bytes so they should go through +- * unfragmented). +- */ +- if (sk->sk_state == TCP_LISTEN) +- return; +- +- /* We don't check in the destentry if pmtu discovery is forbidden +- * on this route. We just assume that no packet_to_big packets +- * are send back when pmtu discovery is not active. +- * There is a small race when the user changes this flag in the +- * route, but I think that's acceptable. +- */ +- if ((dst = __sk_dst_check(sk, 0)) == NULL) +- return; +- +- dst->ops->update_pmtu(dst, mtu); +- +- /* Something is about to be wrong... Remember soft error +- * for the case, if this connection will not able to recover. +- */ +- if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) +- sk->sk_err_soft = EMSGSIZE; +- +- mtu = dst_mtu(dst); +- +- if (inet->pmtudisc != IP_PMTUDISC_DONT && +- inet_csk(sk)->icsk_pmtu_cookie > mtu) { +- tcp_sync_mss(sk, mtu); +- +- /* Resend the TCP packet because it's +- * clear that the old packet has been +- * dropped. This is the new "fast" path mtu +- * discovery. +- */ +- tcp_simple_retransmit(sk); +- } /* else let the usual retransmit timer handle it */ +-} +- +-/* +- * This routine is called by the ICMP module when it gets some +- * sort of error condition. If err < 0 then the socket should +- * be closed and the error returned to the user. If err > 0 +- * it's just the icmp type << 8 | icmp code. After adjustment +- * header points to the first 8 bytes of the tcp header. We need +- * to find the appropriate port. +- * +- * The locking strategy used here is very "optimistic". When +- * someone else accesses the socket the ICMP is just dropped +- * and for some paths there is no check at all. +- * A more general error queue to queue errors for later handling +- * is probably better. +- * +- */ +- +-void tcp_v4_err(struct sk_buff *skb, u32 info) +-{ +- struct iphdr *iph = (struct iphdr *)skb->data; +- struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); +- struct tcp_sock *tp; +- struct inet_sock *inet; +- const int type = icmp_hdr(skb)->type; +- const int code = icmp_hdr(skb)->code; +- struct sock *sk; +- __u32 seq; +- int err; +- +- if (skb->len < (iph->ihl << 2) + 8) { +- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); +- return; +- } +- +- sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, +- th->source, inet_iif(skb)); +- if (!sk) { +- ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); +- return; +- } +- if (sk->sk_state == TCP_TIME_WAIT) { +- inet_twsk_put(inet_twsk(sk)); +- return; +- } +- +- bh_lock_sock(sk); +- /* If too many ICMPs get dropped on busy +- * servers this needs to be solved differently. +- */ +- if (sock_owned_by_user(sk)) +- NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); +- +- if (sk->sk_state == TCP_CLOSE) +- goto out; +- +- tp = tcp_sk(sk); +- seq = ntohl(th->seq); +- if (sk->sk_state != TCP_LISTEN && +- !between(seq, tp->snd_una, tp->snd_nxt)) { +- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); +- goto out; +- } +- +- switch (type) { +- case ICMP_SOURCE_QUENCH: +- /* Just silently ignore these. */ +- goto out; +- case ICMP_PARAMETERPROB: +- err = EPROTO; +- break; +- case ICMP_DEST_UNREACH: +- if (code > NR_ICMP_UNREACH) +- goto out; +- +- if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ +- if (!sock_owned_by_user(sk)) +- do_pmtu_discovery(sk, iph, info); +- goto out; +- } +- +- err = icmp_err_convert[code].errno; +- break; +- case ICMP_TIME_EXCEEDED: +- err = EHOSTUNREACH; +- break; +- default: +- goto out; +- } +- +- switch (sk->sk_state) { +- struct request_sock *req, **prev; +- case TCP_LISTEN: +- if (sock_owned_by_user(sk)) +- goto out; +- +- req = inet_csk_search_req(sk, &prev, th->dest, +- iph->daddr, iph->saddr); +- if (!req) +- goto out; +- +- /* ICMPs are not backlogged, hence we cannot get +- an established socket here. +- */ +- BUG_TRAP(!req->sk); +- +- if (seq != tcp_rsk(req)->snt_isn) { +- NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); +- goto out; +- } +- +- /* +- * Still in SYN_RECV, just remove it silently. +- * There is no good way to pass the error to the newly +- * created socket, and POSIX does not want network +- * errors returned from accept(). +- */ +- inet_csk_reqsk_queue_drop(sk, req, prev); +- goto out; +- +- case TCP_SYN_SENT: +- case TCP_SYN_RECV: /* Cannot happen. +- It can f.e. if SYNs crossed. +- */ +- if (!sock_owned_by_user(sk)) { +- sk->sk_err = err; +- +- sk->sk_error_report(sk); +- +- tcp_done(sk); +- } else { +- sk->sk_err_soft = err; +- } +- goto out; +- } +- +- /* If we've already connected we will keep trying +- * until we time out, or the user gives up. +- * +- * rfc1122 4.2.3.9 allows to consider as hard errors +- * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, +- * but it is obsoleted by pmtu discovery). +- * +- * Note, that in modern internet, where routing is unreliable +- * and in each dark corner broken firewalls sit, sending random +- * errors ordered by their masters even this two messages finally lose +- * their original sense (even Linux sends invalid PORT_UNREACHs) +- * +- * Now we are in compliance with RFCs. +- * --ANK (980905) +- */ +- +- inet = inet_sk(sk); +- if (!sock_owned_by_user(sk) && inet->recverr) { +- sk->sk_err = err; +- sk->sk_error_report(sk); +- } else { /* Only an error on timeout */ +- sk->sk_err_soft = err; +- } +- +-out: +- bh_unlock_sock(sk); +- sock_put(sk); +-} +- +-/* This routine computes an IPv4 TCP checksum. */ +-void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) +-{ +- struct inet_sock *inet = inet_sk(sk); +- struct tcphdr *th = tcp_hdr(skb); +- +- if (skb->ip_summed == CHECKSUM_PARTIAL) { +- th->check = ~tcp_v4_check(len, inet->saddr, +- inet->daddr, 0); +- skb->csum_start = skb_transport_header(skb) - skb->head; +- skb->csum_offset = offsetof(struct tcphdr, check); +- } else { +- th->check = tcp_v4_check(len, inet->saddr, inet->daddr, +- csum_partial((char *)th, +- th->doff << 2, +- skb->csum)); +- } +-} +- +-int tcp_v4_gso_send_check(struct sk_buff *skb) +-{ +- const struct iphdr *iph; +- struct tcphdr *th; +- +- if (!pskb_may_pull(skb, sizeof(*th))) +- return -EINVAL; +- +- iph = ip_hdr(skb); +- th = tcp_hdr(skb); +- +- th->check = 0; +- th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0); +- skb->csum_start = skb_transport_header(skb) - skb->head; +- skb->csum_offset = offsetof(struct tcphdr, check); +- skb->ip_summed = CHECKSUM_PARTIAL; +- return 0; +-} +- +-/* +- * This routine will send an RST to the other tcp. +- * +- * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.) +- * for reset. +- * Answer: if a packet caused RST, it is not for a socket +- * existing in our system, if it is matched to a socket, +- * it is just duplicate segment or bug in other side's TCP. +- * So that we build reply only basing on parameters +- * arrived with segment. +- * Exception: precedence violation. We do not implement it in any case. +- */ +- +-static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) +-{ +- struct tcphdr *th = tcp_hdr(skb); +- struct { +- struct tcphdr th; +-#ifdef CONFIG_TCP_MD5SIG +- __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)]; +-#endif +- } rep; +- struct ip_reply_arg arg; +-#ifdef CONFIG_TCP_MD5SIG +- struct tcp_md5sig_key *key; +-#endif +- +- /* Never send a reset in response to a reset. */ +- if (th->rst) +- return; +- +- if (((struct rtable *)skb->dst)->rt_type != RTN_LOCAL) +- return; +- +- /* Swap the send and the receive. */ +- memset(&rep, 0, sizeof(rep)); +- rep.th.dest = th->source; +- rep.th.source = th->dest; +- rep.th.doff = sizeof(struct tcphdr) / 4; +- rep.th.rst = 1; +- +- if (th->ack) { +- rep.th.seq = th->ack_seq; +- } else { +- rep.th.ack = 1; +- rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin + +- skb->len - (th->doff << 2)); +- } +- +- memset(&arg, 0, sizeof(arg)); +- arg.iov[0].iov_base = (unsigned char *)&rep; +- arg.iov[0].iov_len = sizeof(rep.th); +- +-#ifdef CONFIG_TCP_MD5SIG +- key = sk ? tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr) : NULL; +- if (key) { +- rep.opt[0] = htonl((TCPOPT_NOP << 24) | +- (TCPOPT_NOP << 16) | +- (TCPOPT_MD5SIG << 8) | +- TCPOLEN_MD5SIG); +- /* Update length and the length the header thinks exists */ +- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; +- rep.th.doff = arg.iov[0].iov_len / 4; +- +- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[1], +- key, +- ip_hdr(skb)->daddr, +- ip_hdr(skb)->saddr, +- &rep.th, IPPROTO_TCP, +- arg.iov[0].iov_len); +- } +-#endif +- arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, +- ip_hdr(skb)->saddr, /* XXX */ +- sizeof(struct tcphdr), IPPROTO_TCP, 0); +- arg.csumoffset = offsetof(struct tcphdr, check) / 2; +- +- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); +- +- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); +- TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); +-} +- +-/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states +- outside socket context is ugly, certainly. What can I do? +- */ +- +-static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, +- struct sk_buff *skb, u32 seq, u32 ack, +- u32 win, u32 ts) +-{ +- struct tcphdr *th = tcp_hdr(skb); +- struct { +- struct tcphdr th; +- __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) +-#ifdef CONFIG_TCP_MD5SIG +- + (TCPOLEN_MD5SIG_ALIGNED >> 2) +-#endif +- ]; +- } rep; +- struct ip_reply_arg arg; +-#ifdef CONFIG_TCP_MD5SIG +- struct tcp_md5sig_key *key; +- struct tcp_md5sig_key tw_key; +-#endif +- +- memset(&rep.th, 0, sizeof(struct tcphdr)); +- memset(&arg, 0, sizeof(arg)); +- +- arg.iov[0].iov_base = (unsigned char *)&rep; +- arg.iov[0].iov_len = sizeof(rep.th); +- if (ts) { +- rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | +- (TCPOPT_TIMESTAMP << 8) | +- TCPOLEN_TIMESTAMP); +- rep.opt[1] = htonl(tcp_time_stamp); +- rep.opt[2] = htonl(ts); +- arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED; +- } +- +- /* Swap the send and the receive. */ +- rep.th.dest = th->source; +- rep.th.source = th->dest; +- rep.th.doff = arg.iov[0].iov_len / 4; +- rep.th.seq = htonl(seq); +- rep.th.ack_seq = htonl(ack); +- rep.th.ack = 1; +- rep.th.window = htons(win); +- +-#ifdef CONFIG_TCP_MD5SIG +- /* +- * The SKB holds an imcoming packet, but may not have a valid ->sk +- * pointer. This is especially the case when we're dealing with a +- * TIME_WAIT ack, because the sk structure is long gone, and only +- * the tcp_timewait_sock remains. So the md5 key is stashed in that +- * structure, and we use it in preference. I believe that (twsk || +- * skb->sk) holds true, but we program defensively. +- */ +- if (!twsk && skb->sk) { +- key = tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr); +- } else if (twsk && twsk->tw_md5_keylen) { +- tw_key.key = twsk->tw_md5_key; +- tw_key.keylen = twsk->tw_md5_keylen; +- key = &tw_key; +- } else +- key = NULL; +- +- if (key) { +- int offset = (ts) ? 3 : 0; +- +- rep.opt[offset++] = htonl((TCPOPT_NOP << 24) | +- (TCPOPT_NOP << 16) | +- (TCPOPT_MD5SIG << 8) | +- TCPOLEN_MD5SIG); +- arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED; +- rep.th.doff = arg.iov[0].iov_len/4; +- +- tcp_v4_do_calc_md5_hash((__u8 *)&rep.opt[offset], +- key, +- ip_hdr(skb)->daddr, +- ip_hdr(skb)->saddr, +- &rep.th, IPPROTO_TCP, +- arg.iov[0].iov_len); +- } +-#endif +- arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, +- ip_hdr(skb)->saddr, /* XXX */ +- arg.iov[0].iov_len, IPPROTO_TCP, 0); +- arg.csumoffset = offsetof(struct tcphdr, check) / 2; +- if (twsk) +- arg.bound_dev_if = twsk->tw_sk.tw_bound_dev_if; +- +- ip_send_reply(tcp_socket->sk, skb, &arg, arg.iov[0].iov_len); +- +- TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); +-} +- +-static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) +-{ +- struct inet_timewait_sock *tw = inet_twsk(sk); +- struct tcp_timewait_sock *tcptw = tcp_twsk(sk); +- +- tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, +- tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, +- tcptw->tw_ts_recent); +- +- inet_twsk_put(tw); +-} +- +-static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, +- struct request_sock *req) +-{ +- tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, +- tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, +- req->ts_recent); +-} +- +-/* +- * Send a SYN-ACK after having received an ACK. +- * This still operates on a request_sock only, not on a big +- * socket. +- */ +-static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, +- struct dst_entry *dst) +-{ +- const struct inet_request_sock *ireq = inet_rsk(req); +- int err = -1; +- struct sk_buff * skb; +- +- /* First, grab a route. */ +- if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) +- goto out; +- +- skb = tcp_make_synack(sk, dst, req); +- +- if (skb) { +- struct tcphdr *th = tcp_hdr(skb); +- +- th->check = tcp_v4_check(skb->len, +- ireq->loc_addr, +- ireq->rmt_addr, +- csum_partial((char *)th, skb->len, +- skb->csum)); +- +- err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, +- ireq->rmt_addr, +- ireq->opt); +- err = net_xmit_eval(err); +- } +- +-out: +- dst_release(dst); +- return err; +-} +- +-/* +- * IPv4 request_sock destructor. +- */ +-static void tcp_v4_reqsk_destructor(struct request_sock *req) +-{ +- kfree(inet_rsk(req)->opt); +-} +- +-#ifdef CONFIG_SYN_COOKIES +-static void syn_flood_warning(struct sk_buff *skb) +-{ +- static unsigned long warntime; +- +- if (time_after(jiffies, (warntime + HZ * 60))) { +- warntime = jiffies; +- printk(KERN_INFO +- "possible SYN flooding on port %d. Sending cookies.\n", +- ntohs(tcp_hdr(skb)->dest)); +- } +-} +-#endif +- +-/* +- * Save and compile IPv4 options into the request_sock if needed. +- */ +-static struct ip_options *tcp_v4_save_options(struct sock *sk, +- struct sk_buff *skb) +-{ +- struct ip_options *opt = &(IPCB(skb)->opt); +- struct ip_options *dopt = NULL; +- +- if (opt && opt->optlen) { +- int opt_size = optlength(opt); +- dopt = kmalloc(opt_size, GFP_ATOMIC); +- if (dopt) { +- if (ip_options_echo(dopt, skb)) { +- kfree(dopt); +- dopt = NULL; +- } +- } +- } +- return dopt; +-} +- +-#ifdef CONFIG_TCP_MD5SIG +-/* +- * RFC2385 MD5 checksumming requires a mapping of +- * IP address->MD5 Key. +- * We need to maintain these in the sk structure. +- */ +- +-/* Find the Key structure for an address. */ +-static struct tcp_md5sig_key * +- tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) +-{ +- struct tcp_sock *tp = tcp_sk(sk); +- int i; +- +- if (!tp->md5sig_info || !tp->md5sig_info->entries4) +- return NULL; +- for (i = 0; i < tp->md5sig_info->entries4; i++) { +- if (tp->md5sig_info->keys4[i].addr == addr) +- return &tp->md5sig_info->keys4[i].base; +- } +- return NULL; +-} +- +-struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, +- struct sock *addr_sk) +-{ +- return tcp_v4_md5_do_lookup(sk, inet_sk(addr_sk)->daddr); +-} +- +-EXPORT_SYMBOL(tcp_v4_md5_lookup); +- +-static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk, +- struct request_sock *req) +-{ +- return tcp_v4_md5_do_lookup(sk, inet_rsk(req)->rmt_addr); +-} +- +-/* This can be called on a newly created socket, from other files */ +-int tcp_v4_md5_do_add(struct sock *sk, __be32 addr, +- u8 *newkey, u8 newkeylen) +-{ +- /* Add Key to the list */ +- struct tcp4_md5sig_key *key; +- struct tcp_sock *tp = tcp_sk(sk); +- struct tcp4_md5sig_key *keys; +- +- key = (struct tcp4_md5sig_key *)tcp_v4_md5_do_lookup(sk, addr); +- if (key) { +- /* Pre-existing entry - just update that one. */ +- kfree(key->base.key); +- key->base.key = newkey; +- key->base.keylen = newkeylen; +- } else { +- struct tcp_md5sig_info *md5sig; +- +- if (!tp->md5sig_info) { +- tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), +- GFP_ATOMIC); +- if (!tp->md5sig_info) { +- kfree(newkey); +- return -ENOMEM; +- } +- sk->sk_route_caps &= ~NETIF_F_GSO_MASK; +- } +- if (tcp_alloc_md5sig_pool() == NULL) { +- kfree(newkey); +- return -ENOMEM; +- } +- md5sig = tp->md5sig_info; +- +- if (md5sig->alloced4 == md5sig->entries4) { +- keys = kmalloc((sizeof(*keys) * +- (md5sig->entries4 + 1)), GFP_ATOMIC); +- if (!keys) { +- kfree(newkey); +- tcp_free_md5sig_pool(); +- return -ENOMEM; +- } +- +- if (md5sig->entries4) +- memcpy(keys, md5sig->keys4, +- sizeof(*keys) * md5sig->entries4); +- +- /* Free old key list, and reference new one */ +- if (md5sig->keys4) +- kfree(md5sig->keys4); +- md5sig->keys4 = keys; +- md5sig->alloced4++; +- } +- md5sig->entries4++; +- md5sig->keys4[md5sig->entries4 - 1].addr = addr; +- md5sig->keys4[md5sig->entries4 - 1].base.key = newkey; +- md5sig->keys4[md5sig->entries4 - 1].base.keylen = newkeylen; +- } +- return 0; +-} +- +-EXPORT_SYMBOL(tcp_v4_md5_do_add); +- +-static int tcp_v4_md5_add_func(struct sock *sk, struct sock *addr_sk, +- u8 *newkey, u8 newkeylen) +-{ +- return tcp_v4_md5_do_add(sk, inet_sk(addr_sk)->daddr, +- newkey, newkeylen); +-} +- +-int tcp_v4_md5_do_del(struct sock *sk, __be32 addr) +-{ +- struct tcp_sock *tp = tcp_sk(sk); +- int i; +- +- for (i = 0; i < tp->md5sig_info->entries4; i++) { +- if (tp->md5sig_info->keys4[i].addr == addr) { +- /* Free the key */ +- kfree(tp->md5sig_info->keys4[i].base.key); +- tp->md5sig_info->entries4--; +- +- if (tp->md5sig_info->entries4 == 0) { +- kfree(tp->md5sig_info->keys4); +- tp->md5sig_info->keys4 = NULL; +- tp->md5sig_info->alloced4 = 0; +- } else if (tp->md5sig_info->entries4 != i) { +- /* Need to do some manipulation */ +- memcpy(&tp->md5sig_info->keys4[i], +- &tp->md5sig_info->keys4[i+1], +- (tp->md5sig_info->entries4 - i) * +- sizeof(struct tcp4_md5sig_key)); +- } +- tcp_free_md5sig_pool(); +- return 0; +- } +- } +- return -ENOENT; +-} +- +-EXPORT_SYMBOL(tcp_v4_md5_do_del); +- +-static void tcp_v4_clear_md5_list(struct sock *sk) +-{ +- struct tcp_sock *tp = tcp_sk(sk); +- +- /* Free each key, then the set of key keys, +- * the crypto element, and then decrement our +- * hold on the last resort crypto. +- */ +- if (tp->md5sig_info->entries4) { +- int i; +- for (i = 0; i < tp->md5sig_info->entries4; i++) +- kfree(tp->md5sig_info->keys4[i].base.key); +- tp->md5sig_info->entries4 = 0; +- tcp_free_md5sig_pool(); +- } +- if (tp->md5sig_info->keys4) { +- kfree(tp->md5sig_info->keys4); +- tp->md5sig_info->keys4 = NULL; +- tp->md5sig_info->alloced4 = 0; +- } +-} +- +-static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, +- int optlen) +-{ +- struct tcp_md5sig cmd; +- struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; +- u8 *newkey; +- +- if (optlen < sizeof(cmd)) +- return -EINVAL; +- +- if (copy_from_user(&cmd, optval, sizeof(cmd))) +- return -EFAULT; +- +- if (sin->sin_family != AF_INET) +- return -EINVAL; +- +- if (!cmd.tcpm_key || !cmd.tcpm_keylen) { +- if (!tcp_sk(sk)->md5sig_info) +- return -ENOENT; +- return tcp_v4_md5_do_del(sk, sin->sin_addr.s_addr); +- } +- +- if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) +- return -EINVAL; +- +- if (!tcp_sk(sk)->md5sig_info) { +- struct tcp_sock *tp = tcp_sk(sk); +- struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL); +- +- if (!p) +- return -EINVAL; +- +- tp->md5sig_info = p; +- sk->sk_route_caps &= ~NETIF_F_GSO_MASK; +- } +- +- newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); +- if (!newkey) +- return -ENOMEM; +- return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr, +- newkey, cmd.tcpm_keylen); +-} +- +-static int tcp_v4_do_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, +- __be32 saddr, __be32 daddr, +- struct tcphdr *th, int protocol, +- int tcplen) +-{ +- struct scatterlist sg[4]; +- __u16 data_len; +- int block = 0; +- __sum16 old_checksum; +- struct tcp_md5sig_pool *hp; +- struct tcp4_pseudohdr *bp; +- struct hash_desc *desc; +- int err; +- unsigned int nbytes = 0; +- +- /* +- * Okay, so RFC2385 is turned on for this connection, +- * so we need to generate the MD5 hash for the packet now. +- */ +- +- hp = tcp_get_md5sig_pool(); +- if (!hp) +- goto clear_hash_noput; +- +- bp = &hp->md5_blk.ip4; +- desc = &hp->md5_desc; +- +- /* +- * 1. the TCP pseudo-header (in the order: source IP address, +- * destination IP address, zero-padded protocol number, and +- * segment length) +- */ +- bp->saddr = saddr; +- bp->daddr = daddr; +- bp->pad = 0; +- bp->protocol = protocol; +- bp->len = htons(tcplen); +- sg_set_buf(&sg[block++], bp, sizeof(*bp)); +- nbytes += sizeof(*bp); +- +- /* 2. the TCP header, excluding options, and assuming a +- * checksum of zero/ +- */ +- old_checksum = th->check; +- th->check = 0; +- sg_set_buf(&sg[block++], th, sizeof(struct tcphdr)); +- nbytes += sizeof(struct tcphdr); +- +- /* 3. the TCP segment data (if any) */ +- data_len = tcplen - (th->doff << 2); +- if (data_len > 0) { +- unsigned char *data = (unsigned char *)th + (th->doff << 2); +- sg_set_buf(&sg[block++], data, data_len); +- nbytes += data_len; +- } +- +- /* 4. an independently-specified key or password, known to both +- * TCPs and presumably connection-specific +- */ +- sg_set_buf(&sg[block++], key->key, key->keylen); +- nbytes += key->keylen; +- +- /* Now store the Hash into the packet */ +- err = crypto_hash_init(desc); +- if (err) +- goto clear_hash; +- err = crypto_hash_update(desc, sg, nbytes); +- if (err) +- goto clear_hash; +- err = crypto_hash_final(desc, md5_hash); +- if (err) +- goto clear_hash; +- +- /* Reset header, and free up the crypto */ +- tcp_put_md5sig_pool(); +- th->check = old_checksum; +- +-out: +- return 0; +-clear_hash: +- tcp_put_md5sig_pool(); +-clear_hash_noput: +- memset(md5_hash, 0, 16); +- goto out; +-} +- +-int tcp_v4_calc_md5_hash(char *md5_hash, struct tcp_md5sig_key *key, +- struct sock *sk, +- struct dst_entry *dst, +- struct request_sock *req, +- struct tcphdr *th, int protocol, +- int tcplen) +-{ +- __be32 saddr, daddr; +- +- if (sk) { +- saddr = inet_sk(sk)->saddr; +- daddr = inet_sk(sk)->daddr; +- } else { +- struct rtable *rt = (struct rtable *)dst; +- BUG_ON(!rt); +- saddr = rt->rt_src; +- daddr = rt->rt_dst; +- } +- return tcp_v4_do_calc_md5_hash(md5_hash, key, +- saddr, daddr, +- th, protocol, tcplen); +-} +- +-EXPORT_SYMBOL(tcp_v4_calc_md5_hash); +- +-static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) +-{ +- /* +- * This gets called for each TCP segment that arrives +- * so we want to be efficient. +- * We have 3 drop cases: +- * o No MD5 hash and one expected. +- * o MD5 hash and we're not expecting one. +- * o MD5 hash and its wrong. +- */ +- __u8 *hash_location = NULL; +- struct tcp_md5sig_key *hash_expected; +- const struct iphdr *iph = ip_hdr(skb); +- struct tcphdr *th = tcp_hdr(skb); +- int length = (th->doff << 2) - sizeof(struct tcphdr); +- int genhash; +- unsigned char *ptr; +- unsigned char newhash[16]; +- +- hash_expected = tcp_v4_md5_do_lookup(sk, iph->saddr); +- +- /* +- * If the TCP option length is less than the TCP_MD5SIG +- * option length, then we can shortcut +- */ +- if (length < TCPOLEN_MD5SIG) { +- if (hash_expected) +- return 1; +- else +- return 0; +- } +- +- /* Okay, we can't shortcut - we have to grub through the options */ +- ptr = (unsigned char *)(th + 1); +- while (length > 0) { +- int opcode = *ptr++; +- int opsize; +- +- switch (opcode) { +- case TCPOPT_EOL: +- goto done_opts; +- case TCPOPT_NOP: +- length--; +- continue; +- default: +- opsize = *ptr++; +- if (opsize < 2) +- goto done_opts; +- if (opsize > length) +- goto done_opts; +- +- if (opcode == TCPOPT_MD5SIG) { +- hash_location = ptr; +- goto done_opts; +- } +- } +- ptr += opsize-2; +- length -= opsize; +- } +-done_opts: +- /* We've parsed the options - do we have a hash? */ +- if (!hash_expected && !hash_location) +- return 0; +- +- if (hash_expected && !hash_location) { +- LIMIT_NETDEBUG(KERN_INFO "MD5 Hash expected but NOT found " +- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", +- NIPQUAD(iph->saddr), ntohs(th->source), +- NIPQUAD(iph->daddr), ntohs(th->dest)); +- return 1; +- } +- +- if (!hash_expected && hash_location) { +- LIMIT_NETDEBUG(KERN_INFO "MD5 Hash NOT expected but found " +- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)\n", +- NIPQUAD(iph->saddr), ntohs(th->source), +- NIPQUAD(iph->daddr), ntohs(th->dest)); +- return 1; +- } +- +- /* Okay, so this is hash_expected and hash_location - +- * so we need to calculate the checksum. +- */ +- genhash = tcp_v4_do_calc_md5_hash(newhash, +- hash_expected, +- iph->saddr, iph->daddr, +- th, sk->sk_protocol, +- skb->len); +- +- if (genhash || memcmp(hash_location, newhash, 16) != 0) { +- if (net_ratelimit()) { +- printk(KERN_INFO "MD5 Hash failed for " +- "(" NIPQUAD_FMT ", %d)->(" NIPQUAD_FMT ", %d)%s\n", +- NIPQUAD(iph->saddr), ntohs(th->source), +- NIPQUAD(iph->daddr), ntohs(th->dest), +- genhash ? " tcp_v4_calc_md5_hash failed" : ""); +- } +- return 1; +- } +- return 0; +-} +- +-#endif +- +-struct request_sock_ops tcp_request_sock_ops __read_mostly = { +- .family = PF_INET, +- .obj_size = sizeof(struct tcp_request_sock), +- .rtx_syn_ack = tcp_v4_send_synack, +- .send_ack = tcp_v4_reqsk_send_ack, +- .destructor = tcp_v4_reqsk_destructor, +- .send_reset = tcp_v4_send_reset, +-}; +- +-#ifdef CONFIG_TCP_MD5SIG +-static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { +- .md5_lookup = tcp_v4_reqsk_md5_lookup, +-}; +-#endif +- +-static struct timewait_sock_ops tcp_timewait_sock_ops = { +- .twsk_obj_size = sizeof(struct tcp_timewait_sock), +- .twsk_unique = tcp_twsk_unique, +- .twsk_destructor= tcp_twsk_destructor, +-}; +- +-int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) +-{ +- struct inet_request_sock *ireq; +- struct tcp_options_received tmp_opt; +- struct request_sock *req; +- __be32 saddr = ip_hdr(skb)->saddr; +- __be32 daddr = ip_hdr(skb)->daddr; +- __u32 isn = TCP_SKB_CB(skb)->when; +- struct dst_entry *dst = NULL; +-#ifdef CONFIG_SYN_COOKIES +- int want_cookie = 0; +-#else +-#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */ +-#endif +- +- /* Never answer to SYNs send to broadcast or multicast */ +- if (((struct rtable *)skb->dst)->rt_flags & +- (RTCF_BROADCAST | RTCF_MULTICAST)) +- goto drop; +- +- /* TW buckets are converted to open requests without +- * limitations, they conserve resources and peer is +- * evidently real one. +- */ +- if (inet_csk_reqsk_queue_is_full(sk) && !isn) { +-#ifdef CONFIG_SYN_COOKIES +- if (sysctl_tcp_syncookies) { +- want_cookie = 1; +- } else +-#endif +- goto drop; +- } +- +- /* Accept backlog is full. If we have already queued enough +- * of warm entries in syn queue, drop request. It is better than +- * clogging syn queue with openreqs with exponentially increasing +- * timeout. +- */ +- if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) +- goto drop; +- +- req = reqsk_alloc(&tcp_request_sock_ops); +- if (!req) +- goto drop; +- +-#ifdef CONFIG_TCP_MD5SIG +- tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops; +-#endif +- +- tcp_clear_options(&tmp_opt); +- tmp_opt.mss_clamp = 536; +- tmp_opt.user_mss = tcp_sk(sk)->rx_opt.user_mss; +- +- tcp_parse_options(skb, &tmp_opt, 0); +- +- if (want_cookie) { +- tcp_clear_options(&tmp_opt); +- tmp_opt.saw_tstamp = 0; +- } +- +- if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) { +- /* Some OSes (unknown ones, but I see them on web server, which +- * contains information interesting only for windows' +- * users) do not send their stamp in SYN. It is easy case. +- * We simply do not advertise TS support. +- */ +- tmp_opt.saw_tstamp = 0; +- tmp_opt.tstamp_ok = 0; +- } +- tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; +- +- tcp_openreq_init(req, &tmp_opt, skb); +- +- if (security_inet_conn_request(sk, skb, req)) +- goto drop_and_free; +- +- ireq = inet_rsk(req); +- ireq->loc_addr = daddr; +- ireq->rmt_addr = saddr; +- ireq->opt = tcp_v4_save_options(sk, skb); +- if (!want_cookie) +- TCP_ECN_create_request(req, tcp_hdr(skb)); +- +- if (want_cookie) { +-#ifdef CONFIG_SYN_COOKIES +- syn_flood_warning(skb); +-#endif +- isn = cookie_v4_init_sequence(sk, skb, &req->mss); +- } else if (!isn) { +- struct inet_peer *peer = NULL; +- +- /* VJ's idea. We save last timestamp seen +- * from the destination in peer table, when entering +- * state TIME-WAIT, and check against it before +- * accepting new connection request. +- * +- * If "isn" is not zero, this request hit alive +- * timewait bucket, so that all the necessary checks +- * are made in the function processing timewait state. +- */ +- if (tmp_opt.saw_tstamp && +- tcp_death_row.sysctl_tw_recycle && +- (dst = inet_csk_route_req(sk, req)) != NULL && +- (peer = rt_get_peer((struct rtable *)dst)) != NULL && +- peer->v4daddr == saddr) { +- if (get_seconds() < peer->tcp_ts_stamp + TCP_PAWS_MSL && +- (s32)(peer->tcp_ts - req->ts_recent) > +- TCP_PAWS_WINDOW) { +- NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED); +- dst_release(dst); +- goto drop_and_free; +- } +- } +- /* Kill the following clause, if you dislike this way. */ +- else if (!sysctl_tcp_syncookies && +- (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < +- (sysctl_max_syn_backlog >> 2)) && +- (!peer || !peer->tcp_ts_stamp) && +- (!dst || !dst_metric(dst, RTAX_RTT))) { +- /* Without syncookies last quarter of +- * backlog is filled with destinations, +- * proven to be alive. +- * It means that we continue to communicate +- * to destinations, already remembered +- * to the moment of synflood. +- */ +- LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open " +- "request from %u.%u.%u.%u/%u\n", +- NIPQUAD(saddr), +- ntohs(tcp_hdr(skb)->source)); +- dst_release(dst); +- goto drop_and_free; +- } +- +- isn = tcp_v4_init_sequence(skb); +- } +- tcp_rsk(req)->snt_isn = isn; +- +- if (tcp_v4_send_synack(sk, req, dst)) +- goto drop_and_free; +- +- if (want_cookie) { +- reqsk_free(req); +- } else { +- inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); +- } +- return 0; +- +-drop_and_free: +- reqsk_free(req); +-drop: +- return 0; +-} +- +- +-/* +- * The three way handshake has completed - we got a valid synack - +- * now create the new socket. +- */ +-struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, +- struct request_sock *req, +- struct dst_entry *dst) +-{ +- struct inet_request_sock *ireq; +- struct inet_sock *newinet; +- struct tcp_sock *newtp; +- struct sock *newsk; +-#ifdef CONFIG_TCP_MD5SIG +- struct tcp_md5sig_key *key; +-#endif +- +- if (sk_acceptq_is_full(sk)) +- goto exit_overflow; +- +- if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) +- goto exit; +- +- newsk = tcp_create_openreq_child(sk, req, skb); +- if (!newsk) +- goto exit; +- +- newsk->sk_gso_type = SKB_GSO_TCPV4; +- sk_setup_caps(newsk, dst); +- +- newtp = tcp_sk(newsk); +- newinet = inet_sk(newsk); +- ireq = inet_rsk(req); +- newinet->daddr = ireq->rmt_addr; +- newinet->rcv_saddr = ireq->loc_addr; +- newinet->saddr = ireq->loc_addr; +- newinet->opt = ireq->opt; +- ireq->opt = NULL; +- newinet->mc_index = inet_iif(skb); +- newinet->mc_ttl = ip_hdr(skb)->ttl; +- inet_csk(newsk)->icsk_ext_hdr_len = 0; +- if (newinet->opt) +- inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen; +- newinet->id = newtp->write_seq ^ jiffies; +- +- tcp_mtup_init(newsk); +- tcp_sync_mss(newsk, dst_mtu(dst)); +- newtp->advmss = dst_metric(dst, RTAX_ADVMSS); +- tcp_initialize_rcv_mss(newsk); +- +-#ifdef CONFIG_TCP_MD5SIG +- /* Copy over the MD5 key from the original socket */ +- if ((key = tcp_v4_md5_do_lookup(sk, newinet->daddr)) != NULL) { +- /* +- * We're using one, so create a matching key +- * on the newsk structure. If we fail to get +- * memory, then we end up not copying the key +- * across. Shucks. +- */ +- char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC); +- if (newkey != NULL) +- tcp_v4_md5_do_add(newsk, inet_sk(sk)->daddr, +- newkey, key->keylen); +- } +-#endif +- +- __inet_hash(&tcp_hashinfo, newsk, 0); +- __inet_inherit_port(&tcp_hashinfo, sk, newsk); +- +- return newsk; +- +-exit_overflow: +- NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); +-exit: +- NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); +- dst_release(dst); +- return NULL; +-} +- +-static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) +-{ +- struct tcphdr *th = tcp_hdr(skb); +- const struct iphdr *iph = ip_hdr(skb); +- struct sock *nsk; +- struct request_sock **prev; +- /* Find possible connection requests. */ +- struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, +- iph->saddr, iph->daddr); +- if (req) +- return tcp_check_req(sk, skb, req, prev); +- +- nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source, +- iph->daddr, th->dest, inet_iif(skb)); +- +- if (nsk) { +- if (nsk->sk_state != TCP_TIME_WAIT) { +- bh_lock_sock(nsk); +- return nsk; +- } +- inet_twsk_put(inet_twsk(nsk)); +- return NULL; +- } +- +-#ifdef CONFIG_SYN_COOKIES +- if (!th->rst && !th->syn && th->ack) +- sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt)); +-#endif +- return sk; +-} +- +-static __sum16 tcp_v4_checksum_init(struct sk_buff *skb) +-{ +- const struct iphdr *iph = ip_hdr(skb); +- +- if (skb->ip_summed == CHECKSUM_COMPLETE) { +- if (!tcp_v4_check(skb->len, iph->saddr, +- iph->daddr, skb->csum)) { +- skb->ip_summed = CHECKSUM_UNNECESSARY; +- return 0; +- } +- } +- +- skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, +- skb->len, IPPROTO_TCP, 0); +- +- if (skb->len <= 76) { +- return __skb_checksum_complete(skb); +- } +- return 0; +-} +- +- +-/* The socket must have it's spinlock held when we get +- * here. +- * +- * We have a potential double-lock case here, so even when +- * doing backlog processing we use the BH locking scheme. +- * This is because we cannot sleep with the original spinlock +- * held. +- */ +-int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) +-{ +- struct sock *rsk; +-#ifdef CONFIG_TCP_MD5SIG +- /* +- * We really want to reject the packet as early as possible +- * if: +- * o We're expecting an MD5'd packet and this is no MD5 tcp option +- * o There is an MD5 option and we're not expecting one +- */ +- if (tcp_v4_inbound_md5_hash(sk, skb)) +- goto discard; +-#endif +- +- if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ +- TCP_CHECK_TIMER(sk); +- if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { +- rsk = sk; +- goto reset; +- } +- TCP_CHECK_TIMER(sk); +- return 0; +- } +- +- if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) +- goto csum_err; +- +- if (sk->sk_state == TCP_LISTEN) { +- struct sock *nsk = tcp_v4_hnd_req(sk, skb); +- if (!nsk) +- goto discard; +- +- if (nsk != sk) { +- if (tcp_child_process(sk, nsk, skb)) { +- rsk = nsk; +- goto reset; +- } +- return 0; +- } +- } +- +- TCP_CHECK_TIMER(sk); +- if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { +- rsk = sk; +- goto reset; +- } +- TCP_CHECK_TIMER(sk); +- return 0; +- +-reset: +- tcp_v4_send_reset(rsk, skb); +-discard: +- kfree_skb(skb); +- /* Be careful here. If this function gets more complicated and +- * gcc suffers from register pressure on the x86, sk (in %ebx) +- * might be destroyed here. This current version compiles correctly, +- * but you have been warned. +- */ +- return 0; +- +-csum_err: +- TCP_INC_STATS_BH(TCP_MIB_INERRS); +- goto discard; +-} +- +-/* +- * From tcp_input.c +- */ +- +-int tcp_v4_rcv(struct sk_buff *skb) +-{ +- const struct iphdr *iph; +- struct tcphdr *th; +- struct sock *sk; +- int ret; +- +- if (skb->pkt_type != PACKET_HOST) +- goto discard_it; +- +- /* Count it even if it's bad */ +- TCP_INC_STATS_BH(TCP_MIB_INSEGS); +- +- if (!pskb_may_pull(skb, sizeof(struct tcphdr))) +- goto discard_it; +- +- th = tcp_hdr(skb); +- +- if (th->doff < sizeof(struct tcphdr) / 4) +- goto bad_packet; +- if (!pskb_may_pull(skb, th->doff * 4)) +- goto discard_it; +- +- /* An explanation is required here, I think. +- * Packet length and doff are validated by header prediction, +- * provided case of th->doff==0 is eliminated. +- * So, we defer the checks. */ +- if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) +- goto bad_packet; +- +- th = tcp_hdr(skb); +- iph = ip_hdr(skb); +- TCP_SKB_CB(skb)->seq = ntohl(th->seq); +- TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + +- skb->len - th->doff * 4); +- TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); +- TCP_SKB_CB(skb)->when = 0; +- TCP_SKB_CB(skb)->flags = iph->tos; +- TCP_SKB_CB(skb)->sacked = 0; +- +- sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source, +- iph->daddr, th->dest, inet_iif(skb)); +- if (!sk) +- goto no_tcp_socket; +- +-process: +- if (sk->sk_state == TCP_TIME_WAIT) +- goto do_time_wait; +- +- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) +- goto discard_and_relse; +- nf_reset(skb); +- +- if (sk_filter(sk, skb)) +- goto discard_and_relse; +- +- skb->dev = NULL; +- +- bh_lock_sock_nested(sk); +- ret = 0; +- if (!sock_owned_by_user(sk)) { +-#ifdef CONFIG_NET_DMA +- struct tcp_sock *tp = tcp_sk(sk); +- if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) +- tp->ucopy.dma_chan = get_softnet_dma(); +- if (tp->ucopy.dma_chan) +- ret = tcp_v4_do_rcv(sk, skb); +- else +-#endif +- { +- if (!tcp_prequeue(sk, skb)) +- ret = tcp_v4_do_rcv(sk, skb); +- } +- } else +- sk_add_backlog(sk, skb); +- bh_unlock_sock(sk); +- +- sock_put(sk); +- +- return ret; +- +-no_tcp_socket: +- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) +- goto discard_it; +- +- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { +-bad_packet: +- TCP_INC_STATS_BH(TCP_MIB_INERRS); +- } else { +- tcp_v4_send_reset(NULL, skb); +- } +- +-discard_it: +- /* Discard frame. */ +- kfree_skb(skb); +- return 0; +- +-discard_and_relse: +- sock_put(sk); +- goto discard_it; +- +-do_time_wait: +- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { +- inet_twsk_put(inet_twsk(sk)); +- goto discard_it; +- } +- +- if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { +- TCP_INC_STATS_BH(TCP_MIB_INERRS); +- inet_twsk_put(inet_twsk(sk)); +- goto discard_it; +- } +- switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { +- case TCP_TW_SYN: { +- struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, +- iph->daddr, th->dest, +- inet_iif(skb)); +- if (sk2) { +- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); +- inet_twsk_put(inet_twsk(sk)); +- sk = sk2; +- goto process; +- } +- /* Fall through to ACK */ +- } +- case TCP_TW_ACK: +- tcp_v4_timewait_ack(sk, skb); +- break; +- case TCP_TW_RST: +- goto no_tcp_socket; +- case TCP_TW_SUCCESS:; +- } +- goto discard_it; +-} +- +-/* VJ's idea. Save last timestamp seen from this destination +- * and hold it at least for normal timewait interval to use for duplicate +- * segment detection in subsequent connections, before they enter synchronized +- * state. +- */ +- +-int tcp_v4_remember_stamp(struct sock *sk) +-{ +- struct inet_sock *inet = inet_sk(sk); +- struct tcp_sock *tp = tcp_sk(sk); +- struct rtable *rt = (struct rtable *)__sk_dst_get(sk); +- struct inet_peer *peer = NULL; +- int release_it = 0; +- +- if (!rt || rt->rt_dst != inet->daddr) { +- peer = inet_getpeer(inet->daddr, 1); +- release_it = 1; +- } else { +- if (!rt->peer) +- rt_bind_peer(rt, 1); +- peer = rt->peer; +- } +- +- if (peer) { +- if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || +- (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && +- peer->tcp_ts_stamp <= tp->rx_opt.ts_recent_stamp)) { +- peer->tcp_ts_stamp = tp->rx_opt.ts_recent_stamp; +- peer->tcp_ts = tp->rx_opt.ts_recent; +- } +- if (release_it) +- inet_putpeer(peer); +- return 1; +- } +- +- return 0; +-} +- +-int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) +-{ +- struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); +- +- if (peer) { +- const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); +- +- if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || +- (peer->tcp_ts_stamp + TCP_PAWS_MSL < get_seconds() && +- peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { +- peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; +- peer->tcp_ts = tcptw->tw_ts_recent; +- } +- inet_putpeer(peer); +- return 1; +- } +- +- return 0; +-} +- +-struct inet_connection_sock_af_ops ipv4_specific = { +- .queue_xmit = ip_queue_xmit, +- .send_check = tcp_v4_send_check, +- .rebuild_header = inet_sk_rebuild_header, +- .conn_request = tcp_v4_conn_request, +- .syn_recv_sock = tcp_v4_syn_recv_sock, +- .remember_stamp = tcp_v4_remember_stamp, +- .net_header_len = sizeof(struct iphdr), +- .setsockopt = ip_setsockopt, +- .getsockopt = ip_getsockopt, +- .addr2sockaddr = inet_csk_addr2sockaddr, +- .sockaddr_len = sizeof(struct sockaddr_in), +-#ifdef CONFIG_COMPAT +- .compat_setsockopt = compat_ip_setsockopt, +- .compat_getsockopt = compat_ip_getsockopt, +-#endif +-}; +- +-#ifdef CONFIG_TCP_MD5SIG +-static struct tcp_sock_af_ops tcp_sock_ipv4_specific = { +- .md5_lookup = tcp_v4_md5_lookup, +- .calc_md5_hash = tcp_v4_calc_md5_hash, +- .md5_add = tcp_v4_md5_add_func, +- .md5_parse = tcp_v4_parse_md5_keys, +-}; +-#endif +- +-/* NOTE: A lot of things set to zero explicitly by call to +- * sk_alloc() so need not be done here. +- */ +-static int tcp_v4_init_sock(struct sock *sk) +-{ +- struct inet_connection_sock *icsk = inet_csk(sk); +- struct tcp_sock *tp = tcp_sk(sk); +- +- skb_queue_head_init(&tp->out_of_order_queue); +- tcp_init_xmit_timers(sk); +- tcp_prequeue_init(tp); +- +- icsk->icsk_rto = TCP_TIMEOUT_INIT; +- tp->mdev = TCP_TIMEOUT_INIT; +- +- /* So many TCP implementations out there (incorrectly) count the +- * initial SYN frame in their delayed-ACK and congestion control +- * algorithms that we must have the following bandaid to talk +- * efficiently to them. -DaveM +- */ +- tp->snd_cwnd = 2; +- +- /* See draft-stevens-tcpca-spec-01 for discussion of the +- * initialization of these values. +- */ +- tp->snd_ssthresh = 0x7fffffff; /* Infinity */ +- tp->snd_cwnd_clamp = ~0; +- tp->mss_cache = 536; +- +- tp->reordering = sysctl_tcp_reordering; +- icsk->icsk_ca_ops = &tcp_init_congestion_ops; +- +- sk->sk_state = TCP_CLOSE; +- +- sk->sk_write_space = sk_stream_write_space; +- sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); +- +- icsk->icsk_af_ops = &ipv4_specific; +- icsk->icsk_sync_mss = tcp_sync_mss; +-#ifdef CONFIG_TCP_MD5SIG +- tp->af_specific = &tcp_sock_ipv4_specific; +-#endif +- +- sk->sk_sndbuf = sysctl_tcp_wmem[1]; +- sk->sk_rcvbuf = sysctl_tcp_rmem[1]; +- +- atomic_inc(&tcp_sockets_allocated); +- +- return 0; +-} +- +-int tcp_v4_destroy_sock(struct sock *sk) +-{ +- struct tcp_sock *tp = tcp_sk(sk); +- +- tcp_clear_xmit_timers(sk); +- +- tcp_cleanup_congestion_control(sk); +- +- /* Cleanup up the write buffer. */ +- tcp_write_queue_purge(sk); +- +- /* Cleans up our, hopefully empty, out_of_order_queue. */ +- __skb_queue_purge(&tp->out_of_order_queue); +- +-#ifdef CONFIG_TCP_MD5SIG +- /* Clean up the MD5 key list, if any */ +- if (tp->md5sig_info) { +- tcp_v4_clear_md5_list(sk); +- kfree(tp->md5sig_info); +- tp->md5sig_info = NULL; +- } +-#endif +- +-#ifdef CONFIG_NET_DMA +- /* Cleans up our sk_async_wait_queue */ +- __skb_queue_purge(&sk->sk_async_wait_queue); +-#endif +- +- /* Clean prequeue, it must be empty really */ +- __skb_queue_purge(&tp->ucopy.prequeue); +- +- /* Clean up a referenced TCP bind bucket. */ +- if (inet_csk(sk)->icsk_bind_hash) +- inet_put_port(&tcp_hashinfo, sk); +- +- /* +- * If sendmsg cached page exists, toss it. +- */ +- if (sk->sk_sndmsg_page) { +- __free_page(sk->sk_sndmsg_page); +- sk->sk_sndmsg_page = NULL; +- } +- +- atomic_dec(&tcp_sockets_allocated); +- +- return 0; +-} +- +-EXPORT_SYMBOL(tcp_v4_destroy_sock); +- +-#ifdef CONFIG_PROC_FS +-/* Proc filesystem TCP sock list dumping. */ +- +-static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) +-{ +- return hlist_empty(head) ? NULL : +- list_entry(head->first, struct inet_timewait_sock, tw_node); +-} +- +-static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) +-{ +- return tw->tw_node.next ? +- hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; +-} +- +-static void *listening_get_next(struct seq_file *seq, void *cur) +-{ +- struct inet_connection_sock *icsk; +- struct hlist_node *node; +- struct sock *sk = cur; +- struct tcp_iter_state* st = seq->private; +- +- if (!sk) { +- st->bucket = 0; +- sk = sk_head(&tcp_hashinfo.listening_hash[0]); +- goto get_sk; +- } +- +- ++st->num; +- +- if (st->state == TCP_SEQ_STATE_OPENREQ) { +- struct request_sock *req = cur; +- +- icsk = inet_csk(st->syn_wait_sk); +- req = req->dl_next; +- while (1) { +- while (req) { +- if (req->rsk_ops->family == st->family) { +- cur = req; +- goto out; +- } +- req = req->dl_next; +- } +- if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) +- break; +-get_req: +- req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; +- } +- sk = sk_next(st->syn_wait_sk); +- st->state = TCP_SEQ_STATE_LISTENING; +- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); +- } else { +- icsk = inet_csk(sk); +- read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); +- if (reqsk_queue_len(&icsk->icsk_accept_queue)) +- goto start_req; +- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); +- sk = sk_next(sk); +- } +-get_sk: +- sk_for_each_from(sk, node) { +- if (sk->sk_family == st->family) { +- cur = sk; +- goto out; +- } +- icsk = inet_csk(sk); +- read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); +- if (reqsk_queue_len(&icsk->icsk_accept_queue)) { +-start_req: +- st->uid = sock_i_uid(sk); +- st->syn_wait_sk = sk; +- st->state = TCP_SEQ_STATE_OPENREQ; +- st->sbucket = 0; +- goto get_req; +- } +- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); +- } +- if (++st->bucket < INET_LHTABLE_SIZE) { +- sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); +- goto get_sk; +- } +- cur = NULL; +-out: +- return cur; +-} +- +-static void *listening_get_idx(struct seq_file *seq, loff_t *pos) +-{ +- void *rc = listening_get_next(seq, NULL); +- +- while (rc && *pos) { +- rc = listening_get_next(seq, rc); +- --*pos; +- } +- return rc; +-} +- +-static void *established_get_first(struct seq_file *seq) +-{ +- struct tcp_iter_state* st = seq->private; +- void *rc = NULL; +- +- for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { +- struct sock *sk; +- struct hlist_node *node; +- struct inet_timewait_sock *tw; +- +- /* We can reschedule _before_ having picked the target: */ +- cond_resched_softirq(); +- +- read_lock(&tcp_hashinfo.ehash[st->bucket].lock); +- sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { +- if (sk->sk_family != st->family) { +- continue; +- } +- rc = sk; +- goto out; +- } +- st->state = TCP_SEQ_STATE_TIME_WAIT; +- inet_twsk_for_each(tw, node, +- &tcp_hashinfo.ehash[st->bucket].twchain) { +- if (tw->tw_family != st->family) { +- continue; +- } +- rc = tw; +- goto out; +- } +- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); +- st->state = TCP_SEQ_STATE_ESTABLISHED; +- } +-out: +- return rc; +-} +- +-static void *established_get_next(struct seq_file *seq, void *cur) +-{ +- struct sock *sk = cur; +- struct inet_timewait_sock *tw; +- struct hlist_node *node; +- struct tcp_iter_state* st = seq->private; +- +- ++st->num; +- +- if (st->state == TCP_SEQ_STATE_TIME_WAIT) { +- tw = cur; +- tw = tw_next(tw); +-get_tw: +- while (tw && tw->tw_family != st->family) { +- tw = tw_next(tw); +- } +- if (tw) { +- cur = tw; +- goto out; +- } +- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); +- st->state = TCP_SEQ_STATE_ESTABLISHED; +- +- /* We can reschedule between buckets: */ +- cond_resched_softirq(); +- +- if (++st->bucket < tcp_hashinfo.ehash_size) { +- read_lock(&tcp_hashinfo.ehash[st->bucket].lock); +- sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); +- } else { +- cur = NULL; +- goto out; +- } +- } else +- sk = sk_next(sk); +- +- sk_for_each_from(sk, node) { +- if (sk->sk_family == st->family) +- goto found; +- } +- +- st->state = TCP_SEQ_STATE_TIME_WAIT; +- tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); +- goto get_tw; +-found: +- cur = sk; +-out: +- return cur; +-} +- +-static void *established_get_idx(struct seq_file *seq, loff_t pos) +-{ +- void *rc = established_get_first(seq); +- +- while (rc && pos) { +- rc = established_get_next(seq, rc); +- --pos; +- } +- return rc; +-} +- +-static void *tcp_get_idx(struct seq_file *seq, loff_t pos) +-{ +- void *rc; +- struct tcp_iter_state* st = seq->private; +- +- inet_listen_lock(&tcp_hashinfo); +- st->state = TCP_SEQ_STATE_LISTENING; +- rc = listening_get_idx(seq, &pos); +- +- if (!rc) { +- inet_listen_unlock(&tcp_hashinfo); +- local_bh_disable(); +- st->state = TCP_SEQ_STATE_ESTABLISHED; +- rc = established_get_idx(seq, pos); +- } +- +- return rc; +-} +- +-static void *tcp_seq_start(struct seq_file *seq, loff_t *pos) +-{ +- struct tcp_iter_state* st = seq->private; +- st->state = TCP_SEQ_STATE_LISTENING; +- st->num = 0; +- return *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; +-} +- +-static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +-{ +- void *rc = NULL; +- struct tcp_iter_state* st; +- +- if (v == SEQ_START_TOKEN) { +- rc = tcp_get_idx(seq, 0); +- goto out; +- } +- st = seq->private; +- +- switch (st->state) { +- case TCP_SEQ_STATE_OPENREQ: +- case TCP_SEQ_STATE_LISTENING: +- rc = listening_get_next(seq, v); +- if (!rc) { +- inet_listen_unlock(&tcp_hashinfo); +- local_bh_disable(); +- st->state = TCP_SEQ_STATE_ESTABLISHED; +- rc = established_get_first(seq); +- } +- break; +- case TCP_SEQ_STATE_ESTABLISHED: +- case TCP_SEQ_STATE_TIME_WAIT: +- rc = established_get_next(seq, v); +- break; +- } +-out: +- ++*pos; +- return rc; +-} +- +-static void tcp_seq_stop(struct seq_file *seq, void *v) +-{ +- struct tcp_iter_state* st = seq->private; +- +- switch (st->state) { +- case TCP_SEQ_STATE_OPENREQ: +- if (v) { +- struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); +- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); +- } +- case TCP_SEQ_STATE_LISTENING: +- if (v != SEQ_START_TOKEN) +- inet_listen_unlock(&tcp_hashinfo); +- break; +- case TCP_SEQ_STATE_TIME_WAIT: +- case TCP_SEQ_STATE_ESTABLISHED: +- if (v) +- read_unlock(&tcp_hashinfo.ehash[st->bucket].lock); +- local_bh_enable(); +- break; +- } +-} +- +-static int tcp_seq_open(struct inode *inode, struct file *file) +-{ +- struct tcp_seq_afinfo *afinfo = PDE(inode)->data; +- struct seq_file *seq; +- struct tcp_iter_state *s; +- int rc; +- +- if (unlikely(afinfo == NULL)) +- return -EINVAL; +- +- s = kzalloc(sizeof(*s), GFP_KERNEL); +- if (!s) +- return -ENOMEM; +- s->family = afinfo->family; +- s->seq_ops.start = tcp_seq_start; +- s->seq_ops.next = tcp_seq_next; +- s->seq_ops.show = afinfo->seq_show; +- s->seq_ops.stop = tcp_seq_stop; +- +- rc = seq_open(file, &s->seq_ops); +- if (rc) +- goto out_kfree; +- seq = file->private_data; +- seq->private = s; +-out: +- return rc; +-out_kfree: +- kfree(s); +- goto out; +-} +- +-int tcp_proc_register(struct tcp_seq_afinfo *afinfo) +-{ +- int rc = 0; +- struct proc_dir_entry *p; +- +- if (!afinfo) +- return -EINVAL; +- afinfo->seq_fops->owner = afinfo->owner; +- afinfo->seq_fops->open = tcp_seq_open; +- afinfo->seq_fops->read = seq_read; +- afinfo->seq_fops->llseek = seq_lseek; +- afinfo->seq_fops->release = seq_release_private; +- +- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); +- if (p) +- p->data = afinfo; +- else +- rc = -ENOMEM; +- return rc; +-} +- +-void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo) +-{ +- if (!afinfo) +- return; +- proc_net_remove(afinfo->name); +- memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); +-} +- +-static void get_openreq4(struct sock *sk, struct request_sock *req, +- char *tmpbuf, int i, int uid) +-{ +- const struct inet_request_sock *ireq = inet_rsk(req); +- int ttd = req->expires - jiffies; +- +- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" +- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p", +- i, +- ireq->loc_addr, +- ntohs(inet_sk(sk)->sport), +- ireq->rmt_addr, +- ntohs(ireq->rmt_port), +- TCP_SYN_RECV, +- 0, 0, /* could print option size, but that is af dependent. */ +- 1, /* timers active (only the expire timer) */ +- jiffies_to_clock_t(ttd), +- req->retrans, +- uid, +- 0, /* non standard timer */ +- 0, /* open_requests have no inode */ +- atomic_read(&sk->sk_refcnt), +- req); +-} +- +-static void get_tcp4_sock(struct sock *sk, char *tmpbuf, int i) +-{ +- int timer_active; +- unsigned long timer_expires; +- struct tcp_sock *tp = tcp_sk(sk); +- const struct inet_connection_sock *icsk = inet_csk(sk); +- struct inet_sock *inet = inet_sk(sk); +- __be32 dest = inet->daddr; +- __be32 src = inet->rcv_saddr; +- __u16 destp = ntohs(inet->dport); +- __u16 srcp = ntohs(inet->sport); +- +- if (icsk->icsk_pending == ICSK_TIME_RETRANS) { +- timer_active = 1; +- timer_expires = icsk->icsk_timeout; +- } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { +- timer_active = 4; +- timer_expires = icsk->icsk_timeout; +- } else if (timer_pending(&sk->sk_timer)) { +- timer_active = 2; +- timer_expires = sk->sk_timer.expires; +- } else { +- timer_active = 0; +- timer_expires = jiffies; +- } +- +- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX " +- "%08X %5d %8d %lu %d %p %u %u %u %u %d", +- i, src, srcp, dest, destp, sk->sk_state, +- tp->write_seq - tp->snd_una, +- sk->sk_state == TCP_LISTEN ? sk->sk_ack_backlog : +- (tp->rcv_nxt - tp->copied_seq), +- timer_active, +- jiffies_to_clock_t(timer_expires - jiffies), +- icsk->icsk_retransmits, +- sock_i_uid(sk), +- icsk->icsk_probes_out, +- sock_i_ino(sk), +- atomic_read(&sk->sk_refcnt), sk, +- icsk->icsk_rto, +- icsk->icsk_ack.ato, +- (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, +- tp->snd_cwnd, +- tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); +-} +- +-static void get_timewait4_sock(struct inet_timewait_sock *tw, +- char *tmpbuf, int i) +-{ +- __be32 dest, src; +- __u16 destp, srcp; +- int ttd = tw->tw_ttd - jiffies; +- +- if (ttd < 0) +- ttd = 0; +- +- dest = tw->tw_daddr; +- src = tw->tw_rcv_saddr; +- destp = ntohs(tw->tw_dport); +- srcp = ntohs(tw->tw_sport); +- +- sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" +- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p", +- i, src, srcp, dest, destp, tw->tw_substate, 0, 0, +- 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, +- atomic_read(&tw->tw_refcnt), tw); +-} +- +-#define TMPSZ 150 +- +-static int tcp4_seq_show(struct seq_file *seq, void *v) +-{ +- struct tcp_iter_state* st; +- char tmpbuf[TMPSZ + 1]; +- +- if (v == SEQ_START_TOKEN) { +- seq_printf(seq, "%-*s\n", TMPSZ - 1, +- " sl local_address rem_address st tx_queue " +- "rx_queue tr tm->when retrnsmt uid timeout " +- "inode"); +- goto out; +- } +- st = seq->private; +- +- switch (st->state) { +- case TCP_SEQ_STATE_LISTENING: +- case TCP_SEQ_STATE_ESTABLISHED: +- get_tcp4_sock(v, tmpbuf, st->num); +- break; +- case TCP_SEQ_STATE_OPENREQ: +- get_openreq4(st->syn_wait_sk, v, tmpbuf, st->num, st->uid); +- break; +- case TCP_SEQ_STATE_TIME_WAIT: +- get_timewait4_sock(v, tmpbuf, st->num); +- break; +- } +- seq_printf(seq, "%-*s\n", TMPSZ - 1, tmpbuf); +-out: +- return 0; +-} +- +-static struct file_operations tcp4_seq_fops; +-static struct tcp_seq_afinfo tcp4_seq_afinfo = { +- .owner = THIS_MODULE, +- .name = "tcp", +- .family = AF_INET, +- .seq_show = tcp4_seq_show, +- .seq_fops = &tcp4_seq_fops, +-}; +- +-int __init tcp4_proc_init(void) +-{ +- return tcp_proc_register(&tcp4_seq_afinfo); +-} +- +-void tcp4_proc_exit(void) +-{ +- tcp_proc_unregister(&tcp4_seq_afinfo); +-} +-#endif /* CONFIG_PROC_FS */ +- +-struct proto tcp_prot = { +- .name = "TCP", +- .owner = THIS_MODULE, +- .close = tcp_close, +- .connect = tcp_v4_connect, +- .disconnect = tcp_disconnect, +- .accept = inet_csk_accept, +- .ioctl = tcp_ioctl, +- .init = tcp_v4_init_sock, +- .destroy = tcp_v4_destroy_sock, +- .shutdown = tcp_shutdown, +- .setsockopt = tcp_setsockopt, +- .getsockopt = tcp_getsockopt, +- .recvmsg = tcp_recvmsg, +- .backlog_rcv = tcp_v4_do_rcv, +- .hash = tcp_v4_hash, +- .unhash = tcp_unhash, +- .get_port = tcp_v4_get_port, +- .enter_memory_pressure = tcp_enter_memory_pressure, +- .sockets_allocated = &tcp_sockets_allocated, +- .orphan_count = &tcp_orphan_count, +- .memory_allocated = &tcp_memory_allocated, +- .memory_pressure = &tcp_memory_pressure, +- .sysctl_mem = sysctl_tcp_mem, +- .sysctl_wmem = sysctl_tcp_wmem, +- .sysctl_rmem = sysctl_tcp_rmem, +- .max_header = MAX_TCP_HEADER, +- .obj_size = sizeof(struct tcp_sock), +- .twsk_prot = &tcp_timewait_sock_ops, +- .rsk_prot = &tcp_request_sock_ops, +-#ifdef CONFIG_COMPAT +- .compat_setsockopt = compat_tcp_setsockopt, +- .compat_getsockopt = compat_tcp_getsockopt, +-#endif +-}; +- +-void __init tcp_v4_init(struct net_proto_family *ops) +-{ +- if (inet_csk_ctl_sock_create(&tcp_socket, PF_INET, SOCK_RAW, +- IPPROTO_TCP) < 0) +- panic("Failed to create the TCP control socket.\n"); +-} +- +-EXPORT_SYMBOL(ipv4_specific); +-EXPORT_SYMBOL(tcp_hashinfo); +-EXPORT_SYMBOL(tcp_prot); +-EXPORT_SYMBOL(tcp_unhash); +-EXPORT_SYMBOL(tcp_v4_conn_request); +-EXPORT_SYMBOL(tcp_v4_connect); +-EXPORT_SYMBOL(tcp_v4_do_rcv); +-EXPORT_SYMBOL(tcp_v4_remember_stamp); +-EXPORT_SYMBOL(tcp_v4_send_check); +-EXPORT_SYMBOL(tcp_v4_syn_recv_sock); +- +-#ifdef CONFIG_PROC_FS +-EXPORT_SYMBOL(tcp_proc_register); +-EXPORT_SYMBOL(tcp_proc_unregister); +-#endif +-EXPORT_SYMBOL(sysctl_local_port_range); +-EXPORT_SYMBOL(sysctl_tcp_low_latency); +- +diff -Nurb linux-2.6.22-570/net/ipv4/tcp_output.c linux-2.6.22-591/net/ipv4/tcp_output.c +--- linux-2.6.22-570/net/ipv4/tcp_output.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/tcp_output.c 2007-12-21 15:36:15.000000000 -0500 +@@ -432,11 +432,11 @@ + sysctl_flags = 0; + if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) { + tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; +- if (sysctl_tcp_timestamps) { ++ if (sk->sk_net->sysctl_tcp_timestamps) { + tcp_header_size += TCPOLEN_TSTAMP_ALIGNED; + sysctl_flags |= SYSCTL_FLAG_TSTAMPS; + } +- if (sysctl_tcp_window_scaling) { ++ if (sk->sk_net->sysctl_tcp_window_scaling) { + tcp_header_size += TCPOLEN_WSCALE_ALIGNED; + sysctl_flags |= SYSCTL_FLAG_WSCALE; + } +@@ -2215,7 +2215,7 @@ + * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT. + */ + tp->tcp_header_len = sizeof(struct tcphdr) + +- (sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); ++ (sk->sk_net->sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0); + + #ifdef CONFIG_TCP_MD5SIG + if (tp->af_specific->md5_lookup(sk, sk) != NULL) +@@ -2238,7 +2238,7 @@ + tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), + &tp->rcv_wnd, + &tp->window_clamp, +- sysctl_tcp_window_scaling, ++ sk->sk_net->sysctl_tcp_window_scaling, + &rcv_wscale); + + tp->rx_opt.rcv_wscale = rcv_wscale; +diff -Nurb linux-2.6.22-570/net/ipv4/tcp_probe.c linux-2.6.22-591/net/ipv4/tcp_probe.c +--- linux-2.6.22-570/net/ipv4/tcp_probe.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/tcp_probe.c 2007-12-21 15:36:15.000000000 -0500 +@@ -172,7 +172,7 @@ + if (IS_ERR(tcpw.fifo)) + return PTR_ERR(tcpw.fifo); + +- if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops)) ++ if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &tcpprobe_fops)) + goto err0; + + ret = register_jprobe(&tcp_probe); +@@ -182,7 +182,7 @@ + pr_info("TCP watch registered (port=%d)\n", port); + return 0; + err1: +- proc_net_remove(procname); ++ proc_net_remove(&init_net, procname); + err0: + kfifo_free(tcpw.fifo); + return ret; +@@ -192,7 +192,7 @@ + static __exit void tcpprobe_exit(void) + { + kfifo_free(tcpw.fifo); +- proc_net_remove(procname); ++ proc_net_remove(&init_net, procname); + unregister_jprobe(&tcp_probe); + + } +diff -Nurb linux-2.6.22-570/net/ipv4/tunnel4.c linux-2.6.22-591/net/ipv4/tunnel4.c +--- linux-2.6.22-570/net/ipv4/tunnel4.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/tunnel4.c 2007-12-21 15:36:15.000000000 -0500 +@@ -75,6 +75,10 @@ + { + struct xfrm_tunnel *handler; + ++ if (skb->dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto drop; + +@@ -113,6 +117,9 @@ + { + struct xfrm_tunnel *handler; + ++ if (skb->dev->nd_net != &init_net) ++ return; ++ + for (handler = tunnel4_handlers; handler; handler = handler->next) + if (!handler->err_handler(skb, info)) + break; +diff -Nurb linux-2.6.22-570/net/ipv4/udp.c linux-2.6.22-591/net/ipv4/udp.c +--- linux-2.6.22-570/net/ipv4/udp.c 2007-12-21 15:36:02.000000000 -0500 ++++ linux-2.6.22-591/net/ipv4/udp.c 2007-12-21 15:36:15.000000000 -0500 +@@ -101,6 +101,7 @@ + #include + #include + #include ++#include + #include "udp_impl.h" + + /* +@@ -112,16 +113,17 @@ + struct hlist_head udp_hash[UDP_HTABLE_SIZE]; + DEFINE_RWLOCK(udp_hash_lock); + +-static int udp_port_rover; +- +-static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) ++static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, struct hlist_head udptable[]) + { + struct sock *sk; + struct hlist_node *node; + +- sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) ++ sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) { ++ if (sk->sk_net != net) ++ continue; + if (sk->sk_hash == num) + return 1; ++ } + return 0; + } + +@@ -148,9 +150,9 @@ + if (snum == 0) { + int best_size_so_far, best, result, i; + +- if (*port_rover > sysctl_local_port_range[1] || +- *port_rover < sysctl_local_port_range[0]) +- *port_rover = sysctl_local_port_range[0]; ++ if (*port_rover > sk->sk_net->sysctl_local_port_range[1] || ++ *port_rover < sk->sk_net->sysctl_local_port_range[0]) ++ *port_rover = sk->sk_net->sysctl_local_port_range[0]; + best_size_so_far = 32767; + best = result = *port_rover; + for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { +@@ -158,9 +160,9 @@ + + head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; + if (hlist_empty(head)) { +- if (result > sysctl_local_port_range[1]) +- result = sysctl_local_port_range[0] + +- ((result - sysctl_local_port_range[0]) & ++ if (result > sk->sk_net->sysctl_local_port_range[1]) ++ result = sk->sk_net->sysctl_local_port_range[0] + ++ ((result - sk->sk_net->sysctl_local_port_range[0]) & + (UDP_HTABLE_SIZE - 1)); + goto gotit; + } +@@ -177,11 +179,11 @@ + result = best; + for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; + i++, result += UDP_HTABLE_SIZE) { +- if (result > sysctl_local_port_range[1]) +- result = sysctl_local_port_range[0] +- + ((result - sysctl_local_port_range[0]) & ++ if (result > sk->sk_net->sysctl_local_port_range[1]) ++ result = sk->sk_net->sysctl_local_port_range[0] ++ + ((result - sk->sk_net->sysctl_local_port_range[0]) & + (UDP_HTABLE_SIZE - 1)); +- if (! __udp_lib_lport_inuse(result, udptable)) ++ if (! __udp_lib_lport_inuse(sk->sk_net, result, udptable)) + break; + } + if (i >= (1 << 16) / UDP_HTABLE_SIZE) +@@ -194,6 +196,7 @@ + sk_for_each(sk2, node, head) + if (sk2->sk_hash == snum && + sk2 != sk && ++ sk->sk_net == sk2->sk_net && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if + || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && +@@ -216,7 +219,7 @@ + int udp_get_port(struct sock *sk, unsigned short snum, + int (*scmp)(const struct sock *, const struct sock *)) + { +- return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); ++ return __udp_lib_get_port(sk, snum, udp_hash, &sk->sk_net->udp_port_rover, scmp); + } + + extern int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2); +@@ -229,7 +232,8 @@ + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try + * harder than this. -DaveM + */ +-static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, ++static struct sock *__udp4_lib_lookup(struct net *net, ++ __be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, + int dif, struct hlist_head udptable[]) + { +@@ -243,6 +247,9 @@ + sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { + struct inet_sock *inet = inet_sk(sk); + ++ if (sk->sk_net != net) ++ continue; ++ + if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { + int score = (sk->sk_family == PF_INET ? 1 : 0); + +@@ -299,6 +306,9 @@ + sk_for_each_from(s, node) { + struct inet_sock *inet = inet_sk(s); + ++ if (s->sk_net != sk->sk_net) ++ continue; ++ + if (s->sk_hash != hnum || + (inet->daddr && inet->daddr != rmt_addr) || + (inet->dport != rmt_port && inet->dport) || +@@ -328,6 +338,7 @@ + + void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) + { ++ struct net *net = skb->dev->nd_net; + struct inet_sock *inet; + struct iphdr *iph = (struct iphdr*)skb->data; + struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); +@@ -337,7 +348,7 @@ + int harderr; + int err; + +- sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, ++ sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, + skb->dev->ifindex, udptable ); + if (sk == NULL) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); +@@ -623,7 +634,8 @@ + rt = (struct rtable*)sk_dst_check(sk, 0); + + if (rt == NULL) { +- struct flowi fl = { .oif = ipc.oif, ++ struct flowi fl = { .fl_net = sk->sk_net, ++ .oif = ipc.oif, + .nl_u = { .ip4_u = + { .daddr = faddr, + .saddr = saddr, +@@ -1288,6 +1300,7 @@ + int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], + int proto) + { ++ struct net *net = skb->dev->nd_net; + struct sock *sk; + struct udphdr *uh = udp_hdr(skb); + unsigned short ulen; +@@ -1322,7 +1335,7 @@ + udp_ping_of_death(skb, uh, saddr); + #endif + +- sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, ++ sk = __udp4_lib_lookup(net, saddr, uh->source, daddr, uh->dest, + skb->dev->ifindex, udptable ); + + if (sk != NULL) { +@@ -1651,7 +1664,7 @@ + sk = sk_next(sk); + try_again: + ; +- } while (sk && (sk->sk_family != state->family || ++ } while (sk && ((sk->sk_net != state->net) || sk->sk_family != state->family || + !nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))); + + if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { +@@ -1717,6 +1730,7 @@ + + seq = file->private_data; + seq->private = s; ++ s->net = get_net(PROC_NET(inode)); + out: + return rc; + out_kfree: +@@ -1724,21 +1738,31 @@ + goto out; + } + ++static int udp_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct udp_iter_state *state = seq->private; ++ put_net(state->net); ++ return seq_release_private(inode, file); ++} ++ + /* ------------------------------------------------------------------------ */ +-int udp_proc_register(struct udp_seq_afinfo *afinfo) ++int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) + { + struct proc_dir_entry *p; + int rc = 0; + + if (!afinfo) + return -EINVAL; ++ if (net == &init_net) { + afinfo->seq_fops->owner = afinfo->owner; + afinfo->seq_fops->open = udp_seq_open; + afinfo->seq_fops->read = seq_read; + afinfo->seq_fops->llseek = seq_lseek; +- afinfo->seq_fops->release = seq_release_private; ++ afinfo->seq_fops->release = udp_seq_release; ++ } + +- p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops); ++ p = proc_net_fops_create(net, afinfo->name, S_IRUGO, afinfo->seq_fops); + if (p) + p->data = afinfo; + else +@@ -1746,11 +1770,12 @@ + return rc; + } + +-void udp_proc_unregister(struct udp_seq_afinfo *afinfo) ++void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) + { + if (!afinfo) + return; +- proc_net_remove(afinfo->name); ++ proc_net_remove(net, afinfo->name); ++ if (net == &init_net) + memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); + } + +@@ -1803,14 +1828,30 @@ + .seq_fops = &udp4_seq_fops, + }; + ++ ++static int udp4_proc_net_init(struct net *net) ++{ ++ return udp_proc_register(net, &udp4_seq_afinfo); ++} ++ ++static void udp4_proc_net_exit(struct net *net) ++{ ++ udp_proc_unregister(net, &udp4_seq_afinfo); ++} ++ ++static struct pernet_operations udp4_proc_net_ops = { ++ .init = udp4_proc_net_init, ++ .exit = udp4_proc_net_exit, ++}; ++ + int __init udp4_proc_init(void) + { +- return udp_proc_register(&udp4_seq_afinfo); ++ return register_pernet_subsys(&udp4_proc_net_ops); + } + + void udp4_proc_exit(void) + { +- udp_proc_unregister(&udp4_seq_afinfo); ++ unregister_pernet_subsys(&udp4_proc_net_ops); + } + #endif /* CONFIG_PROC_FS */ + +diff -Nurb linux-2.6.22-570/net/ipv4/udplite.c linux-2.6.22-591/net/ipv4/udplite.c +--- linux-2.6.22-570/net/ipv4/udplite.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/udplite.c 2007-12-21 15:36:15.000000000 -0500 +@@ -31,11 +31,18 @@ + + static int udplite_rcv(struct sk_buff *skb) + { ++ if (skb->dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } + return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); + } + + static void udplite_err(struct sk_buff *skb, u32 info) + { ++ if (skb->dev->nd_net != &init_net) ++ return; ++ + return __udp4_lib_err(skb, info, udplite_hash); + } + +@@ -103,7 +110,7 @@ + inet_register_protosw(&udplite4_protosw); + + #ifdef CONFIG_PROC_FS +- if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */ ++ if (udp_proc_register(&init_net, &udplite4_seq_afinfo)) /* udplite4_proc_init() */ + printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__); + #endif + return; +diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_input.c linux-2.6.22-591/net/ipv4/xfrm4_input.c +--- linux-2.6.22-570/net/ipv4/xfrm4_input.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/xfrm4_input.c 2007-12-21 15:36:15.000000000 -0500 +@@ -18,6 +18,10 @@ + + int xfrm4_rcv(struct sk_buff *skb) + { ++ if (skb->dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } + return xfrm4_rcv_encap(skb, 0); + } + +diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_policy.c linux-2.6.22-591/net/ipv4/xfrm4_policy.c +--- linux-2.6.22-570/net/ipv4/xfrm4_policy.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/xfrm4_policy.c 2007-12-21 15:36:15.000000000 -0500 +@@ -25,6 +25,7 @@ + { + struct rtable *rt; + struct flowi fl_tunnel = { ++ .fl_net = &init_net, + .nl_u = { + .ip4_u = { + .daddr = daddr->a4, +@@ -73,6 +74,7 @@ + struct rtable *rt0 = (struct rtable*)(*dst_p); + struct rtable *rt = rt0; + struct flowi fl_tunnel = { ++ .fl_net = &init_net, + .nl_u = { + .ip4_u = { + .saddr = fl->fl4_src, +@@ -213,6 +215,7 @@ + u8 *xprth = skb_network_header(skb) + iph->ihl * 4; + + memset(fl, 0, sizeof(struct flowi)); ++ fl->fl_net = &init_net; + if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { + switch (iph->protocol) { + case IPPROTO_UDP: +@@ -306,7 +309,7 @@ + + xdst = (struct xfrm_dst *)dst; + if (xdst->u.rt.idev->dev == dev) { +- struct in_device *loopback_idev = in_dev_get(&loopback_dev); ++ struct in_device *loopback_idev = in_dev_get(&init_net.loopback_dev); + BUG_ON(!loopback_idev); + + do { +diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_state.c linux-2.6.22-591/net/ipv4/xfrm4_state.c +--- linux-2.6.22-570/net/ipv4/xfrm4_state.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/xfrm4_state.c 2007-12-21 15:36:15.000000000 -0500 +@@ -16,7 +16,7 @@ + + static int xfrm4_init_flags(struct xfrm_state *x) + { +- if (ipv4_config.no_pmtu_disc) ++ if (init_net.sysctl_ipv4_no_pmtu_disc) + x->props.flags |= XFRM_STATE_NOPMTUDISC; + return 0; + } +diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_tunnel.c linux-2.6.22-591/net/ipv4/xfrm4_tunnel.c +--- linux-2.6.22-570/net/ipv4/xfrm4_tunnel.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv4/xfrm4_tunnel.c 2007-12-21 15:36:12.000000000 -0500 +@@ -109,3 +109,4 @@ + module_init(ipip_init); + module_exit(ipip_fini); + MODULE_LICENSE("GPL"); ++MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP); +diff -Nurb linux-2.6.22-570/net/ipv6/Kconfig linux-2.6.22-591/net/ipv6/Kconfig +--- linux-2.6.22-570/net/ipv6/Kconfig 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/Kconfig 2007-12-21 15:36:12.000000000 -0500 +@@ -109,7 +109,7 @@ + If unsure, say Y. + + config IPV6_MIP6 +- bool "IPv6: Mobility (EXPERIMENTAL)" ++ tristate "IPv6: Mobility (EXPERIMENTAL)" + depends on IPV6 && EXPERIMENTAL + select XFRM + ---help--- +diff -Nurb linux-2.6.22-570/net/ipv6/Makefile linux-2.6.22-591/net/ipv6/Makefile +--- linux-2.6.22-570/net/ipv6/Makefile 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/Makefile 2007-12-21 15:36:12.000000000 -0500 +@@ -14,7 +14,6 @@ + xfrm6_output.o + ipv6-$(CONFIG_NETFILTER) += netfilter.o + ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o +-ipv6-$(CONFIG_IPV6_MIP6) += mip6.o + ipv6-$(CONFIG_PROC_FS) += proc.o + + ipv6-objs += $(ipv6-y) +@@ -28,6 +27,7 @@ + obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o + obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o + obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o ++obj-$(CONFIG_IPV6_MIP6) += mip6.o + obj-$(CONFIG_NETFILTER) += netfilter/ + + obj-$(CONFIG_IPV6_SIT) += sit.o +diff -Nurb linux-2.6.22-570/net/ipv6/addrconf.c linux-2.6.22-591/net/ipv6/addrconf.c +--- linux-2.6.22-570/net/ipv6/addrconf.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/addrconf.c 2007-12-21 15:36:15.000000000 -0500 +@@ -73,6 +73,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -457,7 +458,7 @@ + struct inet6_dev *idev; + + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) { +@@ -920,7 +921,7 @@ + read_lock(&dev_base_lock); + rcu_read_lock(); + +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + +@@ -1047,7 +1048,7 @@ + } + + /* Rule 4: Prefer home address */ +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + if (hiscore.rule < 4) { + if (ifa_result->flags & IFA_F_HOMEADDRESS) + hiscore.attrs |= IPV6_SADDR_SCORE_HOA; +@@ -1882,7 +1883,7 @@ + if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) + goto err_exit; + +- dev = __dev_get_by_index(ireq.ifr6_ifindex); ++ dev = __dev_get_by_index(&init_net, ireq.ifr6_ifindex); + + err = -ENODEV; + if (dev == NULL) +@@ -1913,7 +1914,7 @@ + + if (err == 0) { + err = -ENOBUFS; +- if ((dev = __dev_get_by_name(p.name)) == NULL) ++ if ((dev = __dev_get_by_name(&init_net, p.name)) == NULL) + goto err_exit; + err = dev_open(dev); + } +@@ -1943,7 +1944,7 @@ + if (!valid_lft || prefered_lft > valid_lft) + return -EINVAL; + +- if ((dev = __dev_get_by_index(ifindex)) == NULL) ++ if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) + return -ENODEV; + + if ((idev = addrconf_add_dev(dev)) == NULL) +@@ -1994,7 +1995,7 @@ + struct inet6_dev *idev; + struct net_device *dev; + +- if ((dev = __dev_get_by_index(ifindex)) == NULL) ++ if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL) + return -ENODEV; + + if ((idev = __in6_dev_get(dev)) == NULL) +@@ -2089,7 +2090,7 @@ + return; + } + +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + struct in_device * in_dev = __in_dev_get_rtnl(dev); + if (in_dev && (dev->flags & IFF_UP)) { + struct in_ifaddr * ifa; +@@ -2245,12 +2246,12 @@ + + /* first try to inherit the link-local address from the link device */ + if (idev->dev->iflink && +- (link_dev = __dev_get_by_index(idev->dev->iflink))) { ++ (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) { + if (!ipv6_inherit_linklocal(idev, link_dev)) + return; + } + /* then try to inherit it from any device */ +- for_each_netdev(link_dev) { ++ for_each_netdev(&init_net, link_dev) { + if (!ipv6_inherit_linklocal(idev, link_dev)) + return; + } +@@ -2282,6 +2283,9 @@ + struct inet6_dev *idev = __in6_dev_get(dev); + int run_pending = 0; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + switch(event) { + case NETDEV_REGISTER: + if (!idev && dev->mtu >= IPV6_MIN_MTU) { +@@ -2419,7 +2423,7 @@ + + ASSERT_RTNL(); + +- if (dev == &loopback_dev && how == 1) ++ if (dev == &init_net.loopback_dev && how == 1) + how = 0; + + rt6_ifdown(dev); +@@ -2850,18 +2854,18 @@ + + int __init if6_proc_init(void) + { +- if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops)) ++ if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops)) + return -ENOMEM; + return 0; + } + + void if6_proc_exit(void) + { +- proc_net_remove("if_inet6"); ++ proc_net_remove(&init_net, "if_inet6"); + } + #endif /* CONFIG_PROC_FS */ + +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + /* Check if address is a home address configured on any interface. */ + int ipv6_chk_home_addr(struct in6_addr *addr) + { +@@ -3017,11 +3021,15 @@ + static int + inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct ifaddrmsg *ifm; + struct nlattr *tb[IFA_MAX+1]; + struct in6_addr *pfx; + int err; + ++ if (net != &init_net) ++ return -EINVAL; ++ + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + if (err < 0) + return err; +@@ -3074,6 +3082,7 @@ + static int + inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct ifaddrmsg *ifm; + struct nlattr *tb[IFA_MAX+1]; + struct in6_addr *pfx; +@@ -3083,6 +3092,9 @@ + u8 ifa_flags; + int err; + ++ if (net != &init_net) ++ return -EINVAL; ++ + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + if (err < 0) + return err; +@@ -3103,7 +3115,7 @@ + valid_lft = INFINITY_LIFE_TIME; + } + +- dev = __dev_get_by_index(ifm->ifa_index); ++ dev = __dev_get_by_index(&init_net, ifm->ifa_index); + if (dev == NULL) + return -ENODEV; + +@@ -3292,7 +3304,7 @@ + s_ip_idx = ip_idx = cb->args[1]; + + idx = 0; +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) +@@ -3367,26 +3379,42 @@ + + static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + enum addr_type_t type = UNICAST_ADDR; ++ ++ if (net != &init_net) ++ return 0; ++ + return inet6_dump_addr(skb, cb, type); + } + + static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + enum addr_type_t type = MULTICAST_ADDR; ++ ++ if (net != &init_net) ++ return 0; ++ + return inet6_dump_addr(skb, cb, type); + } + + + static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + enum addr_type_t type = ANYCAST_ADDR; ++ ++ if (net != &init_net) ++ return 0; ++ + return inet6_dump_addr(skb, cb, type); + } + + static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, + void *arg) + { ++ struct net *net = in_skb->sk->sk_net; + struct ifaddrmsg *ifm; + struct nlattr *tb[IFA_MAX+1]; + struct in6_addr *addr = NULL; +@@ -3395,6 +3423,9 @@ + struct sk_buff *skb; + int err; + ++ if (net != &init_net) ++ return -EINVAL; ++ + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); + if (err < 0) + goto errout; +@@ -3407,7 +3438,7 @@ + + ifm = nlmsg_data(nlh); + if (ifm->ifa_index) +- dev = __dev_get_by_index(ifm->ifa_index); ++ dev = __dev_get_by_index(&init_net, ifm->ifa_index); + + if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { + err = -EADDRNOTAVAIL; +@@ -3427,7 +3458,7 @@ + kfree_skb(skb); + goto errout_ifa; + } +- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); ++ err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + errout_ifa: + in6_ifa_put(ifa); + errout: +@@ -3450,10 +3481,10 @@ + kfree_skb(skb); + goto errout; + } +- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); ++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + errout: + if (err < 0) +- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); ++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); + } + + static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, +@@ -3612,19 +3643,22 @@ + + static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + int idx, err; + int s_idx = cb->args[0]; + struct net_device *dev; + struct inet6_dev *idev; + struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL; + ++ if (net != &init_net) ++ return 0; + /* FIXME: maybe disable ipv6 on non v6 guests? + if (skb->sk && skb->sk->sk_vx_info) + return skb->len; */ + + read_lock(&dev_base_lock); + idx = 0; +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if (idx < s_idx) + goto cont; + if (!v6_dev_in_nx_info(dev, nxi)) +@@ -3661,10 +3695,10 @@ + kfree_skb(skb); + goto errout; + } +- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); ++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + errout: + if (err < 0) +- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); ++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err); + } + + static inline size_t inet6_prefix_nlmsg_size(void) +@@ -3730,10 +3764,10 @@ + kfree_skb(skb); + goto errout; + } +- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); ++ err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); + errout: + if (err < 0) +- rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); ++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err); + } + + static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) +@@ -4244,16 +4278,16 @@ + * device and it being up should be removed. + */ + rtnl_lock(); +- if (!ipv6_add_dev(&loopback_dev)) ++ if (!ipv6_add_dev(&init_net.loopback_dev)) + err = -ENOMEM; + rtnl_unlock(); + if (err) + return err; + +- ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); ++ ip6_null_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES +- ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev); +- ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev); ++ ip6_prohibit_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); ++ ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev); + #endif + + register_netdevice_notifier(&ipv6_dev_notf); +@@ -4304,12 +4338,12 @@ + * clean dev list. + */ + +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if ((idev = __in6_dev_get(dev)) == NULL) + continue; + addrconf_ifdown(dev, 1); + } +- addrconf_ifdown(&loopback_dev, 2); ++ addrconf_ifdown(&init_net.loopback_dev, 2); + + /* + * Check hash table. +@@ -4335,6 +4369,6 @@ + rtnl_unlock(); + + #ifdef CONFIG_PROC_FS +- proc_net_remove("if_inet6"); ++ proc_net_remove(&init_net, "if_inet6"); + #endif + } +diff -Nurb linux-2.6.22-570/net/ipv6/addrconf.c.orig linux-2.6.22-591/net/ipv6/addrconf.c.orig +--- linux-2.6.22-570/net/ipv6/addrconf.c.orig 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/addrconf.c.orig 1969-12-31 19:00:00.000000000 -0500 +@@ -1,4301 +0,0 @@ +-/* +- * IPv6 Address [auto]configuration +- * Linux INET6 implementation +- * +- * Authors: +- * Pedro Roque +- * Alexey Kuznetsov +- * +- * $Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $ +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * as published by the Free Software Foundation; either version +- * 2 of the License, or (at your option) any later version. +- */ +- +-/* +- * Changes: +- * +- * Janos Farkas : delete timer on ifdown +- * +- * Andi Kleen : kill double kfree on module +- * unload. +- * Maciej W. Rozycki : FDDI support +- * sekiya@USAGI : Don't send too many RS +- * packets. +- * yoshfuji@USAGI : Fixed interval between DAD +- * packets. +- * YOSHIFUJI Hideaki @USAGI : improved accuracy of +- * address validation timer. +- * YOSHIFUJI Hideaki @USAGI : Privacy Extensions (RFC3041) +- * support. +- * Yuji SEKIYA @USAGI : Don't assign a same IPv6 +- * address on a same interface. +- * YOSHIFUJI Hideaki @USAGI : ARCnet support +- * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to +- * seq_file. +- * YOSHIFUJI Hideaki @USAGI : improved source address +- * selection; consider scope, +- * status etc. +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#ifdef CONFIG_SYSCTL +-#include +-#endif +-#include +-#include +-#include +-#include +- +-#include +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#ifdef CONFIG_IPV6_PRIVACY +-#include +-#endif +- +-#include +-#include +- +-#include +-#include +- +-/* Set to 3 to get tracing... */ +-#define ACONF_DEBUG 2 +- +-#if ACONF_DEBUG >= 3 +-#define ADBG(x) printk x +-#else +-#define ADBG(x) +-#endif +- +-#define INFINITY_LIFE_TIME 0xFFFFFFFF +-#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b))) +- +-#ifdef CONFIG_SYSCTL +-static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p); +-static void addrconf_sysctl_unregister(struct ipv6_devconf *p); +-#endif +- +-#ifdef CONFIG_IPV6_PRIVACY +-static int __ipv6_regen_rndid(struct inet6_dev *idev); +-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); +-static void ipv6_regen_rndid(unsigned long data); +- +-static int desync_factor = MAX_DESYNC_FACTOR * HZ; +-#endif +- +-static int ipv6_count_addresses(struct inet6_dev *idev); +- +-/* +- * Configured unicast address hash table +- */ +-static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; +-static DEFINE_RWLOCK(addrconf_hash_lock); +- +-static void addrconf_verify(unsigned long); +- +-static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); +-static DEFINE_SPINLOCK(addrconf_verify_lock); +- +-static void addrconf_join_anycast(struct inet6_ifaddr *ifp); +-static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); +- +-static int addrconf_ifdown(struct net_device *dev, int how); +- +-static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags); +-static void addrconf_dad_timer(unsigned long data); +-static void addrconf_dad_completed(struct inet6_ifaddr *ifp); +-static void addrconf_dad_run(struct inet6_dev *idev); +-static void addrconf_rs_timer(unsigned long data); +-static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); +-static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); +- +-static void inet6_prefix_notify(int event, struct inet6_dev *idev, +- struct prefix_info *pinfo); +-static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev); +- +-static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); +- +-struct ipv6_devconf ipv6_devconf __read_mostly = { +- .forwarding = 0, +- .hop_limit = IPV6_DEFAULT_HOPLIMIT, +- .mtu6 = IPV6_MIN_MTU, +- .accept_ra = 1, +- .accept_redirects = 1, +- .autoconf = 1, +- .force_mld_version = 0, +- .dad_transmits = 1, +- .rtr_solicits = MAX_RTR_SOLICITATIONS, +- .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, +- .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, +-#ifdef CONFIG_IPV6_PRIVACY +- .use_tempaddr = 0, +- .temp_valid_lft = TEMP_VALID_LIFETIME, +- .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, +- .regen_max_retry = REGEN_MAX_RETRY, +- .max_desync_factor = MAX_DESYNC_FACTOR, +-#endif +- .max_addresses = IPV6_MAX_ADDRESSES, +- .accept_ra_defrtr = 1, +- .accept_ra_pinfo = 1, +-#ifdef CONFIG_IPV6_ROUTER_PREF +- .accept_ra_rtr_pref = 1, +- .rtr_probe_interval = 60 * HZ, +-#ifdef CONFIG_IPV6_ROUTE_INFO +- .accept_ra_rt_info_max_plen = 0, +-#endif +-#endif +- .proxy_ndp = 0, +- .accept_source_route = 0, /* we do not accept RH0 by default. */ +-}; +- +-static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { +- .forwarding = 0, +- .hop_limit = IPV6_DEFAULT_HOPLIMIT, +- .mtu6 = IPV6_MIN_MTU, +- .accept_ra = 1, +- .accept_redirects = 1, +- .autoconf = 1, +- .dad_transmits = 1, +- .rtr_solicits = MAX_RTR_SOLICITATIONS, +- .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, +- .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, +-#ifdef CONFIG_IPV6_PRIVACY +- .use_tempaddr = 0, +- .temp_valid_lft = TEMP_VALID_LIFETIME, +- .temp_prefered_lft = TEMP_PREFERRED_LIFETIME, +- .regen_max_retry = REGEN_MAX_RETRY, +- .max_desync_factor = MAX_DESYNC_FACTOR, +-#endif +- .max_addresses = IPV6_MAX_ADDRESSES, +- .accept_ra_defrtr = 1, +- .accept_ra_pinfo = 1, +-#ifdef CONFIG_IPV6_ROUTER_PREF +- .accept_ra_rtr_pref = 1, +- .rtr_probe_interval = 60 * HZ, +-#ifdef CONFIG_IPV6_ROUTE_INFO +- .accept_ra_rt_info_max_plen = 0, +-#endif +-#endif +- .proxy_ndp = 0, +- .accept_source_route = 0, /* we do not accept RH0 by default. */ +-}; +- +-/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ +-const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; +-const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; +- +-/* Check if a valid qdisc is available */ +-static inline int addrconf_qdisc_ok(struct net_device *dev) +-{ +- return (dev->qdisc != &noop_qdisc); +-} +- +-static void addrconf_del_timer(struct inet6_ifaddr *ifp) +-{ +- if (del_timer(&ifp->timer)) +- __in6_ifa_put(ifp); +-} +- +-enum addrconf_timer_t +-{ +- AC_NONE, +- AC_DAD, +- AC_RS, +-}; +- +-static void addrconf_mod_timer(struct inet6_ifaddr *ifp, +- enum addrconf_timer_t what, +- unsigned long when) +-{ +- if (!del_timer(&ifp->timer)) +- in6_ifa_hold(ifp); +- +- switch (what) { +- case AC_DAD: +- ifp->timer.function = addrconf_dad_timer; +- break; +- case AC_RS: +- ifp->timer.function = addrconf_rs_timer; +- break; +- default:; +- } +- ifp->timer.expires = jiffies + when; +- add_timer(&ifp->timer); +-} +- +-static int snmp6_alloc_dev(struct inet6_dev *idev) +-{ +- int err = -ENOMEM; +- +- if (!idev || !idev->dev) +- return -EINVAL; +- +- if (snmp_mib_init((void **)idev->stats.ipv6, +- sizeof(struct ipstats_mib), +- __alignof__(struct ipstats_mib)) < 0) +- goto err_ip; +- if (snmp_mib_init((void **)idev->stats.icmpv6, +- sizeof(struct icmpv6_mib), +- __alignof__(struct icmpv6_mib)) < 0) +- goto err_icmp; +- +- return 0; +- +-err_icmp: +- snmp_mib_free((void **)idev->stats.ipv6); +-err_ip: +- return err; +-} +- +-static int snmp6_free_dev(struct inet6_dev *idev) +-{ +- snmp_mib_free((void **)idev->stats.icmpv6); +- snmp_mib_free((void **)idev->stats.ipv6); +- return 0; +-} +- +-/* Nobody refers to this device, we may destroy it. */ +- +-static void in6_dev_finish_destroy_rcu(struct rcu_head *head) +-{ +- struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu); +- kfree(idev); +-} +- +-void in6_dev_finish_destroy(struct inet6_dev *idev) +-{ +- struct net_device *dev = idev->dev; +- BUG_TRAP(idev->addr_list==NULL); +- BUG_TRAP(idev->mc_list==NULL); +-#ifdef NET_REFCNT_DEBUG +- printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL"); +-#endif +- dev_put(dev); +- if (!idev->dead) { +- printk("Freeing alive inet6 device %p\n", idev); +- return; +- } +- snmp6_free_dev(idev); +- call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); +-} +- +-EXPORT_SYMBOL(in6_dev_finish_destroy); +- +-static struct inet6_dev * ipv6_add_dev(struct net_device *dev) +-{ +- struct inet6_dev *ndev; +- struct in6_addr maddr; +- +- ASSERT_RTNL(); +- +- if (dev->mtu < IPV6_MIN_MTU) +- return NULL; +- +- ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); +- +- if (ndev == NULL) +- return NULL; +- +- rwlock_init(&ndev->lock); +- ndev->dev = dev; +- memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf)); +- ndev->cnf.mtu6 = dev->mtu; +- ndev->cnf.sysctl = NULL; +- ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); +- if (ndev->nd_parms == NULL) { +- kfree(ndev); +- return NULL; +- } +- /* We refer to the device */ +- dev_hold(dev); +- +- if (snmp6_alloc_dev(ndev) < 0) { +- ADBG((KERN_WARNING +- "%s(): cannot allocate memory for statistics; dev=%s.\n", +- __FUNCTION__, dev->name)); +- neigh_parms_release(&nd_tbl, ndev->nd_parms); +- ndev->dead = 1; +- in6_dev_finish_destroy(ndev); +- return NULL; +- } +- +- if (snmp6_register_dev(ndev) < 0) { +- ADBG((KERN_WARNING +- "%s(): cannot create /proc/net/dev_snmp6/%s\n", +- __FUNCTION__, dev->name)); +- neigh_parms_release(&nd_tbl, ndev->nd_parms); +- ndev->dead = 1; +- in6_dev_finish_destroy(ndev); +- return NULL; +- } +- +- /* One reference from device. We must do this before +- * we invoke __ipv6_regen_rndid(). +- */ +- in6_dev_hold(ndev); +- +-#ifdef CONFIG_IPV6_PRIVACY +- init_timer(&ndev->regen_timer); +- ndev->regen_timer.function = ipv6_regen_rndid; +- ndev->regen_timer.data = (unsigned long) ndev; +- if ((dev->flags&IFF_LOOPBACK) || +- dev->type == ARPHRD_TUNNEL || +-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +- dev->type == ARPHRD_SIT || +-#endif +- dev->type == ARPHRD_NONE) { +- printk(KERN_INFO +- "%s: Disabled Privacy Extensions\n", +- dev->name); +- ndev->cnf.use_tempaddr = -1; +- } else { +- in6_dev_hold(ndev); +- ipv6_regen_rndid((unsigned long) ndev); +- } +-#endif +- +- if (netif_running(dev) && addrconf_qdisc_ok(dev)) +- ndev->if_flags |= IF_READY; +- +- ipv6_mc_init_dev(ndev); +- ndev->tstamp = jiffies; +-#ifdef CONFIG_SYSCTL +- neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6, +- NET_IPV6_NEIGH, "ipv6", +- &ndisc_ifinfo_sysctl_change, +- NULL); +- addrconf_sysctl_register(ndev, &ndev->cnf); +-#endif +- /* protected by rtnl_lock */ +- rcu_assign_pointer(dev->ip6_ptr, ndev); +- +- /* Join all-node multicast group */ +- ipv6_addr_all_nodes(&maddr); +- ipv6_dev_mc_inc(dev, &maddr); +- +- return ndev; +-} +- +-static struct inet6_dev * ipv6_find_idev(struct net_device *dev) +-{ +- struct inet6_dev *idev; +- +- ASSERT_RTNL(); +- +- if ((idev = __in6_dev_get(dev)) == NULL) { +- if ((idev = ipv6_add_dev(dev)) == NULL) +- return NULL; +- } +- +- if (dev->flags&IFF_UP) +- ipv6_mc_up(idev); +- return idev; +-} +- +-#ifdef CONFIG_SYSCTL +-static void dev_forward_change(struct inet6_dev *idev) +-{ +- struct net_device *dev; +- struct inet6_ifaddr *ifa; +- struct in6_addr addr; +- +- if (!idev) +- return; +- dev = idev->dev; +- if (dev && (dev->flags & IFF_MULTICAST)) { +- ipv6_addr_all_routers(&addr); +- +- if (idev->cnf.forwarding) +- ipv6_dev_mc_inc(dev, &addr); +- else +- ipv6_dev_mc_dec(dev, &addr); +- } +- for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { +- if (ifa->flags&IFA_F_TENTATIVE) +- continue; +- if (idev->cnf.forwarding) +- addrconf_join_anycast(ifa); +- else +- addrconf_leave_anycast(ifa); +- } +-} +- +- +-static void addrconf_forward_change(void) +-{ +- struct net_device *dev; +- struct inet6_dev *idev; +- +- read_lock(&dev_base_lock); +- for_each_netdev(dev) { +- rcu_read_lock(); +- idev = __in6_dev_get(dev); +- if (idev) { +- int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); +- idev->cnf.forwarding = ipv6_devconf.forwarding; +- if (changed) +- dev_forward_change(idev); +- } +- rcu_read_unlock(); +- } +- read_unlock(&dev_base_lock); +-} +-#endif +- +-/* Nobody refers to this ifaddr, destroy it */ +- +-void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) +-{ +- BUG_TRAP(ifp->if_next==NULL); +- BUG_TRAP(ifp->lst_next==NULL); +-#ifdef NET_REFCNT_DEBUG +- printk(KERN_DEBUG "inet6_ifa_finish_destroy\n"); +-#endif +- +- in6_dev_put(ifp->idev); +- +- if (del_timer(&ifp->timer)) +- printk("Timer is still running, when freeing ifa=%p\n", ifp); +- +- if (!ifp->dead) { +- printk("Freeing alive inet6 address %p\n", ifp); +- return; +- } +- dst_release(&ifp->rt->u.dst); +- +- kfree(ifp); +-} +- +-static void +-ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) +-{ +- struct inet6_ifaddr *ifa, **ifap; +- int ifp_scope = ipv6_addr_src_scope(&ifp->addr); +- +- /* +- * Each device address list is sorted in order of scope - +- * global before linklocal. +- */ +- for (ifap = &idev->addr_list; (ifa = *ifap) != NULL; +- ifap = &ifa->if_next) { +- if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr)) +- break; +- } +- +- ifp->if_next = *ifap; +- *ifap = ifp; +-} +- +-/* On success it returns ifp with increased reference count */ +- +-static struct inet6_ifaddr * +-ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, +- int scope, u32 flags) +-{ +- struct inet6_ifaddr *ifa = NULL; +- struct rt6_info *rt; +- int hash; +- int err = 0; +- +- rcu_read_lock_bh(); +- if (idev->dead) { +- err = -ENODEV; /*XXX*/ +- goto out2; +- } +- +- write_lock(&addrconf_hash_lock); +- +- /* Ignore adding duplicate addresses on an interface */ +- if (ipv6_chk_same_addr(addr, idev->dev)) { +- ADBG(("ipv6_add_addr: already assigned\n")); +- err = -EEXIST; +- goto out; +- } +- +- ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); +- +- if (ifa == NULL) { +- ADBG(("ipv6_add_addr: malloc failed\n")); +- err = -ENOBUFS; +- goto out; +- } +- +- rt = addrconf_dst_alloc(idev, addr, 0); +- if (IS_ERR(rt)) { +- err = PTR_ERR(rt); +- goto out; +- } +- +- ipv6_addr_copy(&ifa->addr, addr); +- +- spin_lock_init(&ifa->lock); +- init_timer(&ifa->timer); +- ifa->timer.data = (unsigned long) ifa; +- ifa->scope = scope; +- ifa->prefix_len = pfxlen; +- ifa->flags = flags | IFA_F_TENTATIVE; +- ifa->cstamp = ifa->tstamp = jiffies; +- +- ifa->rt = rt; +- +- /* +- * part one of RFC 4429, section 3.3 +- * We should not configure an address as +- * optimistic if we do not yet know the link +- * layer address of our nexhop router +- */ +- +- if (rt->rt6i_nexthop == NULL) +- ifa->flags &= ~IFA_F_OPTIMISTIC; +- +- ifa->idev = idev; +- in6_dev_hold(idev); +- /* For caller */ +- in6_ifa_hold(ifa); +- +- /* Add to big hash table */ +- hash = ipv6_addr_hash(addr); +- +- ifa->lst_next = inet6_addr_lst[hash]; +- inet6_addr_lst[hash] = ifa; +- in6_ifa_hold(ifa); +- write_unlock(&addrconf_hash_lock); +- +- write_lock(&idev->lock); +- /* Add to inet6_dev unicast addr list. */ +- ipv6_link_dev_addr(idev, ifa); +- +-#ifdef CONFIG_IPV6_PRIVACY +- if (ifa->flags&IFA_F_TEMPORARY) { +- ifa->tmp_next = idev->tempaddr_list; +- idev->tempaddr_list = ifa; +- in6_ifa_hold(ifa); +- } +-#endif +- +- in6_ifa_hold(ifa); +- write_unlock(&idev->lock); +-out2: +- rcu_read_unlock_bh(); +- +- if (likely(err == 0)) +- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); +- else { +- kfree(ifa); +- ifa = ERR_PTR(err); +- } +- +- return ifa; +-out: +- write_unlock(&addrconf_hash_lock); +- goto out2; +-} +- +-/* This function wants to get referenced ifp and releases it before return */ +- +-static void ipv6_del_addr(struct inet6_ifaddr *ifp) +-{ +- struct inet6_ifaddr *ifa, **ifap; +- struct inet6_dev *idev = ifp->idev; +- int hash; +- int deleted = 0, onlink = 0; +- unsigned long expires = jiffies; +- +- hash = ipv6_addr_hash(&ifp->addr); +- +- ifp->dead = 1; +- +- write_lock_bh(&addrconf_hash_lock); +- for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL; +- ifap = &ifa->lst_next) { +- if (ifa == ifp) { +- *ifap = ifa->lst_next; +- __in6_ifa_put(ifp); +- ifa->lst_next = NULL; +- break; +- } +- } +- write_unlock_bh(&addrconf_hash_lock); +- +- write_lock_bh(&idev->lock); +-#ifdef CONFIG_IPV6_PRIVACY +- if (ifp->flags&IFA_F_TEMPORARY) { +- for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL; +- ifap = &ifa->tmp_next) { +- if (ifa == ifp) { +- *ifap = ifa->tmp_next; +- if (ifp->ifpub) { +- in6_ifa_put(ifp->ifpub); +- ifp->ifpub = NULL; +- } +- __in6_ifa_put(ifp); +- ifa->tmp_next = NULL; +- break; +- } +- } +- } +-#endif +- +- for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) { +- if (ifa == ifp) { +- *ifap = ifa->if_next; +- __in6_ifa_put(ifp); +- ifa->if_next = NULL; +- if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) +- break; +- deleted = 1; +- continue; +- } else if (ifp->flags & IFA_F_PERMANENT) { +- if (ipv6_prefix_equal(&ifa->addr, &ifp->addr, +- ifp->prefix_len)) { +- if (ifa->flags & IFA_F_PERMANENT) { +- onlink = 1; +- if (deleted) +- break; +- } else { +- unsigned long lifetime; +- +- if (!onlink) +- onlink = -1; +- +- spin_lock(&ifa->lock); +- lifetime = min_t(unsigned long, +- ifa->valid_lft, 0x7fffffffUL/HZ); +- if (time_before(expires, +- ifa->tstamp + lifetime * HZ)) +- expires = ifa->tstamp + lifetime * HZ; +- spin_unlock(&ifa->lock); +- } +- } +- } +- ifap = &ifa->if_next; +- } +- write_unlock_bh(&idev->lock); +- +- ipv6_ifa_notify(RTM_DELADDR, ifp); +- +- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifp); +- +- addrconf_del_timer(ifp); +- +- /* +- * Purge or update corresponding prefix +- * +- * 1) we don't purge prefix here if address was not permanent. +- * prefix is managed by its own lifetime. +- * 2) if there're no addresses, delete prefix. +- * 3) if there're still other permanent address(es), +- * corresponding prefix is still permanent. +- * 4) otherwise, update prefix lifetime to the +- * longest valid lifetime among the corresponding +- * addresses on the device. +- * Note: subsequent RA will update lifetime. +- * +- * --yoshfuji +- */ +- if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { +- struct in6_addr prefix; +- struct rt6_info *rt; +- +- ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); +- rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1); +- +- if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { +- if (onlink == 0) { +- ip6_del_rt(rt); +- rt = NULL; +- } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { +- rt->rt6i_expires = expires; +- rt->rt6i_flags |= RTF_EXPIRES; +- } +- } +- dst_release(&rt->u.dst); +- } +- +- in6_ifa_put(ifp); +-} +- +-#ifdef CONFIG_IPV6_PRIVACY +-static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) +-{ +- struct inet6_dev *idev = ifp->idev; +- struct in6_addr addr, *tmpaddr; +- unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp; +- int tmp_plen; +- int ret = 0; +- int max_addresses; +- u32 addr_flags; +- +- write_lock(&idev->lock); +- if (ift) { +- spin_lock_bh(&ift->lock); +- memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8); +- spin_unlock_bh(&ift->lock); +- tmpaddr = &addr; +- } else { +- tmpaddr = NULL; +- } +-retry: +- in6_dev_hold(idev); +- if (idev->cnf.use_tempaddr <= 0) { +- write_unlock(&idev->lock); +- printk(KERN_INFO +- "ipv6_create_tempaddr(): use_tempaddr is disabled.\n"); +- in6_dev_put(idev); +- ret = -1; +- goto out; +- } +- spin_lock_bh(&ifp->lock); +- if (ifp->regen_count++ >= idev->cnf.regen_max_retry) { +- idev->cnf.use_tempaddr = -1; /*XXX*/ +- spin_unlock_bh(&ifp->lock); +- write_unlock(&idev->lock); +- printk(KERN_WARNING +- "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n"); +- in6_dev_put(idev); +- ret = -1; +- goto out; +- } +- in6_ifa_hold(ifp); +- memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); +- if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) { +- spin_unlock_bh(&ifp->lock); +- write_unlock(&idev->lock); +- printk(KERN_WARNING +- "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n"); +- in6_ifa_put(ifp); +- in6_dev_put(idev); +- ret = -1; +- goto out; +- } +- memcpy(&addr.s6_addr[8], idev->rndid, 8); +- tmp_valid_lft = min_t(__u32, +- ifp->valid_lft, +- idev->cnf.temp_valid_lft); +- tmp_prefered_lft = min_t(__u32, +- ifp->prefered_lft, +- idev->cnf.temp_prefered_lft - desync_factor / HZ); +- tmp_plen = ifp->prefix_len; +- max_addresses = idev->cnf.max_addresses; +- tmp_cstamp = ifp->cstamp; +- tmp_tstamp = ifp->tstamp; +- spin_unlock_bh(&ifp->lock); +- +- write_unlock(&idev->lock); +- +- addr_flags = IFA_F_TEMPORARY; +- /* set in addrconf_prefix_rcv() */ +- if (ifp->flags & IFA_F_OPTIMISTIC) +- addr_flags |= IFA_F_OPTIMISTIC; +- +- ift = !max_addresses || +- ipv6_count_addresses(idev) < max_addresses ? +- ipv6_add_addr(idev, &addr, tmp_plen, +- ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, +- addr_flags) : NULL; +- if (!ift || IS_ERR(ift)) { +- in6_ifa_put(ifp); +- in6_dev_put(idev); +- printk(KERN_INFO +- "ipv6_create_tempaddr(): retry temporary address regeneration.\n"); +- tmpaddr = &addr; +- write_lock(&idev->lock); +- goto retry; +- } +- +- spin_lock_bh(&ift->lock); +- ift->ifpub = ifp; +- ift->valid_lft = tmp_valid_lft; +- ift->prefered_lft = tmp_prefered_lft; +- ift->cstamp = tmp_cstamp; +- ift->tstamp = tmp_tstamp; +- spin_unlock_bh(&ift->lock); +- +- addrconf_dad_start(ift, 0); +- in6_ifa_put(ift); +- in6_dev_put(idev); +-out: +- return ret; +-} +-#endif +- +-/* +- * Choose an appropriate source address (RFC3484) +- */ +-struct ipv6_saddr_score { +- int addr_type; +- unsigned int attrs; +- int matchlen; +- int scope; +- unsigned int rule; +-}; +- +-#define IPV6_SADDR_SCORE_LOCAL 0x0001 +-#define IPV6_SADDR_SCORE_PREFERRED 0x0004 +-#define IPV6_SADDR_SCORE_HOA 0x0008 +-#define IPV6_SADDR_SCORE_OIF 0x0010 +-#define IPV6_SADDR_SCORE_LABEL 0x0020 +-#define IPV6_SADDR_SCORE_PRIVACY 0x0040 +- +-static inline int ipv6_saddr_preferred(int type) +-{ +- if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4| +- IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED)) +- return 1; +- return 0; +-} +- +-/* static matching label */ +-static inline int ipv6_saddr_label(const struct in6_addr *addr, int type) +-{ +- /* +- * prefix (longest match) label +- * ----------------------------- +- * ::1/128 0 +- * ::/0 1 +- * 2002::/16 2 +- * ::/96 3 +- * ::ffff:0:0/96 4 +- * fc00::/7 5 +- * 2001::/32 6 +- */ +- if (type & IPV6_ADDR_LOOPBACK) +- return 0; +- else if (type & IPV6_ADDR_COMPATv4) +- return 3; +- else if (type & IPV6_ADDR_MAPPED) +- return 4; +- else if (addr->s6_addr32[0] == htonl(0x20010000)) +- return 6; +- else if (addr->s6_addr16[0] == htons(0x2002)) +- return 2; +- else if ((addr->s6_addr[0] & 0xfe) == 0xfc) +- return 5; +- return 1; +-} +- +-int ipv6_dev_get_saddr(struct net_device *daddr_dev, +- struct in6_addr *daddr, struct in6_addr *saddr) +-{ +- struct ipv6_saddr_score hiscore; +- struct inet6_ifaddr *ifa_result = NULL; +- int daddr_type = __ipv6_addr_type(daddr); +- int daddr_scope = __ipv6_addr_src_scope(daddr_type); +- u32 daddr_label = ipv6_saddr_label(daddr, daddr_type); +- struct net_device *dev; +- +- memset(&hiscore, 0, sizeof(hiscore)); +- +- read_lock(&dev_base_lock); +- rcu_read_lock(); +- +- for_each_netdev(dev) { +- struct inet6_dev *idev; +- struct inet6_ifaddr *ifa; +- +- /* Rule 0: Candidate Source Address (section 4) +- * - multicast and link-local destination address, +- * the set of candidate source address MUST only +- * include addresses assigned to interfaces +- * belonging to the same link as the outgoing +- * interface. +- * (- For site-local destination addresses, the +- * set of candidate source addresses MUST only +- * include addresses assigned to interfaces +- * belonging to the same site as the outgoing +- * interface.) +- */ +- if ((daddr_type & IPV6_ADDR_MULTICAST || +- daddr_scope <= IPV6_ADDR_SCOPE_LINKLOCAL) && +- daddr_dev && dev != daddr_dev) +- continue; +- +- idev = __in6_dev_get(dev); +- if (!idev) +- continue; +- +- read_lock_bh(&idev->lock); +- for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) { +- struct ipv6_saddr_score score; +- +- score.addr_type = __ipv6_addr_type(&ifa->addr); +- +- /* Rule 0: +- * - Tentative Address (RFC2462 section 5.4) +- * - A tentative address is not considered +- * "assigned to an interface" in the traditional +- * sense, unless it is also flagged as optimistic. +- * - Candidate Source Address (section 4) +- * - In any case, anycast addresses, multicast +- * addresses, and the unspecified address MUST +- * NOT be included in a candidate set. +- */ +- if ((ifa->flags & IFA_F_TENTATIVE) && +- (!(ifa->flags & IFA_F_OPTIMISTIC))) +- continue; +- if (unlikely(score.addr_type == IPV6_ADDR_ANY || +- score.addr_type & IPV6_ADDR_MULTICAST)) { +- LIMIT_NETDEBUG(KERN_DEBUG +- "ADDRCONF: unspecified / multicast address" +- "assigned as unicast address on %s", +- dev->name); +- continue; +- } +- +- score.attrs = 0; +- score.matchlen = 0; +- score.scope = 0; +- score.rule = 0; +- +- if (ifa_result == NULL) { +- /* record it if the first available entry */ +- goto record_it; +- } +- +- /* Rule 1: Prefer same address */ +- if (hiscore.rule < 1) { +- if (ipv6_addr_equal(&ifa_result->addr, daddr)) +- hiscore.attrs |= IPV6_SADDR_SCORE_LOCAL; +- hiscore.rule++; +- } +- if (ipv6_addr_equal(&ifa->addr, daddr)) { +- score.attrs |= IPV6_SADDR_SCORE_LOCAL; +- if (!(hiscore.attrs & IPV6_SADDR_SCORE_LOCAL)) { +- score.rule = 1; +- goto record_it; +- } +- } else { +- if (hiscore.attrs & IPV6_SADDR_SCORE_LOCAL) +- continue; +- } +- +- /* Rule 2: Prefer appropriate scope */ +- if (hiscore.rule < 2) { +- hiscore.scope = __ipv6_addr_src_scope(hiscore.addr_type); +- hiscore.rule++; +- } +- score.scope = __ipv6_addr_src_scope(score.addr_type); +- if (hiscore.scope < score.scope) { +- if (hiscore.scope < daddr_scope) { +- score.rule = 2; +- goto record_it; +- } else +- continue; +- } else if (score.scope < hiscore.scope) { +- if (score.scope < daddr_scope) +- break; /* addresses sorted by scope */ +- else { +- score.rule = 2; +- goto record_it; +- } +- } +- +- /* Rule 3: Avoid deprecated and optimistic addresses */ +- if (hiscore.rule < 3) { +- if (ipv6_saddr_preferred(hiscore.addr_type) || +- (((ifa_result->flags & +- (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) +- hiscore.attrs |= IPV6_SADDR_SCORE_PREFERRED; +- hiscore.rule++; +- } +- if (ipv6_saddr_preferred(score.addr_type) || +- (((ifa->flags & +- (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) { +- score.attrs |= IPV6_SADDR_SCORE_PREFERRED; +- if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) { +- score.rule = 3; +- goto record_it; +- } +- } else { +- if (hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED) +- continue; +- } +- +- /* Rule 4: Prefer home address */ +-#ifdef CONFIG_IPV6_MIP6 +- if (hiscore.rule < 4) { +- if (ifa_result->flags & IFA_F_HOMEADDRESS) +- hiscore.attrs |= IPV6_SADDR_SCORE_HOA; +- hiscore.rule++; +- } +- if (ifa->flags & IFA_F_HOMEADDRESS) { +- score.attrs |= IPV6_SADDR_SCORE_HOA; +- if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) { +- score.rule = 4; +- goto record_it; +- } +- } else { +- if (hiscore.attrs & IPV6_SADDR_SCORE_HOA) +- continue; +- } +-#else +- if (hiscore.rule < 4) +- hiscore.rule++; +-#endif +- +- /* Rule 5: Prefer outgoing interface */ +- if (hiscore.rule < 5) { +- if (daddr_dev == NULL || +- daddr_dev == ifa_result->idev->dev) +- hiscore.attrs |= IPV6_SADDR_SCORE_OIF; +- hiscore.rule++; +- } +- if (daddr_dev == NULL || +- daddr_dev == ifa->idev->dev) { +- score.attrs |= IPV6_SADDR_SCORE_OIF; +- if (!(hiscore.attrs & IPV6_SADDR_SCORE_OIF)) { +- score.rule = 5; +- goto record_it; +- } +- } else { +- if (hiscore.attrs & IPV6_SADDR_SCORE_OIF) +- continue; +- } +- +- /* Rule 6: Prefer matching label */ +- if (hiscore.rule < 6) { +- if (ipv6_saddr_label(&ifa_result->addr, hiscore.addr_type) == daddr_label) +- hiscore.attrs |= IPV6_SADDR_SCORE_LABEL; +- hiscore.rule++; +- } +- if (ipv6_saddr_label(&ifa->addr, score.addr_type) == daddr_label) { +- score.attrs |= IPV6_SADDR_SCORE_LABEL; +- if (!(hiscore.attrs & IPV6_SADDR_SCORE_LABEL)) { +- score.rule = 6; +- goto record_it; +- } +- } else { +- if (hiscore.attrs & IPV6_SADDR_SCORE_LABEL) +- continue; +- } +- +-#ifdef CONFIG_IPV6_PRIVACY +- /* Rule 7: Prefer public address +- * Note: prefer temprary address if use_tempaddr >= 2 +- */ +- if (hiscore.rule < 7) { +- if ((!(ifa_result->flags & IFA_F_TEMPORARY)) ^ +- (ifa_result->idev->cnf.use_tempaddr >= 2)) +- hiscore.attrs |= IPV6_SADDR_SCORE_PRIVACY; +- hiscore.rule++; +- } +- if ((!(ifa->flags & IFA_F_TEMPORARY)) ^ +- (ifa->idev->cnf.use_tempaddr >= 2)) { +- score.attrs |= IPV6_SADDR_SCORE_PRIVACY; +- if (!(hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY)) { +- score.rule = 7; +- goto record_it; +- } +- } else { +- if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY) +- continue; +- } +-#else +- if (hiscore.rule < 7) +- hiscore.rule++; +-#endif +- /* Rule 8: Use longest matching prefix */ +- if (hiscore.rule < 8) { +- hiscore.matchlen = ipv6_addr_diff(&ifa_result->addr, daddr); +- hiscore.rule++; +- } +- score.matchlen = ipv6_addr_diff(&ifa->addr, daddr); +- if (score.matchlen > hiscore.matchlen) { +- score.rule = 8; +- goto record_it; +- } +-#if 0 +- else if (score.matchlen < hiscore.matchlen) +- continue; +-#endif +- +- /* Final Rule: choose first available one */ +- continue; +-record_it: +- if (ifa_result) +- in6_ifa_put(ifa_result); +- in6_ifa_hold(ifa); +- ifa_result = ifa; +- hiscore = score; +- } +- read_unlock_bh(&idev->lock); +- } +- rcu_read_unlock(); +- read_unlock(&dev_base_lock); +- +- if (!ifa_result) +- return -EADDRNOTAVAIL; +- +- ipv6_addr_copy(saddr, &ifa_result->addr); +- in6_ifa_put(ifa_result); +- return 0; +-} +- +- +-int ipv6_get_saddr(struct dst_entry *dst, +- struct in6_addr *daddr, struct in6_addr *saddr) +-{ +- return ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, daddr, saddr); +-} +- +-EXPORT_SYMBOL(ipv6_get_saddr); +- +-int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, +- unsigned char banned_flags) +-{ +- struct inet6_dev *idev; +- int err = -EADDRNOTAVAIL; +- +- rcu_read_lock(); +- if ((idev = __in6_dev_get(dev)) != NULL) { +- struct inet6_ifaddr *ifp; +- +- read_lock_bh(&idev->lock); +- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { +- if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) { +- ipv6_addr_copy(addr, &ifp->addr); +- err = 0; +- break; +- } +- } +- read_unlock_bh(&idev->lock); +- } +- rcu_read_unlock(); +- return err; +-} +- +-static int ipv6_count_addresses(struct inet6_dev *idev) +-{ +- int cnt = 0; +- struct inet6_ifaddr *ifp; +- +- read_lock_bh(&idev->lock); +- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) +- cnt++; +- read_unlock_bh(&idev->lock); +- return cnt; +-} +- +-int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict) +-{ +- struct inet6_ifaddr * ifp; +- u8 hash = ipv6_addr_hash(addr); +- +- read_lock_bh(&addrconf_hash_lock); +- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { +- if (ipv6_addr_equal(&ifp->addr, addr) && +- !(ifp->flags&IFA_F_TENTATIVE)) { +- if (dev == NULL || ifp->idev->dev == dev || +- !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) +- break; +- } +- } +- read_unlock_bh(&addrconf_hash_lock); +- return ifp != NULL; +-} +- +-EXPORT_SYMBOL(ipv6_chk_addr); +- +-static +-int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev) +-{ +- struct inet6_ifaddr * ifp; +- u8 hash = ipv6_addr_hash(addr); +- +- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { +- if (ipv6_addr_equal(&ifp->addr, addr)) { +- if (dev == NULL || ifp->idev->dev == dev) +- break; +- } +- } +- return ifp != NULL; +-} +- +-struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict) +-{ +- struct inet6_ifaddr * ifp; +- u8 hash = ipv6_addr_hash(addr); +- +- read_lock_bh(&addrconf_hash_lock); +- for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) { +- if (ipv6_addr_equal(&ifp->addr, addr)) { +- if (dev == NULL || ifp->idev->dev == dev || +- !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { +- in6_ifa_hold(ifp); +- break; +- } +- } +- } +- read_unlock_bh(&addrconf_hash_lock); +- +- return ifp; +-} +- +-int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) +-{ +- const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; +- const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); +- __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; +- __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); +- int sk_ipv6only = ipv6_only_sock(sk); +- int sk2_ipv6only = inet_v6_ipv6only(sk2); +- int addr_type = ipv6_addr_type(sk_rcv_saddr6); +- int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; +- +- if (!sk2_rcv_saddr && !sk_ipv6only) +- return 1; +- +- if (addr_type2 == IPV6_ADDR_ANY && +- !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) +- return 1; +- +- if (addr_type == IPV6_ADDR_ANY && +- !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) +- return 1; +- +- if (sk2_rcv_saddr6 && +- ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) +- return 1; +- +- if (addr_type == IPV6_ADDR_MAPPED && +- !sk2_ipv6only && +- (!sk2_rcv_saddr || !sk_rcv_saddr || sk_rcv_saddr == sk2_rcv_saddr)) +- return 1; +- +- return 0; +-} +- +-/* Gets referenced address, destroys ifaddr */ +- +-static void addrconf_dad_stop(struct inet6_ifaddr *ifp) +-{ +- if (ifp->flags&IFA_F_PERMANENT) { +- spin_lock_bh(&ifp->lock); +- addrconf_del_timer(ifp); +- ifp->flags |= IFA_F_TENTATIVE; +- spin_unlock_bh(&ifp->lock); +- in6_ifa_put(ifp); +-#ifdef CONFIG_IPV6_PRIVACY +- } else if (ifp->flags&IFA_F_TEMPORARY) { +- struct inet6_ifaddr *ifpub; +- spin_lock_bh(&ifp->lock); +- ifpub = ifp->ifpub; +- if (ifpub) { +- in6_ifa_hold(ifpub); +- spin_unlock_bh(&ifp->lock); +- ipv6_create_tempaddr(ifpub, ifp); +- in6_ifa_put(ifpub); +- } else { +- spin_unlock_bh(&ifp->lock); +- } +- ipv6_del_addr(ifp); +-#endif +- } else +- ipv6_del_addr(ifp); +-} +- +-void addrconf_dad_failure(struct inet6_ifaddr *ifp) +-{ +- if (net_ratelimit()) +- printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); +- addrconf_dad_stop(ifp); +-} +- +-/* Join to solicited addr multicast group. */ +- +-void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr) +-{ +- struct in6_addr maddr; +- +- if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) +- return; +- +- addrconf_addr_solict_mult(addr, &maddr); +- ipv6_dev_mc_inc(dev, &maddr); +-} +- +-void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr) +-{ +- struct in6_addr maddr; +- +- if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) +- return; +- +- addrconf_addr_solict_mult(addr, &maddr); +- __ipv6_dev_mc_dec(idev, &maddr); +-} +- +-static void addrconf_join_anycast(struct inet6_ifaddr *ifp) +-{ +- struct in6_addr addr; +- ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); +- if (ipv6_addr_any(&addr)) +- return; +- ipv6_dev_ac_inc(ifp->idev->dev, &addr); +-} +- +-static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) +-{ +- struct in6_addr addr; +- ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); +- if (ipv6_addr_any(&addr)) +- return; +- __ipv6_dev_ac_dec(ifp->idev, &addr); +-} +- +-static int addrconf_ifid_eui48(u8 *eui, struct net_device *dev) +-{ +- if (dev->addr_len != ETH_ALEN) +- return -1; +- memcpy(eui, dev->dev_addr, 3); +- memcpy(eui + 5, dev->dev_addr + 3, 3); +- +- /* +- * The zSeries OSA network cards can be shared among various +- * OS instances, but the OSA cards have only one MAC address. +- * This leads to duplicate address conflicts in conjunction +- * with IPv6 if more than one instance uses the same card. +- * +- * The driver for these cards can deliver a unique 16-bit +- * identifier for each instance sharing the same card. It is +- * placed instead of 0xFFFE in the interface identifier. The +- * "u" bit of the interface identifier is not inverted in this +- * case. Hence the resulting interface identifier has local +- * scope according to RFC2373. +- */ +- if (dev->dev_id) { +- eui[3] = (dev->dev_id >> 8) & 0xFF; +- eui[4] = dev->dev_id & 0xFF; +- } else { +- eui[3] = 0xFF; +- eui[4] = 0xFE; +- eui[0] ^= 2; +- } +- return 0; +-} +- +-static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev) +-{ +- /* XXX: inherit EUI-64 from other interface -- yoshfuji */ +- if (dev->addr_len != ARCNET_ALEN) +- return -1; +- memset(eui, 0, 7); +- eui[7] = *(u8*)dev->dev_addr; +- return 0; +-} +- +-static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev) +-{ +- if (dev->addr_len != INFINIBAND_ALEN) +- return -1; +- memcpy(eui, dev->dev_addr + 12, 8); +- eui[0] |= 2; +- return 0; +-} +- +-static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) +-{ +- switch (dev->type) { +- case ARPHRD_ETHER: +- case ARPHRD_FDDI: +- case ARPHRD_IEEE802_TR: +- return addrconf_ifid_eui48(eui, dev); +- case ARPHRD_ARCNET: +- return addrconf_ifid_arcnet(eui, dev); +- case ARPHRD_INFINIBAND: +- return addrconf_ifid_infiniband(eui, dev); +- } +- return -1; +-} +- +-static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) +-{ +- int err = -1; +- struct inet6_ifaddr *ifp; +- +- read_lock_bh(&idev->lock); +- for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { +- if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { +- memcpy(eui, ifp->addr.s6_addr+8, 8); +- err = 0; +- break; +- } +- } +- read_unlock_bh(&idev->lock); +- return err; +-} +- +-#ifdef CONFIG_IPV6_PRIVACY +-/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ +-static int __ipv6_regen_rndid(struct inet6_dev *idev) +-{ +-regen: +- get_random_bytes(idev->rndid, sizeof(idev->rndid)); +- idev->rndid[0] &= ~0x02; +- +- /* +- * : +- * check if generated address is not inappropriate +- * +- * - Reserved subnet anycast (RFC 2526) +- * 11111101 11....11 1xxxxxxx +- * - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1 +- * 00-00-5E-FE-xx-xx-xx-xx +- * - value 0 +- * - XXX: already assigned to an address on the device +- */ +- if (idev->rndid[0] == 0xfd && +- (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff && +- (idev->rndid[7]&0x80)) +- goto regen; +- if ((idev->rndid[0]|idev->rndid[1]) == 0) { +- if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe) +- goto regen; +- if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00) +- goto regen; +- } +- +- return 0; +-} +- +-static void ipv6_regen_rndid(unsigned long data) +-{ +- struct inet6_dev *idev = (struct inet6_dev *) data; +- unsigned long expires; +- +- rcu_read_lock_bh(); +- write_lock_bh(&idev->lock); +- +- if (idev->dead) +- goto out; +- +- if (__ipv6_regen_rndid(idev) < 0) +- goto out; +- +- expires = jiffies + +- idev->cnf.temp_prefered_lft * HZ - +- idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor; +- if (time_before(expires, jiffies)) { +- printk(KERN_WARNING +- "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n", +- idev->dev->name); +- goto out; +- } +- +- if (!mod_timer(&idev->regen_timer, expires)) +- in6_dev_hold(idev); +- +-out: +- write_unlock_bh(&idev->lock); +- rcu_read_unlock_bh(); +- in6_dev_put(idev); +-} +- +-static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) { +- int ret = 0; +- +- if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) +- ret = __ipv6_regen_rndid(idev); +- return ret; +-} +-#endif +- +-/* +- * Add prefix route. +- */ +- +-static void +-addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, +- unsigned long expires, u32 flags) +-{ +- struct fib6_config cfg = { +- .fc_table = RT6_TABLE_PREFIX, +- .fc_metric = IP6_RT_PRIO_ADDRCONF, +- .fc_ifindex = dev->ifindex, +- .fc_expires = expires, +- .fc_dst_len = plen, +- .fc_flags = RTF_UP | flags, +- }; +- +- ipv6_addr_copy(&cfg.fc_dst, pfx); +- +- /* Prevent useless cloning on PtP SIT. +- This thing is done here expecting that the whole +- class of non-broadcast devices need not cloning. +- */ +-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +- if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) +- cfg.fc_flags |= RTF_NONEXTHOP; +-#endif +- +- ip6_route_add(&cfg); +-} +- +-/* Create "default" multicast route to the interface */ +- +-static void addrconf_add_mroute(struct net_device *dev) +-{ +- struct fib6_config cfg = { +- .fc_table = RT6_TABLE_LOCAL, +- .fc_metric = IP6_RT_PRIO_ADDRCONF, +- .fc_ifindex = dev->ifindex, +- .fc_dst_len = 8, +- .fc_flags = RTF_UP, +- }; +- +- ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); +- +- ip6_route_add(&cfg); +-} +- +-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +-static void sit_route_add(struct net_device *dev) +-{ +- struct fib6_config cfg = { +- .fc_table = RT6_TABLE_MAIN, +- .fc_metric = IP6_RT_PRIO_ADDRCONF, +- .fc_ifindex = dev->ifindex, +- .fc_dst_len = 96, +- .fc_flags = RTF_UP | RTF_NONEXTHOP, +- }; +- +- /* prefix length - 96 bits "::d.d.d.d" */ +- ip6_route_add(&cfg); +-} +-#endif +- +-static void addrconf_add_lroute(struct net_device *dev) +-{ +- struct in6_addr addr; +- +- ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); +- addrconf_prefix_route(&addr, 64, dev, 0, 0); +-} +- +-static struct inet6_dev *addrconf_add_dev(struct net_device *dev) +-{ +- struct inet6_dev *idev; +- +- ASSERT_RTNL(); +- +- if ((idev = ipv6_find_idev(dev)) == NULL) +- return NULL; +- +- /* Add default multicast route */ +- addrconf_add_mroute(dev); +- +- /* Add link local route */ +- addrconf_add_lroute(dev); +- return idev; +-} +- +-void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) +-{ +- struct prefix_info *pinfo; +- __u32 valid_lft; +- __u32 prefered_lft; +- int addr_type; +- unsigned long rt_expires; +- struct inet6_dev *in6_dev; +- +- pinfo = (struct prefix_info *) opt; +- +- if (len < sizeof(struct prefix_info)) { +- ADBG(("addrconf: prefix option too short\n")); +- return; +- } +- +- /* +- * Validation checks ([ADDRCONF], page 19) +- */ +- +- addr_type = ipv6_addr_type(&pinfo->prefix); +- +- if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)) +- return; +- +- valid_lft = ntohl(pinfo->valid); +- prefered_lft = ntohl(pinfo->prefered); +- +- if (prefered_lft > valid_lft) { +- if (net_ratelimit()) +- printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n"); +- return; +- } +- +- in6_dev = in6_dev_get(dev); +- +- if (in6_dev == NULL) { +- if (net_ratelimit()) +- printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name); +- return; +- } +- +- /* +- * Two things going on here: +- * 1) Add routes for on-link prefixes +- * 2) Configure prefixes with the auto flag set +- */ +- +- /* Avoid arithmetic overflow. Really, we could +- save rt_expires in seconds, likely valid_lft, +- but it would require division in fib gc, that it +- not good. +- */ +- if (valid_lft >= 0x7FFFFFFF/HZ) +- rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ); +- else +- rt_expires = valid_lft * HZ; +- +- /* +- * We convert this (in jiffies) to clock_t later. +- * Avoid arithmetic overflow there as well. +- * Overflow can happen only if HZ < USER_HZ. +- */ +- if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ) +- rt_expires = 0x7FFFFFFF / USER_HZ; +- +- if (pinfo->onlink) { +- struct rt6_info *rt; +- rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1); +- +- if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { +- if (rt->rt6i_flags&RTF_EXPIRES) { +- if (valid_lft == 0) { +- ip6_del_rt(rt); +- rt = NULL; +- } else { +- rt->rt6i_expires = jiffies + rt_expires; +- } +- } +- } else if (valid_lft) { +- addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, +- dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT); +- } +- if (rt) +- dst_release(&rt->u.dst); +- } +- +- /* Try to figure out our local address for this prefix */ +- +- if (pinfo->autoconf && in6_dev->cnf.autoconf) { +- struct inet6_ifaddr * ifp; +- struct in6_addr addr; +- int create = 0, update_lft = 0; +- +- if (pinfo->prefix_len == 64) { +- memcpy(&addr, &pinfo->prefix, 8); +- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && +- ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { +- in6_dev_put(in6_dev); +- return; +- } +- goto ok; +- } +- if (net_ratelimit()) +- printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n", +- pinfo->prefix_len); +- in6_dev_put(in6_dev); +- return; +- +-ok: +- +- ifp = ipv6_get_ifaddr(&addr, dev, 1); +- +- if (ifp == NULL && valid_lft) { +- int max_addresses = in6_dev->cnf.max_addresses; +- u32 addr_flags = 0; +- +-#ifdef CONFIG_IPV6_OPTIMISTIC_DAD +- if (in6_dev->cnf.optimistic_dad && +- !ipv6_devconf.forwarding) +- addr_flags = IFA_F_OPTIMISTIC; +-#endif +- +- /* Do not allow to create too much of autoconfigured +- * addresses; this would be too easy way to crash kernel. +- */ +- if (!max_addresses || +- ipv6_count_addresses(in6_dev) < max_addresses) +- ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len, +- addr_type&IPV6_ADDR_SCOPE_MASK, +- addr_flags); +- +- if (!ifp || IS_ERR(ifp)) { +- in6_dev_put(in6_dev); +- return; +- } +- +- update_lft = create = 1; +- ifp->cstamp = jiffies; +- addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT); +- } +- +- if (ifp) { +- int flags; +- unsigned long now; +-#ifdef CONFIG_IPV6_PRIVACY +- struct inet6_ifaddr *ift; +-#endif +- u32 stored_lft; +- +- /* update lifetime (RFC2462 5.5.3 e) */ +- spin_lock(&ifp->lock); +- now = jiffies; +- if (ifp->valid_lft > (now - ifp->tstamp) / HZ) +- stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; +- else +- stored_lft = 0; +- if (!update_lft && stored_lft) { +- if (valid_lft > MIN_VALID_LIFETIME || +- valid_lft > stored_lft) +- update_lft = 1; +- else if (stored_lft <= MIN_VALID_LIFETIME) { +- /* valid_lft <= stored_lft is always true */ +- /* XXX: IPsec */ +- update_lft = 0; +- } else { +- valid_lft = MIN_VALID_LIFETIME; +- if (valid_lft < prefered_lft) +- prefered_lft = valid_lft; +- update_lft = 1; +- } +- } +- +- if (update_lft) { +- ifp->valid_lft = valid_lft; +- ifp->prefered_lft = prefered_lft; +- ifp->tstamp = now; +- flags = ifp->flags; +- ifp->flags &= ~IFA_F_DEPRECATED; +- spin_unlock(&ifp->lock); +- +- if (!(flags&IFA_F_TENTATIVE)) +- ipv6_ifa_notify(0, ifp); +- } else +- spin_unlock(&ifp->lock); +- +-#ifdef CONFIG_IPV6_PRIVACY +- read_lock_bh(&in6_dev->lock); +- /* update all temporary addresses in the list */ +- for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) { +- /* +- * When adjusting the lifetimes of an existing +- * temporary address, only lower the lifetimes. +- * Implementations must not increase the +- * lifetimes of an existing temporary address +- * when processing a Prefix Information Option. +- */ +- spin_lock(&ift->lock); +- flags = ift->flags; +- if (ift->valid_lft > valid_lft && +- ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ) +- ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ; +- if (ift->prefered_lft > prefered_lft && +- ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ) +- ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ; +- spin_unlock(&ift->lock); +- if (!(flags&IFA_F_TENTATIVE)) +- ipv6_ifa_notify(0, ift); +- } +- +- if (create && in6_dev->cnf.use_tempaddr > 0) { +- /* +- * When a new public address is created as described in [ADDRCONF], +- * also create a new temporary address. +- */ +- read_unlock_bh(&in6_dev->lock); +- ipv6_create_tempaddr(ifp, NULL); +- } else { +- read_unlock_bh(&in6_dev->lock); +- } +-#endif +- in6_ifa_put(ifp); +- addrconf_verify(0); +- } +- } +- inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); +- in6_dev_put(in6_dev); +-} +- +-/* +- * Set destination address. +- * Special case for SIT interfaces where we create a new "virtual" +- * device. +- */ +-int addrconf_set_dstaddr(void __user *arg) +-{ +- struct in6_ifreq ireq; +- struct net_device *dev; +- int err = -EINVAL; +- +- rtnl_lock(); +- +- err = -EFAULT; +- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) +- goto err_exit; +- +- dev = __dev_get_by_index(ireq.ifr6_ifindex); +- +- err = -ENODEV; +- if (dev == NULL) +- goto err_exit; +- +-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +- if (dev->type == ARPHRD_SIT) { +- struct ifreq ifr; +- mm_segment_t oldfs; +- struct ip_tunnel_parm p; +- +- err = -EADDRNOTAVAIL; +- if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4)) +- goto err_exit; +- +- memset(&p, 0, sizeof(p)); +- p.iph.daddr = ireq.ifr6_addr.s6_addr32[3]; +- p.iph.saddr = 0; +- p.iph.version = 4; +- p.iph.ihl = 5; +- p.iph.protocol = IPPROTO_IPV6; +- p.iph.ttl = 64; +- ifr.ifr_ifru.ifru_data = (void __user *)&p; +- +- oldfs = get_fs(); set_fs(KERNEL_DS); +- err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL); +- set_fs(oldfs); +- +- if (err == 0) { +- err = -ENOBUFS; +- if ((dev = __dev_get_by_name(p.name)) == NULL) +- goto err_exit; +- err = dev_open(dev); +- } +- } +-#endif +- +-err_exit: +- rtnl_unlock(); +- return err; +-} +- +-/* +- * Manual configuration of address on an interface +- */ +-static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, +- __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) +-{ +- struct inet6_ifaddr *ifp; +- struct inet6_dev *idev; +- struct net_device *dev; +- int scope; +- u32 flags = RTF_EXPIRES; +- +- ASSERT_RTNL(); +- +- /* check the lifetime */ +- if (!valid_lft || prefered_lft > valid_lft) +- return -EINVAL; +- +- if ((dev = __dev_get_by_index(ifindex)) == NULL) +- return -ENODEV; +- +- if ((idev = addrconf_add_dev(dev)) == NULL) +- return -ENOBUFS; +- +- scope = ipv6_addr_scope(pfx); +- +- if (valid_lft == INFINITY_LIFE_TIME) { +- ifa_flags |= IFA_F_PERMANENT; +- flags = 0; +- } else if (valid_lft >= 0x7FFFFFFF/HZ) +- valid_lft = 0x7FFFFFFF/HZ; +- +- if (prefered_lft == 0) +- ifa_flags |= IFA_F_DEPRECATED; +- else if ((prefered_lft >= 0x7FFFFFFF/HZ) && +- (prefered_lft != INFINITY_LIFE_TIME)) +- prefered_lft = 0x7FFFFFFF/HZ; +- +- ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); +- +- if (!IS_ERR(ifp)) { +- spin_lock_bh(&ifp->lock); +- ifp->valid_lft = valid_lft; +- ifp->prefered_lft = prefered_lft; +- ifp->tstamp = jiffies; +- spin_unlock_bh(&ifp->lock); +- +- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, +- jiffies_to_clock_t(valid_lft * HZ), flags); +- /* +- * Note that section 3.1 of RFC 4429 indicates +- * that the Optimistic flag should not be set for +- * manually configured addresses +- */ +- addrconf_dad_start(ifp, 0); +- in6_ifa_put(ifp); +- addrconf_verify(0); +- return 0; +- } +- +- return PTR_ERR(ifp); +-} +- +-static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen) +-{ +- struct inet6_ifaddr *ifp; +- struct inet6_dev *idev; +- struct net_device *dev; +- +- if ((dev = __dev_get_by_index(ifindex)) == NULL) +- return -ENODEV; +- +- if ((idev = __in6_dev_get(dev)) == NULL) +- return -ENXIO; +- +- read_lock_bh(&idev->lock); +- for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) { +- if (ifp->prefix_len == plen && +- ipv6_addr_equal(pfx, &ifp->addr)) { +- in6_ifa_hold(ifp); +- read_unlock_bh(&idev->lock); +- +- ipv6_del_addr(ifp); +- +- /* If the last address is deleted administratively, +- disable IPv6 on this interface. +- */ +- if (idev->addr_list == NULL) +- addrconf_ifdown(idev->dev, 1); +- return 0; +- } +- } +- read_unlock_bh(&idev->lock); +- return -EADDRNOTAVAIL; +-} +- +- +-int addrconf_add_ifaddr(void __user *arg) +-{ +- struct in6_ifreq ireq; +- int err; +- +- if (!capable(CAP_NET_ADMIN)) +- return -EPERM; +- +- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) +- return -EFAULT; +- +- rtnl_lock(); +- err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, +- IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); +- rtnl_unlock(); +- return err; +-} +- +-int addrconf_del_ifaddr(void __user *arg) +-{ +- struct in6_ifreq ireq; +- int err; +- +- if (!capable(CAP_NET_ADMIN)) +- return -EPERM; +- +- if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq))) +- return -EFAULT; +- +- rtnl_lock(); +- err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen); +- rtnl_unlock(); +- return err; +-} +- +-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +-static void sit_add_v4_addrs(struct inet6_dev *idev) +-{ +- struct inet6_ifaddr * ifp; +- struct in6_addr addr; +- struct net_device *dev; +- int scope; +- +- ASSERT_RTNL(); +- +- memset(&addr, 0, sizeof(struct in6_addr)); +- memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); +- +- if (idev->dev->flags&IFF_POINTOPOINT) { +- addr.s6_addr32[0] = htonl(0xfe800000); +- scope = IFA_LINK; +- } else { +- scope = IPV6_ADDR_COMPATv4; +- } +- +- if (addr.s6_addr32[3]) { +- ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT); +- if (!IS_ERR(ifp)) { +- spin_lock_bh(&ifp->lock); +- ifp->flags &= ~IFA_F_TENTATIVE; +- spin_unlock_bh(&ifp->lock); +- ipv6_ifa_notify(RTM_NEWADDR, ifp); +- in6_ifa_put(ifp); +- } +- return; +- } +- +- for_each_netdev(dev) { +- struct in_device * in_dev = __in_dev_get_rtnl(dev); +- if (in_dev && (dev->flags & IFF_UP)) { +- struct in_ifaddr * ifa; +- +- int flag = scope; +- +- for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { +- int plen; +- +- addr.s6_addr32[3] = ifa->ifa_local; +- +- if (ifa->ifa_scope == RT_SCOPE_LINK) +- continue; +- if (ifa->ifa_scope >= RT_SCOPE_HOST) { +- if (idev->dev->flags&IFF_POINTOPOINT) +- continue; +- flag |= IFA_HOST; +- } +- if (idev->dev->flags&IFF_POINTOPOINT) +- plen = 64; +- else +- plen = 96; +- +- ifp = ipv6_add_addr(idev, &addr, plen, flag, +- IFA_F_PERMANENT); +- if (!IS_ERR(ifp)) { +- spin_lock_bh(&ifp->lock); +- ifp->flags &= ~IFA_F_TENTATIVE; +- spin_unlock_bh(&ifp->lock); +- ipv6_ifa_notify(RTM_NEWADDR, ifp); +- in6_ifa_put(ifp); +- } +- } +- } +- } +-} +-#endif +- +-static void init_loopback(struct net_device *dev) +-{ +- struct inet6_dev *idev; +- struct inet6_ifaddr * ifp; +- +- /* ::1 */ +- +- ASSERT_RTNL(); +- +- if ((idev = ipv6_find_idev(dev)) == NULL) { +- printk(KERN_DEBUG "init loopback: add_dev failed\n"); +- return; +- } +- +- ifp = ipv6_add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFA_F_PERMANENT); +- if (!IS_ERR(ifp)) { +- spin_lock_bh(&ifp->lock); +- ifp->flags &= ~IFA_F_TENTATIVE; +- spin_unlock_bh(&ifp->lock); +- ipv6_ifa_notify(RTM_NEWADDR, ifp); +- in6_ifa_put(ifp); +- } +-} +- +-static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr) +-{ +- struct inet6_ifaddr * ifp; +- u32 addr_flags = IFA_F_PERMANENT; +- +-#ifdef CONFIG_IPV6_OPTIMISTIC_DAD +- if (idev->cnf.optimistic_dad && +- !ipv6_devconf.forwarding) +- addr_flags |= IFA_F_OPTIMISTIC; +-#endif +- +- +- ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags); +- if (!IS_ERR(ifp)) { +- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); +- addrconf_dad_start(ifp, 0); +- in6_ifa_put(ifp); +- } +-} +- +-static void addrconf_dev_config(struct net_device *dev) +-{ +- struct in6_addr addr; +- struct inet6_dev * idev; +- +- ASSERT_RTNL(); +- +- if ((dev->type != ARPHRD_ETHER) && +- (dev->type != ARPHRD_FDDI) && +- (dev->type != ARPHRD_IEEE802_TR) && +- (dev->type != ARPHRD_ARCNET) && +- (dev->type != ARPHRD_INFINIBAND)) { +- /* Alas, we support only Ethernet autoconfiguration. */ +- return; +- } +- +- idev = addrconf_add_dev(dev); +- if (idev == NULL) +- return; +- +- memset(&addr, 0, sizeof(struct in6_addr)); +- addr.s6_addr32[0] = htonl(0xFE800000); +- +- if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0) +- addrconf_add_linklocal(idev, &addr); +-} +- +-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +-static void addrconf_sit_config(struct net_device *dev) +-{ +- struct inet6_dev *idev; +- +- ASSERT_RTNL(); +- +- /* +- * Configure the tunnel with one of our IPv4 +- * addresses... we should configure all of +- * our v4 addrs in the tunnel +- */ +- +- if ((idev = ipv6_find_idev(dev)) == NULL) { +- printk(KERN_DEBUG "init sit: add_dev failed\n"); +- return; +- } +- +- sit_add_v4_addrs(idev); +- +- if (dev->flags&IFF_POINTOPOINT) { +- addrconf_add_mroute(dev); +- addrconf_add_lroute(dev); +- } else +- sit_route_add(dev); +-} +-#endif +- +-static inline int +-ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) +-{ +- struct in6_addr lladdr; +- +- if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) { +- addrconf_add_linklocal(idev, &lladdr); +- return 0; +- } +- return -1; +-} +- +-static void ip6_tnl_add_linklocal(struct inet6_dev *idev) +-{ +- struct net_device *link_dev; +- +- /* first try to inherit the link-local address from the link device */ +- if (idev->dev->iflink && +- (link_dev = __dev_get_by_index(idev->dev->iflink))) { +- if (!ipv6_inherit_linklocal(idev, link_dev)) +- return; +- } +- /* then try to inherit it from any device */ +- for_each_netdev(link_dev) { +- if (!ipv6_inherit_linklocal(idev, link_dev)) +- return; +- } +- printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n"); +-} +- +-/* +- * Autoconfigure tunnel with a link-local address so routing protocols, +- * DHCPv6, MLD etc. can be run over the virtual link +- */ +- +-static void addrconf_ip6_tnl_config(struct net_device *dev) +-{ +- struct inet6_dev *idev; +- +- ASSERT_RTNL(); +- +- if ((idev = addrconf_add_dev(dev)) == NULL) { +- printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n"); +- return; +- } +- ip6_tnl_add_linklocal(idev); +-} +- +-static int addrconf_notify(struct notifier_block *this, unsigned long event, +- void * data) +-{ +- struct net_device *dev = (struct net_device *) data; +- struct inet6_dev *idev = __in6_dev_get(dev); +- int run_pending = 0; +- +- switch(event) { +- case NETDEV_REGISTER: +- if (!idev && dev->mtu >= IPV6_MIN_MTU) { +- idev = ipv6_add_dev(dev); +- if (!idev) +- printk(KERN_WARNING "IPv6: add_dev failed for %s\n", +- dev->name); +- } +- break; +- case NETDEV_UP: +- case NETDEV_CHANGE: +- if (event == NETDEV_UP) { +- if (!addrconf_qdisc_ok(dev)) { +- /* device is not ready yet. */ +- printk(KERN_INFO +- "ADDRCONF(NETDEV_UP): %s: " +- "link is not ready\n", +- dev->name); +- break; +- } +- +- if (idev) +- idev->if_flags |= IF_READY; +- } else { +- if (!addrconf_qdisc_ok(dev)) { +- /* device is still not ready. */ +- break; +- } +- +- if (idev) { +- if (idev->if_flags & IF_READY) { +- /* device is already configured. */ +- break; +- } +- idev->if_flags |= IF_READY; +- } +- +- printk(KERN_INFO +- "ADDRCONF(NETDEV_CHANGE): %s: " +- "link becomes ready\n", +- dev->name); +- +- run_pending = 1; +- } +- +- switch(dev->type) { +-#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) +- case ARPHRD_SIT: +- addrconf_sit_config(dev); +- break; +-#endif +- case ARPHRD_TUNNEL6: +- addrconf_ip6_tnl_config(dev); +- break; +- case ARPHRD_LOOPBACK: +- init_loopback(dev); +- break; +- +- default: +- addrconf_dev_config(dev); +- break; +- } +- if (idev) { +- if (run_pending) +- addrconf_dad_run(idev); +- +- /* If the MTU changed during the interface down, when the +- interface up, the changed MTU must be reflected in the +- idev as well as routers. +- */ +- if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) { +- rt6_mtu_change(dev, dev->mtu); +- idev->cnf.mtu6 = dev->mtu; +- } +- idev->tstamp = jiffies; +- inet6_ifinfo_notify(RTM_NEWLINK, idev); +- /* If the changed mtu during down is lower than IPV6_MIN_MTU +- stop IPv6 on this interface. +- */ +- if (dev->mtu < IPV6_MIN_MTU) +- addrconf_ifdown(dev, event != NETDEV_DOWN); +- } +- break; +- +- case NETDEV_CHANGEMTU: +- if ( idev && dev->mtu >= IPV6_MIN_MTU) { +- rt6_mtu_change(dev, dev->mtu); +- idev->cnf.mtu6 = dev->mtu; +- break; +- } +- +- /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */ +- +- case NETDEV_DOWN: +- case NETDEV_UNREGISTER: +- /* +- * Remove all addresses from this interface. +- */ +- addrconf_ifdown(dev, event != NETDEV_DOWN); +- break; +- +- case NETDEV_CHANGENAME: +- if (idev) { +- snmp6_unregister_dev(idev); +-#ifdef CONFIG_SYSCTL +- addrconf_sysctl_unregister(&idev->cnf); +- neigh_sysctl_unregister(idev->nd_parms); +- neigh_sysctl_register(dev, idev->nd_parms, +- NET_IPV6, NET_IPV6_NEIGH, "ipv6", +- &ndisc_ifinfo_sysctl_change, +- NULL); +- addrconf_sysctl_register(idev, &idev->cnf); +-#endif +- snmp6_register_dev(idev); +- } +- break; +- } +- +- return NOTIFY_OK; +-} +- +-/* +- * addrconf module should be notified of a device going up +- */ +-static struct notifier_block ipv6_dev_notf = { +- .notifier_call = addrconf_notify, +- .priority = 0 +-}; +- +-static int addrconf_ifdown(struct net_device *dev, int how) +-{ +- struct inet6_dev *idev; +- struct inet6_ifaddr *ifa, **bifa; +- int i; +- +- ASSERT_RTNL(); +- +- if (dev == &loopback_dev && how == 1) +- how = 0; +- +- rt6_ifdown(dev); +- neigh_ifdown(&nd_tbl, dev); +- +- idev = __in6_dev_get(dev); +- if (idev == NULL) +- return -ENODEV; +- +- /* Step 1: remove reference to ipv6 device from parent device. +- Do not dev_put! +- */ +- if (how == 1) { +- idev->dead = 1; +- +- /* protected by rtnl_lock */ +- rcu_assign_pointer(dev->ip6_ptr, NULL); +- +- /* Step 1.5: remove snmp6 entry */ +- snmp6_unregister_dev(idev); +- +- } +- +- /* Step 2: clear hash table */ +- for (i=0; iidev == idev) { +- *bifa = ifa->lst_next; +- ifa->lst_next = NULL; +- addrconf_del_timer(ifa); +- in6_ifa_put(ifa); +- continue; +- } +- bifa = &ifa->lst_next; +- } +- write_unlock_bh(&addrconf_hash_lock); +- } +- +- write_lock_bh(&idev->lock); +- +- /* Step 3: clear flags for stateless addrconf */ +- if (how != 1) +- idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); +- +- /* Step 4: clear address list */ +-#ifdef CONFIG_IPV6_PRIVACY +- if (how == 1 && del_timer(&idev->regen_timer)) +- in6_dev_put(idev); +- +- /* clear tempaddr list */ +- while ((ifa = idev->tempaddr_list) != NULL) { +- idev->tempaddr_list = ifa->tmp_next; +- ifa->tmp_next = NULL; +- ifa->dead = 1; +- write_unlock_bh(&idev->lock); +- spin_lock_bh(&ifa->lock); +- +- if (ifa->ifpub) { +- in6_ifa_put(ifa->ifpub); +- ifa->ifpub = NULL; +- } +- spin_unlock_bh(&ifa->lock); +- in6_ifa_put(ifa); +- write_lock_bh(&idev->lock); +- } +-#endif +- while ((ifa = idev->addr_list) != NULL) { +- idev->addr_list = ifa->if_next; +- ifa->if_next = NULL; +- ifa->dead = 1; +- addrconf_del_timer(ifa); +- write_unlock_bh(&idev->lock); +- +- __ipv6_ifa_notify(RTM_DELADDR, ifa); +- atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa); +- in6_ifa_put(ifa); +- +- write_lock_bh(&idev->lock); +- } +- write_unlock_bh(&idev->lock); +- +- /* Step 5: Discard multicast list */ +- +- if (how == 1) +- ipv6_mc_destroy_dev(idev); +- else +- ipv6_mc_down(idev); +- +- /* Step 5: netlink notification of this interface */ +- idev->tstamp = jiffies; +- inet6_ifinfo_notify(RTM_DELLINK, idev); +- +- /* Shot the device (if unregistered) */ +- +- if (how == 1) { +-#ifdef CONFIG_SYSCTL +- addrconf_sysctl_unregister(&idev->cnf); +- neigh_sysctl_unregister(idev->nd_parms); +-#endif +- neigh_parms_release(&nd_tbl, idev->nd_parms); +- neigh_ifdown(&nd_tbl, dev); +- in6_dev_put(idev); +- } +- return 0; +-} +- +-static void addrconf_rs_timer(unsigned long data) +-{ +- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; +- +- if (ifp->idev->cnf.forwarding) +- goto out; +- +- if (ifp->idev->if_flags & IF_RA_RCVD) { +- /* +- * Announcement received after solicitation +- * was sent +- */ +- goto out; +- } +- +- spin_lock(&ifp->lock); +- if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) { +- struct in6_addr all_routers; +- +- /* The wait after the last probe can be shorter */ +- addrconf_mod_timer(ifp, AC_RS, +- (ifp->probes == ifp->idev->cnf.rtr_solicits) ? +- ifp->idev->cnf.rtr_solicit_delay : +- ifp->idev->cnf.rtr_solicit_interval); +- spin_unlock(&ifp->lock); +- +- ipv6_addr_all_routers(&all_routers); +- +- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); +- } else { +- spin_unlock(&ifp->lock); +- /* +- * Note: we do not support deprecated "all on-link" +- * assumption any longer. +- */ +- printk(KERN_DEBUG "%s: no IPv6 routers present\n", +- ifp->idev->dev->name); +- } +- +-out: +- in6_ifa_put(ifp); +-} +- +-/* +- * Duplicate Address Detection +- */ +-static void addrconf_dad_kick(struct inet6_ifaddr *ifp) +-{ +- unsigned long rand_num; +- struct inet6_dev *idev = ifp->idev; +- +- if (ifp->flags & IFA_F_OPTIMISTIC) +- rand_num = 0; +- else +- rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); +- +- ifp->probes = idev->cnf.dad_transmits; +- addrconf_mod_timer(ifp, AC_DAD, rand_num); +-} +- +-static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) +-{ +- struct inet6_dev *idev = ifp->idev; +- struct net_device *dev = idev->dev; +- +- addrconf_join_solict(dev, &ifp->addr); +- +- net_srandom(ifp->addr.s6_addr32[3]); +- +- read_lock_bh(&idev->lock); +- if (ifp->dead) +- goto out; +- spin_lock_bh(&ifp->lock); +- +- if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || +- !(ifp->flags&IFA_F_TENTATIVE) || +- ifp->flags & IFA_F_NODAD) { +- ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC); +- spin_unlock_bh(&ifp->lock); +- read_unlock_bh(&idev->lock); +- +- addrconf_dad_completed(ifp); +- return; +- } +- +- if (!(idev->if_flags & IF_READY)) { +- spin_unlock_bh(&ifp->lock); +- read_unlock_bh(&idev->lock); +- /* +- * If the defice is not ready: +- * - keep it tentative if it is a permanent address. +- * - otherwise, kill it. +- */ +- in6_ifa_hold(ifp); +- addrconf_dad_stop(ifp); +- return; +- } +- +- /* +- * Optimistic nodes can start receiving +- * Frames right away +- */ +- if(ifp->flags & IFA_F_OPTIMISTIC) +- ip6_ins_rt(ifp->rt); +- +- addrconf_dad_kick(ifp); +- spin_unlock_bh(&ifp->lock); +-out: +- read_unlock_bh(&idev->lock); +-} +- +-static void addrconf_dad_timer(unsigned long data) +-{ +- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; +- struct inet6_dev *idev = ifp->idev; +- struct in6_addr unspec; +- struct in6_addr mcaddr; +- +- read_lock_bh(&idev->lock); +- if (idev->dead) { +- read_unlock_bh(&idev->lock); +- goto out; +- } +- spin_lock_bh(&ifp->lock); +- if (ifp->probes == 0) { +- /* +- * DAD was successful +- */ +- +- ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC); +- spin_unlock_bh(&ifp->lock); +- read_unlock_bh(&idev->lock); +- +- addrconf_dad_completed(ifp); +- +- goto out; +- } +- +- ifp->probes--; +- addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time); +- spin_unlock_bh(&ifp->lock); +- read_unlock_bh(&idev->lock); +- +- /* send a neighbour solicitation for our addr */ +- memset(&unspec, 0, sizeof(unspec)); +- addrconf_addr_solict_mult(&ifp->addr, &mcaddr); +- ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec); +-out: +- in6_ifa_put(ifp); +-} +- +-static void addrconf_dad_completed(struct inet6_ifaddr *ifp) +-{ +- struct net_device * dev = ifp->idev->dev; +- +- /* +- * Configure the address for reception. Now it is valid. +- */ +- +- ipv6_ifa_notify(RTM_NEWADDR, ifp); +- +- /* If added prefix is link local and forwarding is off, +- start sending router solicitations. +- */ +- +- if (ifp->idev->cnf.forwarding == 0 && +- ifp->idev->cnf.rtr_solicits > 0 && +- (dev->flags&IFF_LOOPBACK) == 0 && +- (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { +- struct in6_addr all_routers; +- +- ipv6_addr_all_routers(&all_routers); +- +- /* +- * If a host as already performed a random delay +- * [...] as part of DAD [...] there is no need +- * to delay again before sending the first RS +- */ +- ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers); +- +- spin_lock_bh(&ifp->lock); +- ifp->probes = 1; +- ifp->idev->if_flags |= IF_RS_SENT; +- addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval); +- spin_unlock_bh(&ifp->lock); +- } +-} +- +-static void addrconf_dad_run(struct inet6_dev *idev) { +- struct inet6_ifaddr *ifp; +- +- read_lock_bh(&idev->lock); +- for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) { +- spin_lock_bh(&ifp->lock); +- if (!(ifp->flags & IFA_F_TENTATIVE)) { +- spin_unlock_bh(&ifp->lock); +- continue; +- } +- spin_unlock_bh(&ifp->lock); +- addrconf_dad_kick(ifp); +- } +- read_unlock_bh(&idev->lock); +-} +- +-#ifdef CONFIG_PROC_FS +-struct if6_iter_state { +- int bucket; +-}; +- +-static struct inet6_ifaddr *if6_get_first(struct seq_file *seq) +-{ +- struct inet6_ifaddr *ifa = NULL; +- struct if6_iter_state *state = seq->private; +- +- for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { +- ifa = inet6_addr_lst[state->bucket]; +- if (ifa) +- break; +- } +- return ifa; +-} +- +-static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa) +-{ +- struct if6_iter_state *state = seq->private; +- +- ifa = ifa->lst_next; +-try_again: +- if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) { +- ifa = inet6_addr_lst[state->bucket]; +- goto try_again; +- } +- return ifa; +-} +- +-static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos) +-{ +- struct inet6_ifaddr *ifa = if6_get_first(seq); +- +- if (ifa) +- while(pos && (ifa = if6_get_next(seq, ifa)) != NULL) +- --pos; +- return pos ? NULL : ifa; +-} +- +-static void *if6_seq_start(struct seq_file *seq, loff_t *pos) +-{ +- read_lock_bh(&addrconf_hash_lock); +- return if6_get_idx(seq, *pos); +-} +- +-static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) +-{ +- struct inet6_ifaddr *ifa; +- +- ifa = if6_get_next(seq, v); +- ++*pos; +- return ifa; +-} +- +-static void if6_seq_stop(struct seq_file *seq, void *v) +-{ +- read_unlock_bh(&addrconf_hash_lock); +-} +- +-static int if6_seq_show(struct seq_file *seq, void *v) +-{ +- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; +- seq_printf(seq, +- NIP6_SEQFMT " %02x %02x %02x %02x %8s\n", +- NIP6(ifp->addr), +- ifp->idev->dev->ifindex, +- ifp->prefix_len, +- ifp->scope, +- ifp->flags, +- ifp->idev->dev->name); +- return 0; +-} +- +-static struct seq_operations if6_seq_ops = { +- .start = if6_seq_start, +- .next = if6_seq_next, +- .show = if6_seq_show, +- .stop = if6_seq_stop, +-}; +- +-static int if6_seq_open(struct inode *inode, struct file *file) +-{ +- struct seq_file *seq; +- int rc = -ENOMEM; +- struct if6_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); +- +- if (!s) +- goto out; +- +- rc = seq_open(file, &if6_seq_ops); +- if (rc) +- goto out_kfree; +- +- seq = file->private_data; +- seq->private = s; +-out: +- return rc; +-out_kfree: +- kfree(s); +- goto out; +-} +- +-static const struct file_operations if6_fops = { +- .owner = THIS_MODULE, +- .open = if6_seq_open, +- .read = seq_read, +- .llseek = seq_lseek, +- .release = seq_release_private, +-}; +- +-int __init if6_proc_init(void) +-{ +- if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops)) +- return -ENOMEM; +- return 0; +-} +- +-void if6_proc_exit(void) +-{ +- proc_net_remove("if_inet6"); +-} +-#endif /* CONFIG_PROC_FS */ +- +-#ifdef CONFIG_IPV6_MIP6 +-/* Check if address is a home address configured on any interface. */ +-int ipv6_chk_home_addr(struct in6_addr *addr) +-{ +- int ret = 0; +- struct inet6_ifaddr * ifp; +- u8 hash = ipv6_addr_hash(addr); +- read_lock_bh(&addrconf_hash_lock); +- for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { +- if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && +- (ifp->flags & IFA_F_HOMEADDRESS)) { +- ret = 1; +- break; +- } +- } +- read_unlock_bh(&addrconf_hash_lock); +- return ret; +-} +-#endif +- +-/* +- * Periodic address status verification +- */ +- +-static void addrconf_verify(unsigned long foo) +-{ +- struct inet6_ifaddr *ifp; +- unsigned long now, next; +- int i; +- +- spin_lock_bh(&addrconf_verify_lock); +- now = jiffies; +- next = now + ADDR_CHECK_FREQUENCY; +- +- del_timer(&addr_chk_timer); +- +- for (i=0; i < IN6_ADDR_HSIZE; i++) { +- +-restart: +- read_lock(&addrconf_hash_lock); +- for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) { +- unsigned long age; +-#ifdef CONFIG_IPV6_PRIVACY +- unsigned long regen_advance; +-#endif +- +- if (ifp->flags & IFA_F_PERMANENT) +- continue; +- +- spin_lock(&ifp->lock); +- age = (now - ifp->tstamp) / HZ; +- +-#ifdef CONFIG_IPV6_PRIVACY +- regen_advance = ifp->idev->cnf.regen_max_retry * +- ifp->idev->cnf.dad_transmits * +- ifp->idev->nd_parms->retrans_time / HZ; +-#endif +- +- if (ifp->valid_lft != INFINITY_LIFE_TIME && +- age >= ifp->valid_lft) { +- spin_unlock(&ifp->lock); +- in6_ifa_hold(ifp); +- read_unlock(&addrconf_hash_lock); +- ipv6_del_addr(ifp); +- goto restart; +- } else if (ifp->prefered_lft == INFINITY_LIFE_TIME) { +- spin_unlock(&ifp->lock); +- continue; +- } else if (age >= ifp->prefered_lft) { +- /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */ +- int deprecate = 0; +- +- if (!(ifp->flags&IFA_F_DEPRECATED)) { +- deprecate = 1; +- ifp->flags |= IFA_F_DEPRECATED; +- } +- +- if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)) +- next = ifp->tstamp + ifp->valid_lft * HZ; +- +- spin_unlock(&ifp->lock); +- +- if (deprecate) { +- in6_ifa_hold(ifp); +- read_unlock(&addrconf_hash_lock); +- +- ipv6_ifa_notify(0, ifp); +- in6_ifa_put(ifp); +- goto restart; +- } +-#ifdef CONFIG_IPV6_PRIVACY +- } else if ((ifp->flags&IFA_F_TEMPORARY) && +- !(ifp->flags&IFA_F_TENTATIVE)) { +- if (age >= ifp->prefered_lft - regen_advance) { +- struct inet6_ifaddr *ifpub = ifp->ifpub; +- if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) +- next = ifp->tstamp + ifp->prefered_lft * HZ; +- if (!ifp->regen_count && ifpub) { +- ifp->regen_count++; +- in6_ifa_hold(ifp); +- in6_ifa_hold(ifpub); +- spin_unlock(&ifp->lock); +- read_unlock(&addrconf_hash_lock); +- spin_lock(&ifpub->lock); +- ifpub->regen_count = 0; +- spin_unlock(&ifpub->lock); +- ipv6_create_tempaddr(ifpub, ifp); +- in6_ifa_put(ifpub); +- in6_ifa_put(ifp); +- goto restart; +- } +- } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next)) +- next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ; +- spin_unlock(&ifp->lock); +-#endif +- } else { +- /* ifp->prefered_lft <= ifp->valid_lft */ +- if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) +- next = ifp->tstamp + ifp->prefered_lft * HZ; +- spin_unlock(&ifp->lock); +- } +- } +- read_unlock(&addrconf_hash_lock); +- } +- +- addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next; +- add_timer(&addr_chk_timer); +- spin_unlock_bh(&addrconf_verify_lock); +-} +- +-static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) +-{ +- struct in6_addr *pfx = NULL; +- +- if (addr) +- pfx = nla_data(addr); +- +- if (local) { +- if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) +- pfx = NULL; +- else +- pfx = nla_data(local); +- } +- +- return pfx; +-} +- +-static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = { +- [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) }, +- [IFA_LOCAL] = { .len = sizeof(struct in6_addr) }, +- [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, +-}; +- +-static int +-inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +-{ +- struct ifaddrmsg *ifm; +- struct nlattr *tb[IFA_MAX+1]; +- struct in6_addr *pfx; +- int err; +- +- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); +- if (err < 0) +- return err; +- +- ifm = nlmsg_data(nlh); +- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); +- if (pfx == NULL) +- return -EINVAL; +- +- return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); +-} +- +-static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, +- u32 prefered_lft, u32 valid_lft) +-{ +- u32 flags = RTF_EXPIRES; +- +- if (!valid_lft || (prefered_lft > valid_lft)) +- return -EINVAL; +- +- if (valid_lft == INFINITY_LIFE_TIME) { +- ifa_flags |= IFA_F_PERMANENT; +- flags = 0; +- } else if (valid_lft >= 0x7FFFFFFF/HZ) +- valid_lft = 0x7FFFFFFF/HZ; +- +- if (prefered_lft == 0) +- ifa_flags |= IFA_F_DEPRECATED; +- else if ((prefered_lft >= 0x7FFFFFFF/HZ) && +- (prefered_lft != INFINITY_LIFE_TIME)) +- prefered_lft = 0x7FFFFFFF/HZ; +- +- spin_lock_bh(&ifp->lock); +- ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; +- ifp->tstamp = jiffies; +- ifp->valid_lft = valid_lft; +- ifp->prefered_lft = prefered_lft; +- +- spin_unlock_bh(&ifp->lock); +- if (!(ifp->flags&IFA_F_TENTATIVE)) +- ipv6_ifa_notify(0, ifp); +- +- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, +- jiffies_to_clock_t(valid_lft * HZ), flags); +- addrconf_verify(0); +- +- return 0; +-} +- +-static int +-inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +-{ +- struct ifaddrmsg *ifm; +- struct nlattr *tb[IFA_MAX+1]; +- struct in6_addr *pfx; +- struct inet6_ifaddr *ifa; +- struct net_device *dev; +- u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; +- u8 ifa_flags; +- int err; +- +- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); +- if (err < 0) +- return err; +- +- ifm = nlmsg_data(nlh); +- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); +- if (pfx == NULL) +- return -EINVAL; +- +- if (tb[IFA_CACHEINFO]) { +- struct ifa_cacheinfo *ci; +- +- ci = nla_data(tb[IFA_CACHEINFO]); +- valid_lft = ci->ifa_valid; +- preferred_lft = ci->ifa_prefered; +- } else { +- preferred_lft = INFINITY_LIFE_TIME; +- valid_lft = INFINITY_LIFE_TIME; +- } +- +- dev = __dev_get_by_index(ifm->ifa_index); +- if (dev == NULL) +- return -ENODEV; +- +- /* We ignore other flags so far. */ +- ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); +- +- ifa = ipv6_get_ifaddr(pfx, dev, 1); +- if (ifa == NULL) { +- /* +- * It would be best to check for !NLM_F_CREATE here but +- * userspace alreay relies on not having to provide this. +- */ +- return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, +- ifa_flags, preferred_lft, valid_lft); +- } +- +- if (nlh->nlmsg_flags & NLM_F_EXCL || +- !(nlh->nlmsg_flags & NLM_F_REPLACE)) +- err = -EEXIST; +- else +- err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft); +- +- in6_ifa_put(ifa); +- +- return err; +-} +- +-static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags, +- u8 scope, int ifindex) +-{ +- struct ifaddrmsg *ifm; +- +- ifm = nlmsg_data(nlh); +- ifm->ifa_family = AF_INET6; +- ifm->ifa_prefixlen = prefixlen; +- ifm->ifa_flags = flags; +- ifm->ifa_scope = scope; +- ifm->ifa_index = ifindex; +-} +- +-static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, +- unsigned long tstamp, u32 preferred, u32 valid) +-{ +- struct ifa_cacheinfo ci; +- +- ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100 +- + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); +- ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100 +- + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); +- ci.ifa_prefered = preferred; +- ci.ifa_valid = valid; +- +- return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); +-} +- +-static inline int rt_scope(int ifa_scope) +-{ +- if (ifa_scope & IFA_HOST) +- return RT_SCOPE_HOST; +- else if (ifa_scope & IFA_LINK) +- return RT_SCOPE_LINK; +- else if (ifa_scope & IFA_SITE) +- return RT_SCOPE_SITE; +- else +- return RT_SCOPE_UNIVERSE; +-} +- +-static inline int inet6_ifaddr_msgsize(void) +-{ +- return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +- + nla_total_size(16) /* IFA_ADDRESS */ +- + nla_total_size(sizeof(struct ifa_cacheinfo)); +-} +- +-static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, +- u32 pid, u32 seq, int event, unsigned int flags) +-{ +- struct nlmsghdr *nlh; +- u32 preferred, valid; +- +- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); +- if (nlh == NULL) +- return -EMSGSIZE; +- +- put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), +- ifa->idev->dev->ifindex); +- +- if (!(ifa->flags&IFA_F_PERMANENT)) { +- preferred = ifa->prefered_lft; +- valid = ifa->valid_lft; +- if (preferred != INFINITY_LIFE_TIME) { +- long tval = (jiffies - ifa->tstamp)/HZ; +- preferred -= tval; +- if (valid != INFINITY_LIFE_TIME) +- valid -= tval; +- } +- } else { +- preferred = INFINITY_LIFE_TIME; +- valid = INFINITY_LIFE_TIME; +- } +- +- if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || +- put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) { +- nlmsg_cancel(skb, nlh); +- return -EMSGSIZE; +- } +- +- return nlmsg_end(skb, nlh); +-} +- +-static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, +- u32 pid, u32 seq, int event, u16 flags) +-{ +- struct nlmsghdr *nlh; +- u8 scope = RT_SCOPE_UNIVERSE; +- int ifindex = ifmca->idev->dev->ifindex; +- +- if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) +- scope = RT_SCOPE_SITE; +- +- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); +- if (nlh == NULL) +- return -EMSGSIZE; +- +- put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); +- if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || +- put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, +- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { +- nlmsg_cancel(skb, nlh); +- return -EMSGSIZE; +- } +- +- return nlmsg_end(skb, nlh); +-} +- +-static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, +- u32 pid, u32 seq, int event, unsigned int flags) +-{ +- struct nlmsghdr *nlh; +- u8 scope = RT_SCOPE_UNIVERSE; +- int ifindex = ifaca->aca_idev->dev->ifindex; +- +- if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) +- scope = RT_SCOPE_SITE; +- +- nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); +- if (nlh == NULL) +- return -EMSGSIZE; +- +- put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); +- if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || +- put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, +- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { +- nlmsg_cancel(skb, nlh); +- return -EMSGSIZE; +- } +- +- return nlmsg_end(skb, nlh); +-} +- +-enum addr_type_t +-{ +- UNICAST_ADDR, +- MULTICAST_ADDR, +- ANYCAST_ADDR, +-}; +- +-static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, +- enum addr_type_t type) +-{ +- int idx, ip_idx; +- int s_idx, s_ip_idx; +- int err = 1; +- struct net_device *dev; +- struct inet6_dev *idev = NULL; +- struct inet6_ifaddr *ifa; +- struct ifmcaddr6 *ifmca; +- struct ifacaddr6 *ifaca; +- +- s_idx = cb->args[0]; +- s_ip_idx = ip_idx = cb->args[1]; +- +- idx = 0; +- for_each_netdev(dev) { +- if (idx < s_idx) +- goto cont; +- if (idx > s_idx) +- s_ip_idx = 0; +- ip_idx = 0; +- if ((idev = in6_dev_get(dev)) == NULL) +- goto cont; +- read_lock_bh(&idev->lock); +- switch (type) { +- case UNICAST_ADDR: +- /* unicast address incl. temp addr */ +- for (ifa = idev->addr_list; ifa; +- ifa = ifa->if_next, ip_idx++) { +- if (ip_idx < s_ip_idx) +- continue; +- if ((err = inet6_fill_ifaddr(skb, ifa, +- NETLINK_CB(cb->skb).pid, +- cb->nlh->nlmsg_seq, RTM_NEWADDR, +- NLM_F_MULTI)) <= 0) +- goto done; +- } +- break; +- case MULTICAST_ADDR: +- /* multicast address */ +- for (ifmca = idev->mc_list; ifmca; +- ifmca = ifmca->next, ip_idx++) { +- if (ip_idx < s_ip_idx) +- continue; +- if ((err = inet6_fill_ifmcaddr(skb, ifmca, +- NETLINK_CB(cb->skb).pid, +- cb->nlh->nlmsg_seq, RTM_GETMULTICAST, +- NLM_F_MULTI)) <= 0) +- goto done; +- } +- break; +- case ANYCAST_ADDR: +- /* anycast address */ +- for (ifaca = idev->ac_list; ifaca; +- ifaca = ifaca->aca_next, ip_idx++) { +- if (ip_idx < s_ip_idx) +- continue; +- if ((err = inet6_fill_ifacaddr(skb, ifaca, +- NETLINK_CB(cb->skb).pid, +- cb->nlh->nlmsg_seq, RTM_GETANYCAST, +- NLM_F_MULTI)) <= 0) +- goto done; +- } +- break; +- default: +- break; +- } +- read_unlock_bh(&idev->lock); +- in6_dev_put(idev); +-cont: +- idx++; +- } +-done: +- if (err <= 0) { +- read_unlock_bh(&idev->lock); +- in6_dev_put(idev); +- } +- cb->args[0] = idx; +- cb->args[1] = ip_idx; +- return skb->len; +-} +- +-static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) +-{ +- enum addr_type_t type = UNICAST_ADDR; +- return inet6_dump_addr(skb, cb, type); +-} +- +-static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb) +-{ +- enum addr_type_t type = MULTICAST_ADDR; +- return inet6_dump_addr(skb, cb, type); +-} +- +- +-static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) +-{ +- enum addr_type_t type = ANYCAST_ADDR; +- return inet6_dump_addr(skb, cb, type); +-} +- +-static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, +- void *arg) +-{ +- struct ifaddrmsg *ifm; +- struct nlattr *tb[IFA_MAX+1]; +- struct in6_addr *addr = NULL; +- struct net_device *dev = NULL; +- struct inet6_ifaddr *ifa; +- struct sk_buff *skb; +- int err; +- +- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); +- if (err < 0) +- goto errout; +- +- addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); +- if (addr == NULL) { +- err = -EINVAL; +- goto errout; +- } +- +- ifm = nlmsg_data(nlh); +- if (ifm->ifa_index) +- dev = __dev_get_by_index(ifm->ifa_index); +- +- if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { +- err = -EADDRNOTAVAIL; +- goto errout; +- } +- +- if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) { +- err = -ENOBUFS; +- goto errout_ifa; +- } +- +- err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, +- nlh->nlmsg_seq, RTM_NEWADDR, 0); +- if (err < 0) { +- /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ +- WARN_ON(err == -EMSGSIZE); +- kfree_skb(skb); +- goto errout_ifa; +- } +- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); +-errout_ifa: +- in6_ifa_put(ifa); +-errout: +- return err; +-} +- +-static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) +-{ +- struct sk_buff *skb; +- int err = -ENOBUFS; +- +- skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); +- if (skb == NULL) +- goto errout; +- +- err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); +- if (err < 0) { +- /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ +- WARN_ON(err == -EMSGSIZE); +- kfree_skb(skb); +- goto errout; +- } +- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); +-errout: +- if (err < 0) +- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); +-} +- +-static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, +- __s32 *array, int bytes) +-{ +- BUG_ON(bytes < (DEVCONF_MAX * 4)); +- +- memset(array, 0, bytes); +- array[DEVCONF_FORWARDING] = cnf->forwarding; +- array[DEVCONF_HOPLIMIT] = cnf->hop_limit; +- array[DEVCONF_MTU6] = cnf->mtu6; +- array[DEVCONF_ACCEPT_RA] = cnf->accept_ra; +- array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects; +- array[DEVCONF_AUTOCONF] = cnf->autoconf; +- array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits; +- array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; +- array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval; +- array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay; +- array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; +-#ifdef CONFIG_IPV6_PRIVACY +- array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr; +- array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft; +- array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft; +- array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry; +- array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; +-#endif +- array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; +- array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; +- array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; +-#ifdef CONFIG_IPV6_ROUTER_PREF +- array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; +- array[DEVCONF_RTR_PROBE_INTERVAL] = cnf->rtr_probe_interval; +-#ifdef CONFIG_IPV6_ROUTE_INFO +- array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; +-#endif +-#endif +- array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; +- array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; +-#ifdef CONFIG_IPV6_OPTIMISTIC_DAD +- array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad; +-#endif +-} +- +-static inline size_t inet6_if_nlmsg_size(void) +-{ +- return NLMSG_ALIGN(sizeof(struct ifinfomsg)) +- + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ +- + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ +- + nla_total_size(4) /* IFLA_MTU */ +- + nla_total_size(4) /* IFLA_LINK */ +- + nla_total_size( /* IFLA_PROTINFO */ +- nla_total_size(4) /* IFLA_INET6_FLAGS */ +- + nla_total_size(sizeof(struct ifla_cacheinfo)) +- + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */ +- + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ +- + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ +- ); +-} +- +-static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items, +- int bytes) +-{ +- int i; +- int pad = bytes - sizeof(u64) * items; +- BUG_ON(pad < 0); +- +- /* Use put_unaligned() because stats may not be aligned for u64. */ +- put_unaligned(items, &stats[0]); +- for (i = 1; i < items; i++) +- put_unaligned(snmp_fold_field(mib, i), &stats[i]); +- +- memset(&stats[items], 0, pad); +-} +- +-static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, +- int bytes) +-{ +- switch(attrtype) { +- case IFLA_INET6_STATS: +- __snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes); +- break; +- case IFLA_INET6_ICMP6STATS: +- __snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes); +- break; +- } +-} +- +-static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, +- u32 pid, u32 seq, int event, unsigned int flags) +-{ +- struct net_device *dev = idev->dev; +- struct nlattr *nla; +- struct ifinfomsg *hdr; +- struct nlmsghdr *nlh; +- void *protoinfo; +- struct ifla_cacheinfo ci; +- +- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); +- if (nlh == NULL) +- return -EMSGSIZE; +- +- hdr = nlmsg_data(nlh); +- hdr->ifi_family = AF_INET6; +- hdr->__ifi_pad = 0; +- hdr->ifi_type = dev->type; +- hdr->ifi_index = dev->ifindex; +- hdr->ifi_flags = dev_get_flags(dev); +- hdr->ifi_change = 0; +- +- NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); +- +- if (dev->addr_len) +- NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); +- +- NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); +- if (dev->ifindex != dev->iflink) +- NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); +- +- protoinfo = nla_nest_start(skb, IFLA_PROTINFO); +- if (protoinfo == NULL) +- goto nla_put_failure; +- +- NLA_PUT_U32(skb, IFLA_INET6_FLAGS, idev->if_flags); +- +- ci.max_reasm_len = IPV6_MAXPLEN; +- ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100 +- + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); +- ci.reachable_time = idev->nd_parms->reachable_time; +- ci.retrans_time = idev->nd_parms->retrans_time; +- NLA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci); +- +- nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); +- if (nla == NULL) +- goto nla_put_failure; +- ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); +- +- /* XXX - MC not implemented */ +- +- nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); +- if (nla == NULL) +- goto nla_put_failure; +- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); +- +- nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); +- if (nla == NULL) +- goto nla_put_failure; +- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); +- +- nla_nest_end(skb, protoinfo); +- return nlmsg_end(skb, nlh); +- +-nla_put_failure: +- nlmsg_cancel(skb, nlh); +- return -EMSGSIZE; +-} +- +-static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) +-{ +- int idx, err; +- int s_idx = cb->args[0]; +- struct net_device *dev; +- struct inet6_dev *idev; +- +- read_lock(&dev_base_lock); +- idx = 0; +- for_each_netdev(dev) { +- if (idx < s_idx) +- goto cont; +- if ((idev = in6_dev_get(dev)) == NULL) +- goto cont; +- err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid, +- cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI); +- in6_dev_put(idev); +- if (err <= 0) +- break; +-cont: +- idx++; +- } +- read_unlock(&dev_base_lock); +- cb->args[0] = idx; +- +- return skb->len; +-} +- +-void inet6_ifinfo_notify(int event, struct inet6_dev *idev) +-{ +- struct sk_buff *skb; +- int err = -ENOBUFS; +- +- skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC); +- if (skb == NULL) +- goto errout; +- +- err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); +- if (err < 0) { +- /* -EMSGSIZE implies BUG in inet6_if_nlmsg_size() */ +- WARN_ON(err == -EMSGSIZE); +- kfree_skb(skb); +- goto errout; +- } +- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); +-errout: +- if (err < 0) +- rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); +-} +- +-static inline size_t inet6_prefix_nlmsg_size(void) +-{ +- return NLMSG_ALIGN(sizeof(struct prefixmsg)) +- + nla_total_size(sizeof(struct in6_addr)) +- + nla_total_size(sizeof(struct prefix_cacheinfo)); +-} +- +-static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, +- struct prefix_info *pinfo, u32 pid, u32 seq, +- int event, unsigned int flags) +-{ +- struct prefixmsg *pmsg; +- struct nlmsghdr *nlh; +- struct prefix_cacheinfo ci; +- +- nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); +- if (nlh == NULL) +- return -EMSGSIZE; +- +- pmsg = nlmsg_data(nlh); +- pmsg->prefix_family = AF_INET6; +- pmsg->prefix_pad1 = 0; +- pmsg->prefix_pad2 = 0; +- pmsg->prefix_ifindex = idev->dev->ifindex; +- pmsg->prefix_len = pinfo->prefix_len; +- pmsg->prefix_type = pinfo->type; +- pmsg->prefix_pad3 = 0; +- pmsg->prefix_flags = 0; +- if (pinfo->onlink) +- pmsg->prefix_flags |= IF_PREFIX_ONLINK; +- if (pinfo->autoconf) +- pmsg->prefix_flags |= IF_PREFIX_AUTOCONF; +- +- NLA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix); +- +- ci.preferred_time = ntohl(pinfo->prefered); +- ci.valid_time = ntohl(pinfo->valid); +- NLA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci); +- +- return nlmsg_end(skb, nlh); +- +-nla_put_failure: +- nlmsg_cancel(skb, nlh); +- return -EMSGSIZE; +-} +- +-static void inet6_prefix_notify(int event, struct inet6_dev *idev, +- struct prefix_info *pinfo) +-{ +- struct sk_buff *skb; +- int err = -ENOBUFS; +- +- skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC); +- if (skb == NULL) +- goto errout; +- +- err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); +- if (err < 0) { +- /* -EMSGSIZE implies BUG in inet6_prefix_nlmsg_size() */ +- WARN_ON(err == -EMSGSIZE); +- kfree_skb(skb); +- goto errout; +- } +- err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); +-errout: +- if (err < 0) +- rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); +-} +- +-static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) +-{ +- inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); +- +- switch (event) { +- case RTM_NEWADDR: +- /* +- * If the address was optimistic +- * we inserted the route at the start of +- * our DAD process, so we don't need +- * to do it again +- */ +- if (!(ifp->rt->rt6i_node)) +- ip6_ins_rt(ifp->rt); +- if (ifp->idev->cnf.forwarding) +- addrconf_join_anycast(ifp); +- break; +- case RTM_DELADDR: +- if (ifp->idev->cnf.forwarding) +- addrconf_leave_anycast(ifp); +- addrconf_leave_solict(ifp->idev, &ifp->addr); +- dst_hold(&ifp->rt->u.dst); +- if (ip6_del_rt(ifp->rt)) +- dst_free(&ifp->rt->u.dst); +- break; +- } +-} +- +-static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) +-{ +- rcu_read_lock_bh(); +- if (likely(ifp->idev->dead == 0)) +- __ipv6_ifa_notify(event, ifp); +- rcu_read_unlock_bh(); +-} +- +-#ifdef CONFIG_SYSCTL +- +-static +-int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, +- void __user *buffer, size_t *lenp, loff_t *ppos) +-{ +- int *valp = ctl->data; +- int val = *valp; +- int ret; +- +- ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); +- +- if (write && valp != &ipv6_devconf_dflt.forwarding) { +- if (valp != &ipv6_devconf.forwarding) { +- if ((!*valp) ^ (!val)) { +- struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1; +- if (idev == NULL) +- return ret; +- dev_forward_change(idev); +- } +- } else { +- ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding; +- addrconf_forward_change(); +- } +- if (*valp) +- rt6_purge_dflt_routers(); +- } +- +- return ret; +-} +- +-static int addrconf_sysctl_forward_strategy(ctl_table *table, +- int __user *name, int nlen, +- void __user *oldval, +- size_t __user *oldlenp, +- void __user *newval, size_t newlen) +-{ +- int *valp = table->data; +- int new; +- +- if (!newval || !newlen) +- return 0; +- if (newlen != sizeof(int)) +- return -EINVAL; +- if (get_user(new, (int __user *)newval)) +- return -EFAULT; +- if (new == *valp) +- return 0; +- if (oldval && oldlenp) { +- size_t len; +- if (get_user(len, oldlenp)) +- return -EFAULT; +- if (len) { +- if (len > table->maxlen) +- len = table->maxlen; +- if (copy_to_user(oldval, valp, len)) +- return -EFAULT; +- if (put_user(len, oldlenp)) +- return -EFAULT; +- } +- } +- +- if (valp != &ipv6_devconf_dflt.forwarding) { +- if (valp != &ipv6_devconf.forwarding) { +- struct inet6_dev *idev = (struct inet6_dev *)table->extra1; +- int changed; +- if (unlikely(idev == NULL)) +- return -ENODEV; +- changed = (!*valp) ^ (!new); +- *valp = new; +- if (changed) +- dev_forward_change(idev); +- } else { +- *valp = new; +- addrconf_forward_change(); +- } +- +- if (*valp) +- rt6_purge_dflt_routers(); +- } else +- *valp = new; +- +- return 1; +-} +- +-static struct addrconf_sysctl_table +-{ +- struct ctl_table_header *sysctl_header; +- ctl_table addrconf_vars[__NET_IPV6_MAX]; +- ctl_table addrconf_dev[2]; +- ctl_table addrconf_conf_dir[2]; +- ctl_table addrconf_proto_dir[2]; +- ctl_table addrconf_root_dir[2]; +-} addrconf_sysctl __read_mostly = { +- .sysctl_header = NULL, +- .addrconf_vars = { +- { +- .ctl_name = NET_IPV6_FORWARDING, +- .procname = "forwarding", +- .data = &ipv6_devconf.forwarding, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &addrconf_sysctl_forward, +- .strategy = &addrconf_sysctl_forward_strategy, +- }, +- { +- .ctl_name = NET_IPV6_HOP_LIMIT, +- .procname = "hop_limit", +- .data = &ipv6_devconf.hop_limit, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_MTU, +- .procname = "mtu", +- .data = &ipv6_devconf.mtu6, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_ACCEPT_RA, +- .procname = "accept_ra", +- .data = &ipv6_devconf.accept_ra, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_ACCEPT_REDIRECTS, +- .procname = "accept_redirects", +- .data = &ipv6_devconf.accept_redirects, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_AUTOCONF, +- .procname = "autoconf", +- .data = &ipv6_devconf.autoconf, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_DAD_TRANSMITS, +- .procname = "dad_transmits", +- .data = &ipv6_devconf.dad_transmits, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_RTR_SOLICITS, +- .procname = "router_solicitations", +- .data = &ipv6_devconf.rtr_solicits, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL, +- .procname = "router_solicitation_interval", +- .data = &ipv6_devconf.rtr_solicit_interval, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec_jiffies, +- .strategy = &sysctl_jiffies, +- }, +- { +- .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY, +- .procname = "router_solicitation_delay", +- .data = &ipv6_devconf.rtr_solicit_delay, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec_jiffies, +- .strategy = &sysctl_jiffies, +- }, +- { +- .ctl_name = NET_IPV6_FORCE_MLD_VERSION, +- .procname = "force_mld_version", +- .data = &ipv6_devconf.force_mld_version, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +-#ifdef CONFIG_IPV6_PRIVACY +- { +- .ctl_name = NET_IPV6_USE_TEMPADDR, +- .procname = "use_tempaddr", +- .data = &ipv6_devconf.use_tempaddr, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_TEMP_VALID_LFT, +- .procname = "temp_valid_lft", +- .data = &ipv6_devconf.temp_valid_lft, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_TEMP_PREFERED_LFT, +- .procname = "temp_prefered_lft", +- .data = &ipv6_devconf.temp_prefered_lft, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_REGEN_MAX_RETRY, +- .procname = "regen_max_retry", +- .data = &ipv6_devconf.regen_max_retry, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR, +- .procname = "max_desync_factor", +- .data = &ipv6_devconf.max_desync_factor, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +-#endif +- { +- .ctl_name = NET_IPV6_MAX_ADDRESSES, +- .procname = "max_addresses", +- .data = &ipv6_devconf.max_addresses, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_ACCEPT_RA_DEFRTR, +- .procname = "accept_ra_defrtr", +- .data = &ipv6_devconf.accept_ra_defrtr, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_ACCEPT_RA_PINFO, +- .procname = "accept_ra_pinfo", +- .data = &ipv6_devconf.accept_ra_pinfo, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +-#ifdef CONFIG_IPV6_ROUTER_PREF +- { +- .ctl_name = NET_IPV6_ACCEPT_RA_RTR_PREF, +- .procname = "accept_ra_rtr_pref", +- .data = &ipv6_devconf.accept_ra_rtr_pref, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_RTR_PROBE_INTERVAL, +- .procname = "router_probe_interval", +- .data = &ipv6_devconf.rtr_probe_interval, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec_jiffies, +- .strategy = &sysctl_jiffies, +- }, +-#ifdef CONFIG_IPV6_ROUTE_INFO +- { +- .ctl_name = NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, +- .procname = "accept_ra_rt_info_max_plen", +- .data = &ipv6_devconf.accept_ra_rt_info_max_plen, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +-#endif +-#endif +- { +- .ctl_name = NET_IPV6_PROXY_NDP, +- .procname = "proxy_ndp", +- .data = &ipv6_devconf.proxy_ndp, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +- { +- .ctl_name = NET_IPV6_ACCEPT_SOURCE_ROUTE, +- .procname = "accept_source_route", +- .data = &ipv6_devconf.accept_source_route, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- }, +-#ifdef CONFIG_IPV6_OPTIMISTIC_DAD +- { +- .ctl_name = CTL_UNNUMBERED, +- .procname = "optimistic_dad", +- .data = &ipv6_devconf.optimistic_dad, +- .maxlen = sizeof(int), +- .mode = 0644, +- .proc_handler = &proc_dointvec, +- +- }, +-#endif +- { +- .ctl_name = 0, /* sentinel */ +- } +- }, +- .addrconf_dev = { +- { +- .ctl_name = NET_PROTO_CONF_ALL, +- .procname = "all", +- .mode = 0555, +- .child = addrconf_sysctl.addrconf_vars, +- }, +- { +- .ctl_name = 0, /* sentinel */ +- } +- }, +- .addrconf_conf_dir = { +- { +- .ctl_name = NET_IPV6_CONF, +- .procname = "conf", +- .mode = 0555, +- .child = addrconf_sysctl.addrconf_dev, +- }, +- { +- .ctl_name = 0, /* sentinel */ +- } +- }, +- .addrconf_proto_dir = { +- { +- .ctl_name = NET_IPV6, +- .procname = "ipv6", +- .mode = 0555, +- .child = addrconf_sysctl.addrconf_conf_dir, +- }, +- { +- .ctl_name = 0, /* sentinel */ +- } +- }, +- .addrconf_root_dir = { +- { +- .ctl_name = CTL_NET, +- .procname = "net", +- .mode = 0555, +- .child = addrconf_sysctl.addrconf_proto_dir, +- }, +- { +- .ctl_name = 0, /* sentinel */ +- } +- }, +-}; +- +-static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p) +-{ +- int i; +- struct net_device *dev = idev ? idev->dev : NULL; +- struct addrconf_sysctl_table *t; +- char *dev_name = NULL; +- +- t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); +- if (t == NULL) +- return; +- for (i=0; t->addrconf_vars[i].data; i++) { +- t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf; +- t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */ +- } +- if (dev) { +- dev_name = dev->name; +- t->addrconf_dev[0].ctl_name = dev->ifindex; +- } else { +- dev_name = "default"; +- t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT; +- } +- +- /* +- * Make a copy of dev_name, because '.procname' is regarded as const +- * by sysctl and we wouldn't want anyone to change it under our feet +- * (see SIOCSIFNAME). +- */ +- dev_name = kstrdup(dev_name, GFP_KERNEL); +- if (!dev_name) +- goto free; +- +- t->addrconf_dev[0].procname = dev_name; +- +- t->addrconf_dev[0].child = t->addrconf_vars; +- t->addrconf_conf_dir[0].child = t->addrconf_dev; +- t->addrconf_proto_dir[0].child = t->addrconf_conf_dir; +- t->addrconf_root_dir[0].child = t->addrconf_proto_dir; +- +- t->sysctl_header = register_sysctl_table(t->addrconf_root_dir); +- if (t->sysctl_header == NULL) +- goto free_procname; +- else +- p->sysctl = t; +- return; +- +- /* error path */ +- free_procname: +- kfree(dev_name); +- free: +- kfree(t); +- +- return; +-} +- +-static void addrconf_sysctl_unregister(struct ipv6_devconf *p) +-{ +- if (p->sysctl) { +- struct addrconf_sysctl_table *t = p->sysctl; +- p->sysctl = NULL; +- unregister_sysctl_table(t->sysctl_header); +- kfree(t->addrconf_dev[0].procname); +- kfree(t); +- } +-} +- +- +-#endif +- +-/* +- * Device notifier +- */ +- +-int register_inet6addr_notifier(struct notifier_block *nb) +-{ +- return atomic_notifier_chain_register(&inet6addr_chain, nb); +-} +- +-EXPORT_SYMBOL(register_inet6addr_notifier); +- +-int unregister_inet6addr_notifier(struct notifier_block *nb) +-{ +- return atomic_notifier_chain_unregister(&inet6addr_chain,nb); +-} +- +-EXPORT_SYMBOL(unregister_inet6addr_notifier); +- +-/* +- * Init / cleanup code +- */ +- +-int __init addrconf_init(void) +-{ +- int err = 0; +- +- /* The addrconf netdev notifier requires that loopback_dev +- * has it's ipv6 private information allocated and setup +- * before it can bring up and give link-local addresses +- * to other devices which are up. +- * +- * Unfortunately, loopback_dev is not necessarily the first +- * entry in the global dev_base list of net devices. In fact, +- * it is likely to be the very last entry on that list. +- * So this causes the notifier registry below to try and +- * give link-local addresses to all devices besides loopback_dev +- * first, then loopback_dev, which cases all the non-loopback_dev +- * devices to fail to get a link-local address. +- * +- * So, as a temporary fix, allocate the ipv6 structure for +- * loopback_dev first by hand. +- * Longer term, all of the dependencies ipv6 has upon the loopback +- * device and it being up should be removed. +- */ +- rtnl_lock(); +- if (!ipv6_add_dev(&loopback_dev)) +- err = -ENOMEM; +- rtnl_unlock(); +- if (err) +- return err; +- +- ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev); +-#ifdef CONFIG_IPV6_MULTIPLE_TABLES +- ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev); +- ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev); +-#endif +- +- register_netdevice_notifier(&ipv6_dev_notf); +- +- addrconf_verify(0); +- +- err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo); +- if (err < 0) +- goto errout; +- +- /* Only the first call to __rtnl_register can fail */ +- __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL); +- __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL); +- __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, inet6_dump_ifaddr); +- __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, inet6_dump_ifmcaddr); +- __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, inet6_dump_ifacaddr); +- +-#ifdef CONFIG_SYSCTL +- addrconf_sysctl.sysctl_header = +- register_sysctl_table(addrconf_sysctl.addrconf_root_dir); +- addrconf_sysctl_register(NULL, &ipv6_devconf_dflt); +-#endif +- +- return 0; +-errout: +- unregister_netdevice_notifier(&ipv6_dev_notf); +- +- return err; +-} +- +-void __exit addrconf_cleanup(void) +-{ +- struct net_device *dev; +- struct inet6_dev *idev; +- struct inet6_ifaddr *ifa; +- int i; +- +- unregister_netdevice_notifier(&ipv6_dev_notf); +- +-#ifdef CONFIG_SYSCTL +- addrconf_sysctl_unregister(&ipv6_devconf_dflt); +- addrconf_sysctl_unregister(&ipv6_devconf); +-#endif +- +- rtnl_lock(); +- +- /* +- * clean dev list. +- */ +- +- for_each_netdev(dev) { +- if ((idev = __in6_dev_get(dev)) == NULL) +- continue; +- addrconf_ifdown(dev, 1); +- } +- addrconf_ifdown(&loopback_dev, 2); +- +- /* +- * Check hash table. +- */ +- +- write_lock_bh(&addrconf_hash_lock); +- for (i=0; i < IN6_ADDR_HSIZE; i++) { +- for (ifa=inet6_addr_lst[i]; ifa; ) { +- struct inet6_ifaddr *bifa; +- +- bifa = ifa; +- ifa = ifa->lst_next; +- printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa); +- /* Do not free it; something is wrong. +- Now we can investigate it with debugger. +- */ +- } +- } +- write_unlock_bh(&addrconf_hash_lock); +- +- del_timer(&addr_chk_timer); +- +- rtnl_unlock(); +- +-#ifdef CONFIG_PROC_FS +- proc_net_remove("if_inet6"); +-#endif +-} +diff -Nurb linux-2.6.22-570/net/ipv6/af_inet6.c linux-2.6.22-591/net/ipv6/af_inet6.c +--- linux-2.6.22-570/net/ipv6/af_inet6.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/af_inet6.c 2007-12-21 15:36:15.000000000 -0500 +@@ -59,9 +59,6 @@ + #ifdef CONFIG_IPV6_TUNNEL + #include + #endif +-#ifdef CONFIG_IPV6_MIP6 +-#include +-#endif + + #include + #include +@@ -85,7 +82,7 @@ + return (struct ipv6_pinfo *)(((u8 *)sk) + offset); + } + +-static int inet6_create(struct socket *sock, int protocol) ++static int inet6_create(struct net *net, struct socket *sock, int protocol) + { + struct inet_sock *inet; + struct ipv6_pinfo *np; +@@ -98,6 +95,9 @@ + int try_loading_module = 0; + int err; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + if (sock->type != SOCK_RAW && + sock->type != SOCK_DGRAM && + !inet_ehash_secret) +@@ -166,7 +166,7 @@ + BUG_TRAP(answer_prot->slab != NULL); + + err = -ENOBUFS; +- sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1); ++ sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, 1); + if (sk == NULL) + goto out; + +@@ -209,7 +209,7 @@ + inet->mc_index = 0; + inet->mc_list = NULL; + +- if (ipv4_config.no_pmtu_disc) ++ if (init_net.sysctl_ipv4_no_pmtu_disc) + inet->pmtudisc = IP_PMTUDISC_DONT; + else + inet->pmtudisc = IP_PMTUDISC_WANT; +@@ -290,7 +290,7 @@ + /* Check if the address belongs to the host. */ + if (addr_type == IPV6_ADDR_MAPPED) { + v4addr = addr->sin6_addr.s6_addr32[3]; +- if (inet_addr_type(v4addr) != RTN_LOCAL) { ++ if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) { + err = -EADDRNOTAVAIL; + goto out; + } +@@ -316,7 +316,7 @@ + err = -EINVAL; + goto out; + } +- dev = dev_get_by_index(sk->sk_bound_dev_if); ++ dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); + if (!dev) { + err = -ENODEV; + goto out; +@@ -675,6 +675,7 @@ + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.proto = sk->sk_protocol; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &np->saddr); +@@ -876,9 +877,6 @@ + ipv6_frag_init(); + ipv6_nodata_init(); + ipv6_destopt_init(); +-#ifdef CONFIG_IPV6_MIP6 +- mip6_init(); +-#endif + + /* Init v6 transport protocols. */ + udpv6_init(); +@@ -944,9 +942,7 @@ + + /* Cleanup code parts. */ + ipv6_packet_cleanup(); +-#ifdef CONFIG_IPV6_MIP6 +- mip6_fini(); +-#endif ++ + addrconf_cleanup(); + ip6_flowlabel_cleanup(); + ip6_route_cleanup(); +diff -Nurb linux-2.6.22-570/net/ipv6/ah6.c linux-2.6.22-591/net/ipv6/ah6.c +--- linux-2.6.22-570/net/ipv6/ah6.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/ah6.c 2007-12-21 15:36:12.000000000 -0500 +@@ -74,7 +74,7 @@ + return 0; + } + +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + /** + * ipv6_rearrange_destopt - rearrange IPv6 destination options header + * @iph: IPv6 header +@@ -132,6 +132,8 @@ + bad: + return; + } ++#else ++static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) {} + #endif + + /** +@@ -189,10 +191,8 @@ + while (exthdr.raw < end) { + switch (nexthdr) { + case NEXTHDR_DEST: +-#ifdef CONFIG_IPV6_MIP6 + if (dir == XFRM_POLICY_OUT) + ipv6_rearrange_destopt(iph, exthdr.opth); +-#endif + case NEXTHDR_HOP: + if (!zero_out_mutable_opts(exthdr.opth)) { + LIMIT_NETDEBUG( +@@ -228,7 +228,7 @@ + u8 nexthdr; + char tmp_base[8]; + struct { +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + struct in6_addr saddr; + #endif + struct in6_addr daddr; +@@ -255,7 +255,7 @@ + err = -ENOMEM; + goto error; + } +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + memcpy(tmp_ext, &top_iph->saddr, extlen); + #else + memcpy(tmp_ext, &top_iph->daddr, extlen); +@@ -294,7 +294,7 @@ + + memcpy(top_iph, tmp_base, sizeof(tmp_base)); + if (tmp_ext) { +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + memcpy(&top_iph->saddr, tmp_ext, extlen); + #else + memcpy(&top_iph->daddr, tmp_ext, extlen); +@@ -554,3 +554,4 @@ + module_exit(ah6_fini); + + MODULE_LICENSE("GPL"); ++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH); +diff -Nurb linux-2.6.22-570/net/ipv6/anycast.c linux-2.6.22-591/net/ipv6/anycast.c +--- linux-2.6.22-570/net/ipv6/anycast.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/anycast.c 2007-12-21 15:36:15.000000000 -0500 +@@ -32,6 +32,7 @@ + + #include + #include ++#include + + #include + #include +@@ -112,10 +113,10 @@ + } else { + /* router, no matching interface: just pick one */ + +- dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK); ++ dev = dev_get_by_flags(&init_net, IFF_UP, IFF_UP|IFF_LOOPBACK); + } + } else +- dev = dev_get_by_index(ifindex); ++ dev = dev_get_by_index(&init_net, ifindex); + + if (dev == NULL) { + err = -ENODEV; +@@ -196,7 +197,7 @@ + + write_unlock_bh(&ipv6_sk_ac_lock); + +- dev = dev_get_by_index(pac->acl_ifindex); ++ dev = dev_get_by_index(&init_net, pac->acl_ifindex); + if (dev) { + ipv6_dev_ac_dec(dev, &pac->acl_addr); + dev_put(dev); +@@ -224,7 +225,7 @@ + if (pac->acl_ifindex != prev_index) { + if (dev) + dev_put(dev); +- dev = dev_get_by_index(pac->acl_ifindex); ++ dev = dev_get_by_index(&init_net, pac->acl_ifindex); + prev_index = pac->acl_ifindex; + } + if (dev) +@@ -429,7 +430,7 @@ + if (dev) + return ipv6_chk_acast_dev(dev, addr); + read_lock(&dev_base_lock); +- for_each_netdev(dev) ++ for_each_netdev(&init_net, dev) + if (ipv6_chk_acast_dev(dev, addr)) { + found = 1; + break; +@@ -453,7 +454,7 @@ + struct ac6_iter_state *state = ac6_seq_private(seq); + + state->idev = NULL; +- for_each_netdev(state->dev) { ++ for_each_netdev(&init_net, state->dev) { + struct inet6_dev *idev; + idev = in6_dev_get(state->dev); + if (!idev) +@@ -579,7 +580,7 @@ + + int __init ac6_proc_init(void) + { +- if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops)) ++ if (!proc_net_fops_create(&init_net, "anycast6", S_IRUGO, &ac6_seq_fops)) + return -ENOMEM; + + return 0; +@@ -587,7 +588,7 @@ + + void ac6_proc_exit(void) + { +- proc_net_remove("anycast6"); ++ proc_net_remove(&init_net, "anycast6"); + } + #endif + +diff -Nurb linux-2.6.22-570/net/ipv6/datagram.c linux-2.6.22-591/net/ipv6/datagram.c +--- linux-2.6.22-570/net/ipv6/datagram.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/datagram.c 2007-12-21 15:36:15.000000000 -0500 +@@ -60,6 +60,7 @@ + return -EAFNOSUPPORT; + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + if (np->sndflow) { + fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; + if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) { +@@ -544,7 +545,7 @@ + if (!src_info->ipi6_ifindex) + return -EINVAL; + else { +- dev = dev_get_by_index(src_info->ipi6_ifindex); ++ dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex); + if (!dev) + return -ENODEV; + } +@@ -658,7 +659,7 @@ + + switch (rthdr->type) { + case IPV6_SRCRT_TYPE_0: +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + case IPV6_SRCRT_TYPE_2: + #endif + break; +diff -Nurb linux-2.6.22-570/net/ipv6/esp6.c linux-2.6.22-591/net/ipv6/esp6.c +--- linux-2.6.22-570/net/ipv6/esp6.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/esp6.c 2007-12-21 15:36:12.000000000 -0500 +@@ -421,3 +421,4 @@ + module_exit(esp6_fini); + + MODULE_LICENSE("GPL"); ++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP); +diff -Nurb linux-2.6.22-570/net/ipv6/exthdrs.c linux-2.6.22-591/net/ipv6/exthdrs.c +--- linux-2.6.22-570/net/ipv6/exthdrs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/exthdrs.c 2007-12-21 15:36:12.000000000 -0500 +@@ -42,7 +42,7 @@ + #include + #include + #include +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + #include + #endif + +@@ -90,6 +90,7 @@ + bad: + return -1; + } ++EXPORT_SYMBOL_GPL(ipv6_find_tlv); + + /* + * Parsing tlv encoded headers. +@@ -196,7 +197,7 @@ + Destination options header. + *****************************/ + +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) + { + struct sk_buff *skb = *skbp; +@@ -270,7 +271,7 @@ + #endif + + static struct tlvtype_proc tlvprocdestopt_lst[] = { +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + { + .type = IPV6_TLV_HAO, + .func = ipv6_dest_hao, +@@ -283,7 +284,7 @@ + { + struct sk_buff *skb = *skbp; + struct inet6_skb_parm *opt = IP6CB(skb); +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + __u16 dstbuf; + #endif + struct dst_entry *dst; +@@ -298,7 +299,7 @@ + } + + opt->lastopt = opt->dst1 = skb_network_header_len(skb); +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + dstbuf = opt->dst1; + #endif + +@@ -308,7 +309,7 @@ + skb = *skbp; + skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; + opt = IP6CB(skb); +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + opt->nhoff = dstbuf; + #else + opt->nhoff = opt->dst1; +@@ -427,7 +428,7 @@ + looped_back: + if (hdr->segments_left == 0) { + switch (hdr->type) { +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + case IPV6_SRCRT_TYPE_2: + /* Silently discard type 2 header unless it was + * processed by own +@@ -463,7 +464,7 @@ + return -1; + } + break; +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + case IPV6_SRCRT_TYPE_2: + /* Silently discard invalid RTH type 2 */ + if (hdr->hdrlen != 2 || hdr->segments_left != 1) { +@@ -520,7 +521,7 @@ + addr += i - 1; + + switch (hdr->type) { +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + case IPV6_SRCRT_TYPE_2: + if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, + (xfrm_address_t *)&ipv6_hdr(skb)->saddr, +diff -Nurb linux-2.6.22-570/net/ipv6/fib6_rules.c linux-2.6.22-591/net/ipv6/fib6_rules.c +--- linux-2.6.22-570/net/ipv6/fib6_rules.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/fib6_rules.c 2007-12-21 15:36:15.000000000 -0500 +@@ -244,7 +244,7 @@ + return -ENOBUFS; + } + +-static u32 fib6_rule_default_pref(void) ++static u32 fib6_rule_default_pref(struct fib_rules_ops *ops) + { + return 0x3FFF; + } +@@ -277,10 +277,10 @@ + list_add_tail(&local_rule.common.list, &fib6_rules); + list_add_tail(&main_rule.common.list, &fib6_rules); + +- fib_rules_register(&fib6_rules_ops); ++ fib_rules_register(&init_net, &fib6_rules_ops); + } + + void fib6_rules_cleanup(void) + { +- fib_rules_unregister(&fib6_rules_ops); ++ fib_rules_unregister(&init_net, &fib6_rules_ops); + } +diff -Nurb linux-2.6.22-570/net/ipv6/icmp.c linux-2.6.22-591/net/ipv6/icmp.c +--- linux-2.6.22-570/net/ipv6/icmp.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/icmp.c 2007-12-21 15:36:15.000000000 -0500 +@@ -272,7 +272,7 @@ + return 0; + } + +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + static void mip6_addr_swap(struct sk_buff *skb) + { + struct ipv6hdr *iph = ipv6_hdr(skb); +@@ -377,6 +377,7 @@ + mip6_addr_swap(skb); + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.proto = IPPROTO_ICMPV6; + ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); + if (saddr) +@@ -495,6 +496,7 @@ + tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY; + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.proto = IPPROTO_ICMPV6; + ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); + if (saddr) +diff -Nurb linux-2.6.22-570/net/ipv6/inet6_connection_sock.c linux-2.6.22-591/net/ipv6/inet6_connection_sock.c +--- linux-2.6.22-570/net/ipv6/inet6_connection_sock.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/inet6_connection_sock.c 2007-12-21 15:36:15.000000000 -0500 +@@ -149,6 +149,7 @@ + struct in6_addr *final_p = NULL, final; + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.proto = sk->sk_protocol; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &np->saddr); +diff -Nurb linux-2.6.22-570/net/ipv6/inet6_hashtables.c linux-2.6.22-591/net/ipv6/inet6_hashtables.c +--- linux-2.6.22-570/net/ipv6/inet6_hashtables.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/inet6_hashtables.c 2007-12-21 15:36:15.000000000 -0500 +@@ -61,7 +61,7 @@ + const __be16 sport, + const struct in6_addr *daddr, + const u16 hnum, +- const int dif) ++ const int dif, struct net *net) + { + struct sock *sk; + const struct hlist_node *node; +@@ -105,7 +105,7 @@ + + struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, + const struct in6_addr *daddr, +- const unsigned short hnum, const int dif) ++ const unsigned short hnum, const int dif, struct net *net) + { + struct sock *sk; + const struct hlist_node *node; +@@ -113,7 +113,7 @@ + int score, hiscore = 0; + + read_lock(&hashinfo->lhash_lock); +- sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { ++ sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) { + if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { + const struct ipv6_pinfo *np = inet6_sk(sk); + +@@ -152,12 +152,12 @@ + struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, const __be16 sport, + const struct in6_addr *daddr, const __be16 dport, +- const int dif) ++ const int dif, struct net *net) + { + struct sock *sk; + + local_bh_disable(); +- sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); ++ sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif, net); + local_bh_enable(); + + return sk; +@@ -251,6 +251,7 @@ + int inet6_hash_connect(struct inet_timewait_death_row *death_row, + struct sock *sk) + { ++ struct net *net = sk->sk_net; + struct inet_hashinfo *hinfo = death_row->hashinfo; + const unsigned short snum = inet_sk(sk)->num; + struct inet_bind_hashbucket *head; +@@ -258,8 +259,8 @@ + int ret; + + if (snum == 0) { +- const int low = sysctl_local_port_range[0]; +- const int high = sysctl_local_port_range[1]; ++ const int low = sk->sk_net->sysctl_local_port_range[0]; ++ const int high = sk->sk_net->sysctl_local_port_range[1]; + const int range = high - low; + int i, port; + static u32 hint; +@@ -270,7 +271,7 @@ + local_bh_disable(); + for (i = 1; i <= range; i++) { + port = low + (i + offset) % range; +- head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)]; ++ head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; + spin_lock(&head->lock); + + /* Does not bother with rcv_saddr checks, +@@ -278,7 +279,7 @@ + * unique enough. + */ + inet_bind_bucket_for_each(tb, node, &head->chain) { +- if (tb->port == port) { ++ if ((tb->port == port) && (tb->net == net)) { + BUG_TRAP(!hlist_empty(&tb->owners)); + if (tb->fastreuse >= 0) + goto next_port; +@@ -291,7 +292,7 @@ + } + + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, +- head, port); ++ head, net, port); + if (!tb) { + spin_unlock(&head->lock); + break; +@@ -326,7 +327,7 @@ + goto out; + } + +- head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)]; ++ head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)]; + tb = inet_csk(sk)->icsk_bind_hash; + spin_lock_bh(&head->lock); + +diff -Nurb linux-2.6.22-570/net/ipv6/ip6_fib.c linux-2.6.22-591/net/ipv6/ip6_fib.c +--- linux-2.6.22-570/net/ipv6/ip6_fib.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/ip6_fib.c 2007-12-21 15:36:15.000000000 -0500 +@@ -361,6 +361,7 @@ + + static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + unsigned int h, s_h; + unsigned int e = 0, s_e; + struct rt6_rtnl_dump_arg arg; +@@ -369,6 +370,9 @@ + struct hlist_node *node; + int res = 0; + ++ if (net != &init_net) ++ return 0; ++ + s_h = cb->args[0]; + s_e = cb->args[1]; + +@@ -1311,6 +1315,11 @@ + + static int fib6_clean_node(struct fib6_walker_t *w) + { ++ struct nl_info info = { ++ .nlh = NULL, ++ .pid = 0, ++ .net = &init_net, ++ }; + int res; + struct rt6_info *rt; + struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w; +@@ -1319,7 +1328,7 @@ + res = c->func(rt, c->arg); + if (res < 0) { + w->leaf = rt; +- res = fib6_del(rt, NULL); ++ res = fib6_del(rt, &info); + if (res) { + #if RT6_DEBUG >= 2 + printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); +diff -Nurb linux-2.6.22-570/net/ipv6/ip6_flowlabel.c linux-2.6.22-591/net/ipv6/ip6_flowlabel.c +--- linux-2.6.22-570/net/ipv6/ip6_flowlabel.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/ip6_flowlabel.c 2007-12-21 15:36:15.000000000 -0500 +@@ -22,6 +22,7 @@ + #include + + #include ++#include + + #include + #include +@@ -309,6 +310,7 @@ + + msg.msg_controllen = olen; + msg.msg_control = (void*)(fl->opt+1); ++ flowi.fl_net = &init_net; + flowi.oif = 0; + + err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); +@@ -690,7 +692,7 @@ + void ip6_flowlabel_init(void) + { + #ifdef CONFIG_PROC_FS +- proc_net_fops_create("ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); ++ proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops); + #endif + } + +@@ -698,6 +700,6 @@ + { + del_timer(&ip6_fl_gc_timer); + #ifdef CONFIG_PROC_FS +- proc_net_remove("ip6_flowlabel"); ++ proc_net_remove(&init_net, "ip6_flowlabel"); + #endif + } +diff -Nurb linux-2.6.22-570/net/ipv6/ip6_input.c linux-2.6.22-591/net/ipv6/ip6_input.c +--- linux-2.6.22-570/net/ipv6/ip6_input.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/ip6_input.c 2007-12-21 15:36:15.000000000 -0500 +@@ -61,6 +61,11 @@ + u32 pkt_len; + struct inet6_dev *idev; + ++ if (dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } ++ + if (skb->pkt_type == PACKET_OTHERHOST) { + kfree_skb(skb); + return 0; +diff -Nurb linux-2.6.22-570/net/ipv6/ip6_output.c linux-2.6.22-591/net/ipv6/ip6_output.c +--- linux-2.6.22-570/net/ipv6/ip6_output.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/ip6_output.c 2007-12-21 15:36:15.000000000 -0500 +@@ -423,7 +423,7 @@ + + /* XXX: idev->cnf.proxy_ndp? */ + if (ipv6_devconf.proxy_ndp && +- pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { ++ pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) { + int proxied = ip6_forward_proxy_check(skb); + if (proxied > 0) + return ip6_input(skb); +@@ -543,7 +543,7 @@ + found_rhdr = 1; + break; + case NEXTHDR_DEST: +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) + break; + #endif +diff -Nurb linux-2.6.22-570/net/ipv6/ip6_tunnel.c linux-2.6.22-591/net/ipv6/ip6_tunnel.c +--- linux-2.6.22-570/net/ipv6/ip6_tunnel.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/ip6_tunnel.c 2007-12-21 15:36:15.000000000 -0500 +@@ -235,7 +235,7 @@ + int i; + for (i = 1; i < IP6_TNL_MAX; i++) { + sprintf(name, "ip6tnl%d", i); +- if (__dev_get_by_name(name) == NULL) ++ if (__dev_get_by_name(&init_net, name) == NULL) + break; + } + if (i == IP6_TNL_MAX) +@@ -651,7 +651,7 @@ + struct net_device *ldev = NULL; + + if (p->link) +- ldev = dev_get_by_index(p->link); ++ ldev = dev_get_by_index(&init_net, p->link); + + if ((ipv6_addr_is_multicast(&p->laddr) || + likely(ipv6_chk_addr(&p->laddr, ldev, 0))) && +@@ -787,7 +787,7 @@ + struct net_device *ldev = NULL; + + if (p->link) +- ldev = dev_get_by_index(p->link); ++ ldev = dev_get_by_index(&init_net, p->link); + + if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0))) + printk(KERN_WARNING +diff -Nurb linux-2.6.22-570/net/ipv6/ipcomp6.c linux-2.6.22-591/net/ipv6/ipcomp6.c +--- linux-2.6.22-570/net/ipv6/ipcomp6.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/ipcomp6.c 2007-12-21 15:36:12.000000000 -0500 +@@ -501,4 +501,4 @@ + MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173"); + MODULE_AUTHOR("Mitsuru KANDA "); + +- ++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP); +diff -Nurb linux-2.6.22-570/net/ipv6/ipv6_sockglue.c linux-2.6.22-591/net/ipv6/ipv6_sockglue.c +--- linux-2.6.22-570/net/ipv6/ipv6_sockglue.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/ipv6_sockglue.c 2007-12-21 15:36:15.000000000 -0500 +@@ -123,7 +123,7 @@ + struct ipv6hdr *ipv6h; + struct inet6_protocol *ops; + +- if (!(features & NETIF_F_HW_CSUM)) ++ if (!(features & NETIF_F_V6_CSUM)) + features &= ~NETIF_F_SG; + + if (unlikely(skb_shinfo(skb)->gso_type & +@@ -417,7 +417,7 @@ + struct ipv6_rt_hdr *rthdr = opt->srcrt; + switch (rthdr->type) { + case IPV6_SRCRT_TYPE_0: +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + case IPV6_SRCRT_TYPE_2: + #endif + break; +@@ -463,6 +463,7 @@ + struct flowi fl; + int junk; + ++ fl.fl_net = &init_net; + fl.fl6_flowlabel = 0; + fl.oif = sk->sk_bound_dev_if; + +@@ -547,7 +548,7 @@ + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) + goto e_inval; + +- if (__dev_get_by_index(val) == NULL) { ++ if (__dev_get_by_index(&init_net, val) == NULL) { + retv = -ENODEV; + break; + } +diff -Nurb linux-2.6.22-570/net/ipv6/mcast.c linux-2.6.22-591/net/ipv6/mcast.c +--- linux-2.6.22-570/net/ipv6/mcast.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/mcast.c 2007-12-21 15:36:15.000000000 -0500 +@@ -51,6 +51,7 @@ + + #include + #include ++#include + + #include + #include +@@ -214,7 +215,7 @@ + dst_release(&rt->u.dst); + } + } else +- dev = dev_get_by_index(ifindex); ++ dev = dev_get_by_index(&init_net, ifindex); + + if (dev == NULL) { + sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); +@@ -265,7 +266,7 @@ + *lnk = mc_lst->next; + write_unlock_bh(&ipv6_sk_mc_lock); + +- if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) { ++ if ((dev = dev_get_by_index(&init_net, mc_lst->ifindex)) != NULL) { + struct inet6_dev *idev = in6_dev_get(dev); + + (void) ip6_mc_leave_src(sk, mc_lst, idev); +@@ -300,7 +301,7 @@ + dst_release(&rt->u.dst); + } + } else +- dev = dev_get_by_index(ifindex); ++ dev = dev_get_by_index(&init_net, ifindex); + + if (!dev) + return NULL; +@@ -331,7 +332,7 @@ + np->ipv6_mc_list = mc_lst->next; + write_unlock_bh(&ipv6_sk_mc_lock); + +- dev = dev_get_by_index(mc_lst->ifindex); ++ dev = dev_get_by_index(&init_net, mc_lst->ifindex); + if (dev) { + struct inet6_dev *idev = in6_dev_get(dev); + +@@ -2332,7 +2333,7 @@ + struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); + + state->idev = NULL; +- for_each_netdev(state->dev) { ++ for_each_netdev(&init_net, state->dev) { + struct inet6_dev *idev; + idev = in6_dev_get(state->dev); + if (!idev) +@@ -2476,7 +2477,7 @@ + + state->idev = NULL; + state->im = NULL; +- for_each_netdev(state->dev) { ++ for_each_netdev(&init_net, state->dev) { + struct inet6_dev *idev; + idev = in6_dev_get(state->dev); + if (unlikely(idev == NULL)) +@@ -2658,8 +2659,8 @@ + np->hop_limit = 1; + + #ifdef CONFIG_PROC_FS +- proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops); +- proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); ++ proc_net_fops_create(&init_net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops); ++ proc_net_fops_create(&init_net, "mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); + #endif + + return 0; +@@ -2671,7 +2672,7 @@ + igmp6_socket = NULL; /* for safety */ + + #ifdef CONFIG_PROC_FS +- proc_net_remove("mcfilter6"); +- proc_net_remove("igmp6"); ++ proc_net_remove(&init_net, "mcfilter6"); ++ proc_net_remove(&init_net, "igmp6"); + #endif + } +diff -Nurb linux-2.6.22-570/net/ipv6/mip6.c linux-2.6.22-591/net/ipv6/mip6.c +--- linux-2.6.22-570/net/ipv6/mip6.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/mip6.c 2007-12-21 15:36:12.000000000 -0500 +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -86,7 +87,7 @@ + return len; + } + +-int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) ++static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) + { + struct ip6_mh *mh; + +@@ -471,7 +472,7 @@ + .remote_addr = mip6_xfrm_addr, + }; + +-int __init mip6_init(void) ++static int __init mip6_init(void) + { + printk(KERN_INFO "Mobile IPv6\n"); + +@@ -483,18 +484,35 @@ + printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__); + goto mip6_rthdr_xfrm_fail; + } ++ if (rawv6_mh_filter_register(mip6_mh_filter) < 0) { ++ printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __FUNCTION__); ++ goto mip6_rawv6_mh_fail; ++ } ++ ++ + return 0; + ++ mip6_rawv6_mh_fail: ++ xfrm_unregister_type(&mip6_rthdr_type, AF_INET6); + mip6_rthdr_xfrm_fail: + xfrm_unregister_type(&mip6_destopt_type, AF_INET6); + mip6_destopt_xfrm_fail: + return -EAGAIN; + } + +-void __exit mip6_fini(void) ++static void __exit mip6_fini(void) + { ++ if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0) ++ printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __FUNCTION__); + if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) + printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__); + if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0) + printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__); + } ++ ++module_init(mip6_init); ++module_exit(mip6_fini); ++ ++MODULE_LICENSE("GPL"); ++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS); ++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING); +diff -Nurb linux-2.6.22-570/net/ipv6/ndisc.c linux-2.6.22-591/net/ipv6/ndisc.c +--- linux-2.6.22-570/net/ipv6/ndisc.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/ndisc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -418,6 +418,7 @@ + int oif) + { + memset(fl, 0, sizeof(*fl)); ++ fl->fl_net = &init_net; + ipv6_addr_copy(&fl->fl6_src, saddr); + ipv6_addr_copy(&fl->fl6_dst, daddr); + fl->proto = IPPROTO_ICMPV6; +@@ -760,7 +761,7 @@ + if (ipv6_chk_acast_addr(dev, &msg->target) || + (idev->cnf.forwarding && + (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && +- (pneigh = pneigh_lookup(&nd_tbl, ++ (pneigh = pneigh_lookup(&nd_tbl, &init_net, + &msg->target, dev, 0)) != NULL)) { + if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && + skb->pkt_type != PACKET_HOST && +@@ -901,7 +902,7 @@ + */ + if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && + ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && +- pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) { ++ pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) { + /* XXX: idev->cnf.prixy_ndp */ + goto out; + } +@@ -1525,6 +1526,9 @@ + { + struct net_device *dev = ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + switch (event) { + case NETDEV_CHANGEADDR: + neigh_changeaddr(&nd_tbl, dev); +diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6_queue.c linux-2.6.22-591/net/ipv6/netfilter/ip6_queue.c +--- linux-2.6.22-570/net/ipv6/netfilter/ip6_queue.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/netfilter/ip6_queue.c 2007-12-21 15:36:15.000000000 -0500 +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -546,6 +547,9 @@ + { + struct net_device *dev = ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + /* Drop any packets associated with the downed device */ + if (event == NETDEV_DOWN) + ipq_dev_drop(dev->ifindex); +@@ -565,7 +569,7 @@ + if (event == NETLINK_URELEASE && + n->protocol == NETLINK_IP6_FW && n->pid) { + write_lock_bh(&queue_lock); +- if (n->pid == peer_pid) ++ if ((n->net == &init_net) && (n->pid == peer_pid)) + __ipq_reset(); + write_unlock_bh(&queue_lock); + } +@@ -657,14 +661,14 @@ + struct proc_dir_entry *proc; + + netlink_register_notifier(&ipq_nl_notifier); +- ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL, +- THIS_MODULE); ++ ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, ipq_rcv_sk, ++ NULL, THIS_MODULE); + if (ipqnl == NULL) { + printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); + goto cleanup_netlink_notifier; + } + +- proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); ++ proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); + if (proc) + proc->owner = THIS_MODULE; + else { +@@ -685,7 +689,7 @@ + cleanup_sysctl: + unregister_sysctl_table(ipq_sysctl_header); + unregister_netdevice_notifier(&ipq_dev_notifier); +- proc_net_remove(IPQ_PROC_FS_NAME); ++ proc_net_remove(&init_net, IPQ_PROC_FS_NAME); + + cleanup_ipqnl: + sock_release(ipqnl->sk_socket); +@@ -705,7 +709,7 @@ + + unregister_sysctl_table(ipq_sysctl_header); + unregister_netdevice_notifier(&ipq_dev_notifier); +- proc_net_remove(IPQ_PROC_FS_NAME); ++ proc_net_remove(&init_net, IPQ_PROC_FS_NAME); + + sock_release(ipqnl->sk_socket); + mutex_lock(&ipqnl_mutex); +diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6_tables.c linux-2.6.22-591/net/ipv6/netfilter/ip6_tables.c +--- linux-2.6.22-570/net/ipv6/netfilter/ip6_tables.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/netfilter/ip6_tables.c 2007-12-21 15:36:15.000000000 -0500 +@@ -906,7 +906,7 @@ + int ret; + struct xt_table *t; + +- t = xt_find_table_lock(AF_INET6, entries->name); ++ t = xt_find_table_lock(&init_net, AF_INET6, entries->name); + if (t && !IS_ERR(t)) { + struct xt_table_info *private = t->private; + duprintf("t->private->number = %u\n", private->number); +@@ -972,7 +972,7 @@ + + duprintf("ip_tables: Translated table\n"); + +- t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name), ++ t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, tmp.name), + "ip6table_%s", tmp.name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; +@@ -1073,7 +1073,7 @@ + goto free; + } + +- t = xt_find_table_lock(AF_INET6, tmp.name); ++ t = xt_find_table_lock(&init_net, AF_INET6, tmp.name); + if (!t || IS_ERR(t)) { + ret = t ? PTR_ERR(t) : -ENOENT; + goto free; +@@ -1109,6 +1109,9 @@ + { + int ret; + ++ if (sk->sk_net != &init_net) ++ return -ENOPROTOOPT; ++ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + +@@ -1134,6 +1137,9 @@ + { + int ret; + ++ if (sk->sk_net != &init_net) ++ return -ENOPROTOOPT; ++ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + +@@ -1155,7 +1161,7 @@ + } + name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; + +- t = try_then_request_module(xt_find_table_lock(AF_INET6, name), ++ t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, name), + "ip6table_%s", name); + if (t && !IS_ERR(t)) { + struct ip6t_getinfo info; +@@ -1259,7 +1265,7 @@ + return ret; + } + +- ret = xt_register_table(table, &bootstrap, newinfo); ++ ret = xt_register_table(&init_net, table, &bootstrap, newinfo); + if (ret != 0) { + xt_free_table_info(newinfo); + return ret; +diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6t_REJECT.c linux-2.6.22-591/net/ipv6/netfilter/ip6t_REJECT.c +--- linux-2.6.22-570/net/ipv6/netfilter/ip6t_REJECT.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/netfilter/ip6t_REJECT.c 2007-12-21 15:36:15.000000000 -0500 +@@ -92,6 +92,7 @@ + } + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); + ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); +@@ -172,7 +173,7 @@ + send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) + { + if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) +- skb_in->dev = &loopback_dev; ++ skb_in->dev = &init_net.loopback_dev; + + icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); + } +diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6table_filter.c linux-2.6.22-591/net/ipv6/netfilter/ip6table_filter.c +--- linux-2.6.22-570/net/ipv6/netfilter/ip6table_filter.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/netfilter/ip6table_filter.c 2007-12-21 15:36:15.000000000 -0500 +@@ -65,6 +65,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + return ip6t_do_table(pskb, hook, in, out, &packet_filter); + } + +@@ -75,6 +79,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + #if 0 + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) +diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6table_mangle.c linux-2.6.22-591/net/ipv6/netfilter/ip6table_mangle.c +--- linux-2.6.22-570/net/ipv6/netfilter/ip6table_mangle.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/netfilter/ip6table_mangle.c 2007-12-21 15:36:15.000000000 -0500 +@@ -79,6 +79,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + return ip6t_do_table(pskb, hook, in, out, &packet_mangler); + } + +@@ -95,6 +99,10 @@ + u_int8_t hop_limit; + u_int32_t flowlabel, mark; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + #if 0 + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) +diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/ip6table_raw.c linux-2.6.22-591/net/ipv6/netfilter/ip6table_raw.c +--- linux-2.6.22-570/net/ipv6/netfilter/ip6table_raw.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/netfilter/ip6table_raw.c 2007-12-21 15:36:15.000000000 -0500 +@@ -57,6 +57,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + return ip6t_do_table(pskb, hook, in, out, &packet_raw); + } + +diff -Nurb linux-2.6.22-570/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c linux-2.6.22-591/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +--- linux-2.6.22-570/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 2007-12-21 15:36:15.000000000 -0500 +@@ -167,6 +167,10 @@ + unsigned char pnum = ipv6_hdr(*pskb)->nexthdr; + + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* This is where we call the helper: as the packet goes out. */ + ct = nf_ct_get(*pskb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) +@@ -203,6 +207,10 @@ + { + struct sk_buff *reasm; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* Previously seen (loopback)? */ + if ((*pskb)->nfct) + return NF_ACCEPT; +@@ -231,6 +239,10 @@ + { + struct sk_buff *reasm = (*pskb)->nfct_reasm; + ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* This packet is fragmented and has reassembled packet. */ + if (reasm) { + /* Reassembled packet isn't parsed yet ? */ +@@ -256,6 +268,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct ipv6hdr)) { + if (net_ratelimit()) +diff -Nurb linux-2.6.22-570/net/ipv6/netfilter.c linux-2.6.22-591/net/ipv6/netfilter.c +--- linux-2.6.22-570/net/ipv6/netfilter.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/netfilter.c 2007-12-21 15:36:15.000000000 -0500 +@@ -14,6 +14,7 @@ + struct ipv6hdr *iph = ipv6_hdr(skb); + struct dst_entry *dst; + struct flowi fl = { ++ .fl_net = &init_net, + .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, + .mark = skb->mark, + .nl_u = +diff -Nurb linux-2.6.22-570/net/ipv6/proc.c linux-2.6.22-591/net/ipv6/proc.c +--- linux-2.6.22-570/net/ipv6/proc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/proc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + static struct proc_dir_entry *proc_net_devsnmp6; + +@@ -231,22 +232,22 @@ + { + int rc = 0; + +- if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops)) ++ if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops)) + goto proc_snmp6_fail; + +- proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net); ++ proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net); + if (!proc_net_devsnmp6) + goto proc_dev_snmp6_fail; + +- if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops)) ++ if (!proc_net_fops_create(&init_net, "sockstat6", S_IRUGO, &sockstat6_seq_fops)) + goto proc_sockstat6_fail; + out: + return rc; + + proc_sockstat6_fail: +- proc_net_remove("dev_snmp6"); ++ proc_net_remove(&init_net, "dev_snmp6"); + proc_dev_snmp6_fail: +- proc_net_remove("snmp6"); ++ proc_net_remove(&init_net, "snmp6"); + proc_snmp6_fail: + rc = -ENOMEM; + goto out; +@@ -254,8 +255,8 @@ + + void ipv6_misc_proc_exit(void) + { +- proc_net_remove("sockstat6"); +- proc_net_remove("dev_snmp6"); +- proc_net_remove("snmp6"); ++ proc_net_remove(&init_net, "sockstat6"); ++ proc_net_remove(&init_net, "dev_snmp6"); ++ proc_net_remove(&init_net, "snmp6"); + } + +diff -Nurb linux-2.6.22-570/net/ipv6/raw.c linux-2.6.22-591/net/ipv6/raw.c +--- linux-2.6.22-570/net/ipv6/raw.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/raw.c 2007-12-21 15:36:15.000000000 -0500 +@@ -49,7 +49,8 @@ + #include + #include + #include +-#ifdef CONFIG_IPV6_MIP6 ++#include ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + #include + #endif + +@@ -137,6 +138,28 @@ + return 0; + } + ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) ++static int (*mh_filter)(struct sock *sock, struct sk_buff *skb); ++ ++int rawv6_mh_filter_register(int (*filter)(struct sock *sock, ++ struct sk_buff *skb)) ++{ ++ rcu_assign_pointer(mh_filter, filter); ++ return 0; ++} ++EXPORT_SYMBOL(rawv6_mh_filter_register); ++ ++int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock, ++ struct sk_buff *skb)) ++{ ++ rcu_assign_pointer(mh_filter, NULL); ++ synchronize_rcu(); ++ return 0; ++} ++EXPORT_SYMBOL(rawv6_mh_filter_unregister); ++ ++#endif ++ + /* + * demultiplex raw sockets. + * (should consider queueing the skb in the sock receive_queue +@@ -178,16 +201,22 @@ + case IPPROTO_ICMPV6: + filtered = icmpv6_filter(sk, skb); + break; +-#ifdef CONFIG_IPV6_MIP6 ++ ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + case IPPROTO_MH: ++ { + /* XXX: To validate MH only once for each packet, + * this is placed here. It should be after checking + * xfrm policy, however it doesn't. The checking xfrm + * policy is placed in rawv6_rcv() because it is + * required for each socket. + */ +- filtered = mip6_mh_filter(sk, skb); ++ int (*filter)(struct sock *sock, struct sk_buff *skb); ++ ++ filter = rcu_dereference(mh_filter); ++ filtered = filter ? filter(sk, skb) : 0; + break; ++ } + #endif + default: + filtered = 0; +@@ -254,7 +283,7 @@ + if (!sk->sk_bound_dev_if) + goto out; + +- dev = dev_get_by_index(sk->sk_bound_dev_if); ++ dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if); + if (!dev) { + err = -ENODEV; + goto out; +@@ -611,9 +640,7 @@ + struct iovec *iov; + u8 __user *type = NULL; + u8 __user *code = NULL; +-#ifdef CONFIG_IPV6_MIP6 + u8 len = 0; +-#endif + int probed = 0; + int i; + +@@ -646,7 +673,6 @@ + probed = 1; + } + break; +-#ifdef CONFIG_IPV6_MIP6 + case IPPROTO_MH: + if (iov->iov_base && iov->iov_len < 1) + break; +@@ -660,7 +686,6 @@ + len += iov->iov_len; + + break; +-#endif + default: + probed = 1; + break; +@@ -704,6 +729,7 @@ + * Get and verify the address. + */ + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + + if (sin6) { + if (addr_len < SIN6_LEN_RFC2133) +@@ -1291,13 +1317,13 @@ + + int __init raw6_proc_init(void) + { +- if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops)) ++ if (!proc_net_fops_create(&init_net, "raw6", S_IRUGO, &raw6_seq_fops)) + return -ENOMEM; + return 0; + } + + void raw6_proc_exit(void) + { +- proc_net_remove("raw6"); ++ proc_net_remove(&init_net, "raw6"); + } + #endif /* CONFIG_PROC_FS */ +diff -Nurb linux-2.6.22-570/net/ipv6/reassembly.c linux-2.6.22-591/net/ipv6/reassembly.c +--- linux-2.6.22-570/net/ipv6/reassembly.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/reassembly.c 2007-12-21 15:36:15.000000000 -0500 +@@ -301,7 +301,7 @@ + + fq_kill(fq); + +- dev = dev_get_by_index(fq->iif); ++ dev = dev_get_by_index(&init_net, fq->iif); + if (!dev) + goto out; + +diff -Nurb linux-2.6.22-570/net/ipv6/route.c linux-2.6.22-591/net/ipv6/route.c +--- linux-2.6.22-570/net/ipv6/route.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/route.c 2007-12-21 15:36:15.000000000 -0500 +@@ -56,6 +56,7 @@ + #include + #include + #include ++#include + + #include + +@@ -137,7 +138,7 @@ + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, +- .dev = &loopback_dev, ++ .dev = NULL, + .obsolete = -1, + .error = -ENETUNREACH, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, +@@ -163,7 +164,7 @@ + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, +- .dev = &loopback_dev, ++ .dev = NULL, + .obsolete = -1, + .error = -EACCES, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, +@@ -183,7 +184,7 @@ + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, +- .dev = &loopback_dev, ++ .dev = NULL, + .obsolete = -1, + .error = -EINVAL, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, +@@ -223,8 +224,8 @@ + struct rt6_info *rt = (struct rt6_info *)dst; + struct inet6_dev *idev = rt->rt6i_idev; + +- if (dev != &loopback_dev && idev != NULL && idev->dev == dev) { +- struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); ++ if (dev != &init_net.loopback_dev && idev != NULL && idev->dev == dev) { ++ struct inet6_dev *loopback_idev = in6_dev_get(&init_net.loopback_dev); + if (loopback_idev != NULL) { + rt->rt6i_idev = loopback_idev; + in6_dev_put(idev); +@@ -564,6 +565,7 @@ + int oif, int strict) + { + struct flowi fl = { ++ .fl_net = &init_net, + .oif = oif, + .nl_u = { + .ip6_u = { +@@ -611,7 +613,12 @@ + + int ip6_ins_rt(struct rt6_info *rt) + { +- return __ip6_ins_rt(rt, NULL); ++ struct nl_info info = { ++ .nlh = NULL, ++ .pid = 0, ++ .net = &init_net, ++ }; ++ return __ip6_ins_rt(rt, &info); + } + + static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, +@@ -742,6 +749,7 @@ + struct ipv6hdr *iph = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct flowi fl = { ++ .fl_net = &init_net, + .iif = skb->dev->ifindex, + .nl_u = { + .ip6_u = { +@@ -1129,7 +1137,7 @@ + #endif + if (cfg->fc_ifindex) { + err = -ENODEV; +- dev = dev_get_by_index(cfg->fc_ifindex); ++ dev = dev_get_by_index(&init_net, cfg->fc_ifindex); + if (!dev) + goto out; + idev = in6_dev_get(dev); +@@ -1187,12 +1195,12 @@ + if ((cfg->fc_flags & RTF_REJECT) || + (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { + /* hold loopback dev/idev if we haven't done so. */ +- if (dev != &loopback_dev) { ++ if (dev != &init_net.loopback_dev) { + if (dev) { + dev_put(dev); + in6_dev_put(idev); + } +- dev = &loopback_dev; ++ dev = &init_net.loopback_dev; + dev_hold(dev); + idev = in6_dev_get(dev); + if (!idev) { +@@ -1333,7 +1341,12 @@ + + int ip6_del_rt(struct rt6_info *rt) + { +- return __ip6_del_rt(rt, NULL); ++ struct nl_info info = { ++ .nlh = NULL, ++ .pid = 0, ++ .net = &init_net, ++ }; ++ return __ip6_del_rt(rt, &info); + } + + static int ip6_route_del(struct fib6_config *cfg) +@@ -1444,6 +1457,7 @@ + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct ip6rd_flowi rdfl = { + .fl = { ++ .fl_net = &init_net, + .oif = dev->ifindex, + .nl_u = { + .ip6_u = { +@@ -1896,13 +1910,13 @@ + if (rt == NULL) + return ERR_PTR(-ENOMEM); + +- dev_hold(&loopback_dev); ++ dev_hold(&init_net.loopback_dev); + in6_dev_hold(idev); + + rt->u.dst.flags = DST_HOST; + rt->u.dst.input = ip6_input; + rt->u.dst.output = ip6_output; +- rt->rt6i_dev = &loopback_dev; ++ rt->rt6i_dev = &init_net.loopback_dev; + rt->rt6i_idev = idev; + rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); +@@ -2033,6 +2047,7 @@ + + cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.nlh = nlh; ++ cfg->fc_nlinfo.net = skb->sk->sk_net; + + if (tb[RTA_GATEWAY]) { + nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); +@@ -2078,9 +2093,13 @@ + + static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct fib6_config cfg; + int err; + ++ if (net != &init_net) ++ return -EINVAL; ++ + err = rtm_to_fib6_config(skb, nlh, &cfg); + if (err < 0) + return err; +@@ -2090,9 +2109,13 @@ + + static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct fib6_config cfg; + int err; + ++ if (net != &init_net) ++ return -EINVAL; ++ + err = rtm_to_fib6_config(skb, nlh, &cfg); + if (err < 0) + return err; +@@ -2227,6 +2250,7 @@ + + static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) + { ++ struct net *net = in_skb->sk->sk_net; + struct nlattr *tb[RTA_MAX+1]; + struct rt6_info *rt; + struct sk_buff *skb; +@@ -2234,12 +2258,16 @@ + struct flowi fl; + int err, iif = 0; + ++ if (net != &init_net) ++ return -EINVAL; ++ + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); + if (err < 0) + goto errout; + + err = -EINVAL; + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + + if (tb[RTA_SRC]) { + if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) +@@ -2263,7 +2291,7 @@ + + if (iif) { + struct net_device *dev; +- dev = __dev_get_by_index(iif); ++ dev = __dev_get_by_index(&init_net, iif); + if (!dev) { + err = -ENODEV; + goto errout; +@@ -2293,7 +2321,7 @@ + goto errout; + } + +- err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); ++ err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + errout: + return err; + } +@@ -2301,17 +2329,10 @@ + void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) + { + struct sk_buff *skb; +- u32 pid = 0, seq = 0; +- struct nlmsghdr *nlh = NULL; ++ u32 pid = info->pid, seq = info->nlh ? info->nlh->nlmsg_seq : 0; ++ struct nlmsghdr *nlh = info->nlh; + int err = -ENOBUFS; + +- if (info) { +- pid = info->pid; +- nlh = info->nlh; +- if (nlh) +- seq = nlh->nlmsg_seq; +- } +- + skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); + if (skb == NULL) + goto errout; +@@ -2323,10 +2344,10 @@ + kfree_skb(skb); + goto errout; + } +- err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); ++ err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); + errout: + if (err < 0) +- rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); ++ rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err); + } + + /* +@@ -2558,13 +2579,19 @@ + SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; + ++ /* Perform the initialization we can't perform at compile time */ ++ ip6_null_entry.u.dst.dev = &init_net.loopback_dev; ++#ifdef CONFIG_IPV6_MULTIPLE_TABLES ++ ip6_prohibit_entry.u.dst.dev = &init_net.loopback_dev; ++ ip6_blk_hole_entry.u.dst.dev = &init_net.loopback_dev; ++#endif + fib6_init(); + #ifdef CONFIG_PROC_FS +- p = proc_net_create("ipv6_route", 0, rt6_proc_info); ++ p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info); + if (p) + p->owner = THIS_MODULE; + +- proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops); ++ proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); + #endif + #ifdef CONFIG_XFRM + xfrm6_init(); +@@ -2584,8 +2611,8 @@ + fib6_rules_cleanup(); + #endif + #ifdef CONFIG_PROC_FS +- proc_net_remove("ipv6_route"); +- proc_net_remove("rt6_stats"); ++ proc_net_remove(&init_net, "ipv6_route"); ++ proc_net_remove(&init_net, "rt6_stats"); + #endif + #ifdef CONFIG_XFRM + xfrm6_fini(); +diff -Nurb linux-2.6.22-570/net/ipv6/sit.c linux-2.6.22-591/net/ipv6/sit.c +--- linux-2.6.22-570/net/ipv6/sit.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/sit.c 2007-12-21 15:36:15.000000000 -0500 +@@ -167,7 +167,7 @@ + int i; + for (i=1; i<100; i++) { + sprintf(name, "sit%d", i); +- if (__dev_get_by_name(name) == NULL) ++ if (__dev_get_by_name(&init_net, name) == NULL) + break; + } + if (i==100) +@@ -283,6 +283,9 @@ + struct sk_buff *skb2; + struct rt6_info *rt6i; + ++ if (skb->dev->nd_net != &init_net) ++ return; ++ + if (len < hlen + sizeof(struct ipv6hdr)) + return; + iph6 = (struct ipv6hdr*)(dp + hlen); +@@ -369,6 +372,10 @@ + struct iphdr *iph; + struct ip_tunnel *tunnel; + ++ if (skb->dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto out; + +@@ -474,7 +481,8 @@ + } + + { +- struct flowi fl = { .nl_u = { .ip4_u = ++ struct flowi fl = { .fl_net = &init_net, ++ .nl_u = { .ip4_u = + { .daddr = dst, + .saddr = tiph->saddr, + .tos = RT_TOS(tos) } }, +@@ -745,7 +753,8 @@ + memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); + + if (iph->daddr) { +- struct flowi fl = { .nl_u = { .ip4_u = ++ struct flowi fl = { .fl_net = &init_net, ++ .nl_u = { .ip4_u = + { .daddr = iph->daddr, + .saddr = iph->saddr, + .tos = RT_TOS(iph->tos) } }, +@@ -760,7 +769,7 @@ + } + + if (!tdev && tunnel->parms.link) +- tdev = __dev_get_by_index(tunnel->parms.link); ++ tdev = __dev_get_by_index(&init_net, tunnel->parms.link); + + if (tdev) { + dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); +diff -Nurb linux-2.6.22-570/net/ipv6/tcp_ipv6.c linux-2.6.22-591/net/ipv6/tcp_ipv6.c +--- linux-2.6.22-570/net/ipv6/tcp_ipv6.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/tcp_ipv6.c 2007-12-21 15:36:15.000000000 -0500 +@@ -143,6 +143,7 @@ + return(-EAFNOSUPPORT); + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + + if (np->sndflow) { + fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; +@@ -330,6 +331,7 @@ + static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + int type, int code, int offset, __be32 info) + { ++ struct net *net = skb->dev->nd_net; + struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; + const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); + struct ipv6_pinfo *np; +@@ -339,7 +341,7 @@ + __u32 seq; + + sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, +- th->source, skb->dev->ifindex); ++ th->source, skb->dev->ifindex, net); + + if (sk == NULL) { + ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); +@@ -388,6 +390,7 @@ + for now. + */ + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_dst, &np->daddr); + ipv6_addr_copy(&fl.fl6_src, &np->saddr); +@@ -481,6 +484,7 @@ + int err = -1; + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); + ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); +@@ -1066,6 +1070,7 @@ + buff->csum = csum_partial((char *)t1, sizeof(*t1), 0); + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); + ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); + +@@ -1167,6 +1172,7 @@ + buff->csum = csum_partial((char *)t1, tot_len, 0); + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); + ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); + +@@ -1224,7 +1230,8 @@ + + nsk = __inet6_lookup_established(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, + th->source, &ipv6_hdr(skb)->daddr, +- ntohs(th->dest), inet6_iif(skb)); ++ ntohs(th->dest), inet6_iif(skb), ++ sk->sk_net); + + if (nsk) { + if (nsk->sk_state != TCP_TIME_WAIT) { +@@ -1414,6 +1421,7 @@ + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); + if (opt && opt->srcrt) { +@@ -1700,6 +1708,7 @@ + static int tcp_v6_rcv(struct sk_buff **pskb) + { + struct sk_buff *skb = *pskb; ++ struct net *net = skb->dev->nd_net; + struct tcphdr *th; + struct sock *sk; + int ret; +@@ -1736,7 +1745,7 @@ + + sk = __inet6_lookup(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, + &ipv6_hdr(skb)->daddr, ntohs(th->dest), +- inet6_iif(skb)); ++ inet6_iif(skb), net); + + if (!sk) + goto no_tcp_socket; +@@ -1816,7 +1825,8 @@ + + sk2 = inet6_lookup_listener(&tcp_hashinfo, + &ipv6_hdr(skb)->daddr, +- ntohs(th->dest), inet6_iif(skb)); ++ ntohs(th->dest), inet6_iif(skb), ++ net); + if (sk2 != NULL) { + struct inet_timewait_sock *tw = inet_twsk(sk); + inet_twsk_deschedule(tw, &tcp_death_row); +@@ -2121,12 +2131,12 @@ + + int __init tcp6_proc_init(void) + { +- return tcp_proc_register(&tcp6_seq_afinfo); ++ return tcp_proc_register(&init_net, &tcp6_seq_afinfo); + } + + void tcp6_proc_exit(void) + { +- tcp_proc_unregister(&tcp6_seq_afinfo); ++ tcp_proc_unregister(&init_net, &tcp6_seq_afinfo); + } + #endif + +diff -Nurb linux-2.6.22-570/net/ipv6/udp.c linux-2.6.22-591/net/ipv6/udp.c +--- linux-2.6.22-570/net/ipv6/udp.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/udp.c 2007-12-21 15:36:15.000000000 -0500 +@@ -657,6 +657,7 @@ + ulen += sizeof(struct udphdr); + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + + if (sin6) { + if (sin6->sin6_port == 0) +@@ -967,11 +968,11 @@ + + int __init udp6_proc_init(void) + { +- return udp_proc_register(&udp6_seq_afinfo); ++ return udp_proc_register(&init_net, &udp6_seq_afinfo); + } + + void udp6_proc_exit(void) { +- udp_proc_unregister(&udp6_seq_afinfo); ++ udp_proc_unregister(&init_net, &udp6_seq_afinfo); + } + #endif /* CONFIG_PROC_FS */ + +diff -Nurb linux-2.6.22-570/net/ipv6/udplite.c linux-2.6.22-591/net/ipv6/udplite.c +--- linux-2.6.22-570/net/ipv6/udplite.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/udplite.c 2007-12-21 15:36:15.000000000 -0500 +@@ -95,11 +95,11 @@ + + int __init udplite6_proc_init(void) + { +- return udp_proc_register(&udplite6_seq_afinfo); ++ return udp_proc_register(&init_net, &udplite6_seq_afinfo); + } + + void udplite6_proc_exit(void) + { +- udp_proc_unregister(&udplite6_seq_afinfo); ++ udp_proc_unregister(&init_net, &udplite6_seq_afinfo); + } + #endif +diff -Nurb linux-2.6.22-570/net/ipv6/xfrm6_policy.c linux-2.6.22-591/net/ipv6/xfrm6_policy.c +--- linux-2.6.22-570/net/ipv6/xfrm6_policy.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/ipv6/xfrm6_policy.c 2007-12-21 15:36:15.000000000 -0500 +@@ -18,7 +18,7 @@ + #include + #include + #include +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + #include + #endif + +@@ -40,6 +40,7 @@ + { + struct rt6_info *rt; + struct flowi fl_tunnel = { ++ .fl_net = &init_net, + .nl_u = { + .ip6_u = { + .daddr = *(struct in6_addr *)&daddr->a6, +@@ -132,6 +133,7 @@ + struct rt6_info *rt0 = (struct rt6_info*)(*dst_p); + struct rt6_info *rt = rt0; + struct flowi fl_tunnel = { ++ .fl_net = &init_net, + .nl_u = { + .ip6_u = { + .saddr = fl->fl6_src, +@@ -278,6 +280,7 @@ + u8 nexthdr = nh[IP6CB(skb)->nhoff]; + + memset(fl, 0, sizeof(struct flowi)); ++ fl->fl_net = &init_net; + ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr); + ipv6_addr_copy(&fl->fl6_src, &hdr->saddr); + +@@ -318,7 +321,7 @@ + fl->proto = nexthdr; + return; + +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + case IPPROTO_MH: + if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) { + struct ip6_mh *mh; +@@ -375,7 +378,7 @@ + + xdst = (struct xfrm_dst *)dst; + if (xdst->u.rt6.rt6i_idev->dev == dev) { +- struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); ++ struct inet6_dev *loopback_idev = in6_dev_get(&init_net.loopback_dev); + BUG_ON(!loopback_idev); + + do { +diff -Nurb linux-2.6.22-570/net/ipv6/xfrm6_state.c linux-2.6.22-591/net/ipv6/xfrm6_state.c +--- linux-2.6.22-570/net/ipv6/xfrm6_state.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/xfrm6_state.c 2007-12-21 15:36:12.000000000 -0500 +@@ -65,7 +65,7 @@ + goto end; + + /* Rule 2: select MIPv6 RO or inbound trigger */ +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + for (i = 0; i < n; i++) { + if (src[i] && + (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION || +@@ -130,7 +130,7 @@ + goto end; + + /* Rule 2: select MIPv6 RO or inbound trigger */ +-#ifdef CONFIG_IPV6_MIP6 ++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) + for (i = 0; i < n; i++) { + if (src[i] && + (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION || +diff -Nurb linux-2.6.22-570/net/ipv6/xfrm6_tunnel.c linux-2.6.22-591/net/ipv6/xfrm6_tunnel.c +--- linux-2.6.22-570/net/ipv6/xfrm6_tunnel.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipv6/xfrm6_tunnel.c 2007-12-21 15:36:12.000000000 -0500 +@@ -379,3 +379,4 @@ + module_init(xfrm6_tunnel_init); + module_exit(xfrm6_tunnel_fini); + MODULE_LICENSE("GPL"); ++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6); +diff -Nurb linux-2.6.22-570/net/ipx/af_ipx.c linux-2.6.22-591/net/ipx/af_ipx.c +--- linux-2.6.22-570/net/ipx/af_ipx.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipx/af_ipx.c 2007-12-21 15:36:15.000000000 -0500 +@@ -347,6 +347,9 @@ + struct net_device *dev = ptr; + struct ipx_interface *i, *tmp; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (event != NETDEV_DOWN && event != NETDEV_UP) + goto out; + +@@ -986,7 +989,7 @@ + if (intrfc) + ipxitf_put(intrfc); + +- dev = dev_get_by_name(idef->ipx_device); ++ dev = dev_get_by_name(&init_net, idef->ipx_device); + rc = -ENODEV; + if (!dev) + goto out; +@@ -1094,7 +1097,7 @@ + if (!dlink_type) + goto out; + +- dev = __dev_get_by_name(idef->ipx_device); ++ dev = __dev_get_by_name(&init_net, idef->ipx_device); + rc = -ENODEV; + if (!dev) + goto out; +@@ -1189,7 +1192,7 @@ + if (copy_from_user(&ifr, arg, sizeof(ifr))) + break; + sipx = (struct sockaddr_ipx *)&ifr.ifr_addr; +- dev = __dev_get_by_name(ifr.ifr_name); ++ dev = __dev_get_by_name(&init_net, ifr.ifr_name); + rc = -ENODEV; + if (!dev) + break; +@@ -1360,11 +1363,14 @@ + .obj_size = sizeof(struct ipx_sock), + }; + +-static int ipx_create(struct socket *sock, int protocol) ++static int ipx_create(struct net *net, struct socket *sock, int protocol) + { + int rc = -ESOCKTNOSUPPORT; + struct sock *sk; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + /* + * SPX support is not anymore in the kernel sources. If you want to + * ressurrect it, completing it and making it understand shared skbs, +@@ -1375,7 +1381,7 @@ + goto out; + + rc = -ENOMEM; +- sk = sk_alloc(PF_IPX, GFP_KERNEL, &ipx_proto, 1); ++ sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, 1); + if (!sk) + goto out; + #ifdef IPX_REFCNT_DEBUG +@@ -1644,6 +1650,9 @@ + u16 ipx_pktsize; + int rc = 0; + ++ if (dev->nd_net != &init_net) ++ goto drop; ++ + /* Not ours */ + if (skb->pkt_type == PACKET_OTHERHOST) + goto drop; +diff -Nurb linux-2.6.22-570/net/ipx/ipx_proc.c linux-2.6.22-591/net/ipx/ipx_proc.c +--- linux-2.6.22-570/net/ipx/ipx_proc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/ipx/ipx_proc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -353,7 +354,7 @@ + struct proc_dir_entry *p; + int rc = -ENOMEM; + +- ipx_proc_dir = proc_mkdir("ipx", proc_net); ++ ipx_proc_dir = proc_mkdir("ipx", init_net.proc_net); + + if (!ipx_proc_dir) + goto out; +@@ -381,7 +382,7 @@ + out_route: + remove_proc_entry("interface", ipx_proc_dir); + out_interface: +- remove_proc_entry("ipx", proc_net); ++ remove_proc_entry("ipx", init_net.proc_net); + goto out; + } + +@@ -390,7 +391,7 @@ + remove_proc_entry("interface", ipx_proc_dir); + remove_proc_entry("route", ipx_proc_dir); + remove_proc_entry("socket", ipx_proc_dir); +- remove_proc_entry("ipx", proc_net); ++ remove_proc_entry("ipx", init_net.proc_net); + } + + #else /* CONFIG_PROC_FS */ +diff -Nurb linux-2.6.22-570/net/irda/af_irda.c linux-2.6.22-591/net/irda/af_irda.c +--- linux-2.6.22-570/net/irda/af_irda.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/irda/af_irda.c 2007-12-21 15:36:15.000000000 -0500 +@@ -60,7 +60,7 @@ + + #include + +-static int irda_create(struct socket *sock, int protocol); ++static int irda_create(struct net *net, struct socket *sock, int protocol); + + static const struct proto_ops irda_stream_ops; + static const struct proto_ops irda_seqpacket_ops; +@@ -831,7 +831,7 @@ + + IRDA_DEBUG(2, "%s()\n", __FUNCTION__); + +- err = irda_create(newsock, sk->sk_protocol); ++ err = irda_create(sk->sk_net, newsock, sk->sk_protocol); + if (err) + return err; + +@@ -1057,13 +1057,16 @@ + * Create IrDA socket + * + */ +-static int irda_create(struct socket *sock, int protocol) ++static int irda_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + struct irda_sock *self; + + IRDA_DEBUG(2, "%s()\n", __FUNCTION__); + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + /* Check for valid socket type */ + switch (sock->type) { + case SOCK_STREAM: /* For TTP connections with SAR disabled */ +@@ -1075,7 +1078,7 @@ + } + + /* Allocate networking socket */ +- sk = sk_alloc(PF_IRDA, GFP_ATOMIC, &irda_proto, 1); ++ sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto, 1); + if (sk == NULL) + return -ENOMEM; + +diff -Nurb linux-2.6.22-570/net/irda/irias_object.c linux-2.6.22-591/net/irda/irias_object.c +--- linux-2.6.22-570/net/irda/irias_object.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/irda/irias_object.c 2007-12-21 15:36:12.000000000 -0500 +@@ -36,39 +36,6 @@ + */ + struct ias_value irias_missing = { IAS_MISSING, 0, 0, 0, {0}}; + +-/* +- * Function strndup (str, max) +- * +- * My own kernel version of strndup! +- * +- * Faster, check boundary... Jean II +- */ +-static char *strndup(char *str, size_t max) +-{ +- char *new_str; +- int len; +- +- /* Check string */ +- if (str == NULL) +- return NULL; +- /* Check length, truncate */ +- len = strlen(str); +- if(len > max) +- len = max; +- +- /* Allocate new string */ +- new_str = kmalloc(len + 1, GFP_ATOMIC); +- if (new_str == NULL) { +- IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); +- return NULL; +- } +- +- /* Copy and truncate */ +- memcpy(new_str, str, len); +- new_str[len] = '\0'; +- +- return new_str; +-} + + /* + * Function ias_new_object (name, id) +@@ -90,7 +57,7 @@ + } + + obj->magic = IAS_OBJECT_MAGIC; +- obj->name = strndup(name, IAS_MAX_CLASSNAME); ++ obj->name = kstrndup(name, IAS_MAX_CLASSNAME, GFP_ATOMIC); + if (!obj->name) { + IRDA_WARNING("%s(), Unable to allocate name!\n", + __FUNCTION__); +@@ -360,7 +327,7 @@ + } + + attrib->magic = IAS_ATTRIB_MAGIC; +- attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); ++ attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); + + /* Insert value */ + attrib->value = irias_new_integer_value(value); +@@ -404,7 +371,7 @@ + } + + attrib->magic = IAS_ATTRIB_MAGIC; +- attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); ++ attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); + + attrib->value = irias_new_octseq_value( octets, len); + if (!attrib->name || !attrib->value) { +@@ -446,7 +413,7 @@ + } + + attrib->magic = IAS_ATTRIB_MAGIC; +- attrib->name = strndup(name, IAS_MAX_ATTRIBNAME); ++ attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); + + attrib->value = irias_new_string_value(value); + if (!attrib->name || !attrib->value) { +@@ -506,7 +473,7 @@ + + value->type = IAS_STRING; + value->charset = CS_ASCII; +- value->t.string = strndup(string, IAS_MAX_STRING); ++ value->t.string = kstrndup(string, IAS_MAX_STRING, GFP_ATOMIC); + if (!value->t.string) { + IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__); + kfree(value); +diff -Nurb linux-2.6.22-570/net/irda/irlap_frame.c linux-2.6.22-591/net/irda/irlap_frame.c +--- linux-2.6.22-570/net/irda/irlap_frame.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/irda/irlap_frame.c 2007-12-21 15:36:15.000000000 -0500 +@@ -1319,6 +1319,9 @@ + int command; + __u8 control; + ++ if (dev->nd_net != &init_net) ++ goto out; ++ + /* FIXME: should we get our own field? */ + self = (struct irlap_cb *) dev->atalk_ptr; + +diff -Nurb linux-2.6.22-570/net/irda/irproc.c linux-2.6.22-591/net/irda/irproc.c +--- linux-2.6.22-570/net/irda/irproc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/irda/irproc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -66,7 +67,7 @@ + int i; + struct proc_dir_entry *d; + +- proc_irda = proc_mkdir("irda", proc_net); ++ proc_irda = proc_mkdir("irda", init_net.proc_net); + if (proc_irda == NULL) + return; + proc_irda->owner = THIS_MODULE; +@@ -92,7 +93,7 @@ + for (i=0; i + #include + #include ++#include + + #include + +@@ -136,11 +137,14 @@ + .obj_size = sizeof(struct pfkey_sock), + }; + +-static int pfkey_create(struct socket *sock, int protocol) ++static int pfkey_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + int err; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (sock->type != SOCK_RAW) +@@ -149,7 +153,7 @@ + return -EPROTONOSUPPORT; + + err = -ENOMEM; +- sk = sk_alloc(PF_KEY, GFP_KERNEL, &key_proto, 1); ++ sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, 1); + if (sk == NULL) + goto out; + +@@ -3781,7 +3785,7 @@ + static void __exit ipsec_pfkey_exit(void) + { + xfrm_unregister_km(&pfkeyv2_mgr); +- remove_proc_entry("net/pfkey", NULL); ++ remove_proc_entry("pfkey", init_net.proc_net); + sock_unregister(PF_KEY); + proto_unregister(&key_proto); + } +@@ -3798,7 +3802,7 @@ + goto out_unregister_key_proto; + #ifdef CONFIG_PROC_FS + err = -ENOMEM; +- if (create_proc_read_entry("net/pfkey", 0, NULL, pfkey_read_proc, NULL) == NULL) ++ if (create_proc_read_entry("pfkey", 0, init_net.proc_net, pfkey_read_proc, NULL) == NULL) + goto out_sock_unregister; + #endif + err = xfrm_register_km(&pfkeyv2_mgr); +diff -Nurb linux-2.6.22-570/net/llc/af_llc.c linux-2.6.22-591/net/llc/af_llc.c +--- linux-2.6.22-570/net/llc/af_llc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/llc/af_llc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -150,14 +150,17 @@ + * socket type we have available. + * Returns 0 upon success, negative upon failure. + */ +-static int llc_ui_create(struct socket *sock, int protocol) ++static int llc_ui_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + int rc = -ESOCKTNOSUPPORT; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) { + rc = -ENOMEM; +- sk = llc_sk_alloc(PF_LLC, GFP_KERNEL, &llc_proto); ++ sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto); + if (sk) { + rc = 0; + llc_ui_sk_init(sock, sk); +@@ -249,7 +252,7 @@ + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out; + rc = -ENODEV; +- llc->dev = dev_getfirstbyhwtype(addr->sllc_arphrd); ++ llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd); + if (!llc->dev) + goto out; + rc = -EUSERS; +@@ -300,7 +303,7 @@ + goto out; + rc = -ENODEV; + rtnl_lock(); +- llc->dev = dev_getbyhwaddr(addr->sllc_arphrd, addr->sllc_mac); ++ llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, addr->sllc_mac); + rtnl_unlock(); + if (!llc->dev) + goto out; +diff -Nurb linux-2.6.22-570/net/llc/llc_conn.c linux-2.6.22-591/net/llc/llc_conn.c +--- linux-2.6.22-570/net/llc/llc_conn.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/llc/llc_conn.c 2007-12-21 15:36:15.000000000 -0500 +@@ -700,7 +700,7 @@ + struct llc_addr *saddr, + struct llc_addr *daddr) + { +- struct sock *newsk = llc_sk_alloc(sk->sk_family, GFP_ATOMIC, ++ struct sock *newsk = llc_sk_alloc(sk->sk_net, sk->sk_family, GFP_ATOMIC, + sk->sk_prot); + struct llc_sock *newllc, *llc = llc_sk(sk); + +@@ -867,9 +867,9 @@ + * Allocates a LLC sock and initializes it. Returns the new LLC sock + * or %NULL if there's no memory available for one + */ +-struct sock *llc_sk_alloc(int family, gfp_t priority, struct proto *prot) ++struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot) + { +- struct sock *sk = sk_alloc(family, priority, prot, 1); ++ struct sock *sk = sk_alloc(net, family, priority, prot, 1); + + if (!sk) + goto out; +diff -Nurb linux-2.6.22-570/net/llc/llc_core.c linux-2.6.22-591/net/llc/llc_core.c +--- linux-2.6.22-570/net/llc/llc_core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/llc/llc_core.c 2007-12-23 03:37:02.000000000 -0500 +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + + LIST_HEAD(llc_sap_list); +@@ -162,7 +163,8 @@ + { + struct net_device *dev; + +- dev = first_net_device(); ++ /* XXX sapan ++ dev = first_net_device(&init_net); + if (dev != NULL) + dev = next_net_device(dev); + +@@ -172,6 +174,7 @@ + memset(llc_station_mac_sa, 0, ETH_ALEN); + dev_add_pack(&llc_packet_type); + dev_add_pack(&llc_tr_packet_type); ++ */ + return 0; + } + +diff -Nurb linux-2.6.22-570/net/llc/llc_core.c.orig linux-2.6.22-591/net/llc/llc_core.c.orig +--- linux-2.6.22-570/net/llc/llc_core.c.orig 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/net/llc/llc_core.c.orig 2007-12-21 15:36:15.000000000 -0500 +@@ -0,0 +1,197 @@ ++/* ++ * llc_core.c - Minimum needed routines for sap handling and module init/exit ++ * ++ * Copyright (c) 1997 by Procom Technology, Inc. ++ * 2001-2003 by Arnaldo Carvalho de Melo ++ * ++ * This program can be redistributed or modified under the terms of the ++ * GNU General Public License as published by the Free Software Foundation. ++ * This program is distributed without any warranty or implied warranty ++ * of merchantability or fitness for a particular purpose. ++ * ++ * See the GNU General Public License for more details. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++LIST_HEAD(llc_sap_list); ++DEFINE_RWLOCK(llc_sap_list_lock); ++ ++unsigned char llc_station_mac_sa[ETH_ALEN]; ++ ++/** ++ * llc_sap_alloc - allocates and initializes sap. ++ * ++ * Allocates and initializes sap. ++ */ ++static struct llc_sap *llc_sap_alloc(void) ++{ ++ struct llc_sap *sap = kzalloc(sizeof(*sap), GFP_ATOMIC); ++ ++ if (sap) { ++ sap->state = LLC_SAP_STATE_ACTIVE; ++ memcpy(sap->laddr.mac, llc_station_mac_sa, ETH_ALEN); ++ rwlock_init(&sap->sk_list.lock); ++ atomic_set(&sap->refcnt, 1); ++ } ++ return sap; ++} ++ ++/** ++ * llc_add_sap - add sap to station list ++ * @sap: Address of the sap ++ * ++ * Adds a sap to the LLC's station sap list. ++ */ ++static void llc_add_sap(struct llc_sap *sap) ++{ ++ list_add_tail(&sap->node, &llc_sap_list); ++} ++ ++/** ++ * llc_del_sap - del sap from station list ++ * @sap: Address of the sap ++ * ++ * Removes a sap to the LLC's station sap list. ++ */ ++static void llc_del_sap(struct llc_sap *sap) ++{ ++ write_lock_bh(&llc_sap_list_lock); ++ list_del(&sap->node); ++ write_unlock_bh(&llc_sap_list_lock); ++} ++ ++static struct llc_sap *__llc_sap_find(unsigned char sap_value) ++{ ++ struct llc_sap* sap; ++ ++ list_for_each_entry(sap, &llc_sap_list, node) ++ if (sap->laddr.lsap == sap_value) ++ goto out; ++ sap = NULL; ++out: ++ return sap; ++} ++ ++/** ++ * llc_sap_find - searchs a SAP in station ++ * @sap_value: sap to be found ++ * ++ * Searchs for a sap in the sap list of the LLC's station upon the sap ID. ++ * If the sap is found it will be refcounted and the user will have to do ++ * a llc_sap_put after use. ++ * Returns the sap or %NULL if not found. ++ */ ++struct llc_sap *llc_sap_find(unsigned char sap_value) ++{ ++ struct llc_sap* sap; ++ ++ read_lock_bh(&llc_sap_list_lock); ++ sap = __llc_sap_find(sap_value); ++ if (sap) ++ llc_sap_hold(sap); ++ read_unlock_bh(&llc_sap_list_lock); ++ return sap; ++} ++ ++/** ++ * llc_sap_open - open interface to the upper layers. ++ * @lsap: SAP number. ++ * @func: rcv func for datalink protos ++ * ++ * Interface function to upper layer. Each one who wants to get a SAP ++ * (for example NetBEUI) should call this function. Returns the opened ++ * SAP for success, NULL for failure. ++ */ ++struct llc_sap *llc_sap_open(unsigned char lsap, ++ int (*func)(struct sk_buff *skb, ++ struct net_device *dev, ++ struct packet_type *pt, ++ struct net_device *orig_dev)) ++{ ++ struct llc_sap *sap = NULL; ++ ++ write_lock_bh(&llc_sap_list_lock); ++ if (__llc_sap_find(lsap)) /* SAP already exists */ ++ goto out; ++ sap = llc_sap_alloc(); ++ if (!sap) ++ goto out; ++ sap->laddr.lsap = lsap; ++ sap->rcv_func = func; ++ llc_add_sap(sap); ++out: ++ write_unlock_bh(&llc_sap_list_lock); ++ return sap; ++} ++ ++/** ++ * llc_sap_close - close interface for upper layers. ++ * @sap: SAP to be closed. ++ * ++ * Close interface function to upper layer. Each one who wants to ++ * close an open SAP (for example NetBEUI) should call this function. ++ * Removes this sap from the list of saps in the station and then ++ * frees the memory for this sap. ++ */ ++void llc_sap_close(struct llc_sap *sap) ++{ ++ WARN_ON(!hlist_empty(&sap->sk_list.list)); ++ llc_del_sap(sap); ++ kfree(sap); ++} ++ ++static struct packet_type llc_packet_type = { ++ .type = __constant_htons(ETH_P_802_2), ++ .func = llc_rcv, ++}; ++ ++static struct packet_type llc_tr_packet_type = { ++ .type = __constant_htons(ETH_P_TR_802_2), ++ .func = llc_rcv, ++}; ++ ++static int __init llc_init(void) ++{ ++ struct net_device *dev; ++ ++ dev = first_net_device(&init_net); ++ if (dev != NULL) ++ dev = next_net_device(dev); ++ ++ if (dev != NULL) ++ memcpy(llc_station_mac_sa, dev->dev_addr, ETH_ALEN); ++ else ++ memset(llc_station_mac_sa, 0, ETH_ALEN); ++ dev_add_pack(&llc_packet_type); ++ dev_add_pack(&llc_tr_packet_type); ++ return 0; ++} ++ ++static void __exit llc_exit(void) ++{ ++ dev_remove_pack(&llc_packet_type); ++ dev_remove_pack(&llc_tr_packet_type); ++} ++ ++module_init(llc_init); ++module_exit(llc_exit); ++ ++EXPORT_SYMBOL(llc_station_mac_sa); ++EXPORT_SYMBOL(llc_sap_list); ++EXPORT_SYMBOL(llc_sap_list_lock); ++EXPORT_SYMBOL(llc_sap_find); ++EXPORT_SYMBOL(llc_sap_open); ++EXPORT_SYMBOL(llc_sap_close); ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Procom 1997, Jay Schullist 2001, Arnaldo C. Melo 2001-2003"); ++MODULE_DESCRIPTION("LLC IEEE 802.2 core support"); +diff -Nurb linux-2.6.22-570/net/llc/llc_input.c linux-2.6.22-591/net/llc/llc_input.c +--- linux-2.6.22-570/net/llc/llc_input.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/llc/llc_input.c 2007-12-21 15:36:15.000000000 -0500 +@@ -12,6 +12,7 @@ + * See the GNU General Public License for more details. + */ + #include ++#include + #include + #include + #include +@@ -145,6 +146,9 @@ + int (*rcv)(struct sk_buff *, struct net_device *, + struct packet_type *, struct net_device *); + ++ if (dev->nd_net != &init_net) ++ goto drop; ++ + /* + * When the interface is in promisc. mode, drop all the crap that it + * receives, do not try to analyse it. +diff -Nurb linux-2.6.22-570/net/llc/llc_proc.c linux-2.6.22-591/net/llc/llc_proc.c +--- linux-2.6.22-570/net/llc/llc_proc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/llc/llc_proc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -231,7 +232,7 @@ + int rc = -ENOMEM; + struct proc_dir_entry *p; + +- llc_proc_dir = proc_mkdir("llc", proc_net); ++ llc_proc_dir = proc_mkdir("llc", init_net.proc_net); + if (!llc_proc_dir) + goto out; + llc_proc_dir->owner = THIS_MODULE; +@@ -254,7 +255,7 @@ + out_core: + remove_proc_entry("socket", llc_proc_dir); + out_socket: +- remove_proc_entry("llc", proc_net); ++ remove_proc_entry("llc", init_net.proc_net); + goto out; + } + +@@ -262,5 +263,5 @@ + { + remove_proc_entry("socket", llc_proc_dir); + remove_proc_entry("core", llc_proc_dir); +- remove_proc_entry("llc", proc_net); ++ remove_proc_entry("llc", init_net.proc_net); + } +diff -Nurb linux-2.6.22-570/net/mac80211/ieee80211_ioctl.c linux-2.6.22-591/net/mac80211/ieee80211_ioctl.c +--- linux-2.6.22-570/net/mac80211/ieee80211_ioctl.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/mac80211/ieee80211_ioctl.c 2007-12-21 15:36:12.000000000 -0500 +@@ -838,6 +838,29 @@ + } + + ++static int ieee80211_ioctl_giwrate(struct net_device *dev, ++ struct iw_request_info *info, ++ struct iw_param *rate, char *extra) ++{ ++ struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); ++ struct sta_info *sta; ++ struct ieee80211_sub_if_data *sdata; ++ ++ sdata = IEEE80211_DEV_TO_SUB_IF(dev); ++ if (sdata->type == IEEE80211_IF_TYPE_STA) ++ sta = sta_info_get(local, sdata->u.sta.bssid); ++ else ++ return -EOPNOTSUPP; ++ if (!sta) ++ return -ENODEV; ++ if (sta->txrate < local->oper_hw_mode->num_rates) ++ rate->value = local->oper_hw_mode->rates[sta->txrate].rate * 100000; ++ else ++ rate->value = 0; ++ sta_info_put(sta); ++ return 0; ++} ++ + static int ieee80211_ioctl_siwrts(struct net_device *dev, + struct iw_request_info *info, + struct iw_param *rts, char *extra) +@@ -1779,7 +1802,7 @@ + (iw_handler) NULL, /* -- hole -- */ + (iw_handler) NULL, /* -- hole -- */ + (iw_handler) NULL, /* SIOCSIWRATE */ +- (iw_handler) NULL, /* SIOCGIWRATE */ ++ (iw_handler) ieee80211_ioctl_giwrate, /* SIOCGIWRATE */ + (iw_handler) ieee80211_ioctl_siwrts, /* SIOCSIWRTS */ + (iw_handler) ieee80211_ioctl_giwrts, /* SIOCGIWRTS */ + (iw_handler) ieee80211_ioctl_siwfrag, /* SIOCSIWFRAG */ +diff -Nurb linux-2.6.22-570/net/netfilter/core.c linux-2.6.22-591/net/netfilter/core.c +--- linux-2.6.22-570/net/netfilter/core.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netfilter/core.c 2007-12-21 15:36:15.000000000 -0500 +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + + #include "nf_internals.h" + +@@ -203,7 +204,9 @@ + return 0; + + /* Not exclusive use of packet? Must copy. */ +- if (skb_shared(*pskb) || skb_cloned(*pskb)) ++ if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len)) ++ goto copy_skb; ++ if (skb_shared(*pskb)) + goto copy_skb; + + return pskb_may_pull(*pskb, writable_len); +@@ -278,8 +281,28 @@ + #endif /* CONFIG_NF_CONNTRACK */ + + #ifdef CONFIG_PROC_FS +-struct proc_dir_entry *proc_net_netfilter; +-EXPORT_SYMBOL(proc_net_netfilter); ++static int netfilter_proc_init(struct net * net) ++{ ++ int error = -ENOMEM; ++ net->proc_net_netfilter = proc_mkdir("netfilter", net->proc_net); ++ ++ if (net->proc_net_netfilter) { ++ net->proc_net_netfilter->data = net; ++ error = 0; ++ } ++ return error; ++} ++ ++static void netfilter_proc_exit(struct net *net) ++{ ++ remove_proc_entry("netfilter", net->proc_net); ++} ++ ++static struct pernet_operations netfilter_proc_ops = { ++ .init = netfilter_proc_init, ++ .exit = netfilter_proc_exit, ++}; ++ + #endif + + void __init netfilter_init(void) +@@ -291,8 +314,7 @@ + } + + #ifdef CONFIG_PROC_FS +- proc_net_netfilter = proc_mkdir("netfilter", proc_net); +- if (!proc_net_netfilter) ++ if (register_pernet_subsys(&netfilter_proc_ops) < 0) + panic("cannot create netfilter proc entry"); + #endif + +diff -Nurb linux-2.6.22-570/net/netfilter/nf_conntrack_h323_main.c linux-2.6.22-591/net/netfilter/nf_conntrack_h323_main.c +--- linux-2.6.22-570/net/netfilter/nf_conntrack_h323_main.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netfilter/nf_conntrack_h323_main.c 2007-12-21 15:36:15.000000000 -0500 +@@ -724,6 +724,8 @@ + + memset(&fl1, 0, sizeof(fl1)); + memset(&fl2, 0, sizeof(fl2)); ++ fl1.fl_net = &init_net; ++ fl2.fl_net = &init_net; + + switch (family) { + case AF_INET: { +diff -Nurb linux-2.6.22-570/net/netfilter/nf_conntrack_standalone.c linux-2.6.22-591/net/netfilter/nf_conntrack_standalone.c +--- linux-2.6.22-570/net/netfilter/nf_conntrack_standalone.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netfilter/nf_conntrack_standalone.c 2007-12-21 15:36:15.000000000 -0500 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #ifdef CONFIG_SYSCTL + #include + #endif +@@ -419,14 +420,14 @@ + return ret; + + #ifdef CONFIG_PROC_FS +- proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops); ++ proc = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops); + if (!proc) goto cleanup_init; + +- proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440, ++ proc_exp = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, + &exp_file_ops); + if (!proc_exp) goto cleanup_proc; + +- proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat); ++ proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, init_net.proc_net_stat); + if (!proc_stat) + goto cleanup_proc_exp; + +@@ -447,11 +448,11 @@ + cleanup_proc_stat: + #endif + #ifdef CONFIG_PROC_FS +- remove_proc_entry("nf_conntrack", proc_net_stat); ++ remove_proc_entry("nf_conntrack", init_net.proc_net_stat); + cleanup_proc_exp: +- proc_net_remove("nf_conntrack_expect"); ++ proc_net_remove(&init_net, "nf_conntrack_expect"); + cleanup_proc: +- proc_net_remove("nf_conntrack"); ++ proc_net_remove(&init_net, "nf_conntrack"); + cleanup_init: + #endif /* CNFIG_PROC_FS */ + nf_conntrack_cleanup(); +@@ -464,9 +465,9 @@ + unregister_sysctl_table(nf_ct_sysctl_header); + #endif + #ifdef CONFIG_PROC_FS +- remove_proc_entry("nf_conntrack", proc_net_stat); +- proc_net_remove("nf_conntrack_expect"); +- proc_net_remove("nf_conntrack"); ++ remove_proc_entry("nf_conntrack", init_net.proc_net_stat); ++ proc_net_remove(&init_net, "nf_conntrack_expect"); ++ proc_net_remove(&init_net, "nf_conntrack"); + #endif /* CNFIG_PROC_FS */ + nf_conntrack_cleanup(); + } +diff -Nurb linux-2.6.22-570/net/netfilter/nf_log.c linux-2.6.22-591/net/netfilter/nf_log.c +--- linux-2.6.22-570/net/netfilter/nf_log.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netfilter/nf_log.c 2007-12-21 15:36:15.000000000 -0500 +@@ -168,7 +168,8 @@ + #ifdef CONFIG_PROC_FS + struct proc_dir_entry *pde; + +- pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter); ++ pde = create_proc_entry("nf_log", S_IRUGO, ++ init_net.proc_net_netfilter); + if (!pde) + return -1; + +diff -Nurb linux-2.6.22-570/net/netfilter/nf_queue.c linux-2.6.22-591/net/netfilter/nf_queue.c +--- linux-2.6.22-570/net/netfilter/nf_queue.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netfilter/nf_queue.c 2007-12-21 15:36:15.000000000 -0500 +@@ -346,7 +346,7 @@ + #ifdef CONFIG_PROC_FS + struct proc_dir_entry *pde; + +- pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter); ++ pde = create_proc_entry("nf_queue", S_IRUGO, init_net.proc_net_netfilter); + if (!pde) + return -1; + pde->proc_fops = &nfqueue_file_ops; +diff -Nurb linux-2.6.22-570/net/netfilter/nfnetlink.c linux-2.6.22-591/net/netfilter/nfnetlink.c +--- linux-2.6.22-570/net/netfilter/nfnetlink.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netfilter/nfnetlink.c 2007-12-21 15:36:15.000000000 -0500 +@@ -264,7 +264,7 @@ + { + printk("Netfilter messages via NETLINK v%s.\n", nfversion); + +- nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, ++ nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX, + nfnetlink_rcv, NULL, THIS_MODULE); + if (!nfnl) { + printk(KERN_ERR "cannot initialize nfnetlink!\n"); +diff -Nurb linux-2.6.22-570/net/netfilter/nfnetlink_log.c linux-2.6.22-591/net/netfilter/nfnetlink_log.c +--- linux-2.6.22-570/net/netfilter/nfnetlink_log.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netfilter/nfnetlink_log.c 2007-12-21 15:36:15.000000000 -0500 +@@ -705,7 +705,8 @@ + + hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + UDEBUG("node = %p\n", inst); +- if (n->pid == inst->peer_pid) ++ if ((n->net == &init_net) && ++ (n->pid == inst->peer_pid)) + __instance_destroy(inst); + } + } +@@ -1023,7 +1024,7 @@ + + #ifdef CONFIG_PROC_FS + proc_nful = create_proc_entry("nfnetlink_log", 0440, +- proc_net_netfilter); ++ init_net.proc_net_netfilter); + if (!proc_nful) + goto cleanup_subsys; + proc_nful->proc_fops = &nful_file_ops; +@@ -1043,7 +1044,7 @@ + { + nf_log_unregister(&nfulnl_logger); + #ifdef CONFIG_PROC_FS +- remove_proc_entry("nfnetlink_log", proc_net_netfilter); ++ remove_proc_entry("nfnetlink_log", init_net.proc_net_netfilter); + #endif + nfnetlink_subsys_unregister(&nfulnl_subsys); + netlink_unregister_notifier(&nfulnl_rtnl_notifier); +diff -Nurb linux-2.6.22-570/net/netfilter/nfnetlink_queue.c linux-2.6.22-591/net/netfilter/nfnetlink_queue.c +--- linux-2.6.22-570/net/netfilter/nfnetlink_queue.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netfilter/nfnetlink_queue.c 2007-12-21 15:36:15.000000000 -0500 +@@ -734,6 +734,9 @@ + { + struct net_device *dev = ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + /* Drop any packets associated with the downed device */ + if (event == NETDEV_DOWN) + nfqnl_dev_drop(dev->ifindex); +@@ -762,7 +765,8 @@ + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { +- if (n->pid == inst->peer_pid) ++ if ((n->net == &init_net) && ++ (n->pid == inst->peer_pid)) + __instance_destroy(inst); + } + } +@@ -1106,7 +1110,7 @@ + + #ifdef CONFIG_PROC_FS + proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440, +- proc_net_netfilter); ++ init_net.proc_net_netfilter); + if (!proc_nfqueue) + goto cleanup_subsys; + proc_nfqueue->proc_fops = &nfqnl_file_ops; +@@ -1129,7 +1133,7 @@ + nf_unregister_queue_handlers(&nfqh); + unregister_netdevice_notifier(&nfqnl_dev_notifier); + #ifdef CONFIG_PROC_FS +- remove_proc_entry("nfnetlink_queue", proc_net_netfilter); ++ remove_proc_entry("nfnetlink_queue", init_net.proc_net_netfilter); + #endif + nfnetlink_subsys_unregister(&nfqnl_subsys); + netlink_unregister_notifier(&nfqnl_rtnl_notifier); +diff -Nurb linux-2.6.22-570/net/netfilter/x_tables.c linux-2.6.22-591/net/netfilter/x_tables.c +--- linux-2.6.22-570/net/netfilter/x_tables.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netfilter/x_tables.c 2007-12-21 15:36:15.000000000 -0500 +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -37,11 +38,16 @@ + struct mutex mutex; + struct list_head match; + struct list_head target; +- struct list_head tables; + struct mutex compat_mutex; + }; + +-static struct xt_af *xt; ++ ++struct xt_af_pernet { ++ struct list_head tables; ++}; ++ ++static struct xt_af * xt; ++ + + #ifdef DEBUG_IP_FIREWALL_USER + #define duprintf(format, args...) printk(format , ## args) +@@ -286,9 +292,9 @@ + return 1; + } + if (target == 1) +- have_rev = target_revfn(af, name, revision, &best); ++ have_rev = target_revfn( af, name, revision, &best); + else +- have_rev = match_revfn(af, name, revision, &best); ++ have_rev = match_revfn( af, name, revision, &best); + mutex_unlock(&xt[af].mutex); + + /* Nothing at all? Return 0 to try loading module. */ +@@ -533,14 +539,14 @@ + EXPORT_SYMBOL(xt_free_table_info); + + /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ +-struct xt_table *xt_find_table_lock(int af, const char *name) ++struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name) + { + struct xt_table *t; + + if (mutex_lock_interruptible(&xt[af].mutex) != 0) + return ERR_PTR(-EINTR); + +- list_for_each_entry(t, &xt[af].tables, list) ++ list_for_each_entry(t, &net->xtn[af].tables, list) + if (strcmp(t->name, name) == 0 && try_module_get(t->me)) + return t; + mutex_unlock(&xt[af].mutex); +@@ -596,7 +602,7 @@ + } + EXPORT_SYMBOL_GPL(xt_replace_table); + +-int xt_register_table(struct xt_table *table, ++int xt_register_table(struct net *net, struct xt_table *table, + struct xt_table_info *bootstrap, + struct xt_table_info *newinfo) + { +@@ -609,7 +615,7 @@ + return ret; + + /* Don't autoload: we'd eat our tail... */ +- list_for_each_entry(t, &xt[table->af].tables, list) { ++ list_for_each_entry(t, &net->xtn[table->af].tables, list) { + if (strcmp(t->name, table->name) == 0) { + ret = -EEXIST; + goto unlock; +@@ -628,7 +634,7 @@ + /* save number of initial entries */ + private->initial_entries = private->number; + +- list_add(&table->list, &xt[table->af].tables); ++ list_add(&table->list, &net->xtn[table->af].tables); + + ret = 0; + unlock: +@@ -666,7 +672,7 @@ + return pos ? NULL : head; + } + +-static struct list_head *type2list(u_int16_t af, u_int16_t type) ++static struct list_head *type2list(struct net *net, u_int16_t af, u_int16_t type) + { + struct list_head *list; + +@@ -678,7 +684,7 @@ + list = &xt[af].match; + break; + case TABLE: +- list = &xt[af].tables; ++ list = &net->xtn[af].tables; + break; + default: + list = NULL; +@@ -691,6 +697,7 @@ + static void *xt_tgt_seq_start(struct seq_file *seq, loff_t *pos) + { + struct proc_dir_entry *pde = (struct proc_dir_entry *) seq->private; ++ struct net *net = PDE_NET(pde); + u_int16_t af = (unsigned long)pde->data & 0xffff; + u_int16_t type = (unsigned long)pde->data >> 16; + struct list_head *list; +@@ -698,7 +705,7 @@ + if (af >= NPROTO) + return NULL; + +- list = type2list(af, type); ++ list = type2list(net, af, type); + if (!list) + return NULL; + +@@ -711,6 +718,7 @@ + static void *xt_tgt_seq_next(struct seq_file *seq, void *v, loff_t *pos) + { + struct proc_dir_entry *pde = seq->private; ++ struct net *net = PDE_NET(pde); + u_int16_t af = (unsigned long)pde->data & 0xffff; + u_int16_t type = (unsigned long)pde->data >> 16; + struct list_head *list; +@@ -718,7 +726,7 @@ + if (af >= NPROTO) + return NULL; + +- list = type2list(af, type); ++ list = type2list(net, af, type); + if (!list) + return NULL; + +@@ -759,6 +767,7 @@ + if (!ret) { + struct seq_file *seq = file->private_data; + struct proc_dir_entry *pde = PDE(inode); ++ get_net(PROC_NET(inode)); + + seq->private = pde; + } +@@ -766,12 +775,18 @@ + return ret; + } + ++static int xt_tgt_release(struct inode *inode, struct file *file) ++{ ++ put_net(PROC_NET(inode)); ++ return seq_release(inode, file); ++} ++ + static const struct file_operations xt_file_ops = { + .owner = THIS_MODULE, + .open = xt_tgt_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release, ++ .release = xt_tgt_release, + }; + + #define FORMAT_TABLES "_tables_names" +@@ -794,7 +809,7 @@ + #ifdef CONFIG_PROC_FS + strlcpy(buf, xt_prefix[af], sizeof(buf)); + strlcat(buf, FORMAT_TABLES, sizeof(buf)); +- proc = proc_net_fops_create(buf, 0440, &xt_file_ops); ++ proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); + if (!proc) + goto out; + proc->data = (void *) ((unsigned long) af | (TABLE << 16)); +@@ -802,14 +817,14 @@ + + strlcpy(buf, xt_prefix[af], sizeof(buf)); + strlcat(buf, FORMAT_MATCHES, sizeof(buf)); +- proc = proc_net_fops_create(buf, 0440, &xt_file_ops); ++ proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); + if (!proc) + goto out_remove_tables; + proc->data = (void *) ((unsigned long) af | (MATCH << 16)); + + strlcpy(buf, xt_prefix[af], sizeof(buf)); + strlcat(buf, FORMAT_TARGETS, sizeof(buf)); +- proc = proc_net_fops_create(buf, 0440, &xt_file_ops); ++ proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops); + if (!proc) + goto out_remove_matches; + proc->data = (void *) ((unsigned long) af | (TARGET << 16)); +@@ -821,12 +836,12 @@ + out_remove_matches: + strlcpy(buf, xt_prefix[af], sizeof(buf)); + strlcat(buf, FORMAT_MATCHES, sizeof(buf)); +- proc_net_remove(buf); ++ proc_net_remove(&init_net, buf); + + out_remove_tables: + strlcpy(buf, xt_prefix[af], sizeof(buf)); + strlcat(buf, FORMAT_TABLES, sizeof(buf)); +- proc_net_remove(buf); ++ proc_net_remove(&init_net, buf); + out: + return -1; + #endif +@@ -840,19 +855,42 @@ + + strlcpy(buf, xt_prefix[af], sizeof(buf)); + strlcat(buf, FORMAT_TABLES, sizeof(buf)); +- proc_net_remove(buf); ++ proc_net_remove(&init_net, buf); + + strlcpy(buf, xt_prefix[af], sizeof(buf)); + strlcat(buf, FORMAT_TARGETS, sizeof(buf)); +- proc_net_remove(buf); ++ proc_net_remove(&init_net, buf); + + strlcpy(buf, xt_prefix[af], sizeof(buf)); + strlcat(buf, FORMAT_MATCHES, sizeof(buf)); +- proc_net_remove(buf); ++ proc_net_remove(&init_net, buf); + #endif /*CONFIG_PROC_FS*/ + } + EXPORT_SYMBOL_GPL(xt_proto_fini); + ++static int xt_net_init(struct net *net) ++{ ++ int i; ++ ++ net->xtn = kmalloc(sizeof(struct xt_af_pernet) * NPROTO, GFP_KERNEL); ++ if (!net->xtn) ++ return -ENOMEM; ++ ++ for (i = 0; i < NPROTO; i++) { ++ INIT_LIST_HEAD(&net->xtn[i].tables); ++ } ++ return 0; ++} ++ ++static void xt_net_exit(struct net *net) ++{ ++ kfree(net->xtn); ++} ++ ++static struct pernet_operations xt_net_ops = { ++ .init = xt_net_init, ++ .exit = xt_net_exit, ++}; + + static int __init xt_init(void) + { +@@ -869,13 +907,13 @@ + #endif + INIT_LIST_HEAD(&xt[i].target); + INIT_LIST_HEAD(&xt[i].match); +- INIT_LIST_HEAD(&xt[i].tables); + } +- return 0; ++ return register_pernet_subsys(&xt_net_ops); + } + + static void __exit xt_fini(void) + { ++ unregister_pernet_subsys(&xt_net_ops); + kfree(xt); + } + +diff -Nurb linux-2.6.22-570/net/netfilter/xt_MARK.c linux-2.6.22-591/net/netfilter/xt_MARK.c +--- linux-2.6.22-570/net/netfilter/xt_MARK.c 2007-12-21 15:36:03.000000000 -0500 ++++ linux-2.6.22-591/net/netfilter/xt_MARK.c 2007-12-21 15:36:15.000000000 -0500 +@@ -131,7 +131,7 @@ + if ((*pskb)->sk) + connection_sk = (*pskb)->sk; + else { +- connection_sk = inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif); ++ connection_sk = inet_lookup(&tcp_hashinfo, src_ip, src_port, ip, port, dif,(*pskb)->sk->sk_net); + } + + if (connection_sk) { +diff -Nurb linux-2.6.22-570/net/netfilter/xt_hashlimit.c linux-2.6.22-591/net/netfilter/xt_hashlimit.c +--- linux-2.6.22-570/net/netfilter/xt_hashlimit.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netfilter/xt_hashlimit.c 2007-12-21 15:36:15.000000000 -0500 +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -736,13 +737,13 @@ + printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n"); + goto err2; + } +- hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", proc_net); ++ hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net); + if (!hashlimit_procdir4) { + printk(KERN_ERR "xt_hashlimit: unable to create proc dir " + "entry\n"); + goto err3; + } +- hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", proc_net); ++ hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net); + if (!hashlimit_procdir6) { + printk(KERN_ERR "xt_hashlimit: unable to create proc dir " + "entry\n"); +@@ -750,7 +751,7 @@ + } + return 0; + err4: +- remove_proc_entry("ipt_hashlimit", proc_net); ++ remove_proc_entry("ipt_hashlimit", init_net.proc_net); + err3: + kmem_cache_destroy(hashlimit_cachep); + err2: +@@ -762,8 +763,8 @@ + + static void __exit xt_hashlimit_fini(void) + { +- remove_proc_entry("ipt_hashlimit", proc_net); +- remove_proc_entry("ip6t_hashlimit", proc_net); ++ remove_proc_entry("ipt_hashlimit", init_net.proc_net); ++ remove_proc_entry("ip6t_hashlimit", init_net.proc_net); + kmem_cache_destroy(hashlimit_cachep); + xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit)); + } +diff -Nurb linux-2.6.22-570/net/netlink/af_netlink.c linux-2.6.22-591/net/netlink/af_netlink.c +--- linux-2.6.22-570/net/netlink/af_netlink.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/netlink/af_netlink.c 2007-12-21 15:36:15.000000000 -0500 +@@ -63,6 +63,7 @@ + #include + #include + #include ++#include + + #define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8) + +@@ -212,7 +213,7 @@ + wake_up(&nl_table_wait); + } + +-static __inline__ struct sock *netlink_lookup(int protocol, u32 pid) ++static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) + { + struct nl_pid_hash *hash = &nl_table[protocol].hash; + struct hlist_head *head; +@@ -222,7 +223,7 @@ + read_lock(&nl_table_lock); + head = nl_pid_hashfn(hash, pid); + sk_for_each(sk, node, head) { +- if (nlk_sk(sk)->pid == pid) { ++ if ((sk->sk_net == net) && (nlk_sk(sk)->pid == pid)) { + sock_hold(sk); + goto found; + } +@@ -327,7 +328,7 @@ + * makes sure updates are visible before bind or setsockopt return. */ + } + +-static int netlink_insert(struct sock *sk, u32 pid) ++static int netlink_insert(struct sock *sk, struct net *net, u32 pid) + { + struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct hlist_head *head; +@@ -340,7 +341,7 @@ + head = nl_pid_hashfn(hash, pid); + len = 0; + sk_for_each(osk, node, head) { +- if (nlk_sk(osk)->pid == pid) ++ if ((osk->sk_net == net) && (nlk_sk(osk)->pid == pid)) + break; + len++; + } +@@ -383,15 +384,15 @@ + .obj_size = sizeof(struct netlink_sock), + }; + +-static int __netlink_create(struct socket *sock, struct mutex *cb_mutex, +- int protocol) ++static int __netlink_create(struct net *net, struct socket *sock, ++ struct mutex *cb_mutex, int protocol) + { + struct sock *sk; + struct netlink_sock *nlk; + + sock->ops = &netlink_ops; + +- sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); ++ sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); + if (!sk) + return -ENOMEM; + +@@ -411,7 +412,7 @@ + return 0; + } + +-static int netlink_create(struct socket *sock, int protocol) ++static int netlink_create(struct net *net, struct socket *sock, int protocol) + { + struct module *module = NULL; + struct mutex *cb_mutex; +@@ -440,7 +441,7 @@ + cb_mutex = nl_table[protocol].cb_mutex; + netlink_unlock_table(); + +- if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0) ++ if ((err = __netlink_create(net, sock, cb_mutex, protocol)) < 0) + goto out_module; + + nlk = nlk_sk(sock->sk); +@@ -477,6 +478,7 @@ + + if (nlk->pid && !nlk->subscriptions) { + struct netlink_notify n = { ++ .net = sk->sk_net, + .protocol = sk->sk_protocol, + .pid = nlk->pid, + }; +@@ -505,6 +507,7 @@ + static int netlink_autobind(struct socket *sock) + { + struct sock *sk = sock->sk; ++ struct net *net = sk->sk_net; + struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct hlist_head *head; + struct sock *osk; +@@ -518,6 +521,8 @@ + netlink_table_grab(); + head = nl_pid_hashfn(hash, pid); + sk_for_each(osk, node, head) { ++ if ((osk->sk_net != net)) ++ continue; + if (nlk_sk(osk)->pid == pid) { + /* Bind collision, search negative pid values. */ + pid = rover--; +@@ -529,7 +534,7 @@ + } + netlink_table_ungrab(); + +- err = netlink_insert(sk, pid); ++ err = netlink_insert(sk, net, pid); + if (err == -EADDRINUSE) + goto retry; + +@@ -583,6 +588,7 @@ + static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len) + { + struct sock *sk = sock->sk; ++ struct net *net = sk->sk_net; + struct netlink_sock *nlk = nlk_sk(sk); + struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; + int err; +@@ -606,7 +612,7 @@ + return -EINVAL; + } else { + err = nladdr->nl_pid ? +- netlink_insert(sk, nladdr->nl_pid) : ++ netlink_insert(sk, net, nladdr->nl_pid) : + netlink_autobind(sock); + if (err) + return err; +@@ -690,10 +696,12 @@ + static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) + { + int protocol = ssk->sk_protocol; ++ struct net *net; + struct sock *sock; + struct netlink_sock *nlk; + +- sock = netlink_lookup(protocol, pid); ++ net = ssk->sk_net; ++ sock = netlink_lookup(net, protocol, pid); + if (!sock) + return ERR_PTR(-ECONNREFUSED); + +@@ -866,6 +874,7 @@ + + struct netlink_broadcast_data { + struct sock *exclude_sk; ++ struct net *net; + u32 pid; + u32 group; + int failure; +@@ -888,6 +897,9 @@ + !test_bit(p->group - 1, nlk->groups)) + goto out; + ++ if ((sk->sk_net != p->net)) ++ goto out; ++ + if (p->failure) { + netlink_overrun(sk); + goto out; +@@ -926,6 +938,7 @@ + int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, + u32 group, gfp_t allocation) + { ++ struct net *net = ssk->sk_net; + struct netlink_broadcast_data info; + struct hlist_node *node; + struct sock *sk; +@@ -933,6 +946,7 @@ + skb = netlink_trim(skb, allocation); + + info.exclude_sk = ssk; ++ info.net = net; + info.pid = pid; + info.group = group; + info.failure = 0; +@@ -981,6 +995,9 @@ + if (sk == p->exclude_sk) + goto out; + ++ if (sk->sk_net != p->exclude_sk->sk_net) ++ goto out; ++ + if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + !test_bit(p->group - 1, nlk->groups)) + goto out; +@@ -1276,7 +1293,7 @@ + */ + + struct sock * +-netlink_kernel_create(int unit, unsigned int groups, ++netlink_kernel_create(struct net *net, int unit, unsigned int groups, + void (*input)(struct sock *sk, int len), + struct mutex *cb_mutex, struct module *module) + { +@@ -1293,7 +1310,7 @@ + if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) + return NULL; + +- if (__netlink_create(sock, cb_mutex, unit) < 0) ++ if (__netlink_create(net, sock, cb_mutex, unit) < 0) + goto out_sock_release; + + if (groups < 32) +@@ -1308,18 +1325,20 @@ + if (input) + nlk_sk(sk)->data_ready = input; + +- if (netlink_insert(sk, 0)) ++ if (netlink_insert(sk, net, 0)) + goto out_sock_release; + + nlk = nlk_sk(sk); + nlk->flags |= NETLINK_KERNEL_SOCKET; + + netlink_table_grab(); ++ if (!nl_table[unit].registered) { + nl_table[unit].groups = groups; + nl_table[unit].listeners = listeners; + nl_table[unit].cb_mutex = cb_mutex; + nl_table[unit].module = module; + nl_table[unit].registered = 1; ++ } + netlink_table_ungrab(); + + return sk; +@@ -1420,7 +1439,7 @@ + atomic_inc(&skb->users); + cb->skb = skb; + +- sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid); ++ sk = netlink_lookup(ssk->sk_net, ssk->sk_protocol, NETLINK_CB(skb).pid); + if (sk == NULL) { + netlink_destroy_callback(cb); + return -ECONNREFUSED; +@@ -1462,7 +1481,8 @@ + if (!skb) { + struct sock *sk; + +- sk = netlink_lookup(in_skb->sk->sk_protocol, ++ sk = netlink_lookup(in_skb->sk->sk_net, ++ in_skb->sk->sk_protocol, + NETLINK_CB(in_skb).pid); + if (sk) { + sk->sk_err = ENOBUFS; +@@ -1613,6 +1633,7 @@ + + #ifdef CONFIG_PROC_FS + struct nl_seq_iter { ++ struct net *net; + int link; + int hash_idx; + }; +@@ -1630,6 +1651,8 @@ + + for (j = 0; j <= hash->mask; j++) { + sk_for_each(s, node, &hash->table[j]) { ++ if (iter->net != s->sk_net) ++ continue; + if (off == pos) { + iter->link = i; + iter->hash_idx = j; +@@ -1659,11 +1682,14 @@ + if (v == SEQ_START_TOKEN) + return netlink_seq_socket_idx(seq, 0); + +- s = sk_next(v); ++ iter = seq->private; ++ s = v; ++ do { ++ s = sk_next(s); ++ } while (s && (iter->net != s->sk_net)); + if (s) + return s; + +- iter = seq->private; + i = iter->link; + j = iter->hash_idx + 1; + +@@ -1672,6 +1698,8 @@ + + for (; j <= hash->mask; j++) { + s = sk_head(&hash->table[j]); ++ while (s && (iter->net != s->sk_net)) ++ s = sk_next(s); + if (s) { + iter->link = i; + iter->hash_idx = j; +@@ -1742,15 +1770,24 @@ + + seq = file->private_data; + seq->private = iter; ++ iter->net = get_net(PROC_NET(inode)); + return 0; + } + ++static int netlink_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct nl_seq_iter *iter = seq->private; ++ put_net(iter->net); ++ return seq_release_private(inode, file); ++} ++ + static const struct file_operations netlink_seq_fops = { + .owner = THIS_MODULE, + .open = netlink_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release_private, ++ .release = netlink_seq_release, + }; + + #endif +@@ -1792,6 +1829,27 @@ + .owner = THIS_MODULE, /* for consistency 8) */ + }; + ++static int netlink_net_init(struct net *net) ++{ ++#ifdef CONFIG_PROC_FS ++ if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops)) ++ return -ENOMEM; ++#endif ++ return 0; ++} ++ ++static void netlink_net_exit(struct net *net) ++{ ++#ifdef CONFIG_PROC_FS ++ proc_net_remove(net, "netlink"); ++#endif ++} ++ ++static struct pernet_operations netlink_net_ops = { ++ .init = netlink_net_init, ++ .exit = netlink_net_exit, ++}; ++ + static int __init netlink_proto_init(void) + { + struct sk_buff *dummy_skb; +@@ -1837,9 +1895,7 @@ + } + + sock_register(&netlink_family_ops); +-#ifdef CONFIG_PROC_FS +- proc_net_fops_create("netlink", 0, &netlink_seq_fops); +-#endif ++ register_pernet_subsys(&netlink_net_ops); + /* The netlink device handler may be needed early. */ + rtnetlink_init(); + out: +diff -Nurb linux-2.6.22-570/net/netlink/attr.c linux-2.6.22-591/net/netlink/attr.c +--- linux-2.6.22-570/net/netlink/attr.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netlink/attr.c 2007-12-21 15:36:12.000000000 -0500 +@@ -72,6 +72,17 @@ + return -ERANGE; + break; + ++ case NLA_NESTED_COMPAT: ++ if (attrlen < pt->len) ++ return -ERANGE; ++ if (attrlen < NLA_ALIGN(pt->len)) ++ break; ++ if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN) ++ return -ERANGE; ++ nla = nla_data(nla) + NLA_ALIGN(pt->len); ++ if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla)) ++ return -ERANGE; ++ break; + default: + if (pt->len) + minlen = pt->len; +diff -Nurb linux-2.6.22-570/net/netlink/genetlink.c linux-2.6.22-591/net/netlink/genetlink.c +--- linux-2.6.22-570/net/netlink/genetlink.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netlink/genetlink.c 2007-12-21 15:36:15.000000000 -0500 +@@ -557,8 +557,9 @@ + goto errout_register; + + netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV); +- genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID, +- genl_rcv, NULL, THIS_MODULE); ++ genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC, ++ GENL_MAX_ID, genl_rcv, NULL, ++ THIS_MODULE); + if (genl_sock == NULL) + panic("GENL: Cannot initialize generic netlink\n"); + +diff -Nurb linux-2.6.22-570/net/netrom/af_netrom.c linux-2.6.22-591/net/netrom/af_netrom.c +--- linux-2.6.22-570/net/netrom/af_netrom.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netrom/af_netrom.c 2007-12-21 15:36:15.000000000 -0500 +@@ -41,6 +41,7 @@ + #include + #include + #include ++#include + #include + + static int nr_ndevs = 4; +@@ -105,6 +106,9 @@ + { + struct net_device *dev = (struct net_device *)ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (event != NETDEV_DOWN) + return NOTIFY_DONE; + +@@ -408,15 +412,18 @@ + .obj_size = sizeof(struct nr_sock), + }; + +-static int nr_create(struct socket *sock, int protocol) ++static int nr_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + struct nr_sock *nr; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + if (sock->type != SOCK_SEQPACKET || protocol != 0) + return -ESOCKTNOSUPPORT; + +- if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) ++ if ((sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL) + return -ENOMEM; + + nr = nr_sk(sk); +@@ -458,7 +465,7 @@ + if (osk->sk_type != SOCK_SEQPACKET) + return NULL; + +- if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) ++ if ((sk = sk_alloc(osk->sk_net, PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL) + return NULL; + + nr = nr_sk(sk); +@@ -1447,9 +1454,9 @@ + + nr_loopback_init(); + +- proc_net_fops_create("nr", S_IRUGO, &nr_info_fops); +- proc_net_fops_create("nr_neigh", S_IRUGO, &nr_neigh_fops); +- proc_net_fops_create("nr_nodes", S_IRUGO, &nr_nodes_fops); ++ proc_net_fops_create(&init_net, "nr", S_IRUGO, &nr_info_fops); ++ proc_net_fops_create(&init_net, "nr_neigh", S_IRUGO, &nr_neigh_fops); ++ proc_net_fops_create(&init_net, "nr_nodes", S_IRUGO, &nr_nodes_fops); + out: + return rc; + fail: +@@ -1477,9 +1484,9 @@ + { + int i; + +- proc_net_remove("nr"); +- proc_net_remove("nr_neigh"); +- proc_net_remove("nr_nodes"); ++ proc_net_remove(&init_net, "nr"); ++ proc_net_remove(&init_net, "nr_neigh"); ++ proc_net_remove(&init_net, "nr_nodes"); + nr_loopback_clear(); + + nr_rt_free(); +diff -Nurb linux-2.6.22-570/net/netrom/nr_route.c linux-2.6.22-591/net/netrom/nr_route.c +--- linux-2.6.22-570/net/netrom/nr_route.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/netrom/nr_route.c 2007-12-21 15:36:15.000000000 -0500 +@@ -580,7 +580,7 @@ + { + struct net_device *dev; + +- if ((dev = dev_get_by_name(devname)) == NULL) ++ if ((dev = dev_get_by_name(&init_net, devname)) == NULL) + return NULL; + + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) +@@ -598,7 +598,7 @@ + struct net_device *dev, *first = NULL; + + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM) + if (first == NULL || strncmp(dev->name, first->name, 3) < 0) + first = dev; +@@ -618,7 +618,7 @@ + struct net_device *dev; + + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) { + dev_hold(dev); + goto out; +diff -Nurb linux-2.6.22-570/net/packet/af_packet.c linux-2.6.22-591/net/packet/af_packet.c +--- linux-2.6.22-570/net/packet/af_packet.c 2007-12-21 15:36:03.000000000 -0500 ++++ linux-2.6.22-591/net/packet/af_packet.c 2007-12-21 15:36:15.000000000 -0500 +@@ -65,6 +65,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -135,10 +136,6 @@ + packet classifier depends on it. + */ + +-/* List of all packet sockets. */ +-static HLIST_HEAD(packet_sklist); +-static DEFINE_RWLOCK(packet_sklist_lock); +- + static atomic_t packet_socks_nr; + + +@@ -273,6 +270,9 @@ + if (skb->pkt_type == PACKET_LOOPBACK) + goto out; + ++ if (dev->nd_net != sk->sk_net) ++ goto out; ++ + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + goto oom; + +@@ -344,7 +344,7 @@ + */ + + saddr->spkt_device[13] = 0; +- dev = dev_get_by_name(saddr->spkt_device); ++ dev = dev_get_by_name(sk->sk_net, saddr->spkt_device); + err = -ENODEV; + if (dev == NULL) + goto out_unlock; +@@ -462,6 +462,9 @@ + sk = pt->af_packet_priv; + po = pkt_sk(sk); + ++ if (dev->nd_net != sk->sk_net) ++ goto drop; ++ + skb->dev = dev; + + if (dev->hard_header) { +@@ -578,6 +581,9 @@ + sk = pt->af_packet_priv; + po = pkt_sk(sk); + ++ if (dev->nd_net != sk->sk_net) ++ goto drop; ++ + if (dev->hard_header) { + if (sk->sk_type != SOCK_DGRAM) + skb_push(skb, skb->data - skb_mac_header(skb)); +@@ -738,7 +744,7 @@ + } + + +- dev = dev_get_by_index(ifindex); ++ dev = dev_get_by_index(sk->sk_net, ifindex); + err = -ENXIO; + if (dev == NULL) + goto out_unlock; +@@ -811,15 +817,17 @@ + { + struct sock *sk = sock->sk; + struct packet_sock *po; ++ struct net *net; + + if (!sk) + return 0; + ++ net = sk->sk_net; + po = pkt_sk(sk); + +- write_lock_bh(&packet_sklist_lock); ++ write_lock_bh(&net->packet_sklist_lock); + sk_del_node_init(sk); +- write_unlock_bh(&packet_sklist_lock); ++ write_unlock_bh(&net->packet_sklist_lock); + + /* + * Unhook packet receive handler. +@@ -933,7 +941,7 @@ + return -EINVAL; + strlcpy(name,uaddr->sa_data,sizeof(name)); + +- dev = dev_get_by_name(name); ++ dev = dev_get_by_name(sk->sk_net, name); + if (dev) { + err = packet_do_bind(sk, dev, pkt_sk(sk)->num); + dev_put(dev); +@@ -960,7 +968,7 @@ + + if (sll->sll_ifindex) { + err = -ENODEV; +- dev = dev_get_by_index(sll->sll_ifindex); ++ dev = dev_get_by_index(sk->sk_net, sll->sll_ifindex); + if (dev == NULL) + goto out; + } +@@ -982,7 +990,7 @@ + * Create a packet of type SOCK_PACKET. + */ + +-static int packet_create(struct socket *sock, int protocol) ++static int packet_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + struct packet_sock *po; +@@ -998,7 +1006,7 @@ + sock->state = SS_UNCONNECTED; + + err = -ENOBUFS; +- sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1); ++ sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, 1); + if (sk == NULL) + goto out; + +@@ -1034,9 +1042,9 @@ + po->running = 1; + } + +- write_lock_bh(&packet_sklist_lock); +- sk_add_node(sk, &packet_sklist); +- write_unlock_bh(&packet_sklist_lock); ++ write_lock_bh(&net->packet_sklist_lock); ++ sk_add_node(sk, &net->packet_sklist); ++ write_unlock_bh(&net->packet_sklist_lock); + return(0); + out: + return err; +@@ -1154,7 +1162,7 @@ + return -EOPNOTSUPP; + + uaddr->sa_family = AF_PACKET; +- dev = dev_get_by_index(pkt_sk(sk)->ifindex); ++ dev = dev_get_by_index(sk->sk_net, pkt_sk(sk)->ifindex); + if (dev) { + strlcpy(uaddr->sa_data, dev->name, 15); + dev_put(dev); +@@ -1179,7 +1187,7 @@ + sll->sll_family = AF_PACKET; + sll->sll_ifindex = po->ifindex; + sll->sll_protocol = po->num; +- dev = dev_get_by_index(po->ifindex); ++ dev = dev_get_by_index(sk->sk_net, po->ifindex); + if (dev) { + sll->sll_hatype = dev->type; + sll->sll_halen = dev->addr_len; +@@ -1231,7 +1239,7 @@ + rtnl_lock(); + + err = -ENODEV; +- dev = __dev_get_by_index(mreq->mr_ifindex); ++ dev = __dev_get_by_index(sk->sk_net, mreq->mr_ifindex); + if (!dev) + goto done; + +@@ -1285,7 +1293,7 @@ + if (--ml->count == 0) { + struct net_device *dev; + *mlp = ml->next; +- dev = dev_get_by_index(ml->ifindex); ++ dev = dev_get_by_index(sk->sk_net, ml->ifindex); + if (dev) { + packet_dev_mc(dev, ml, -1); + dev_put(dev); +@@ -1313,7 +1321,7 @@ + struct net_device *dev; + + po->mclist = ml->next; +- if ((dev = dev_get_by_index(ml->ifindex)) != NULL) { ++ if ((dev = dev_get_by_index(sk->sk_net, ml->ifindex)) != NULL) { + packet_dev_mc(dev, ml, -1); + dev_put(dev); + } +@@ -1469,9 +1477,10 @@ + struct sock *sk; + struct hlist_node *node; + struct net_device *dev = data; ++ struct net *net = dev->nd_net; + +- read_lock(&packet_sklist_lock); +- sk_for_each(sk, node, &packet_sklist) { ++ read_lock(&net->packet_sklist_lock); ++ sk_for_each(sk, node, &net->packet_sklist) { + struct packet_sock *po = pkt_sk(sk); + + switch (msg) { +@@ -1510,7 +1519,7 @@ + break; + } + } +- read_unlock(&packet_sklist_lock); ++ read_unlock(&net->packet_sklist_lock); + return NOTIFY_DONE; + } + +@@ -1878,12 +1887,12 @@ + }; + + #ifdef CONFIG_PROC_FS +-static inline struct sock *packet_seq_idx(loff_t off) ++static inline struct sock *packet_seq_idx(struct net *net, loff_t off) + { + struct sock *s; + struct hlist_node *node; + +- sk_for_each(s, node, &packet_sklist) { ++ sk_for_each(s, node, &net->packet_sklist) { + if (!off--) + return s; + } +@@ -1892,21 +1901,24 @@ + + static void *packet_seq_start(struct seq_file *seq, loff_t *pos) + { +- read_lock(&packet_sklist_lock); +- return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN; ++ struct net *net = seq->private; ++ read_lock(&net->packet_sklist_lock); ++ return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN; + } + + static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) + { ++ struct net *net = seq->private; + ++*pos; + return (v == SEQ_START_TOKEN) +- ? sk_head(&packet_sklist) ++ ? sk_head(&net->packet_sklist) + : sk_next((struct sock*)v) ; + } + + static void packet_seq_stop(struct seq_file *seq, void *v) + { +- read_unlock(&packet_sklist_lock); ++ struct net *net = seq->private; ++ read_unlock(&net->packet_sklist_lock); + } + + static int packet_seq_show(struct seq_file *seq, void *v) +@@ -1942,7 +1954,22 @@ + + static int packet_seq_open(struct inode *inode, struct file *file) + { +- return seq_open(file, &packet_seq_ops); ++ struct seq_file *seq; ++ int res; ++ res = seq_open(file, &packet_seq_ops); ++ if (!res) { ++ seq = file->private_data; ++ seq->private = get_net(PROC_NET(inode)); ++ } ++ return res; ++} ++ ++static int packet_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq= file->private_data; ++ struct net *net = seq->private; ++ put_net(net); ++ return seq_release(inode, file); + } + + static const struct file_operations packet_seq_fops = { +@@ -1950,15 +1977,37 @@ + .open = packet_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release, ++ .release = packet_seq_release, + }; + + #endif + ++static int packet_net_init(struct net *net) ++{ ++ rwlock_init(&net->packet_sklist_lock); ++ INIT_HLIST_HEAD(&net->packet_sklist); ++ ++ if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++static void packet_net_exit(struct net *net) ++{ ++ proc_net_remove(net, "packet"); ++} ++ ++static struct pernet_operations packet_net_ops = { ++ .init = packet_net_init, ++ .exit = packet_net_exit, ++}; ++ ++ + static void __exit packet_exit(void) + { +- proc_net_remove("packet"); + unregister_netdevice_notifier(&packet_netdev_notifier); ++ unregister_pernet_subsys(&packet_net_ops); + sock_unregister(PF_PACKET); + proto_unregister(&packet_proto); + } +@@ -1971,8 +2020,8 @@ + goto out; + + sock_register(&packet_family_ops); ++ register_pernet_subsys(&packet_net_ops); + register_netdevice_notifier(&packet_netdev_notifier); +- proc_net_fops_create("packet", 0, &packet_seq_fops); + out: + return rc; + } +diff -Nurb linux-2.6.22-570/net/rose/af_rose.c linux-2.6.22-591/net/rose/af_rose.c +--- linux-2.6.22-570/net/rose/af_rose.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/rose/af_rose.c 2007-12-21 15:36:15.000000000 -0500 +@@ -45,6 +45,7 @@ + #include + #include + #include ++#include + + static int rose_ndevs = 10; + +@@ -196,6 +197,9 @@ + { + struct net_device *dev = (struct net_device *)ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (event != NETDEV_DOWN) + return NOTIFY_DONE; + +@@ -498,15 +502,18 @@ + .obj_size = sizeof(struct rose_sock), + }; + +-static int rose_create(struct socket *sock, int protocol) ++static int rose_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + struct rose_sock *rose; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + if (sock->type != SOCK_SEQPACKET || protocol != 0) + return -ESOCKTNOSUPPORT; + +- if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) ++ if ((sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + return -ENOMEM; + + rose = rose_sk(sk); +@@ -544,7 +551,7 @@ + if (osk->sk_type != SOCK_SEQPACKET) + return NULL; + +- if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) ++ if ((sk = sk_alloc(osk->sk_net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL) + return NULL; + + rose = rose_sk(sk); +@@ -1576,10 +1583,10 @@ + + rose_add_loopback_neigh(); + +- proc_net_fops_create("rose", S_IRUGO, &rose_info_fops); +- proc_net_fops_create("rose_neigh", S_IRUGO, &rose_neigh_fops); +- proc_net_fops_create("rose_nodes", S_IRUGO, &rose_nodes_fops); +- proc_net_fops_create("rose_routes", S_IRUGO, &rose_routes_fops); ++ proc_net_fops_create(&init_net, "rose", S_IRUGO, &rose_info_fops); ++ proc_net_fops_create(&init_net, "rose_neigh", S_IRUGO, &rose_neigh_fops); ++ proc_net_fops_create(&init_net, "rose_nodes", S_IRUGO, &rose_nodes_fops); ++ proc_net_fops_create(&init_net, "rose_routes", S_IRUGO, &rose_routes_fops); + out: + return rc; + fail: +@@ -1606,10 +1613,10 @@ + { + int i; + +- proc_net_remove("rose"); +- proc_net_remove("rose_neigh"); +- proc_net_remove("rose_nodes"); +- proc_net_remove("rose_routes"); ++ proc_net_remove(&init_net, "rose"); ++ proc_net_remove(&init_net, "rose_neigh"); ++ proc_net_remove(&init_net, "rose_nodes"); ++ proc_net_remove(&init_net, "rose_routes"); + rose_loopback_clear(); + + rose_rt_free(); +diff -Nurb linux-2.6.22-570/net/rose/rose_route.c linux-2.6.22-591/net/rose/rose_route.c +--- linux-2.6.22-570/net/rose/rose_route.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/rose/rose_route.c 2007-12-21 15:36:15.000000000 -0500 +@@ -583,7 +583,7 @@ + { + struct net_device *dev; + +- if ((dev = dev_get_by_name(devname)) == NULL) ++ if ((dev = dev_get_by_name(&init_net, devname)) == NULL) + return NULL; + + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25) +@@ -601,7 +601,7 @@ + struct net_device *dev, *first = NULL; + + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE) + if (first == NULL || strncmp(dev->name, first->name, 3) < 0) + first = dev; +@@ -619,7 +619,7 @@ + struct net_device *dev; + + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) { + dev_hold(dev); + goto out; +@@ -636,7 +636,7 @@ + struct net_device *dev; + + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) + goto out; + } +diff -Nurb linux-2.6.22-570/net/rxrpc/af_rxrpc.c linux-2.6.22-591/net/rxrpc/af_rxrpc.c +--- linux-2.6.22-570/net/rxrpc/af_rxrpc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/rxrpc/af_rxrpc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include + #include + #include "ar-internal.h" +@@ -605,13 +606,16 @@ + /* + * create an RxRPC socket + */ +-static int rxrpc_create(struct socket *sock, int protocol) ++static int rxrpc_create(struct net *net, struct socket *sock, int protocol) + { + struct rxrpc_sock *rx; + struct sock *sk; + + _enter("%p,%d", sock, protocol); + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + /* we support transport protocol UDP only */ + if (protocol != PF_INET) + return -EPROTONOSUPPORT; +@@ -622,7 +626,7 @@ + sock->ops = &rxrpc_rpc_ops; + sock->state = SS_UNCONNECTED; + +- sk = sk_alloc(PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1); ++ sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1); + if (!sk) + return -ENOMEM; + +@@ -829,8 +833,8 @@ + } + + #ifdef CONFIG_PROC_FS +- proc_net_fops_create("rxrpc_calls", 0, &rxrpc_call_seq_fops); +- proc_net_fops_create("rxrpc_conns", 0, &rxrpc_connection_seq_fops); ++ proc_net_fops_create(&init_net, "rxrpc_calls", 0, &rxrpc_call_seq_fops); ++ proc_net_fops_create(&init_net, "rxrpc_conns", 0, &rxrpc_connection_seq_fops); + #endif + return 0; + +@@ -868,8 +872,8 @@ + + _debug("flush scheduled work"); + flush_workqueue(rxrpc_workqueue); +- proc_net_remove("rxrpc_conns"); +- proc_net_remove("rxrpc_calls"); ++ proc_net_remove(&init_net, "rxrpc_conns"); ++ proc_net_remove(&init_net, "rxrpc_calls"); + destroy_workqueue(rxrpc_workqueue); + kmem_cache_destroy(rxrpc_call_jar); + _leave(""); +diff -Nurb linux-2.6.22-570/net/sched/act_api.c linux-2.6.22-591/net/sched/act_api.c +--- linux-2.6.22-570/net/sched/act_api.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sched/act_api.c 2007-12-21 15:36:15.000000000 -0500 +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -675,7 +676,7 @@ + return -EINVAL; + } + +- return rtnl_unicast(skb, pid); ++ return rtnl_unicast(skb, &init_net, pid); + } + + static struct tc_action * +@@ -796,7 +797,7 @@ + nlh->nlmsg_flags |= NLM_F_ROOT; + module_put(a->ops->owner); + kfree(a); +- err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); ++ err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + if (err > 0) + return 0; + +@@ -859,7 +860,7 @@ + + /* now do the delete */ + tcf_action_destroy(head, 0); +- ret = rtnetlink_send(skb, pid, RTNLGRP_TC, ++ ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, + n->nlmsg_flags&NLM_F_ECHO); + if (ret > 0) + return 0; +@@ -903,7 +904,7 @@ + nlh->nlmsg_len = skb_tail_pointer(skb) - b; + NETLINK_CB(skb).dst_group = RTNLGRP_TC; + +- err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO); ++ err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); + if (err > 0) + err = 0; + return err; +@@ -941,10 +942,14 @@ + + static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct rtattr **tca = arg; + u32 pid = skb ? NETLINK_CB(skb).pid : 0; + int ret = 0, ovr = 0; + ++ if (net != &init_net) ++ return -EINVAL; ++ + if (tca[TCA_ACT_TAB-1] == NULL) { + printk("tc_ctl_action: received NO action attribs\n"); + return -EINVAL; +@@ -1014,6 +1019,7 @@ + static int + tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + struct nlmsghdr *nlh; + unsigned char *b = skb_tail_pointer(skb); + struct rtattr *x; +@@ -1023,6 +1029,9 @@ + struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh); + struct rtattr *kind = find_dump_kind(cb->nlh); + ++ if (net != &init_net) ++ return 0; ++ + if (kind == NULL) { + printk("tc_dump_action: action bad kind\n"); + return 0; +diff -Nurb linux-2.6.22-570/net/sched/act_mirred.c linux-2.6.22-591/net/sched/act_mirred.c +--- linux-2.6.22-570/net/sched/act_mirred.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sched/act_mirred.c 2007-12-21 15:36:15.000000000 -0500 +@@ -85,7 +85,7 @@ + parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]); + + if (parm->ifindex) { +- dev = __dev_get_by_index(parm->ifindex); ++ dev = __dev_get_by_index(&init_net, parm->ifindex); + if (dev == NULL) + return -ENODEV; + switch (dev->type) { +diff -Nurb linux-2.6.22-570/net/sched/cls_api.c linux-2.6.22-591/net/sched/cls_api.c +--- linux-2.6.22-570/net/sched/cls_api.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sched/cls_api.c 2007-12-21 15:36:15.000000000 -0500 +@@ -129,6 +129,7 @@ + + static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct rtattr **tca; + struct tcmsg *t; + u32 protocol; +@@ -145,6 +146,9 @@ + unsigned long fh; + int err; + ++ if (net != &init_net) ++ return -EINVAL; ++ + replay: + tca = arg; + t = NLMSG_DATA(n); +@@ -164,7 +168,7 @@ + /* Find head of filter chain. */ + + /* Find link */ +- if ((dev = __dev_get_by_index(t->tcm_ifindex)) == NULL) ++ if ((dev = __dev_get_by_index(&init_net, t->tcm_ifindex)) == NULL) + return -ENODEV; + + /* Find qdisc */ +@@ -365,7 +369,7 @@ + return -EINVAL; + } + +- return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); ++ return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + } + + struct tcf_dump_args +@@ -385,6 +389,7 @@ + + static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + int t; + int s_t; + struct net_device *dev; +@@ -395,9 +400,12 @@ + struct Qdisc_class_ops *cops; + struct tcf_dump_args arg; + ++ if (net != &init_net) ++ return 0; ++ + if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) + return skb->len; +- if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) ++ if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + return skb->len; + + if (!tcm->tcm_parent) +diff -Nurb linux-2.6.22-570/net/sched/em_meta.c linux-2.6.22-591/net/sched/em_meta.c +--- linux-2.6.22-570/net/sched/em_meta.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sched/em_meta.c 2007-12-21 15:36:15.000000000 -0500 +@@ -291,7 +291,7 @@ + } else { + struct net_device *dev; + +- dev = dev_get_by_index(skb->sk->sk_bound_dev_if); ++ dev = dev_get_by_index(&init_net, skb->sk->sk_bound_dev_if); + *err = var_dev(dev, dst); + if (dev) + dev_put(dev); +diff -Nurb linux-2.6.22-570/net/sched/sch_api.c linux-2.6.22-591/net/sched/sch_api.c +--- linux-2.6.22-570/net/sched/sch_api.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/sched/sch_api.c 2007-12-21 15:36:15.000000000 -0500 +@@ -35,6 +35,7 @@ + #include + #include + ++#include + #include + #include + #include +@@ -609,6 +610,7 @@ + + static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct tcmsg *tcm = NLMSG_DATA(n); + struct rtattr **tca = arg; + struct net_device *dev; +@@ -617,7 +619,10 @@ + struct Qdisc *p = NULL; + int err; + +- if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) ++ if (net != &init_net) ++ return -EINVAL; ++ ++ if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + return -ENODEV; + + if (clid) { +@@ -670,6 +675,7 @@ + + static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct tcmsg *tcm; + struct rtattr **tca; + struct net_device *dev; +@@ -677,6 +683,9 @@ + struct Qdisc *q, *p; + int err; + ++ if (net != &init_net) ++ return -EINVAL; ++ + replay: + /* Reinit, just in case something touches this. */ + tcm = NLMSG_DATA(n); +@@ -684,7 +693,7 @@ + clid = tcm->tcm_parent; + q = p = NULL; + +- if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) ++ if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + return -ENODEV; + + if (clid) { +@@ -873,7 +882,7 @@ + } + + if (skb->len) +- return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); ++ return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + + err_out: + kfree_skb(skb); +@@ -882,16 +891,20 @@ + + static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + int idx, q_idx; + int s_idx, s_q_idx; + struct net_device *dev; + struct Qdisc *q; + ++ if (net != &init_net) ++ return 0; ++ + s_idx = cb->args[0]; + s_q_idx = q_idx = cb->args[1]; + read_lock(&dev_base_lock); + idx = 0; +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + if (idx < s_idx) + goto cont; + if (idx > s_idx) +@@ -930,6 +943,7 @@ + + static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) + { ++ struct net *net = skb->sk->sk_net; + struct tcmsg *tcm = NLMSG_DATA(n); + struct rtattr **tca = arg; + struct net_device *dev; +@@ -942,7 +956,10 @@ + u32 qid = TC_H_MAJ(clid); + int err; + +- if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL) ++ if (net != &init_net) ++ return -EINVAL; ++ ++ if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + return -ENODEV; + + /* +@@ -1096,7 +1113,7 @@ + return -EINVAL; + } + +- return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); ++ return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + } + + struct qdisc_dump_args +@@ -1116,6 +1133,7 @@ + + static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) + { ++ struct net *net = skb->sk->sk_net; + int t; + int s_t; + struct net_device *dev; +@@ -1123,9 +1141,12 @@ + struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); + struct qdisc_dump_args arg; + ++ if (net != &init_net) ++ return 0; ++ + if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) + return 0; +- if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL) ++ if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) + return 0; + + s_t = cb->args[0]; +@@ -1252,7 +1273,7 @@ + { + register_qdisc(&pfifo_qdisc_ops); + register_qdisc(&bfifo_qdisc_ops); +- proc_net_fops_create("psched", 0, &psched_fops); ++ proc_net_fops_create(&init_net, "psched", 0, &psched_fops); + + rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); + rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL); +diff -Nurb linux-2.6.22-570/net/sched/sch_generic.c linux-2.6.22-591/net/sched/sch_generic.c +--- linux-2.6.22-570/net/sched/sch_generic.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sched/sch_generic.c 2007-12-21 15:36:12.000000000 -0500 +@@ -59,122 +59,143 @@ + spin_unlock_bh(&dev->queue_lock); + } + +-/* +- dev->queue_lock serializes queue accesses for this device +- AND dev->qdisc pointer itself. ++static inline int qdisc_qlen(struct Qdisc *q) ++{ ++ return q->q.qlen; ++} + +- netif_tx_lock serializes accesses to device driver. ++static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, ++ struct Qdisc *q) ++{ ++ if (unlikely(skb->next)) ++ dev->gso_skb = skb; ++ else ++ q->ops->requeue(skb, q); + +- dev->queue_lock and netif_tx_lock are mutually exclusive, +- if one is grabbed, another must be free. +- */ ++ netif_schedule(dev); ++ return 0; ++} + ++static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, ++ struct Qdisc *q) ++{ ++ struct sk_buff *skb; + +-/* Kick device. ++ if ((skb = dev->gso_skb)) ++ dev->gso_skb = NULL; ++ else ++ skb = q->dequeue(q); + +- Returns: 0 - queue is empty or throttled. +- >0 - queue is not empty. ++ return skb; ++} + +- NOTE: Called under dev->queue_lock with locally disabled BH. +-*/ ++static inline int handle_dev_cpu_collision(struct sk_buff *skb, ++ struct net_device *dev, ++ struct Qdisc *q) ++{ ++ int ret; + ++ if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { ++ /* ++ * Same CPU holding the lock. It may be a transient ++ * configuration error, when hard_start_xmit() recurses. We ++ * detect it by checking xmit owner and drop the packet when ++ * deadloop is detected. Return OK to try the next skb. ++ */ ++ kfree_skb(skb); ++ if (net_ratelimit()) ++ printk(KERN_WARNING "Dead loop on netdevice %s, " ++ "fix it urgently!\n", dev->name); ++ ret = qdisc_qlen(q); ++ } else { ++ /* ++ * Another cpu is holding lock, requeue & delay xmits for ++ * some time. ++ */ ++ __get_cpu_var(netdev_rx_stat).cpu_collision++; ++ ret = dev_requeue_skb(skb, dev, q); ++ } ++ ++ return ret; ++} ++ ++/* ++ * NOTE: Called under dev->queue_lock with locally disabled BH. ++ * ++ * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this ++ * device at a time. dev->queue_lock serializes queue accesses for ++ * this device AND dev->qdisc pointer itself. ++ * ++ * netif_tx_lock serializes accesses to device driver. ++ * ++ * dev->queue_lock and netif_tx_lock are mutually exclusive, ++ * if one is grabbed, another must be free. ++ * ++ * Note, that this procedure can be called by a watchdog timer ++ * ++ * Returns to the caller: ++ * 0 - queue is empty or throttled. ++ * >0 - queue is not empty. ++ * ++ */ + static inline int qdisc_restart(struct net_device *dev) + { + struct Qdisc *q = dev->qdisc; + struct sk_buff *skb; ++ unsigned lockless; ++ int ret; + + /* Dequeue packet */ +- if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { +- unsigned nolock = (dev->features & NETIF_F_LLTX); +- +- dev->gso_skb = NULL; ++ if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) ++ return 0; + + /* +- * When the driver has LLTX set it does its own locking +- * in start_xmit. No need to add additional overhead by +- * locking again. These checks are worth it because +- * even uncongested locks can be quite expensive. +- * The driver can do trylock like here too, in case +- * of lock congestion it should return -1 and the packet +- * will be requeued. +- */ +- if (!nolock) { +- if (!netif_tx_trylock(dev)) { +- collision: +- /* So, someone grabbed the driver. */ +- +- /* It may be transient configuration error, +- when hard_start_xmit() recurses. We detect +- it by checking xmit owner and drop the +- packet when deadloop is detected. ++ * When the driver has LLTX set, it does its own locking in ++ * start_xmit. These checks are worth it because even uncongested ++ * locks can be quite expensive. The driver can do a trylock, as ++ * is being done here; in case of lock contention it should return ++ * NETDEV_TX_LOCKED and the packet will be requeued. + */ +- if (dev->xmit_lock_owner == smp_processor_id()) { +- kfree_skb(skb); +- if (net_ratelimit()) +- printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); +- goto out; +- } +- __get_cpu_var(netdev_rx_stat).cpu_collision++; +- goto requeue; +- } ++ lockless = (dev->features & NETIF_F_LLTX); ++ ++ if (!lockless && !netif_tx_trylock(dev)) { ++ /* Another CPU grabbed the driver tx lock */ ++ return handle_dev_cpu_collision(skb, dev, q); + } + +- { + /* And release queue */ + spin_unlock(&dev->queue_lock); + +- if (!netif_queue_stopped(dev)) { +- int ret; +- + ret = dev_hard_start_xmit(skb, dev); +- if (ret == NETDEV_TX_OK) { +- if (!nolock) { +- netif_tx_unlock(dev); +- } +- spin_lock(&dev->queue_lock); +- q = dev->qdisc; +- goto out; +- } +- if (ret == NETDEV_TX_LOCKED && nolock) { +- spin_lock(&dev->queue_lock); +- q = dev->qdisc; +- goto collision; +- } +- } + +- /* NETDEV_TX_BUSY - we need to requeue */ +- /* Release the driver */ +- if (!nolock) { ++ if (!lockless) + netif_tx_unlock(dev); +- } ++ + spin_lock(&dev->queue_lock); + q = dev->qdisc; +- } + +- /* Device kicked us out :( +- This is possible in three cases: ++ switch (ret) { ++ case NETDEV_TX_OK: ++ /* Driver sent out skb successfully */ ++ ret = qdisc_qlen(q); ++ break; + +- 0. driver is locked +- 1. fastroute is enabled +- 2. device cannot determine busy state +- before start of transmission (f.e. dialout) +- 3. device is buggy (ppp) +- */ ++ case NETDEV_TX_LOCKED: ++ /* Driver try lock failed */ ++ ret = handle_dev_cpu_collision(skb, dev, q); ++ break; + +-requeue: +- if (unlikely(q == &noop_qdisc)) +- kfree_skb(skb); +- else if (skb->next) +- dev->gso_skb = skb; +- else +- q->ops->requeue(skb, q); +- netif_schedule(dev); ++ default: ++ /* Driver returned NETDEV_TX_BUSY - requeue skb */ ++ if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) ++ printk(KERN_WARNING "BUG %s code %d qlen %d\n", ++ dev->name, ret, q->q.qlen); ++ ++ ret = dev_requeue_skb(skb, dev, q); ++ break; + } +- return 0; + +-out: +- BUG_ON((int) q->q.qlen < 0); +- return q->q.qlen; ++ return ret; + } + + void __qdisc_run(struct net_device *dev) +diff -Nurb linux-2.6.22-570/net/sched/sch_ingress.c linux-2.6.22-591/net/sched/sch_ingress.c +--- linux-2.6.22-570/net/sched/sch_ingress.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sched/sch_ingress.c 2007-12-21 15:36:15.000000000 -0500 +@@ -243,6 +243,10 @@ + struct net_device *dev = skb->dev; + int fwres=NF_ACCEPT; + ++ /* Only filter packets in the initial network namespace */ ++ if ((indev?indev:outdev)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + DPRINTK("ing_hook: skb %s dev=%s len=%u\n", + skb->sk ? "(owned)" : "(unowned)", + skb->dev ? (*pskb)->dev->name : "(no dev)", +diff -Nurb linux-2.6.22-570/net/sctp/input.c linux-2.6.22-591/net/sctp/input.c +--- linux-2.6.22-570/net/sctp/input.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sctp/input.c 2007-12-21 15:36:15.000000000 -0500 +@@ -126,6 +126,10 @@ + int family; + struct sctp_af *af; + ++ if (skb->dev->nd_net != &init_net) { ++ kfree_skb(skb); ++ return 0; ++ } + if (skb->pkt_type!=PACKET_HOST) + goto discard_it; + +@@ -509,6 +513,9 @@ + sk_buff_data_t saveip, savesctp; + int err; + ++ if (skb->dev->nd_net != &init_net) ++ return; ++ + if (skb->len < ihlen + 8) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + return; +diff -Nurb linux-2.6.22-570/net/sctp/ipv6.c linux-2.6.22-591/net/sctp/ipv6.c +--- linux-2.6.22-570/net/sctp/ipv6.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/sctp/ipv6.c 2007-12-21 15:36:15.000000000 -0500 +@@ -189,6 +189,7 @@ + + memset(&fl, 0, sizeof(fl)); + ++ fl.fl_net = &init_net; + fl.proto = sk->sk_protocol; + + /* Fill in the dest address from the route entry passed with the skb +@@ -230,6 +231,7 @@ + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); ++ fl.fl_net = &init_net; + ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr); + if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) + fl.oif = daddr->v6.sin6_scope_id; +@@ -619,7 +621,7 @@ + struct ipv6_pinfo *newnp, *np = inet6_sk(sk); + struct sctp6_sock *newsctp6sk; + +- newsk = sk_alloc(PF_INET6, GFP_KERNEL, sk->sk_prot, 1); ++ newsk = sk_alloc(sk->sk_net, PF_INET6, GFP_KERNEL, sk->sk_prot, 1); + if (!newsk) + goto out; + +@@ -664,7 +666,7 @@ + newinet->mc_index = 0; + newinet->mc_list = NULL; + +- if (ipv4_config.no_pmtu_disc) ++ if (init_net.sysctl_ipv4_no_pmtu_disc) + newinet->pmtudisc = IP_PMTUDISC_DONT; + else + newinet->pmtudisc = IP_PMTUDISC_WANT; +@@ -841,7 +843,7 @@ + if (type & IPV6_ADDR_LINKLOCAL) { + if (!addr->v6.sin6_scope_id) + return 0; +- dev = dev_get_by_index(addr->v6.sin6_scope_id); ++ dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); + if (!dev) + return 0; + if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) { +@@ -872,7 +874,7 @@ + if (type & IPV6_ADDR_LINKLOCAL) { + if (!addr->v6.sin6_scope_id) + return 0; +- dev = dev_get_by_index(addr->v6.sin6_scope_id); ++ dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id); + if (!dev) + return 0; + if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) { +diff -Nurb linux-2.6.22-570/net/sctp/protocol.c linux-2.6.22-591/net/sctp/protocol.c +--- linux-2.6.22-570/net/sctp/protocol.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sctp/protocol.c 2007-12-21 15:36:15.000000000 -0500 +@@ -59,6 +59,7 @@ + #include + #include + #include ++#include + + /* Global data structures. */ + struct sctp_globals sctp_globals __read_mostly; +@@ -93,7 +94,7 @@ + { + if (!proc_net_sctp) { + struct proc_dir_entry *ent; +- ent = proc_mkdir("net/sctp", NULL); ++ ent = proc_mkdir("sctp", init_net.proc_net); + if (ent) { + ent->owner = THIS_MODULE; + proc_net_sctp = ent; +@@ -126,7 +127,7 @@ + + if (proc_net_sctp) { + proc_net_sctp = NULL; +- remove_proc_entry("net/sctp", NULL); ++ remove_proc_entry("sctp", init_net.proc_net); + } + } + +@@ -170,7 +171,7 @@ + struct sctp_af *af; + + read_lock(&dev_base_lock); +- for_each_netdev(dev) { ++ for_each_netdev(&init_net, dev) { + __list_for_each(pos, &sctp_address_families) { + af = list_entry(pos, struct sctp_af, list); + af->copy_addrlist(&sctp_local_addr_list, dev); +@@ -354,13 +355,13 @@ + /* Should this be available for binding? */ + static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) + { +- int ret = inet_addr_type(addr->v4.sin_addr.s_addr); ++ int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr); + + + if (addr->v4.sin_addr.s_addr != INADDR_ANY && + ret != RTN_LOCAL && + !sp->inet.freebind && +- !sysctl_ip_nonlocal_bind) ++ !init_net.sysctl_ip_nonlocal_bind) + return 0; + + return 1; +@@ -423,6 +424,7 @@ + union sctp_addr dst_saddr; + + memset(&fl, 0x0, sizeof(struct flowi)); ++ fl.fl_net = &init_net; + fl.fl4_dst = daddr->v4.sin_addr.s_addr; + fl.proto = IPPROTO_SCTP; + if (asoc) { +@@ -539,7 +541,7 @@ + { + struct inet_sock *inet = inet_sk(sk); + struct inet_sock *newinet; +- struct sock *newsk = sk_alloc(PF_INET, GFP_KERNEL, sk->sk_prot, 1); ++ struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL, sk->sk_prot, 1); + + if (!newsk) + goto out; +@@ -1122,7 +1124,7 @@ + } + + spin_lock_init(&sctp_port_alloc_lock); +- sctp_port_rover = sysctl_local_port_range[0] - 1; ++ sctp_port_rover = init_net.sysctl_local_port_range[0] - 1; + + printk(KERN_INFO "SCTP: Hash tables configured " + "(established %d bind %d)\n", +diff -Nurb linux-2.6.22-570/net/sctp/socket.c linux-2.6.22-591/net/sctp/socket.c +--- linux-2.6.22-570/net/sctp/socket.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sctp/socket.c 2007-12-21 15:36:15.000000000 -0500 +@@ -5021,8 +5021,8 @@ + * already in the hash table; if not, we use that; if + * it is, we try next. + */ +- int low = sysctl_local_port_range[0]; +- int high = sysctl_local_port_range[1]; ++ int low = sk->sk_net->sysctl_local_port_range[0]; ++ int high = sk->sk_net->sysctl_local_port_range[1]; + int remaining = (high - low) + 1; + int rover; + int index; +diff -Nurb linux-2.6.22-570/net/socket.c linux-2.6.22-591/net/socket.c +--- linux-2.6.22-570/net/socket.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/socket.c 2007-12-21 15:36:15.000000000 -0500 +@@ -84,6 +84,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -821,9 +822,9 @@ + */ + + static DEFINE_MUTEX(br_ioctl_mutex); +-static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; ++static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; + +-void brioctl_set(int (*hook) (unsigned int, void __user *)) ++void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) + { + mutex_lock(&br_ioctl_mutex); + br_ioctl_hook = hook; +@@ -833,9 +834,9 @@ + EXPORT_SYMBOL(brioctl_set); + + static DEFINE_MUTEX(vlan_ioctl_mutex); +-static int (*vlan_ioctl_hook) (void __user *arg); ++static int (*vlan_ioctl_hook) (struct net *, void __user *arg); + +-void vlan_ioctl_set(int (*hook) (void __user *)) ++void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) + { + mutex_lock(&vlan_ioctl_mutex); + vlan_ioctl_hook = hook; +@@ -864,16 +865,20 @@ + static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) + { + struct socket *sock; ++ struct sock *sk; + void __user *argp = (void __user *)arg; + int pid, err; ++ struct net *net; + + sock = file->private_data; ++ sk = sock->sk; ++ net = sk->sk_net; + if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { +- err = dev_ioctl(cmd, argp); ++ err = dev_ioctl(net, cmd, argp); + } else + #ifdef CONFIG_WIRELESS_EXT + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { +- err = dev_ioctl(cmd, argp); ++ err = dev_ioctl(net, cmd, argp); + } else + #endif /* CONFIG_WIRELESS_EXT */ + switch (cmd) { +@@ -899,7 +904,7 @@ + + mutex_lock(&br_ioctl_mutex); + if (br_ioctl_hook) +- err = br_ioctl_hook(cmd, argp); ++ err = br_ioctl_hook(net, cmd, argp); + mutex_unlock(&br_ioctl_mutex); + break; + case SIOCGIFVLAN: +@@ -910,7 +915,7 @@ + + mutex_lock(&vlan_ioctl_mutex); + if (vlan_ioctl_hook) +- err = vlan_ioctl_hook(argp); ++ err = vlan_ioctl_hook(net, argp); + mutex_unlock(&vlan_ioctl_mutex); + break; + case SIOCADDDLCI: +@@ -933,7 +938,7 @@ + * to the NIC driver. + */ + if (err == -ENOIOCTLCMD) +- err = dev_ioctl(cmd, argp); ++ err = dev_ioctl(net, cmd, argp); + break; + } + return err; +@@ -1102,7 +1107,7 @@ + return 0; + } + +-static int __sock_create(int family, int type, int protocol, ++static int __sock_create(struct net *net, int family, int type, int protocol, + struct socket **res, int kern) + { + int err; +@@ -1185,7 +1190,7 @@ + /* Now protected by module ref count */ + rcu_read_unlock(); + +- err = pf->create(sock, protocol); ++ err = pf->create(net, sock, protocol); + if (err < 0) + goto out_module_put; + +@@ -1224,12 +1229,12 @@ + + int sock_create(int family, int type, int protocol, struct socket **res) + { +- return __sock_create(family, type, protocol, res, 0); ++ return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); + } + + int sock_create_kern(int family, int type, int protocol, struct socket **res) + { +- return __sock_create(family, type, protocol, res, 1); ++ return __sock_create(&init_net, family, type, protocol, res, 1); + } + + asmlinkage long sys_socket(int family, int type, int protocol) +@@ -1389,8 +1394,6 @@ + * ready for listening. + */ + +-int sysctl_somaxconn __read_mostly = SOMAXCONN; +- + asmlinkage long sys_listen(int fd, int backlog) + { + struct socket *sock; +@@ -1398,8 +1401,9 @@ + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (sock) { +- if ((unsigned)backlog > sysctl_somaxconn) +- backlog = sysctl_somaxconn; ++ struct net *net = sock->sk->sk_net; ++ if ((unsigned)backlog > net->sysctl_somaxconn) ++ backlog = net->sysctl_somaxconn; + + err = security_socket_listen(sock, backlog); + if (!err) +@@ -2189,6 +2193,16 @@ + printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); + } + ++static int sock_pernet_init(struct net *net) ++{ ++ net->sysctl_somaxconn = SOMAXCONN; ++ return 0; ++} ++ ++static struct pernet_operations sock_net_ops = { ++ .init = sock_pernet_init, ++}; ++ + static int __init sock_init(void) + { + /* +@@ -2217,6 +2231,8 @@ + netfilter_init(); + #endif + ++ register_pernet_subsys(&sock_net_ops); ++ + return 0; + } + +diff -Nurb linux-2.6.22-570/net/socket.c.orig linux-2.6.22-591/net/socket.c.orig +--- linux-2.6.22-570/net/socket.c.orig 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/socket.c.orig 1969-12-31 19:00:00.000000000 -0500 +@@ -1,2344 +0,0 @@ +-/* +- * NET An implementation of the SOCKET network access protocol. +- * +- * Version: @(#)socket.c 1.1.93 18/02/95 +- * +- * Authors: Orest Zborowski, +- * Ross Biro +- * Fred N. van Kempen, +- * +- * Fixes: +- * Anonymous : NOTSOCK/BADF cleanup. Error fix in +- * shutdown() +- * Alan Cox : verify_area() fixes +- * Alan Cox : Removed DDI +- * Jonathan Kamens : SOCK_DGRAM reconnect bug +- * Alan Cox : Moved a load of checks to the very +- * top level. +- * Alan Cox : Move address structures to/from user +- * mode above the protocol layers. +- * Rob Janssen : Allow 0 length sends. +- * Alan Cox : Asynchronous I/O support (cribbed from the +- * tty drivers). +- * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style) +- * Jeff Uphoff : Made max number of sockets command-line +- * configurable. +- * Matti Aarnio : Made the number of sockets dynamic, +- * to be allocated when needed, and mr. +- * Uphoff's max is used as max to be +- * allowed to allocate. +- * Linus : Argh. removed all the socket allocation +- * altogether: it's in the inode now. +- * Alan Cox : Made sock_alloc()/sock_release() public +- * for NetROM and future kernel nfsd type +- * stuff. +- * Alan Cox : sendmsg/recvmsg basics. +- * Tom Dyas : Export net symbols. +- * Marcin Dalecki : Fixed problems with CONFIG_NET="n". +- * Alan Cox : Added thread locking to sys_* calls +- * for sockets. May have errors at the +- * moment. +- * Kevin Buhr : Fixed the dumb errors in the above. +- * Andi Kleen : Some small cleanups, optimizations, +- * and fixed a copy_from_user() bug. +- * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) +- * Tigran Aivazian : Made listen(2) backlog sanity checks +- * protocol-independent +- * +- * +- * This program is free software; you can redistribute it and/or +- * modify it under the terms of the GNU General Public License +- * as published by the Free Software Foundation; either version +- * 2 of the License, or (at your option) any later version. +- * +- * +- * This module is effectively the top level interface to the BSD socket +- * paradigm. +- * +- * Based upon Swansea University Computer Society NET3.039 +- */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +-#include +- +-#include +- +-#include +-#include +- +-static int sock_no_open(struct inode *irrelevant, struct file *dontcare); +-static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, +- unsigned long nr_segs, loff_t pos); +-static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, +- unsigned long nr_segs, loff_t pos); +-static int sock_mmap(struct file *file, struct vm_area_struct *vma); +- +-static int sock_close(struct inode *inode, struct file *file); +-static unsigned int sock_poll(struct file *file, +- struct poll_table_struct *wait); +-static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +-#ifdef CONFIG_COMPAT +-static long compat_sock_ioctl(struct file *file, +- unsigned int cmd, unsigned long arg); +-#endif +-static int sock_fasync(int fd, struct file *filp, int on); +-static ssize_t sock_sendpage(struct file *file, struct page *page, +- int offset, size_t size, loff_t *ppos, int more); +- +-/* +- * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear +- * in the operation structures but are done directly via the socketcall() multiplexor. +- */ +- +-static const struct file_operations socket_file_ops = { +- .owner = THIS_MODULE, +- .llseek = no_llseek, +- .aio_read = sock_aio_read, +- .aio_write = sock_aio_write, +- .poll = sock_poll, +- .unlocked_ioctl = sock_ioctl, +-#ifdef CONFIG_COMPAT +- .compat_ioctl = compat_sock_ioctl, +-#endif +- .mmap = sock_mmap, +- .open = sock_no_open, /* special open code to disallow open via /proc */ +- .release = sock_close, +- .fasync = sock_fasync, +- .sendpage = sock_sendpage, +- .splice_write = generic_splice_sendpage, +-}; +- +-/* +- * The protocol list. Each protocol is registered in here. +- */ +- +-static DEFINE_SPINLOCK(net_family_lock); +-static const struct net_proto_family *net_families[NPROTO] __read_mostly; +- +-/* +- * Statistics counters of the socket lists +- */ +- +-static DEFINE_PER_CPU(int, sockets_in_use) = 0; +- +-/* +- * Support routines. +- * Move socket addresses back and forth across the kernel/user +- * divide and look after the messy bits. +- */ +- +-#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - +- 16 for IP, 16 for IPX, +- 24 for IPv6, +- about 80 for AX.25 +- must be at least one bigger than +- the AF_UNIX size (see net/unix/af_unix.c +- :unix_mkname()). +- */ +- +-/** +- * move_addr_to_kernel - copy a socket address into kernel space +- * @uaddr: Address in user space +- * @kaddr: Address in kernel space +- * @ulen: Length in user space +- * +- * The address is copied into kernel space. If the provided address is +- * too long an error code of -EINVAL is returned. If the copy gives +- * invalid addresses -EFAULT is returned. On a success 0 is returned. +- */ +- +-int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) +-{ +- if (ulen < 0 || ulen > MAX_SOCK_ADDR) +- return -EINVAL; +- if (ulen == 0) +- return 0; +- if (copy_from_user(kaddr, uaddr, ulen)) +- return -EFAULT; +- return audit_sockaddr(ulen, kaddr); +-} +- +-/** +- * move_addr_to_user - copy an address to user space +- * @kaddr: kernel space address +- * @klen: length of address in kernel +- * @uaddr: user space address +- * @ulen: pointer to user length field +- * +- * The value pointed to by ulen on entry is the buffer length available. +- * This is overwritten with the buffer space used. -EINVAL is returned +- * if an overlong buffer is specified or a negative buffer size. -EFAULT +- * is returned if either the buffer or the length field are not +- * accessible. +- * After copying the data up to the limit the user specifies, the true +- * length of the data is written over the length limit the user +- * specified. Zero is returned for a success. +- */ +- +-int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, +- int __user *ulen) +-{ +- int err; +- int len; +- +- err = get_user(len, ulen); +- if (err) +- return err; +- if (len > klen) +- len = klen; +- if (len < 0 || len > MAX_SOCK_ADDR) +- return -EINVAL; +- if (len) { +- if (audit_sockaddr(klen, kaddr)) +- return -ENOMEM; +- if (copy_to_user(uaddr, kaddr, len)) +- return -EFAULT; +- } +- /* +- * "fromlen shall refer to the value before truncation.." +- * 1003.1g +- */ +- return __put_user(klen, ulen); +-} +- +-#define SOCKFS_MAGIC 0x534F434B +- +-static struct kmem_cache *sock_inode_cachep __read_mostly; +- +-static struct inode *sock_alloc_inode(struct super_block *sb) +-{ +- struct socket_alloc *ei; +- +- ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); +- if (!ei) +- return NULL; +- init_waitqueue_head(&ei->socket.wait); +- +- ei->socket.fasync_list = NULL; +- ei->socket.state = SS_UNCONNECTED; +- ei->socket.flags = 0; +- ei->socket.ops = NULL; +- ei->socket.sk = NULL; +- ei->socket.file = NULL; +- +- return &ei->vfs_inode; +-} +- +-static void sock_destroy_inode(struct inode *inode) +-{ +- kmem_cache_free(sock_inode_cachep, +- container_of(inode, struct socket_alloc, vfs_inode)); +-} +- +-static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) +-{ +- struct socket_alloc *ei = (struct socket_alloc *)foo; +- +- inode_init_once(&ei->vfs_inode); +-} +- +-static int init_inodecache(void) +-{ +- sock_inode_cachep = kmem_cache_create("sock_inode_cache", +- sizeof(struct socket_alloc), +- 0, +- (SLAB_HWCACHE_ALIGN | +- SLAB_RECLAIM_ACCOUNT | +- SLAB_MEM_SPREAD), +- init_once, +- NULL); +- if (sock_inode_cachep == NULL) +- return -ENOMEM; +- return 0; +-} +- +-static struct super_operations sockfs_ops = { +- .alloc_inode = sock_alloc_inode, +- .destroy_inode =sock_destroy_inode, +- .statfs = simple_statfs, +-}; +- +-static int sockfs_get_sb(struct file_system_type *fs_type, +- int flags, const char *dev_name, void *data, +- struct vfsmount *mnt) +-{ +- return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, +- mnt); +-} +- +-static struct vfsmount *sock_mnt __read_mostly; +- +-static struct file_system_type sock_fs_type = { +- .name = "sockfs", +- .get_sb = sockfs_get_sb, +- .kill_sb = kill_anon_super, +-}; +- +-static int sockfs_delete_dentry(struct dentry *dentry) +-{ +- /* +- * At creation time, we pretended this dentry was hashed +- * (by clearing DCACHE_UNHASHED bit in d_flags) +- * At delete time, we restore the truth : not hashed. +- * (so that dput() can proceed correctly) +- */ +- dentry->d_flags |= DCACHE_UNHASHED; +- return 0; +-} +- +-/* +- * sockfs_dname() is called from d_path(). +- */ +-static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) +-{ +- return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]", +- dentry->d_inode->i_ino); +-} +- +-static struct dentry_operations sockfs_dentry_operations = { +- .d_delete = sockfs_delete_dentry, +- .d_dname = sockfs_dname, +-}; +- +-/* +- * Obtains the first available file descriptor and sets it up for use. +- * +- * These functions create file structures and maps them to fd space +- * of the current process. On success it returns file descriptor +- * and file struct implicitly stored in sock->file. +- * Note that another thread may close file descriptor before we return +- * from this function. We use the fact that now we do not refer +- * to socket after mapping. If one day we will need it, this +- * function will increment ref. count on file by 1. +- * +- * In any case returned fd MAY BE not valid! +- * This race condition is unavoidable +- * with shared fd spaces, we cannot solve it inside kernel, +- * but we take care of internal coherence yet. +- */ +- +-static int sock_alloc_fd(struct file **filep) +-{ +- int fd; +- +- fd = get_unused_fd(); +- if (likely(fd >= 0)) { +- struct file *file = get_empty_filp(); +- +- *filep = file; +- if (unlikely(!file)) { +- put_unused_fd(fd); +- return -ENFILE; +- } +- } else +- *filep = NULL; +- return fd; +-} +- +-static int sock_attach_fd(struct socket *sock, struct file *file) +-{ +- struct qstr name = { .name = "" }; +- +- file->f_path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); +- if (unlikely(!file->f_path.dentry)) +- return -ENOMEM; +- +- file->f_path.dentry->d_op = &sockfs_dentry_operations; +- /* +- * We dont want to push this dentry into global dentry hash table. +- * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED +- * This permits a working /proc/$pid/fd/XXX on sockets +- */ +- file->f_path.dentry->d_flags &= ~DCACHE_UNHASHED; +- d_instantiate(file->f_path.dentry, SOCK_INODE(sock)); +- file->f_path.mnt = mntget(sock_mnt); +- file->f_mapping = file->f_path.dentry->d_inode->i_mapping; +- +- sock->file = file; +- file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops; +- file->f_mode = FMODE_READ | FMODE_WRITE; +- file->f_flags = O_RDWR; +- file->f_pos = 0; +- file->private_data = sock; +- +- return 0; +-} +- +-int sock_map_fd(struct socket *sock) +-{ +- struct file *newfile; +- int fd = sock_alloc_fd(&newfile); +- +- if (likely(fd >= 0)) { +- int err = sock_attach_fd(sock, newfile); +- +- if (unlikely(err < 0)) { +- put_filp(newfile); +- put_unused_fd(fd); +- return err; +- } +- fd_install(fd, newfile); +- } +- return fd; +-} +- +-static struct socket *sock_from_file(struct file *file, int *err) +-{ +- if (file->f_op == &socket_file_ops) +- return file->private_data; /* set in sock_map_fd */ +- +- *err = -ENOTSOCK; +- return NULL; +-} +- +-/** +- * sockfd_lookup - Go from a file number to its socket slot +- * @fd: file handle +- * @err: pointer to an error code return +- * +- * The file handle passed in is locked and the socket it is bound +- * too is returned. If an error occurs the err pointer is overwritten +- * with a negative errno code and NULL is returned. The function checks +- * for both invalid handles and passing a handle which is not a socket. +- * +- * On a success the socket object pointer is returned. +- */ +- +-struct socket *sockfd_lookup(int fd, int *err) +-{ +- struct file *file; +- struct socket *sock; +- +- file = fget(fd); +- if (!file) { +- *err = -EBADF; +- return NULL; +- } +- +- sock = sock_from_file(file, err); +- if (!sock) +- fput(file); +- return sock; +-} +- +-static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) +-{ +- struct file *file; +- struct socket *sock; +- +- *err = -EBADF; +- file = fget_light(fd, fput_needed); +- if (file) { +- sock = sock_from_file(file, err); +- if (sock) +- return sock; +- fput_light(file, *fput_needed); +- } +- return NULL; +-} +- +-/** +- * sock_alloc - allocate a socket +- * +- * Allocate a new inode and socket object. The two are bound together +- * and initialised. The socket is then returned. If we are out of inodes +- * NULL is returned. +- */ +- +-static struct socket *sock_alloc(void) +-{ +- struct inode *inode; +- struct socket *sock; +- +- inode = new_inode(sock_mnt->mnt_sb); +- if (!inode) +- return NULL; +- +- sock = SOCKET_I(inode); +- +- inode->i_mode = S_IFSOCK | S_IRWXUGO; +- inode->i_uid = current->fsuid; +- inode->i_gid = current->fsgid; +- +- get_cpu_var(sockets_in_use)++; +- put_cpu_var(sockets_in_use); +- return sock; +-} +- +-/* +- * In theory you can't get an open on this inode, but /proc provides +- * a back door. Remember to keep it shut otherwise you'll let the +- * creepy crawlies in. +- */ +- +-static int sock_no_open(struct inode *irrelevant, struct file *dontcare) +-{ +- return -ENXIO; +-} +- +-const struct file_operations bad_sock_fops = { +- .owner = THIS_MODULE, +- .open = sock_no_open, +-}; +- +-/** +- * sock_release - close a socket +- * @sock: socket to close +- * +- * The socket is released from the protocol stack if it has a release +- * callback, and the inode is then released if the socket is bound to +- * an inode not a file. +- */ +- +-void sock_release(struct socket *sock) +-{ +- if (sock->ops) { +- struct module *owner = sock->ops->owner; +- +- sock->ops->release(sock); +- sock->ops = NULL; +- module_put(owner); +- } +- +- if (sock->fasync_list) +- printk(KERN_ERR "sock_release: fasync list not empty!\n"); +- +- get_cpu_var(sockets_in_use)--; +- put_cpu_var(sockets_in_use); +- if (!sock->file) { +- iput(SOCK_INODE(sock)); +- return; +- } +- sock->file = NULL; +-} +- +-static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, +- struct msghdr *msg, size_t size) +-{ +- struct sock_iocb *si = kiocb_to_siocb(iocb); +- int err; +- +- si->sock = sock; +- si->scm = NULL; +- si->msg = msg; +- si->size = size; +- +- err = security_socket_sendmsg(sock, msg, size); +- if (err) +- return err; +- +- return sock->ops->sendmsg(iocb, sock, msg, size); +-} +- +-int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) +-{ +- struct kiocb iocb; +- struct sock_iocb siocb; +- int ret; +- +- init_sync_kiocb(&iocb, NULL); +- iocb.private = &siocb; +- ret = __sock_sendmsg(&iocb, sock, msg, size); +- if (-EIOCBQUEUED == ret) +- ret = wait_on_sync_kiocb(&iocb); +- return ret; +-} +- +-int kernel_sendmsg(struct socket *sock, struct msghdr *msg, +- struct kvec *vec, size_t num, size_t size) +-{ +- mm_segment_t oldfs = get_fs(); +- int result; +- +- set_fs(KERNEL_DS); +- /* +- * the following is safe, since for compiler definitions of kvec and +- * iovec are identical, yielding the same in-core layout and alignment +- */ +- msg->msg_iov = (struct iovec *)vec; +- msg->msg_iovlen = num; +- result = sock_sendmsg(sock, msg, size); +- set_fs(oldfs); +- return result; +-} +- +-/* +- * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) +- */ +-void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, +- struct sk_buff *skb) +-{ +- ktime_t kt = skb->tstamp; +- +- if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { +- struct timeval tv; +- /* Race occurred between timestamp enabling and packet +- receiving. Fill in the current time for now. */ +- if (kt.tv64 == 0) +- kt = ktime_get_real(); +- skb->tstamp = kt; +- tv = ktime_to_timeval(kt); +- put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); +- } else { +- struct timespec ts; +- /* Race occurred between timestamp enabling and packet +- receiving. Fill in the current time for now. */ +- if (kt.tv64 == 0) +- kt = ktime_get_real(); +- skb->tstamp = kt; +- ts = ktime_to_timespec(kt); +- put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts); +- } +-} +- +-EXPORT_SYMBOL_GPL(__sock_recv_timestamp); +- +-static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, +- struct msghdr *msg, size_t size, int flags) +-{ +- int err; +- struct sock_iocb *si = kiocb_to_siocb(iocb); +- +- si->sock = sock; +- si->scm = NULL; +- si->msg = msg; +- si->size = size; +- si->flags = flags; +- +- err = security_socket_recvmsg(sock, msg, size, flags); +- if (err) +- return err; +- +- return sock->ops->recvmsg(iocb, sock, msg, size, flags); +-} +- +-int sock_recvmsg(struct socket *sock, struct msghdr *msg, +- size_t size, int flags) +-{ +- struct kiocb iocb; +- struct sock_iocb siocb; +- int ret; +- +- init_sync_kiocb(&iocb, NULL); +- iocb.private = &siocb; +- ret = __sock_recvmsg(&iocb, sock, msg, size, flags); +- if (-EIOCBQUEUED == ret) +- ret = wait_on_sync_kiocb(&iocb); +- return ret; +-} +- +-int kernel_recvmsg(struct socket *sock, struct msghdr *msg, +- struct kvec *vec, size_t num, size_t size, int flags) +-{ +- mm_segment_t oldfs = get_fs(); +- int result; +- +- set_fs(KERNEL_DS); +- /* +- * the following is safe, since for compiler definitions of kvec and +- * iovec are identical, yielding the same in-core layout and alignment +- */ +- msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; +- result = sock_recvmsg(sock, msg, size, flags); +- set_fs(oldfs); +- return result; +-} +- +-static void sock_aio_dtor(struct kiocb *iocb) +-{ +- kfree(iocb->private); +-} +- +-static ssize_t sock_sendpage(struct file *file, struct page *page, +- int offset, size_t size, loff_t *ppos, int more) +-{ +- struct socket *sock; +- int flags; +- +- sock = file->private_data; +- +- flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; +- if (more) +- flags |= MSG_MORE; +- +- return sock->ops->sendpage(sock, page, offset, size, flags); +-} +- +-static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, +- struct sock_iocb *siocb) +-{ +- if (!is_sync_kiocb(iocb)) { +- siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); +- if (!siocb) +- return NULL; +- iocb->ki_dtor = sock_aio_dtor; +- } +- +- siocb->kiocb = iocb; +- iocb->private = siocb; +- return siocb; +-} +- +-static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, +- struct file *file, const struct iovec *iov, +- unsigned long nr_segs) +-{ +- struct socket *sock = file->private_data; +- size_t size = 0; +- int i; +- +- for (i = 0; i < nr_segs; i++) +- size += iov[i].iov_len; +- +- msg->msg_name = NULL; +- msg->msg_namelen = 0; +- msg->msg_control = NULL; +- msg->msg_controllen = 0; +- msg->msg_iov = (struct iovec *)iov; +- msg->msg_iovlen = nr_segs; +- msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; +- +- return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags); +-} +- +-static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, +- unsigned long nr_segs, loff_t pos) +-{ +- struct sock_iocb siocb, *x; +- +- if (pos != 0) +- return -ESPIPE; +- +- if (iocb->ki_left == 0) /* Match SYS5 behaviour */ +- return 0; +- +- +- x = alloc_sock_iocb(iocb, &siocb); +- if (!x) +- return -ENOMEM; +- return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); +-} +- +-static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, +- struct file *file, const struct iovec *iov, +- unsigned long nr_segs) +-{ +- struct socket *sock = file->private_data; +- size_t size = 0; +- int i; +- +- for (i = 0; i < nr_segs; i++) +- size += iov[i].iov_len; +- +- msg->msg_name = NULL; +- msg->msg_namelen = 0; +- msg->msg_control = NULL; +- msg->msg_controllen = 0; +- msg->msg_iov = (struct iovec *)iov; +- msg->msg_iovlen = nr_segs; +- msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; +- if (sock->type == SOCK_SEQPACKET) +- msg->msg_flags |= MSG_EOR; +- +- return __sock_sendmsg(iocb, sock, msg, size); +-} +- +-static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, +- unsigned long nr_segs, loff_t pos) +-{ +- struct sock_iocb siocb, *x; +- +- if (pos != 0) +- return -ESPIPE; +- +- x = alloc_sock_iocb(iocb, &siocb); +- if (!x) +- return -ENOMEM; +- +- return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); +-} +- +-/* +- * Atomic setting of ioctl hooks to avoid race +- * with module unload. +- */ +- +-static DEFINE_MUTEX(br_ioctl_mutex); +-static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; +- +-void brioctl_set(int (*hook) (unsigned int, void __user *)) +-{ +- mutex_lock(&br_ioctl_mutex); +- br_ioctl_hook = hook; +- mutex_unlock(&br_ioctl_mutex); +-} +- +-EXPORT_SYMBOL(brioctl_set); +- +-static DEFINE_MUTEX(vlan_ioctl_mutex); +-static int (*vlan_ioctl_hook) (void __user *arg); +- +-void vlan_ioctl_set(int (*hook) (void __user *)) +-{ +- mutex_lock(&vlan_ioctl_mutex); +- vlan_ioctl_hook = hook; +- mutex_unlock(&vlan_ioctl_mutex); +-} +- +-EXPORT_SYMBOL(vlan_ioctl_set); +- +-static DEFINE_MUTEX(dlci_ioctl_mutex); +-static int (*dlci_ioctl_hook) (unsigned int, void __user *); +- +-void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) +-{ +- mutex_lock(&dlci_ioctl_mutex); +- dlci_ioctl_hook = hook; +- mutex_unlock(&dlci_ioctl_mutex); +-} +- +-EXPORT_SYMBOL(dlci_ioctl_set); +- +-/* +- * With an ioctl, arg may well be a user mode pointer, but we don't know +- * what to do with it - that's up to the protocol still. +- */ +- +-static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) +-{ +- struct socket *sock; +- void __user *argp = (void __user *)arg; +- int pid, err; +- +- sock = file->private_data; +- if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { +- err = dev_ioctl(cmd, argp); +- } else +-#ifdef CONFIG_WIRELESS_EXT +- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { +- err = dev_ioctl(cmd, argp); +- } else +-#endif /* CONFIG_WIRELESS_EXT */ +- switch (cmd) { +- case FIOSETOWN: +- case SIOCSPGRP: +- err = -EFAULT; +- if (get_user(pid, (int __user *)argp)) +- break; +- err = f_setown(sock->file, pid, 1); +- break; +- case FIOGETOWN: +- case SIOCGPGRP: +- err = put_user(f_getown(sock->file), +- (int __user *)argp); +- break; +- case SIOCGIFBR: +- case SIOCSIFBR: +- case SIOCBRADDBR: +- case SIOCBRDELBR: +- err = -ENOPKG; +- if (!br_ioctl_hook) +- request_module("bridge"); +- +- mutex_lock(&br_ioctl_mutex); +- if (br_ioctl_hook) +- err = br_ioctl_hook(cmd, argp); +- mutex_unlock(&br_ioctl_mutex); +- break; +- case SIOCGIFVLAN: +- case SIOCSIFVLAN: +- err = -ENOPKG; +- if (!vlan_ioctl_hook) +- request_module("8021q"); +- +- mutex_lock(&vlan_ioctl_mutex); +- if (vlan_ioctl_hook) +- err = vlan_ioctl_hook(argp); +- mutex_unlock(&vlan_ioctl_mutex); +- break; +- case SIOCADDDLCI: +- case SIOCDELDLCI: +- err = -ENOPKG; +- if (!dlci_ioctl_hook) +- request_module("dlci"); +- +- if (dlci_ioctl_hook) { +- mutex_lock(&dlci_ioctl_mutex); +- err = dlci_ioctl_hook(cmd, argp); +- mutex_unlock(&dlci_ioctl_mutex); +- } +- break; +- default: +- err = sock->ops->ioctl(sock, cmd, arg); +- +- /* +- * If this ioctl is unknown try to hand it down +- * to the NIC driver. +- */ +- if (err == -ENOIOCTLCMD) +- err = dev_ioctl(cmd, argp); +- break; +- } +- return err; +-} +- +-int sock_create_lite(int family, int type, int protocol, struct socket **res) +-{ +- int err; +- struct socket *sock = NULL; +- +- err = security_socket_create(family, type, protocol, 1); +- if (err) +- goto out; +- +- sock = sock_alloc(); +- if (!sock) { +- err = -ENOMEM; +- goto out; +- } +- +- sock->type = type; +- err = security_socket_post_create(sock, family, type, protocol, 1); +- if (err) +- goto out_release; +- +-out: +- *res = sock; +- return err; +-out_release: +- sock_release(sock); +- sock = NULL; +- goto out; +-} +- +-/* No kernel lock held - perfect */ +-static unsigned int sock_poll(struct file *file, poll_table *wait) +-{ +- struct socket *sock; +- +- /* +- * We can't return errors to poll, so it's either yes or no. +- */ +- sock = file->private_data; +- return sock->ops->poll(file, sock, wait); +-} +- +-static int sock_mmap(struct file *file, struct vm_area_struct *vma) +-{ +- struct socket *sock = file->private_data; +- +- return sock->ops->mmap(file, sock, vma); +-} +- +-static int sock_close(struct inode *inode, struct file *filp) +-{ +- /* +- * It was possible the inode is NULL we were +- * closing an unfinished socket. +- */ +- +- if (!inode) { +- printk(KERN_DEBUG "sock_close: NULL inode\n"); +- return 0; +- } +- sock_fasync(-1, filp, 0); +- sock_release(SOCKET_I(inode)); +- return 0; +-} +- +-/* +- * Update the socket async list +- * +- * Fasync_list locking strategy. +- * +- * 1. fasync_list is modified only under process context socket lock +- * i.e. under semaphore. +- * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) +- * or under socket lock. +- * 3. fasync_list can be used from softirq context, so that +- * modification under socket lock have to be enhanced with +- * write_lock_bh(&sk->sk_callback_lock). +- * --ANK (990710) +- */ +- +-static int sock_fasync(int fd, struct file *filp, int on) +-{ +- struct fasync_struct *fa, *fna = NULL, **prev; +- struct socket *sock; +- struct sock *sk; +- +- if (on) { +- fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); +- if (fna == NULL) +- return -ENOMEM; +- } +- +- sock = filp->private_data; +- +- sk = sock->sk; +- if (sk == NULL) { +- kfree(fna); +- return -EINVAL; +- } +- +- lock_sock(sk); +- +- prev = &(sock->fasync_list); +- +- for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) +- if (fa->fa_file == filp) +- break; +- +- if (on) { +- if (fa != NULL) { +- write_lock_bh(&sk->sk_callback_lock); +- fa->fa_fd = fd; +- write_unlock_bh(&sk->sk_callback_lock); +- +- kfree(fna); +- goto out; +- } +- fna->fa_file = filp; +- fna->fa_fd = fd; +- fna->magic = FASYNC_MAGIC; +- fna->fa_next = sock->fasync_list; +- write_lock_bh(&sk->sk_callback_lock); +- sock->fasync_list = fna; +- write_unlock_bh(&sk->sk_callback_lock); +- } else { +- if (fa != NULL) { +- write_lock_bh(&sk->sk_callback_lock); +- *prev = fa->fa_next; +- write_unlock_bh(&sk->sk_callback_lock); +- kfree(fa); +- } +- } +- +-out: +- release_sock(sock->sk); +- return 0; +-} +- +-/* This function may be called only under socket lock or callback_lock */ +- +-int sock_wake_async(struct socket *sock, int how, int band) +-{ +- if (!sock || !sock->fasync_list) +- return -1; +- switch (how) { +- case 1: +- +- if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) +- break; +- goto call_kill; +- case 2: +- if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) +- break; +- /* fall through */ +- case 0: +-call_kill: +- __kill_fasync(sock->fasync_list, SIGIO, band); +- break; +- case 3: +- __kill_fasync(sock->fasync_list, SIGURG, band); +- } +- return 0; +-} +- +-static int __sock_create(int family, int type, int protocol, +- struct socket **res, int kern) +-{ +- int err; +- struct socket *sock; +- const struct net_proto_family *pf; +- +- /* +- * Check protocol is in range +- */ +- if (family < 0 || family >= NPROTO) +- return -EAFNOSUPPORT; +- if (type < 0 || type >= SOCK_MAX) +- return -EINVAL; +- +- /* Compatibility. +- +- This uglymoron is moved from INET layer to here to avoid +- deadlock in module load. +- */ +- if (family == PF_INET && type == SOCK_PACKET) { +- static int warned; +- if (!warned) { +- warned = 1; +- printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", +- current->comm); +- } +- family = PF_PACKET; +- } +- +- err = security_socket_create(family, type, protocol, kern); +- if (err) +- return err; +- +- /* +- * Allocate the socket and allow the family to set things up. if +- * the protocol is 0, the family is instructed to select an appropriate +- * default. +- */ +- sock = sock_alloc(); +- if (!sock) { +- if (net_ratelimit()) +- printk(KERN_WARNING "socket: no more sockets\n"); +- return -ENFILE; /* Not exactly a match, but its the +- closest posix thing */ +- } +- +- sock->type = type; +- +-#if defined(CONFIG_KMOD) +- /* Attempt to load a protocol module if the find failed. +- * +- * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user +- * requested real, full-featured networking support upon configuration. +- * Otherwise module support will break! +- */ +- if (net_families[family] == NULL) +- request_module("net-pf-%d", family); +-#endif +- +- rcu_read_lock(); +- pf = rcu_dereference(net_families[family]); +- err = -EAFNOSUPPORT; +- if (!pf) +- goto out_release; +- +- /* +- * We will call the ->create function, that possibly is in a loadable +- * module, so we have to bump that loadable module refcnt first. +- */ +- if (!try_module_get(pf->owner)) +- goto out_release; +- +- /* Now protected by module ref count */ +- rcu_read_unlock(); +- +- err = pf->create(sock, protocol); +- if (err < 0) +- goto out_module_put; +- +- /* +- * Now to bump the refcnt of the [loadable] module that owns this +- * socket at sock_release time we decrement its refcnt. +- */ +- if (!try_module_get(sock->ops->owner)) +- goto out_module_busy; +- +- /* +- * Now that we're done with the ->create function, the [loadable] +- * module can have its refcnt decremented +- */ +- module_put(pf->owner); +- err = security_socket_post_create(sock, family, type, protocol, kern); +- if (err) +- goto out_sock_release; +- *res = sock; +- +- return 0; +- +-out_module_busy: +- err = -EAFNOSUPPORT; +-out_module_put: +- sock->ops = NULL; +- module_put(pf->owner); +-out_sock_release: +- sock_release(sock); +- return err; +- +-out_release: +- rcu_read_unlock(); +- goto out_sock_release; +-} +- +-int sock_create(int family, int type, int protocol, struct socket **res) +-{ +- return __sock_create(family, type, protocol, res, 0); +-} +- +-int sock_create_kern(int family, int type, int protocol, struct socket **res) +-{ +- return __sock_create(family, type, protocol, res, 1); +-} +- +-asmlinkage long sys_socket(int family, int type, int protocol) +-{ +- int retval; +- struct socket *sock; +- +- retval = sock_create(family, type, protocol, &sock); +- if (retval < 0) +- goto out; +- +- retval = sock_map_fd(sock); +- if (retval < 0) +- goto out_release; +- +-out: +- /* It may be already another descriptor 8) Not kernel problem. */ +- return retval; +- +-out_release: +- sock_release(sock); +- return retval; +-} +- +-/* +- * Create a pair of connected sockets. +- */ +- +-asmlinkage long sys_socketpair(int family, int type, int protocol, +- int __user *usockvec) +-{ +- struct socket *sock1, *sock2; +- int fd1, fd2, err; +- struct file *newfile1, *newfile2; +- +- /* +- * Obtain the first socket and check if the underlying protocol +- * supports the socketpair call. +- */ +- +- err = sock_create(family, type, protocol, &sock1); +- if (err < 0) +- goto out; +- +- err = sock_create(family, type, protocol, &sock2); +- if (err < 0) +- goto out_release_1; +- +- err = sock1->ops->socketpair(sock1, sock2); +- if (err < 0) +- goto out_release_both; +- +- fd1 = sock_alloc_fd(&newfile1); +- if (unlikely(fd1 < 0)) { +- err = fd1; +- goto out_release_both; +- } +- +- fd2 = sock_alloc_fd(&newfile2); +- if (unlikely(fd2 < 0)) { +- err = fd2; +- put_filp(newfile1); +- put_unused_fd(fd1); +- goto out_release_both; +- } +- +- err = sock_attach_fd(sock1, newfile1); +- if (unlikely(err < 0)) { +- goto out_fd2; +- } +- +- err = sock_attach_fd(sock2, newfile2); +- if (unlikely(err < 0)) { +- fput(newfile1); +- goto out_fd1; +- } +- +- err = audit_fd_pair(fd1, fd2); +- if (err < 0) { +- fput(newfile1); +- fput(newfile2); +- goto out_fd; +- } +- +- fd_install(fd1, newfile1); +- fd_install(fd2, newfile2); +- /* fd1 and fd2 may be already another descriptors. +- * Not kernel problem. +- */ +- +- err = put_user(fd1, &usockvec[0]); +- if (!err) +- err = put_user(fd2, &usockvec[1]); +- if (!err) +- return 0; +- +- sys_close(fd2); +- sys_close(fd1); +- return err; +- +-out_release_both: +- sock_release(sock2); +-out_release_1: +- sock_release(sock1); +-out: +- return err; +- +-out_fd2: +- put_filp(newfile1); +- sock_release(sock1); +-out_fd1: +- put_filp(newfile2); +- sock_release(sock2); +-out_fd: +- put_unused_fd(fd1); +- put_unused_fd(fd2); +- goto out; +-} +- +-/* +- * Bind a name to a socket. Nothing much to do here since it's +- * the protocol's responsibility to handle the local address. +- * +- * We move the socket address to kernel space before we call +- * the protocol layer (having also checked the address is ok). +- */ +- +-asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) +-{ +- struct socket *sock; +- char address[MAX_SOCK_ADDR]; +- int err, fput_needed; +- +- sock = sockfd_lookup_light(fd, &err, &fput_needed); +- if (sock) { +- err = move_addr_to_kernel(umyaddr, addrlen, address); +- if (err >= 0) { +- err = security_socket_bind(sock, +- (struct sockaddr *)address, +- addrlen); +- if (!err) +- err = sock->ops->bind(sock, +- (struct sockaddr *) +- address, addrlen); +- } +- fput_light(sock->file, fput_needed); +- } +- return err; +-} +- +-/* +- * Perform a listen. Basically, we allow the protocol to do anything +- * necessary for a listen, and if that works, we mark the socket as +- * ready for listening. +- */ +- +-int sysctl_somaxconn __read_mostly = SOMAXCONN; +- +-asmlinkage long sys_listen(int fd, int backlog) +-{ +- struct socket *sock; +- int err, fput_needed; +- +- sock = sockfd_lookup_light(fd, &err, &fput_needed); +- if (sock) { +- if ((unsigned)backlog > sysctl_somaxconn) +- backlog = sysctl_somaxconn; +- +- err = security_socket_listen(sock, backlog); +- if (!err) +- err = sock->ops->listen(sock, backlog); +- +- fput_light(sock->file, fput_needed); +- } +- return err; +-} +- +-/* +- * For accept, we attempt to create a new socket, set up the link +- * with the client, wake up the client, then return the new +- * connected fd. We collect the address of the connector in kernel +- * space and move it to user at the very end. This is unclean because +- * we open the socket then return an error. +- * +- * 1003.1g adds the ability to recvmsg() to query connection pending +- * status to recvmsg. We need to add that support in a way thats +- * clean when we restucture accept also. +- */ +- +-asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, +- int __user *upeer_addrlen) +-{ +- struct socket *sock, *newsock; +- struct file *newfile; +- int err, len, newfd, fput_needed; +- char address[MAX_SOCK_ADDR]; +- +- sock = sockfd_lookup_light(fd, &err, &fput_needed); +- if (!sock) +- goto out; +- +- err = -ENFILE; +- if (!(newsock = sock_alloc())) +- goto out_put; +- +- newsock->type = sock->type; +- newsock->ops = sock->ops; +- +- /* +- * We don't need try_module_get here, as the listening socket (sock) +- * has the protocol module (sock->ops->owner) held. +- */ +- __module_get(newsock->ops->owner); +- +- newfd = sock_alloc_fd(&newfile); +- if (unlikely(newfd < 0)) { +- err = newfd; +- sock_release(newsock); +- goto out_put; +- } +- +- err = sock_attach_fd(newsock, newfile); +- if (err < 0) +- goto out_fd_simple; +- +- err = security_socket_accept(sock, newsock); +- if (err) +- goto out_fd; +- +- err = sock->ops->accept(sock, newsock, sock->file->f_flags); +- if (err < 0) +- goto out_fd; +- +- if (upeer_sockaddr) { +- if (newsock->ops->getname(newsock, (struct sockaddr *)address, +- &len, 2) < 0) { +- err = -ECONNABORTED; +- goto out_fd; +- } +- err = move_addr_to_user(address, len, upeer_sockaddr, +- upeer_addrlen); +- if (err < 0) +- goto out_fd; +- } +- +- /* File flags are not inherited via accept() unlike another OSes. */ +- +- fd_install(newfd, newfile); +- err = newfd; +- +- security_socket_post_accept(sock, newsock); +- +-out_put: +- fput_light(sock->file, fput_needed); +-out: +- return err; +-out_fd_simple: +- sock_release(newsock); +- put_filp(newfile); +- put_unused_fd(newfd); +- goto out_put; +-out_fd: +- fput(newfile); +- put_unused_fd(newfd); +- goto out_put; +-} +- +-/* +- * Attempt to connect to a socket with the server address. The address +- * is in user space so we verify it is OK and move it to kernel space. +- * +- * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to +- * break bindings +- * +- * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and +- * other SEQPACKET protocols that take time to connect() as it doesn't +- * include the -EINPROGRESS status for such sockets. +- */ +- +-asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, +- int addrlen) +-{ +- struct socket *sock; +- char address[MAX_SOCK_ADDR]; +- int err, fput_needed; +- +- sock = sockfd_lookup_light(fd, &err, &fput_needed); +- if (!sock) +- goto out; +- err = move_addr_to_kernel(uservaddr, addrlen, address); +- if (err < 0) +- goto out_put; +- +- err = +- security_socket_connect(sock, (struct sockaddr *)address, addrlen); +- if (err) +- goto out_put; +- +- err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, +- sock->file->f_flags); +-out_put: +- fput_light(sock->file, fput_needed); +-out: +- return err; +-} +- +-/* +- * Get the local address ('name') of a socket object. Move the obtained +- * name to user space. +- */ +- +-asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, +- int __user *usockaddr_len) +-{ +- struct socket *sock; +- char address[MAX_SOCK_ADDR]; +- int len, err, fput_needed; +- +- sock = sockfd_lookup_light(fd, &err, &fput_needed); +- if (!sock) +- goto out; +- +- err = security_socket_getsockname(sock); +- if (err) +- goto out_put; +- +- err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0); +- if (err) +- goto out_put; +- err = move_addr_to_user(address, len, usockaddr, usockaddr_len); +- +-out_put: +- fput_light(sock->file, fput_needed); +-out: +- return err; +-} +- +-/* +- * Get the remote address ('name') of a socket object. Move the obtained +- * name to user space. +- */ +- +-asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, +- int __user *usockaddr_len) +-{ +- struct socket *sock; +- char address[MAX_SOCK_ADDR]; +- int len, err, fput_needed; +- +- sock = sockfd_lookup_light(fd, &err, &fput_needed); +- if (sock != NULL) { +- err = security_socket_getpeername(sock); +- if (err) { +- fput_light(sock->file, fput_needed); +- return err; +- } +- +- err = +- sock->ops->getname(sock, (struct sockaddr *)address, &len, +- 1); +- if (!err) +- err = move_addr_to_user(address, len, usockaddr, +- usockaddr_len); +- fput_light(sock->file, fput_needed); +- } +- return err; +-} +- +-/* +- * Send a datagram to a given address. We move the address into kernel +- * space and check the user space data area is readable before invoking +- * the protocol. +- */ +- +-asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, +- unsigned flags, struct sockaddr __user *addr, +- int addr_len) +-{ +- struct socket *sock; +- char address[MAX_SOCK_ADDR]; +- int err; +- struct msghdr msg; +- struct iovec iov; +- int fput_needed; +- struct file *sock_file; +- +- sock_file = fget_light(fd, &fput_needed); +- err = -EBADF; +- if (!sock_file) +- goto out; +- +- sock = sock_from_file(sock_file, &err); +- if (!sock) +- goto out_put; +- iov.iov_base = buff; +- iov.iov_len = len; +- msg.msg_name = NULL; +- msg.msg_iov = &iov; +- msg.msg_iovlen = 1; +- msg.msg_control = NULL; +- msg.msg_controllen = 0; +- msg.msg_namelen = 0; +- if (addr) { +- err = move_addr_to_kernel(addr, addr_len, address); +- if (err < 0) +- goto out_put; +- msg.msg_name = address; +- msg.msg_namelen = addr_len; +- } +- if (sock->file->f_flags & O_NONBLOCK) +- flags |= MSG_DONTWAIT; +- msg.msg_flags = flags; +- err = sock_sendmsg(sock, &msg, len); +- +-out_put: +- fput_light(sock_file, fput_needed); +-out: +- return err; +-} +- +-/* +- * Send a datagram down a socket. +- */ +- +-asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) +-{ +- return sys_sendto(fd, buff, len, flags, NULL, 0); +-} +- +-/* +- * Receive a frame from the socket and optionally record the address of the +- * sender. We verify the buffers are writable and if needed move the +- * sender address from kernel to user space. +- */ +- +-asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, +- unsigned flags, struct sockaddr __user *addr, +- int __user *addr_len) +-{ +- struct socket *sock; +- struct iovec iov; +- struct msghdr msg; +- char address[MAX_SOCK_ADDR]; +- int err, err2; +- struct file *sock_file; +- int fput_needed; +- +- sock_file = fget_light(fd, &fput_needed); +- err = -EBADF; +- if (!sock_file) +- goto out; +- +- sock = sock_from_file(sock_file, &err); +- if (!sock) +- goto out_put; +- +- msg.msg_control = NULL; +- msg.msg_controllen = 0; +- msg.msg_iovlen = 1; +- msg.msg_iov = &iov; +- iov.iov_len = size; +- iov.iov_base = ubuf; +- msg.msg_name = address; +- msg.msg_namelen = MAX_SOCK_ADDR; +- if (sock->file->f_flags & O_NONBLOCK) +- flags |= MSG_DONTWAIT; +- err = sock_recvmsg(sock, &msg, size, flags); +- +- if (err >= 0 && addr != NULL) { +- err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len); +- if (err2 < 0) +- err = err2; +- } +-out_put: +- fput_light(sock_file, fput_needed); +-out: +- return err; +-} +- +-/* +- * Receive a datagram from a socket. +- */ +- +-asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, +- unsigned flags) +-{ +- return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); +-} +- +-/* +- * Set a socket option. Because we don't know the option lengths we have +- * to pass the user mode parameter for the protocols to sort out. +- */ +- +-asmlinkage long sys_setsockopt(int fd, int level, int optname, +- char __user *optval, int optlen) +-{ +- int err, fput_needed; +- struct socket *sock; +- +- if (optlen < 0) +- return -EINVAL; +- +- sock = sockfd_lookup_light(fd, &err, &fput_needed); +- if (sock != NULL) { +- err = security_socket_setsockopt(sock, level, optname); +- if (err) +- goto out_put; +- +- if (level == SOL_SOCKET) +- err = +- sock_setsockopt(sock, level, optname, optval, +- optlen); +- else +- err = +- sock->ops->setsockopt(sock, level, optname, optval, +- optlen); +-out_put: +- fput_light(sock->file, fput_needed); +- } +- return err; +-} +- +-/* +- * Get a socket option. Because we don't know the option lengths we have +- * to pass a user mode parameter for the protocols to sort out. +- */ +- +-asmlinkage long sys_getsockopt(int fd, int level, int optname, +- char __user *optval, int __user *optlen) +-{ +- int err, fput_needed; +- struct socket *sock; +- +- sock = sockfd_lookup_light(fd, &err, &fput_needed); +- if (sock != NULL) { +- err = security_socket_getsockopt(sock, level, optname); +- if (err) +- goto out_put; +- +- if (level == SOL_SOCKET) +- err = +- sock_getsockopt(sock, level, optname, optval, +- optlen); +- else +- err = +- sock->ops->getsockopt(sock, level, optname, optval, +- optlen); +-out_put: +- fput_light(sock->file, fput_needed); +- } +- return err; +-} +- +-/* +- * Shutdown a socket. +- */ +- +-asmlinkage long sys_shutdown(int fd, int how) +-{ +- int err, fput_needed; +- struct socket *sock; +- +- sock = sockfd_lookup_light(fd, &err, &fput_needed); +- if (sock != NULL) { +- err = security_socket_shutdown(sock, how); +- if (!err) +- err = sock->ops->shutdown(sock, how); +- fput_light(sock->file, fput_needed); +- } +- return err; +-} +- +-/* A couple of helpful macros for getting the address of the 32/64 bit +- * fields which are the same type (int / unsigned) on our platforms. +- */ +-#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) +-#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) +-#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) +- +-/* +- * BSD sendmsg interface +- */ +- +-asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) +-{ +- struct compat_msghdr __user *msg_compat = +- (struct compat_msghdr __user *)msg; +- struct socket *sock; +- char address[MAX_SOCK_ADDR]; +- struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; +- unsigned char ctl[sizeof(struct cmsghdr) + 20] +- __attribute__ ((aligned(sizeof(__kernel_size_t)))); +- /* 20 is size of ipv6_pktinfo */ +- unsigned char *ctl_buf = ctl; +- struct msghdr msg_sys; +- int err, ctl_len, iov_size, total_len; +- int fput_needed; +- +- err = -EFAULT; +- if (MSG_CMSG_COMPAT & flags) { +- if (get_compat_msghdr(&msg_sys, msg_compat)) +- return -EFAULT; +- } +- else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) +- return -EFAULT; +- +- sock = sockfd_lookup_light(fd, &err, &fput_needed); +- if (!sock) +- goto out; +- +- /* do not move before msg_sys is valid */ +- err = -EMSGSIZE; +- if (msg_sys.msg_iovlen > UIO_MAXIOV) +- goto out_put; +- +- /* Check whether to allocate the iovec area */ +- err = -ENOMEM; +- iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); +- if (msg_sys.msg_iovlen > UIO_FASTIOV) { +- iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); +- if (!iov) +- goto out_put; +- } +- +- /* This will also move the address data into kernel space */ +- if (MSG_CMSG_COMPAT & flags) { +- err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ); +- } else +- err = verify_iovec(&msg_sys, iov, address, VERIFY_READ); +- if (err < 0) +- goto out_freeiov; +- total_len = err; +- +- err = -ENOBUFS; +- +- if (msg_sys.msg_controllen > INT_MAX) +- goto out_freeiov; +- ctl_len = msg_sys.msg_controllen; +- if ((MSG_CMSG_COMPAT & flags) && ctl_len) { +- err = +- cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, +- sizeof(ctl)); +- if (err) +- goto out_freeiov; +- ctl_buf = msg_sys.msg_control; +- ctl_len = msg_sys.msg_controllen; +- } else if (ctl_len) { +- if (ctl_len > sizeof(ctl)) { +- ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); +- if (ctl_buf == NULL) +- goto out_freeiov; +- } +- err = -EFAULT; +- /* +- * Careful! Before this, msg_sys.msg_control contains a user pointer. +- * Afterwards, it will be a kernel pointer. Thus the compiler-assisted +- * checking falls down on this. +- */ +- if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, +- ctl_len)) +- goto out_freectl; +- msg_sys.msg_control = ctl_buf; +- } +- msg_sys.msg_flags = flags; +- +- if (sock->file->f_flags & O_NONBLOCK) +- msg_sys.msg_flags |= MSG_DONTWAIT; +- err = sock_sendmsg(sock, &msg_sys, total_len); +- +-out_freectl: +- if (ctl_buf != ctl) +- sock_kfree_s(sock->sk, ctl_buf, ctl_len); +-out_freeiov: +- if (iov != iovstack) +- sock_kfree_s(sock->sk, iov, iov_size); +-out_put: +- fput_light(sock->file, fput_needed); +-out: +- return err; +-} +- +-/* +- * BSD recvmsg interface +- */ +- +-asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, +- unsigned int flags) +-{ +- struct compat_msghdr __user *msg_compat = +- (struct compat_msghdr __user *)msg; +- struct socket *sock; +- struct iovec iovstack[UIO_FASTIOV]; +- struct iovec *iov = iovstack; +- struct msghdr msg_sys; +- unsigned long cmsg_ptr; +- int err, iov_size, total_len, len; +- int fput_needed; +- +- /* kernel mode address */ +- char addr[MAX_SOCK_ADDR]; +- +- /* user mode address pointers */ +- struct sockaddr __user *uaddr; +- int __user *uaddr_len; +- +- if (MSG_CMSG_COMPAT & flags) { +- if (get_compat_msghdr(&msg_sys, msg_compat)) +- return -EFAULT; +- } +- else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) +- return -EFAULT; +- +- sock = sockfd_lookup_light(fd, &err, &fput_needed); +- if (!sock) +- goto out; +- +- err = -EMSGSIZE; +- if (msg_sys.msg_iovlen > UIO_MAXIOV) +- goto out_put; +- +- /* Check whether to allocate the iovec area */ +- err = -ENOMEM; +- iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); +- if (msg_sys.msg_iovlen > UIO_FASTIOV) { +- iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); +- if (!iov) +- goto out_put; +- } +- +- /* +- * Save the user-mode address (verify_iovec will change the +- * kernel msghdr to use the kernel address space) +- */ +- +- uaddr = (void __user *)msg_sys.msg_name; +- uaddr_len = COMPAT_NAMELEN(msg); +- if (MSG_CMSG_COMPAT & flags) { +- err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); +- } else +- err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE); +- if (err < 0) +- goto out_freeiov; +- total_len = err; +- +- cmsg_ptr = (unsigned long)msg_sys.msg_control; +- msg_sys.msg_flags = 0; +- if (MSG_CMSG_COMPAT & flags) +- msg_sys.msg_flags = MSG_CMSG_COMPAT; +- +- if (sock->file->f_flags & O_NONBLOCK) +- flags |= MSG_DONTWAIT; +- err = sock_recvmsg(sock, &msg_sys, total_len, flags); +- if (err < 0) +- goto out_freeiov; +- len = err; +- +- if (uaddr != NULL) { +- err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, +- uaddr_len); +- if (err < 0) +- goto out_freeiov; +- } +- err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), +- COMPAT_FLAGS(msg)); +- if (err) +- goto out_freeiov; +- if (MSG_CMSG_COMPAT & flags) +- err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, +- &msg_compat->msg_controllen); +- else +- err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, +- &msg->msg_controllen); +- if (err) +- goto out_freeiov; +- err = len; +- +-out_freeiov: +- if (iov != iovstack) +- sock_kfree_s(sock->sk, iov, iov_size); +-out_put: +- fput_light(sock->file, fput_needed); +-out: +- return err; +-} +- +-#ifdef __ARCH_WANT_SYS_SOCKETCALL +- +-/* Argument list sizes for sys_socketcall */ +-#define AL(x) ((x) * sizeof(unsigned long)) +-static const unsigned char nargs[18]={ +- AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), +- AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), +- AL(6),AL(2),AL(5),AL(5),AL(3),AL(3) +-}; +- +-#undef AL +- +-/* +- * System call vectors. +- * +- * Argument checking cleaned up. Saved 20% in size. +- * This function doesn't need to set the kernel lock because +- * it is set by the callees. +- */ +- +-asmlinkage long sys_socketcall(int call, unsigned long __user *args) +-{ +- unsigned long a[6]; +- unsigned long a0, a1; +- int err; +- +- if (call < 1 || call > SYS_RECVMSG) +- return -EINVAL; +- +- /* copy_from_user should be SMP safe. */ +- if (copy_from_user(a, args, nargs[call])) +- return -EFAULT; +- +- err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); +- if (err) +- return err; +- +- a0 = a[0]; +- a1 = a[1]; +- +- switch (call) { +- case SYS_SOCKET: +- err = sys_socket(a0, a1, a[2]); +- break; +- case SYS_BIND: +- err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); +- break; +- case SYS_CONNECT: +- err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); +- break; +- case SYS_LISTEN: +- err = sys_listen(a0, a1); +- break; +- case SYS_ACCEPT: +- err = +- sys_accept(a0, (struct sockaddr __user *)a1, +- (int __user *)a[2]); +- break; +- case SYS_GETSOCKNAME: +- err = +- sys_getsockname(a0, (struct sockaddr __user *)a1, +- (int __user *)a[2]); +- break; +- case SYS_GETPEERNAME: +- err = +- sys_getpeername(a0, (struct sockaddr __user *)a1, +- (int __user *)a[2]); +- break; +- case SYS_SOCKETPAIR: +- err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); +- break; +- case SYS_SEND: +- err = sys_send(a0, (void __user *)a1, a[2], a[3]); +- break; +- case SYS_SENDTO: +- err = sys_sendto(a0, (void __user *)a1, a[2], a[3], +- (struct sockaddr __user *)a[4], a[5]); +- break; +- case SYS_RECV: +- err = sys_recv(a0, (void __user *)a1, a[2], a[3]); +- break; +- case SYS_RECVFROM: +- err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], +- (struct sockaddr __user *)a[4], +- (int __user *)a[5]); +- break; +- case SYS_SHUTDOWN: +- err = sys_shutdown(a0, a1); +- break; +- case SYS_SETSOCKOPT: +- err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); +- break; +- case SYS_GETSOCKOPT: +- err = +- sys_getsockopt(a0, a1, a[2], (char __user *)a[3], +- (int __user *)a[4]); +- break; +- case SYS_SENDMSG: +- err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); +- break; +- case SYS_RECVMSG: +- err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); +- break; +- default: +- err = -EINVAL; +- break; +- } +- return err; +-} +- +-#endif /* __ARCH_WANT_SYS_SOCKETCALL */ +- +-/** +- * sock_register - add a socket protocol handler +- * @ops: description of protocol +- * +- * This function is called by a protocol handler that wants to +- * advertise its address family, and have it linked into the +- * socket interface. The value ops->family coresponds to the +- * socket system call protocol family. +- */ +-int sock_register(const struct net_proto_family *ops) +-{ +- int err; +- +- if (ops->family >= NPROTO) { +- printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, +- NPROTO); +- return -ENOBUFS; +- } +- +- spin_lock(&net_family_lock); +- if (net_families[ops->family]) +- err = -EEXIST; +- else { +- net_families[ops->family] = ops; +- err = 0; +- } +- spin_unlock(&net_family_lock); +- +- printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); +- return err; +-} +- +-/** +- * sock_unregister - remove a protocol handler +- * @family: protocol family to remove +- * +- * This function is called by a protocol handler that wants to +- * remove its address family, and have it unlinked from the +- * new socket creation. +- * +- * If protocol handler is a module, then it can use module reference +- * counts to protect against new references. If protocol handler is not +- * a module then it needs to provide its own protection in +- * the ops->create routine. +- */ +-void sock_unregister(int family) +-{ +- BUG_ON(family < 0 || family >= NPROTO); +- +- spin_lock(&net_family_lock); +- net_families[family] = NULL; +- spin_unlock(&net_family_lock); +- +- synchronize_rcu(); +- +- printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); +-} +- +-static int __init sock_init(void) +-{ +- /* +- * Initialize sock SLAB cache. +- */ +- +- sk_init(); +- +- /* +- * Initialize skbuff SLAB cache +- */ +- skb_init(); +- +- /* +- * Initialize the protocols module. +- */ +- +- init_inodecache(); +- register_filesystem(&sock_fs_type); +- sock_mnt = kern_mount(&sock_fs_type); +- +- /* The real protocol initialization is performed in later initcalls. +- */ +- +-#ifdef CONFIG_NETFILTER +- netfilter_init(); +-#endif +- +- return 0; +-} +- +-core_initcall(sock_init); /* early initcall */ +- +-#ifdef CONFIG_PROC_FS +-void socket_seq_show(struct seq_file *seq) +-{ +- int cpu; +- int counter = 0; +- +- for_each_possible_cpu(cpu) +- counter += per_cpu(sockets_in_use, cpu); +- +- /* It can be negative, by the way. 8) */ +- if (counter < 0) +- counter = 0; +- +- seq_printf(seq, "sockets: used %d\n", counter); +-} +-#endif /* CONFIG_PROC_FS */ +- +-#ifdef CONFIG_COMPAT +-static long compat_sock_ioctl(struct file *file, unsigned cmd, +- unsigned long arg) +-{ +- struct socket *sock = file->private_data; +- int ret = -ENOIOCTLCMD; +- +- if (sock->ops->compat_ioctl) +- ret = sock->ops->compat_ioctl(sock, cmd, arg); +- +- return ret; +-} +-#endif +- +-int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) +-{ +- return sock->ops->bind(sock, addr, addrlen); +-} +- +-int kernel_listen(struct socket *sock, int backlog) +-{ +- return sock->ops->listen(sock, backlog); +-} +- +-int kernel_accept(struct socket *sock, struct socket **newsock, int flags) +-{ +- struct sock *sk = sock->sk; +- int err; +- +- err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, +- newsock); +- if (err < 0) +- goto done; +- +- err = sock->ops->accept(sock, *newsock, flags); +- if (err < 0) { +- sock_release(*newsock); +- goto done; +- } +- +- (*newsock)->ops = sock->ops; +- +-done: +- return err; +-} +- +-int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, +- int flags) +-{ +- return sock->ops->connect(sock, addr, addrlen, flags); +-} +- +-int kernel_getsockname(struct socket *sock, struct sockaddr *addr, +- int *addrlen) +-{ +- return sock->ops->getname(sock, addr, addrlen, 0); +-} +- +-int kernel_getpeername(struct socket *sock, struct sockaddr *addr, +- int *addrlen) +-{ +- return sock->ops->getname(sock, addr, addrlen, 1); +-} +- +-int kernel_getsockopt(struct socket *sock, int level, int optname, +- char *optval, int *optlen) +-{ +- mm_segment_t oldfs = get_fs(); +- int err; +- +- set_fs(KERNEL_DS); +- if (level == SOL_SOCKET) +- err = sock_getsockopt(sock, level, optname, optval, optlen); +- else +- err = sock->ops->getsockopt(sock, level, optname, optval, +- optlen); +- set_fs(oldfs); +- return err; +-} +- +-int kernel_setsockopt(struct socket *sock, int level, int optname, +- char *optval, int optlen) +-{ +- mm_segment_t oldfs = get_fs(); +- int err; +- +- set_fs(KERNEL_DS); +- if (level == SOL_SOCKET) +- err = sock_setsockopt(sock, level, optname, optval, optlen); +- else +- err = sock->ops->setsockopt(sock, level, optname, optval, +- optlen); +- set_fs(oldfs); +- return err; +-} +- +-int kernel_sendpage(struct socket *sock, struct page *page, int offset, +- size_t size, int flags) +-{ +- if (sock->ops->sendpage) +- return sock->ops->sendpage(sock, page, offset, size, flags); +- +- return sock_no_sendpage(sock, page, offset, size, flags); +-} +- +-int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) +-{ +- mm_segment_t oldfs = get_fs(); +- int err; +- +- set_fs(KERNEL_DS); +- err = sock->ops->ioctl(sock, cmd, arg); +- set_fs(oldfs); +- +- return err; +-} +- +-/* ABI emulation layers need these two */ +-EXPORT_SYMBOL(move_addr_to_kernel); +-EXPORT_SYMBOL(move_addr_to_user); +-EXPORT_SYMBOL(sock_create); +-EXPORT_SYMBOL(sock_create_kern); +-EXPORT_SYMBOL(sock_create_lite); +-EXPORT_SYMBOL(sock_map_fd); +-EXPORT_SYMBOL(sock_recvmsg); +-EXPORT_SYMBOL(sock_register); +-EXPORT_SYMBOL(sock_release); +-EXPORT_SYMBOL(sock_sendmsg); +-EXPORT_SYMBOL(sock_unregister); +-EXPORT_SYMBOL(sock_wake_async); +-EXPORT_SYMBOL(sockfd_lookup); +-EXPORT_SYMBOL(kernel_sendmsg); +-EXPORT_SYMBOL(kernel_recvmsg); +-EXPORT_SYMBOL(kernel_bind); +-EXPORT_SYMBOL(kernel_listen); +-EXPORT_SYMBOL(kernel_accept); +-EXPORT_SYMBOL(kernel_connect); +-EXPORT_SYMBOL(kernel_getsockname); +-EXPORT_SYMBOL(kernel_getpeername); +-EXPORT_SYMBOL(kernel_getsockopt); +-EXPORT_SYMBOL(kernel_setsockopt); +-EXPORT_SYMBOL(kernel_sendpage); +-EXPORT_SYMBOL(kernel_sock_ioctl); +diff -Nurb linux-2.6.22-570/net/sunrpc/auth.c linux-2.6.22-591/net/sunrpc/auth.c +--- linux-2.6.22-570/net/sunrpc/auth.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/sunrpc/auth.c 2007-12-21 15:36:12.000000000 -0500 +@@ -19,12 +19,16 @@ + # define RPCDBG_FACILITY RPCDBG_AUTH + #endif + +-static struct rpc_authops * auth_flavors[RPC_AUTH_MAXFLAVOR] = { ++static DEFINE_SPINLOCK(rpc_authflavor_lock); ++static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = { + &authnull_ops, /* AUTH_NULL */ + &authunix_ops, /* AUTH_UNIX */ + NULL, /* others can be loadable modules */ + }; + ++static LIST_HEAD(cred_unused); ++static unsigned long number_cred_unused; ++ + static u32 + pseudoflavor_to_flavor(u32 flavor) { + if (flavor >= RPC_AUTH_MAXFLAVOR) +@@ -33,55 +37,67 @@ + } + + int +-rpcauth_register(struct rpc_authops *ops) ++rpcauth_register(const struct rpc_authops *ops) + { + rpc_authflavor_t flavor; ++ int ret = -EPERM; + + if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR) + return -EINVAL; +- if (auth_flavors[flavor] != NULL) +- return -EPERM; /* what else? */ ++ spin_lock(&rpc_authflavor_lock); ++ if (auth_flavors[flavor] == NULL) { + auth_flavors[flavor] = ops; +- return 0; ++ ret = 0; ++ } ++ spin_unlock(&rpc_authflavor_lock); ++ return ret; + } + + int +-rpcauth_unregister(struct rpc_authops *ops) ++rpcauth_unregister(const struct rpc_authops *ops) + { + rpc_authflavor_t flavor; ++ int ret = -EPERM; + + if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR) + return -EINVAL; +- if (auth_flavors[flavor] != ops) +- return -EPERM; /* what else? */ ++ spin_lock(&rpc_authflavor_lock); ++ if (auth_flavors[flavor] == ops) { + auth_flavors[flavor] = NULL; +- return 0; ++ ret = 0; ++ } ++ spin_unlock(&rpc_authflavor_lock); ++ return ret; + } + + struct rpc_auth * + rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) + { + struct rpc_auth *auth; +- struct rpc_authops *ops; ++ const struct rpc_authops *ops; + u32 flavor = pseudoflavor_to_flavor(pseudoflavor); + + auth = ERR_PTR(-EINVAL); + if (flavor >= RPC_AUTH_MAXFLAVOR) + goto out; + +- /* FIXME - auth_flavors[] really needs an rw lock, +- * and module refcounting. */ + #ifdef CONFIG_KMOD + if ((ops = auth_flavors[flavor]) == NULL) + request_module("rpc-auth-%u", flavor); + #endif +- if ((ops = auth_flavors[flavor]) == NULL) ++ spin_lock(&rpc_authflavor_lock); ++ ops = auth_flavors[flavor]; ++ if (ops == NULL || !try_module_get(ops->owner)) { ++ spin_unlock(&rpc_authflavor_lock); + goto out; ++ } ++ spin_unlock(&rpc_authflavor_lock); + auth = ops->create(clnt, pseudoflavor); ++ module_put(ops->owner); + if (IS_ERR(auth)) + return auth; + if (clnt->cl_auth) +- rpcauth_destroy(clnt->cl_auth); ++ rpcauth_release(clnt->cl_auth); + clnt->cl_auth = auth; + + out: +@@ -89,7 +105,7 @@ + } + + void +-rpcauth_destroy(struct rpc_auth *auth) ++rpcauth_release(struct rpc_auth *auth) + { + if (!atomic_dec_and_test(&auth->au_count)) + return; +@@ -98,11 +114,31 @@ + + static DEFINE_SPINLOCK(rpc_credcache_lock); + ++static void ++rpcauth_unhash_cred_locked(struct rpc_cred *cred) ++{ ++ hlist_del_rcu(&cred->cr_hash); ++ smp_mb__before_clear_bit(); ++ clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); ++} ++ ++static void ++rpcauth_unhash_cred(struct rpc_cred *cred) ++{ ++ spinlock_t *cache_lock; ++ ++ cache_lock = &cred->cr_auth->au_credcache->lock; ++ spin_lock(cache_lock); ++ if (atomic_read(&cred->cr_count) == 0) ++ rpcauth_unhash_cred_locked(cred); ++ spin_unlock(cache_lock); ++} ++ + /* + * Initialize RPC credential cache + */ + int +-rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) ++rpcauth_init_credcache(struct rpc_auth *auth) + { + struct rpc_cred_cache *new; + int i; +@@ -112,8 +148,7 @@ + return -ENOMEM; + for (i = 0; i < RPC_CREDCACHE_NR; i++) + INIT_HLIST_HEAD(&new->hashtable[i]); +- new->expire = expire; +- new->nextgc = jiffies + (expire >> 1); ++ spin_lock_init(&new->lock); + auth->au_credcache = new; + return 0; + } +@@ -122,13 +157,13 @@ + * Destroy a list of credentials + */ + static inline +-void rpcauth_destroy_credlist(struct hlist_head *head) ++void rpcauth_destroy_credlist(struct list_head *head) + { + struct rpc_cred *cred; + +- while (!hlist_empty(head)) { +- cred = hlist_entry(head->first, struct rpc_cred, cr_hash); +- hlist_del_init(&cred->cr_hash); ++ while (!list_empty(head)) { ++ cred = list_entry(head->next, struct rpc_cred, cr_lru); ++ list_del_init(&cred->cr_lru); + put_rpccred(cred); + } + } +@@ -138,58 +173,95 @@ + * that are not referenced. + */ + void +-rpcauth_free_credcache(struct rpc_auth *auth) ++rpcauth_clear_credcache(struct rpc_cred_cache *cache) + { +- struct rpc_cred_cache *cache = auth->au_credcache; +- HLIST_HEAD(free); +- struct hlist_node *pos, *next; ++ LIST_HEAD(free); ++ struct hlist_head *head; + struct rpc_cred *cred; + int i; + + spin_lock(&rpc_credcache_lock); ++ spin_lock(&cache->lock); + for (i = 0; i < RPC_CREDCACHE_NR; i++) { +- hlist_for_each_safe(pos, next, &cache->hashtable[i]) { +- cred = hlist_entry(pos, struct rpc_cred, cr_hash); +- __hlist_del(&cred->cr_hash); +- hlist_add_head(&cred->cr_hash, &free); ++ head = &cache->hashtable[i]; ++ while (!hlist_empty(head)) { ++ cred = hlist_entry(head->first, struct rpc_cred, cr_hash); ++ get_rpccred(cred); ++ if (!list_empty(&cred->cr_lru)) { ++ list_del(&cred->cr_lru); ++ number_cred_unused--; + } ++ list_add_tail(&cred->cr_lru, &free); ++ rpcauth_unhash_cred_locked(cred); + } ++ } ++ spin_unlock(&cache->lock); + spin_unlock(&rpc_credcache_lock); + rpcauth_destroy_credlist(&free); + } + +-static void +-rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free) ++/* ++ * Destroy the RPC credential cache ++ */ ++void ++rpcauth_destroy_credcache(struct rpc_auth *auth) + { +- if (atomic_read(&cred->cr_count) != 1) +- return; +- if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire)) +- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; +- if (!(cred->cr_flags & RPCAUTH_CRED_UPTODATE)) { +- __hlist_del(&cred->cr_hash); +- hlist_add_head(&cred->cr_hash, free); ++ struct rpc_cred_cache *cache = auth->au_credcache; ++ ++ if (cache) { ++ auth->au_credcache = NULL; ++ rpcauth_clear_credcache(cache); ++ kfree(cache); + } + } + + /* + * Remove stale credentials. Avoid sleeping inside the loop. + */ +-static void +-rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free) ++static int ++rpcauth_prune_expired(struct list_head *free, int nr_to_scan) + { +- struct rpc_cred_cache *cache = auth->au_credcache; +- struct hlist_node *pos, *next; ++ spinlock_t *cache_lock; + struct rpc_cred *cred; +- int i; + +- dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth); +- for (i = 0; i < RPC_CREDCACHE_NR; i++) { +- hlist_for_each_safe(pos, next, &cache->hashtable[i]) { +- cred = hlist_entry(pos, struct rpc_cred, cr_hash); +- rpcauth_prune_expired(auth, cred, free); ++ while(!list_empty(&cred_unused)) { ++ cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru); ++ list_del_init(&cred->cr_lru); ++ number_cred_unused--; ++ if (atomic_read(&cred->cr_count) != 0) ++ continue; ++ cache_lock = &cred->cr_auth->au_credcache->lock; ++ spin_lock(cache_lock); ++ if (atomic_read(&cred->cr_count) == 0) { ++ get_rpccred(cred); ++ list_add_tail(&cred->cr_lru, free); ++ rpcauth_unhash_cred_locked(cred); ++ nr_to_scan --; + } ++ spin_unlock(cache_lock); ++ if (nr_to_scan == 0) ++ break; + } +- cache->nextgc = jiffies + cache->expire; ++ return nr_to_scan; ++} ++ ++/* ++ * Run memory cache shrinker. ++ */ ++static int ++rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) ++{ ++ LIST_HEAD(free); ++ int res; ++ ++ if (list_empty(&cred_unused)) ++ return 0; ++ spin_lock(&rpc_credcache_lock); ++ nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan); ++ res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure; ++ spin_unlock(&rpc_credcache_lock); ++ rpcauth_destroy_credlist(&free); ++ return res; + } + + /* +@@ -199,53 +271,56 @@ + rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, + int flags) + { ++ LIST_HEAD(free); + struct rpc_cred_cache *cache = auth->au_credcache; +- HLIST_HEAD(free); +- struct hlist_node *pos, *next; +- struct rpc_cred *new = NULL, +- *cred = NULL; ++ struct hlist_node *pos; ++ struct rpc_cred *cred = NULL, ++ *entry, *new; + int nr = 0; + + if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) + nr = acred->uid & RPC_CREDCACHE_MASK; +-retry: +- spin_lock(&rpc_credcache_lock); +- if (time_before(cache->nextgc, jiffies)) +- rpcauth_gc_credcache(auth, &free); +- hlist_for_each_safe(pos, next, &cache->hashtable[nr]) { +- struct rpc_cred *entry; +- entry = hlist_entry(pos, struct rpc_cred, cr_hash); +- if (entry->cr_ops->crmatch(acred, entry, flags)) { +- hlist_del(&entry->cr_hash); +- cred = entry; +- break; +- } +- rpcauth_prune_expired(auth, entry, &free); +- } +- if (new) { +- if (cred) +- hlist_add_head(&new->cr_hash, &free); +- else +- cred = new; ++ ++ rcu_read_lock(); ++ hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) { ++ if (!entry->cr_ops->crmatch(acred, entry, flags)) ++ continue; ++ spin_lock(&cache->lock); ++ if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) { ++ spin_unlock(&cache->lock); ++ continue; + } +- if (cred) { +- hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]); +- get_rpccred(cred); ++ cred = get_rpccred(entry); ++ spin_unlock(&cache->lock); ++ break; + } +- spin_unlock(&rpc_credcache_lock); ++ rcu_read_unlock(); + +- rpcauth_destroy_credlist(&free); ++ if (cred != NULL) ++ goto found; + +- if (!cred) { + new = auth->au_ops->crcreate(auth, acred, flags); +- if (!IS_ERR(new)) { +-#ifdef RPC_DEBUG +- new->cr_magic = RPCAUTH_CRED_MAGIC; +-#endif +- goto retry; +- } else ++ if (IS_ERR(new)) { ++ cred = new; ++ goto out; ++ } ++ ++ spin_lock(&cache->lock); ++ hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) { ++ if (!entry->cr_ops->crmatch(acred, entry, flags)) ++ continue; ++ cred = get_rpccred(entry); ++ break; ++ } ++ if (cred == NULL) { + cred = new; +- } else if ((cred->cr_flags & RPCAUTH_CRED_NEW) ++ set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); ++ hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]); ++ } else ++ list_add_tail(&new->cr_lru, &free); ++ spin_unlock(&cache->lock); ++found: ++ if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) + && cred->cr_ops->cr_init != NULL + && !(flags & RPCAUTH_LOOKUP_NEW)) { + int res = cred->cr_ops->cr_init(auth, cred); +@@ -254,8 +329,9 @@ + cred = ERR_PTR(res); + } + } +- +- return (struct rpc_cred *) cred; ++ rpcauth_destroy_credlist(&free); ++out: ++ return cred; + } + + struct rpc_cred * +@@ -277,6 +353,23 @@ + return ret; + } + ++void ++rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, ++ struct rpc_auth *auth, const struct rpc_credops *ops) ++{ ++ INIT_HLIST_NODE(&cred->cr_hash); ++ INIT_LIST_HEAD(&cred->cr_lru); ++ atomic_set(&cred->cr_count, 1); ++ cred->cr_auth = auth; ++ cred->cr_ops = ops; ++ cred->cr_expire = jiffies; ++#ifdef RPC_DEBUG ++ cred->cr_magic = RPCAUTH_CRED_MAGIC; ++#endif ++ cred->cr_uid = acred->uid; ++} ++EXPORT_SYMBOL(rpcauth_init_cred); ++ + struct rpc_cred * + rpcauth_bindcred(struct rpc_task *task) + { +@@ -317,9 +410,31 @@ + void + put_rpccred(struct rpc_cred *cred) + { +- cred->cr_expire = jiffies; ++ /* Fast path for unhashed credentials */ ++ if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) ++ goto need_lock; ++ + if (!atomic_dec_and_test(&cred->cr_count)) + return; ++ goto out_destroy; ++need_lock: ++ if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) ++ return; ++ if (!list_empty(&cred->cr_lru)) { ++ number_cred_unused--; ++ list_del_init(&cred->cr_lru); ++ } ++ if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) ++ rpcauth_unhash_cred(cred); ++ else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { ++ cred->cr_expire = jiffies; ++ list_add_tail(&cred->cr_lru, &cred_unused); ++ number_cred_unused++; ++ spin_unlock(&rpc_credcache_lock); ++ return; ++ } ++ spin_unlock(&rpc_credcache_lock); ++out_destroy: + cred->cr_ops->crdestroy(cred); + } + +@@ -404,17 +519,34 @@ + void + rpcauth_invalcred(struct rpc_task *task) + { ++ struct rpc_cred *cred = task->tk_msg.rpc_cred; ++ + dprintk("RPC: %5u invalidating %s cred %p\n", +- task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred); +- spin_lock(&rpc_credcache_lock); +- if (task->tk_msg.rpc_cred) +- task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; +- spin_unlock(&rpc_credcache_lock); ++ task->tk_pid, task->tk_auth->au_ops->au_name, cred); ++ if (cred) ++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + } + + int + rpcauth_uptodatecred(struct rpc_task *task) + { +- return !(task->tk_msg.rpc_cred) || +- (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE); ++ struct rpc_cred *cred = task->tk_msg.rpc_cred; ++ ++ return cred == NULL || ++ test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0; ++} ++ ++ ++static struct shrinker *rpc_cred_shrinker; ++ ++void __init rpcauth_init_module(void) ++{ ++ rpc_init_authunix(); ++ rpc_cred_shrinker = set_shrinker(DEFAULT_SEEKS, rpcauth_cache_shrinker); ++} ++ ++void __exit rpcauth_remove_module(void) ++{ ++ if (rpc_cred_shrinker != NULL) ++ remove_shrinker(rpc_cred_shrinker); + } +diff -Nurb linux-2.6.22-570/net/sunrpc/auth_gss/auth_gss.c linux-2.6.22-591/net/sunrpc/auth_gss/auth_gss.c +--- linux-2.6.22-570/net/sunrpc/auth_gss/auth_gss.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sunrpc/auth_gss/auth_gss.c 2007-12-21 15:36:12.000000000 -0500 +@@ -54,9 +54,9 @@ + #include + #include + +-static struct rpc_authops authgss_ops; ++static const struct rpc_authops authgss_ops; + +-static struct rpc_credops gss_credops; ++static const struct rpc_credops gss_credops; + + #ifdef RPC_DEBUG + # define RPCDBG_FACILITY RPCDBG_AUTH +@@ -64,7 +64,6 @@ + + #define NFS_NGROUPS 16 + +-#define GSS_CRED_EXPIRE (60 * HZ) /* XXX: reasonable? */ + #define GSS_CRED_SLACK 1024 /* XXX: unused */ + /* length of a krb5 verifier (48), plus data added before arguments when + * using integrity (two 4-byte integers): */ +@@ -85,10 +84,8 @@ + struct rpc_auth rpc_auth; + struct gss_api_mech *mech; + enum rpc_gss_svc service; +- struct list_head upcalls; + struct rpc_clnt *client; + struct dentry *dentry; +- spinlock_t lock; + }; + + static void gss_destroy_ctx(struct gss_cl_ctx *); +@@ -116,8 +113,8 @@ + write_lock(&gss_ctx_lock); + old = gss_cred->gc_ctx; + gss_cred->gc_ctx = ctx; +- cred->cr_flags |= RPCAUTH_CRED_UPTODATE; +- cred->cr_flags &= ~RPCAUTH_CRED_NEW; ++ set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); ++ clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); + write_unlock(&gss_ctx_lock); + if (old) + gss_put_ctx(old); +@@ -130,7 +127,7 @@ + int res = 0; + + read_lock(&gss_ctx_lock); +- if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx) ++ if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) && gss_cred->gc_ctx) + res = 1; + read_unlock(&gss_ctx_lock); + return res; +@@ -269,10 +266,10 @@ + } + + static struct gss_upcall_msg * +-__gss_find_upcall(struct gss_auth *gss_auth, uid_t uid) ++__gss_find_upcall(struct rpc_inode *rpci, uid_t uid) + { + struct gss_upcall_msg *pos; +- list_for_each_entry(pos, &gss_auth->upcalls, list) { ++ list_for_each_entry(pos, &rpci->in_downcall, list) { + if (pos->uid != uid) + continue; + atomic_inc(&pos->count); +@@ -290,24 +287,24 @@ + static inline struct gss_upcall_msg * + gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) + { ++ struct inode *inode = gss_auth->dentry->d_inode; ++ struct rpc_inode *rpci = RPC_I(inode); + struct gss_upcall_msg *old; + +- spin_lock(&gss_auth->lock); +- old = __gss_find_upcall(gss_auth, gss_msg->uid); ++ spin_lock(&inode->i_lock); ++ old = __gss_find_upcall(rpci, gss_msg->uid); + if (old == NULL) { + atomic_inc(&gss_msg->count); +- list_add(&gss_msg->list, &gss_auth->upcalls); ++ list_add(&gss_msg->list, &rpci->in_downcall); + } else + gss_msg = old; +- spin_unlock(&gss_auth->lock); ++ spin_unlock(&inode->i_lock); + return gss_msg; + } + + static void + __gss_unhash_msg(struct gss_upcall_msg *gss_msg) + { +- if (list_empty(&gss_msg->list)) +- return; + list_del_init(&gss_msg->list); + rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); + wake_up_all(&gss_msg->waitqueue); +@@ -318,10 +315,14 @@ + gss_unhash_msg(struct gss_upcall_msg *gss_msg) + { + struct gss_auth *gss_auth = gss_msg->auth; ++ struct inode *inode = gss_auth->dentry->d_inode; + +- spin_lock(&gss_auth->lock); ++ if (list_empty(&gss_msg->list)) ++ return; ++ spin_lock(&inode->i_lock); ++ if (!list_empty(&gss_msg->list)) + __gss_unhash_msg(gss_msg); +- spin_unlock(&gss_auth->lock); ++ spin_unlock(&inode->i_lock); + } + + static void +@@ -330,16 +331,16 @@ + struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred, + struct gss_cred, gc_base); + struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; ++ struct inode *inode = gss_msg->auth->dentry->d_inode; + +- BUG_ON(gss_msg == NULL); + if (gss_msg->ctx) + gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx)); + else + task->tk_status = gss_msg->msg.errno; +- spin_lock(&gss_msg->auth->lock); ++ spin_lock(&inode->i_lock); + gss_cred->gc_upcall = NULL; + rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); +- spin_unlock(&gss_msg->auth->lock); ++ spin_unlock(&inode->i_lock); + gss_release_msg(gss_msg); + } + +@@ -386,11 +387,12 @@ + gss_refresh_upcall(struct rpc_task *task) + { + struct rpc_cred *cred = task->tk_msg.rpc_cred; +- struct gss_auth *gss_auth = container_of(task->tk_client->cl_auth, ++ struct gss_auth *gss_auth = container_of(cred->cr_auth, + struct gss_auth, rpc_auth); + struct gss_cred *gss_cred = container_of(cred, + struct gss_cred, gc_base); + struct gss_upcall_msg *gss_msg; ++ struct inode *inode = gss_auth->dentry->d_inode; + int err = 0; + + dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, +@@ -400,7 +402,7 @@ + err = PTR_ERR(gss_msg); + goto out; + } +- spin_lock(&gss_auth->lock); ++ spin_lock(&inode->i_lock); + if (gss_cred->gc_upcall != NULL) + rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL); + else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { +@@ -411,7 +413,7 @@ + rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL); + } else + err = gss_msg->msg.errno; +- spin_unlock(&gss_auth->lock); ++ spin_unlock(&inode->i_lock); + gss_release_msg(gss_msg); + out: + dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n", +@@ -422,6 +424,7 @@ + static inline int + gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) + { ++ struct inode *inode = gss_auth->dentry->d_inode; + struct rpc_cred *cred = &gss_cred->gc_base; + struct gss_upcall_msg *gss_msg; + DEFINE_WAIT(wait); +@@ -435,12 +438,12 @@ + } + for (;;) { + prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE); +- spin_lock(&gss_auth->lock); ++ spin_lock(&inode->i_lock); + if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { +- spin_unlock(&gss_auth->lock); ++ spin_unlock(&inode->i_lock); + break; + } +- spin_unlock(&gss_auth->lock); ++ spin_unlock(&inode->i_lock); + if (signalled()) { + err = -ERESTARTSYS; + goto out_intr; +@@ -489,12 +492,11 @@ + const void *p, *end; + void *buf; + struct rpc_clnt *clnt; +- struct gss_auth *gss_auth; +- struct rpc_cred *cred; + struct gss_upcall_msg *gss_msg; ++ struct inode *inode = filp->f_path.dentry->d_inode; + struct gss_cl_ctx *ctx; + uid_t uid; +- int err = -EFBIG; ++ ssize_t err = -EFBIG; + + if (mlen > MSG_BUF_MAXSIZE) + goto out; +@@ -503,7 +505,7 @@ + if (!buf) + goto out; + +- clnt = RPC_I(filp->f_path.dentry->d_inode)->private; ++ clnt = RPC_I(inode)->private; + err = -EFAULT; + if (copy_from_user(buf, src, mlen)) + goto err; +@@ -519,43 +521,38 @@ + ctx = gss_alloc_context(); + if (ctx == NULL) + goto err; +- err = 0; +- gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth); +- p = gss_fill_context(p, end, ctx, gss_auth->mech); ++ ++ err = -ENOENT; ++ /* Find a matching upcall */ ++ spin_lock(&inode->i_lock); ++ gss_msg = __gss_find_upcall(RPC_I(inode), uid); ++ if (gss_msg == NULL) { ++ spin_unlock(&inode->i_lock); ++ goto err_put_ctx; ++ } ++ list_del_init(&gss_msg->list); ++ spin_unlock(&inode->i_lock); ++ ++ p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); + if (IS_ERR(p)) { + err = PTR_ERR(p); +- if (err != -EACCES) +- goto err_put_ctx; ++ gss_msg->msg.errno = (err == -EACCES) ? -EACCES : -EAGAIN; ++ goto err_release_msg; + } +- spin_lock(&gss_auth->lock); +- gss_msg = __gss_find_upcall(gss_auth, uid); +- if (gss_msg) { +- if (err == 0 && gss_msg->ctx == NULL) + gss_msg->ctx = gss_get_ctx(ctx); +- gss_msg->msg.errno = err; ++ err = mlen; ++ ++err_release_msg: ++ spin_lock(&inode->i_lock); + __gss_unhash_msg(gss_msg); +- spin_unlock(&gss_auth->lock); ++ spin_unlock(&inode->i_lock); + gss_release_msg(gss_msg); +- } else { +- struct auth_cred acred = { .uid = uid }; +- spin_unlock(&gss_auth->lock); +- cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, RPCAUTH_LOOKUP_NEW); +- if (IS_ERR(cred)) { +- err = PTR_ERR(cred); +- goto err_put_ctx; +- } +- gss_cred_set_ctx(cred, gss_get_ctx(ctx)); +- } +- gss_put_ctx(ctx); +- kfree(buf); +- dprintk("RPC: gss_pipe_downcall returning length %Zu\n", mlen); +- return mlen; + err_put_ctx: + gss_put_ctx(ctx); + err: + kfree(buf); + out: +- dprintk("RPC: gss_pipe_downcall returning %d\n", err); ++ dprintk("RPC: gss_pipe_downcall returning %Zd\n", err); + return err; + } + +@@ -563,27 +560,21 @@ + gss_pipe_release(struct inode *inode) + { + struct rpc_inode *rpci = RPC_I(inode); +- struct rpc_clnt *clnt; +- struct rpc_auth *auth; +- struct gss_auth *gss_auth; +- +- clnt = rpci->private; +- auth = clnt->cl_auth; +- gss_auth = container_of(auth, struct gss_auth, rpc_auth); +- spin_lock(&gss_auth->lock); +- while (!list_empty(&gss_auth->upcalls)) { + struct gss_upcall_msg *gss_msg; + +- gss_msg = list_entry(gss_auth->upcalls.next, ++ spin_lock(&inode->i_lock); ++ while (!list_empty(&rpci->in_downcall)) { ++ ++ gss_msg = list_entry(rpci->in_downcall.next, + struct gss_upcall_msg, list); + gss_msg->msg.errno = -EPIPE; + atomic_inc(&gss_msg->count); + __gss_unhash_msg(gss_msg); +- spin_unlock(&gss_auth->lock); ++ spin_unlock(&inode->i_lock); + gss_release_msg(gss_msg); +- spin_lock(&gss_auth->lock); ++ spin_lock(&inode->i_lock); + } +- spin_unlock(&gss_auth->lock); ++ spin_unlock(&inode->i_lock); + } + + static void +@@ -637,8 +628,6 @@ + gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); + if (gss_auth->service == 0) + goto err_put_mech; +- INIT_LIST_HEAD(&gss_auth->upcalls); +- spin_lock_init(&gss_auth->lock); + auth = &gss_auth->rpc_auth; + auth->au_cslack = GSS_CRED_SLACK >> 2; + auth->au_rslack = GSS_VERF_SLACK >> 2; +@@ -646,10 +635,6 @@ + auth->au_flavor = flavor; + atomic_set(&auth->au_count, 1); + +- err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE); +- if (err) +- goto err_put_mech; +- + gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name, + clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); + if (IS_ERR(gss_auth->dentry)) { +@@ -657,7 +642,13 @@ + goto err_put_mech; + } + ++ err = rpcauth_init_credcache(auth); ++ if (err) ++ goto err_unlink_pipe; ++ + return auth; ++err_unlink_pipe: ++ rpc_unlink(gss_auth->dentry); + err_put_mech: + gss_mech_put(gss_auth->mech); + err_free: +@@ -675,12 +666,13 @@ + dprintk("RPC: destroying GSS authenticator %p flavor %d\n", + auth, auth->au_flavor); + ++ rpcauth_destroy_credcache(auth); ++ + gss_auth = container_of(auth, struct gss_auth, rpc_auth); + rpc_unlink(gss_auth->dentry); + gss_auth->dentry = NULL; + gss_mech_put(gss_auth->mech); + +- rpcauth_free_credcache(auth); + kfree(gss_auth); + module_put(THIS_MODULE); + } +@@ -701,17 +693,27 @@ + } + + static void +-gss_destroy_cred(struct rpc_cred *rc) ++gss_free_cred(struct gss_cred *cred) + { +- struct gss_cred *cred = container_of(rc, struct gss_cred, gc_base); +- +- dprintk("RPC: gss_destroy_cred \n"); +- ++ dprintk("RPC: gss_free_cred %p\n", cred); + if (cred->gc_ctx) + gss_put_ctx(cred->gc_ctx); + kfree(cred); + } + ++static void ++gss_free_cred_callback(struct rcu_head *head) ++{ ++ struct gss_cred *cred = container_of(head, struct gss_cred, gc_base.cr_rcu); ++ gss_free_cred(cred); ++} ++ ++static void ++gss_destroy_cred(struct rpc_cred *rc) ++{ ++ call_rcu(&rc->cr_rcu, gss_free_cred_callback); ++} ++ + /* + * Lookup RPCSEC_GSS cred for the current process + */ +@@ -734,15 +736,12 @@ + if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL))) + goto out_err; + +- atomic_set(&cred->gc_count, 1); +- cred->gc_uid = acred->uid; ++ rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); + /* + * Note: in order to force a call to call_refresh(), we deliberately + * fail to flag the credential as RPCAUTH_CRED_UPTODATE. + */ +- cred->gc_flags = 0; +- cred->gc_base.cr_ops = &gss_credops; +- cred->gc_base.cr_flags = RPCAUTH_CRED_NEW; ++ cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; + cred->gc_service = gss_auth->service; + return &cred->gc_base; + +@@ -774,7 +773,7 @@ + * we don't really care if the credential has expired or not, + * since the caller should be prepared to reinitialise it. + */ +- if ((flags & RPCAUTH_LOOKUP_NEW) && (rc->cr_flags & RPCAUTH_CRED_NEW)) ++ if ((flags & RPCAUTH_LOOKUP_NEW) && test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) + goto out; + /* Don't match with creds that have expired. */ + if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) +@@ -830,7 +829,7 @@ + mic.data = (u8 *)(p + 1); + maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) { +- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; ++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + } else if (maj_stat != 0) { + printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); + goto out_put_ctx; +@@ -883,7 +882,7 @@ + + maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) +- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; ++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + if (maj_stat) + goto out_bad; + /* We leave it to unwrap to calculate au_rslack. For now we just +@@ -937,7 +936,7 @@ + maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); + status = -EIO; /* XXX? */ + if (maj_stat == GSS_S_CONTEXT_EXPIRED) +- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; ++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + else if (maj_stat) + return status; + q = xdr_encode_opaque(p, NULL, mic.len); +@@ -1036,7 +1035,7 @@ + /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was + * done anyway, so it's safe to put the request on the wire: */ + if (maj_stat == GSS_S_CONTEXT_EXPIRED) +- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; ++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + else if (maj_stat) + return status; + +@@ -1123,7 +1122,7 @@ + + maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) +- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; ++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + if (maj_stat != GSS_S_COMPLETE) + return status; + return 0; +@@ -1148,7 +1147,7 @@ + + maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); + if (maj_stat == GSS_S_CONTEXT_EXPIRED) +- cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; ++ clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); + if (maj_stat != GSS_S_COMPLETE) + return status; + if (ntohl(*(*p)++) != rqstp->rq_seqno) +@@ -1199,7 +1198,7 @@ + return status; + } + +-static struct rpc_authops authgss_ops = { ++static const struct rpc_authops authgss_ops = { + .owner = THIS_MODULE, + .au_flavor = RPC_AUTH_GSS, + #ifdef RPC_DEBUG +@@ -1211,7 +1210,7 @@ + .crcreate = gss_create_cred + }; + +-static struct rpc_credops gss_credops = { ++static const struct rpc_credops gss_credops = { + .cr_name = "AUTH_GSS", + .crdestroy = gss_destroy_cred, + .cr_init = gss_cred_init, +diff -Nurb linux-2.6.22-570/net/sunrpc/auth_gss/gss_krb5_mech.c linux-2.6.22-591/net/sunrpc/auth_gss/gss_krb5_mech.c +--- linux-2.6.22-570/net/sunrpc/auth_gss/gss_krb5_mech.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sunrpc/auth_gss/gss_krb5_mech.c 2007-12-21 15:36:12.000000000 -0500 +@@ -201,7 +201,7 @@ + kfree(kctx); + } + +-static struct gss_api_ops gss_kerberos_ops = { ++static const struct gss_api_ops gss_kerberos_ops = { + .gss_import_sec_context = gss_import_sec_context_kerberos, + .gss_get_mic = gss_get_mic_kerberos, + .gss_verify_mic = gss_verify_mic_kerberos, +diff -Nurb linux-2.6.22-570/net/sunrpc/auth_gss/gss_spkm3_mech.c linux-2.6.22-591/net/sunrpc/auth_gss/gss_spkm3_mech.c +--- linux-2.6.22-570/net/sunrpc/auth_gss/gss_spkm3_mech.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sunrpc/auth_gss/gss_spkm3_mech.c 2007-12-21 15:36:12.000000000 -0500 +@@ -202,7 +202,7 @@ + return err; + } + +-static struct gss_api_ops gss_spkm3_ops = { ++static const struct gss_api_ops gss_spkm3_ops = { + .gss_import_sec_context = gss_import_sec_context_spkm3, + .gss_get_mic = gss_get_mic_spkm3, + .gss_verify_mic = gss_verify_mic_spkm3, +diff -Nurb linux-2.6.22-570/net/sunrpc/auth_null.c linux-2.6.22-591/net/sunrpc/auth_null.c +--- linux-2.6.22-570/net/sunrpc/auth_null.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sunrpc/auth_null.c 2007-12-21 15:36:12.000000000 -0500 +@@ -76,7 +76,7 @@ + static int + nul_refresh(struct rpc_task *task) + { +- task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE; ++ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); + return 0; + } + +@@ -101,7 +101,7 @@ + return p; + } + +-struct rpc_authops authnull_ops = { ++const struct rpc_authops authnull_ops = { + .owner = THIS_MODULE, + .au_flavor = RPC_AUTH_NULL, + #ifdef RPC_DEBUG +@@ -122,7 +122,7 @@ + }; + + static +-struct rpc_credops null_credops = { ++const struct rpc_credops null_credops = { + .cr_name = "AUTH_NULL", + .crdestroy = nul_destroy_cred, + .crmatch = nul_match, +@@ -133,9 +133,11 @@ + + static + struct rpc_cred null_cred = { ++ .cr_lru = LIST_HEAD_INIT(null_cred.cr_lru), ++ .cr_auth = &null_auth, + .cr_ops = &null_credops, + .cr_count = ATOMIC_INIT(1), +- .cr_flags = RPCAUTH_CRED_UPTODATE, ++ .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, + #ifdef RPC_DEBUG + .cr_magic = RPCAUTH_CRED_MAGIC, + #endif +diff -Nurb linux-2.6.22-570/net/sunrpc/auth_unix.c linux-2.6.22-591/net/sunrpc/auth_unix.c +--- linux-2.6.22-570/net/sunrpc/auth_unix.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/sunrpc/auth_unix.c 2007-12-23 02:13:00.000000000 -0500 +@@ -22,11 +22,6 @@ + gid_t uc_gids[NFS_NGROUPS]; + }; + #define uc_uid uc_base.cr_uid +-#define uc_count uc_base.cr_count +-#define uc_flags uc_base.cr_flags +-#define uc_expire uc_base.cr_expire +- +-#define UNX_CRED_EXPIRE (60 * HZ) + + #define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2)) + +@@ -36,15 +31,14 @@ + + static struct rpc_auth unix_auth; + static struct rpc_cred_cache unix_cred_cache; +-static struct rpc_credops unix_credops; ++static const struct rpc_credops unix_credops; + + static struct rpc_auth * + unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) + { + dprintk("RPC: creating UNIX authenticator for client %p\n", + clnt); +- if (atomic_inc_return(&unix_auth.au_count) == 0) +- unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1); ++ atomic_inc(&unix_auth.au_count); + return &unix_auth; + } + +@@ -52,7 +46,7 @@ + unx_destroy(struct rpc_auth *auth) + { + dprintk("RPC: destroying UNIX authenticator %p\n", auth); +- rpcauth_free_credcache(auth); ++ rpcauth_clear_credcache(auth->au_credcache); + } + + /* +@@ -76,8 +70,8 @@ + if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) + return ERR_PTR(-ENOMEM); + +- atomic_set(&cred->uc_count, 1); +- cred->uc_flags = RPCAUTH_CRED_UPTODATE; ++ rpcauth_init_cred(&cred->uc_base, acred, &unix_auth, &unix_credops); ++ cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; + if (flags & RPCAUTH_LOOKUP_ROOTCREDS) { + cred->uc_uid = 0; + cred->uc_gid = 0; +@@ -88,7 +82,6 @@ + if (groups > NFS_NGROUPS) + groups = NFS_NGROUPS; + +- cred->uc_uid = acred->uid; + cred->uc_gid = acred->gid; + cred->uc_tag = acred->tag; + for (i = 0; i < groups; i++) +@@ -96,17 +89,31 @@ + if (i < NFS_NGROUPS) + cred->uc_gids[i] = NOGROUP; + } +- cred->uc_base.cr_ops = &unix_credops; + +- return (struct rpc_cred *) cred; ++ return &cred->uc_base; + } + + static void +-unx_destroy_cred(struct rpc_cred *cred) ++ unx_free_cred(struct unx_cred *cred) + { ++ dprintk("RPC: unx_free_cred %p\n", cred); + kfree(cred); + } + ++static void ++unx_free_cred_callback(struct rcu_head *head) ++{ ++ struct unx_cred *cred = container_of(head, struct unx_cred, uc_base.cr_rcu); ++ unx_free_cred(cred); ++} ++ ++static void ++unx_destroy_cred(struct rpc_cred *cred) ++{ ++ call_rcu(&cred->cr_rcu, unx_free_cred_callback); ++} ++ ++ + /* + * Match credentials against current process creds. + * The root_override argument takes care of cases where the caller may +@@ -115,7 +122,7 @@ + static int + unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) + { +- struct unx_cred *cred = (struct unx_cred *) rcred; ++ struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); + int i; + + if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) { +@@ -147,7 +154,7 @@ + unx_marshal(struct rpc_task *task, __be32 *p) + { + struct rpc_clnt *clnt = task->tk_client; +- struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; ++ struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base); + __be32 *base, *hold; + int i, tag; + +@@ -159,7 +166,6 @@ + * Copy the UTS nodename captured when the client was created. + */ + p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); +- tag = task->tk_client->cl_tag; + + *p++ = htonl((u32) TAGINO_UID(tag, + cred->uc_uid, cred->uc_tag)); +@@ -183,7 +189,7 @@ + static int + unx_refresh(struct rpc_task *task) + { +- task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE; ++ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); + return 0; + } + +@@ -212,7 +218,12 @@ + return p; + } + +-struct rpc_authops authunix_ops = { ++void __init rpc_init_authunix(void) ++{ ++ spin_lock_init(&unix_cred_cache.lock); ++} ++ ++const struct rpc_authops authunix_ops = { + .owner = THIS_MODULE, + .au_flavor = RPC_AUTH_UNIX, + #ifdef RPC_DEBUG +@@ -226,7 +237,6 @@ + + static + struct rpc_cred_cache unix_cred_cache = { +- .expire = UNX_CRED_EXPIRE, + }; + + static +@@ -240,7 +250,7 @@ + }; + + static +-struct rpc_credops unix_credops = { ++const struct rpc_credops unix_credops = { + .cr_name = "AUTH_UNIX", + .crdestroy = unx_destroy_cred, + .crmatch = unx_match, +diff -Nurb linux-2.6.22-570/net/sunrpc/auth_unix.c.orig linux-2.6.22-591/net/sunrpc/auth_unix.c.orig +--- linux-2.6.22-570/net/sunrpc/auth_unix.c.orig 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/net/sunrpc/auth_unix.c.orig 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,261 @@ ++/* ++ * linux/net/sunrpc/auth_unix.c ++ * ++ * UNIX-style authentication; no AUTH_SHORT support ++ * ++ * Copyright (C) 1996, Olaf Kirch ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define NFS_NGROUPS 16 ++ ++struct unx_cred { ++ struct rpc_cred uc_base; ++ gid_t uc_gid; ++ tag_t uc_tag; ++ gid_t uc_gids[NFS_NGROUPS]; ++}; ++#define uc_uid uc_base.cr_uid ++ ++#define UNX_WRITESLACK (21 + (UNX_MAXNODENAME >> 2)) ++ ++#ifdef RPC_DEBUG ++# define RPCDBG_FACILITY RPCDBG_AUTH ++#endif ++ ++static struct rpc_auth unix_auth; ++static struct rpc_cred_cache unix_cred_cache; ++static const struct rpc_credops unix_credops; ++ ++static struct rpc_auth * ++unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) ++{ ++ dprintk("RPC: creating UNIX authenticator for client %p\n", ++ clnt); ++ atomic_inc(&unix_auth.au_count); ++ return &unix_auth; ++} ++ ++static void ++unx_destroy(struct rpc_auth *auth) ++{ ++ dprintk("RPC: destroying UNIX authenticator %p\n", auth); ++ rpcauth_clear_credcache(auth->au_credcache); ++} ++ ++/* ++ * Lookup AUTH_UNIX creds for current process ++ */ ++static struct rpc_cred * ++unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) ++{ ++ return rpcauth_lookup_credcache(auth, acred, flags); ++} ++ ++static struct rpc_cred * ++unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) ++{ ++ struct unx_cred *cred; ++ int i; ++ ++ dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", ++ acred->uid, acred->gid); ++ ++ if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) ++ return ERR_PTR(-ENOMEM); ++ ++ rpcauth_init_cred(&cred->uc_base, acred, &unix_auth, &unix_credops); ++ cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; ++ if (flags & RPCAUTH_LOOKUP_ROOTCREDS) { ++ cred->uc_uid = 0; ++ cred->uc_gid = 0; ++ cred->uc_tag = dx_current_tag(); ++ cred->uc_gids[0] = NOGROUP; ++ } else { ++ int groups = acred->group_info->ngroups; ++ if (groups > NFS_NGROUPS) ++ groups = NFS_NGROUPS; ++ ++ cred->uc_gid = acred->gid; ++ cred->uc_tag = acred->tag; ++ for (i = 0; i < groups; i++) ++ cred->uc_gids[i] = GROUP_AT(acred->group_info, i); ++ if (i < NFS_NGROUPS) ++ cred->uc_gids[i] = NOGROUP; ++ } ++ ++ return &cred->uc_base; ++} ++ ++static void ++ unx_free_cred(struct unx_cred *cred) ++{ ++ dprintk("RPC: unx_free_cred %p\n", cred); ++ kfree(cred); ++} ++ ++static void ++unx_free_cred_callback(struct rcu_head *head) ++{ ++ struct unx_cred *cred = container_of(head, struct unx_cred, uc_base.cr_rcu); ++ unx_free_cred(cred); ++} ++ ++static void ++unx_destroy_cred(struct rpc_cred *cred) ++{ ++ call_rcu(&cred->cr_rcu, unx_free_cred_callback); ++} ++ ++ ++/* ++ * Match credentials against current process creds. ++ * The root_override argument takes care of cases where the caller may ++ * request root creds (e.g. for NFS swapping). ++ */ ++static int ++unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) ++{ ++ struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); ++ int i; ++ ++ if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) { ++ int groups; ++ ++ if (cred->uc_uid != acred->uid ++ || cred->uc_gid != acred->gid ++ || cred->uc_tag != acred->tag) ++ return 0; ++ ++ groups = acred->group_info->ngroups; ++ if (groups > NFS_NGROUPS) ++ groups = NFS_NGROUPS; ++ for (i = 0; i < groups ; i++) ++ if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i)) ++ return 0; ++ return 1; ++ } ++ return (cred->uc_uid == 0 ++ && cred->uc_gid == 0 ++ && cred->uc_gids[0] == (gid_t) NOGROUP); ++} ++ ++/* ++ * Marshal credentials. ++ * Maybe we should keep a cached credential for performance reasons. ++ */ ++static __be32 * ++unx_marshal(struct rpc_task *task, __be32 *p) ++{ ++ struct rpc_clnt *clnt = task->tk_client; ++ struct unx_cred *cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base); ++ __be32 *base, *hold; ++ int i, tag; ++ ++ *p++ = htonl(RPC_AUTH_UNIX); ++ base = p++; ++ *p++ = htonl(jiffies/HZ); ++ ++ /* ++ * Copy the UTS nodename captured when the client was created. ++ */ ++ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); ++ tag = task->tk_client->cl_tag; ++ ++ *p++ = htonl((u32) TAGINO_UID(tag, ++ cred->uc_uid, cred->uc_tag)); ++ *p++ = htonl((u32) TAGINO_GID(tag, ++ cred->uc_gid, cred->uc_tag)); ++ hold = p++; ++ for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++) ++ *p++ = htonl((u32) cred->uc_gids[i]); ++ *hold = htonl(p - hold - 1); /* gid array length */ ++ *base = htonl((p - base - 1) << 2); /* cred length */ ++ ++ *p++ = htonl(RPC_AUTH_NULL); ++ *p++ = htonl(0); ++ ++ return p; ++} ++ ++/* ++ * Refresh credentials. This is a no-op for AUTH_UNIX ++ */ ++static int ++unx_refresh(struct rpc_task *task) ++{ ++ set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags); ++ return 0; ++} ++ ++static __be32 * ++unx_validate(struct rpc_task *task, __be32 *p) ++{ ++ rpc_authflavor_t flavor; ++ u32 size; ++ ++ flavor = ntohl(*p++); ++ if (flavor != RPC_AUTH_NULL && ++ flavor != RPC_AUTH_UNIX && ++ flavor != RPC_AUTH_SHORT) { ++ printk("RPC: bad verf flavor: %u\n", flavor); ++ return NULL; ++ } ++ ++ size = ntohl(*p++); ++ if (size > RPC_MAX_AUTH_SIZE) { ++ printk("RPC: giant verf size: %u\n", size); ++ return NULL; ++ } ++ task->tk_auth->au_rslack = (size >> 2) + 2; ++ p += (size >> 2); ++ ++ return p; ++} ++ ++void __init rpc_init_authunix(void) ++{ ++ spin_lock_init(&unix_cred_cache.lock); ++} ++ ++const struct rpc_authops authunix_ops = { ++ .owner = THIS_MODULE, ++ .au_flavor = RPC_AUTH_UNIX, ++#ifdef RPC_DEBUG ++ .au_name = "UNIX", ++#endif ++ .create = unx_create, ++ .destroy = unx_destroy, ++ .lookup_cred = unx_lookup_cred, ++ .crcreate = unx_create_cred, ++}; ++ ++static ++struct rpc_cred_cache unix_cred_cache = { ++}; ++ ++static ++struct rpc_auth unix_auth = { ++ .au_cslack = UNX_WRITESLACK, ++ .au_rslack = 2, /* assume AUTH_NULL verf */ ++ .au_ops = &authunix_ops, ++ .au_flavor = RPC_AUTH_UNIX, ++ .au_count = ATOMIC_INIT(0), ++ .au_credcache = &unix_cred_cache, ++}; ++ ++static ++const struct rpc_credops unix_credops = { ++ .cr_name = "AUTH_UNIX", ++ .crdestroy = unx_destroy_cred, ++ .crmatch = unx_match, ++ .crmarshal = unx_marshal, ++ .crrefresh = unx_refresh, ++ .crvalidate = unx_validate, ++}; +diff -Nurb linux-2.6.22-570/net/sunrpc/clnt.c linux-2.6.22-591/net/sunrpc/clnt.c +--- linux-2.6.22-570/net/sunrpc/clnt.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/sunrpc/clnt.c 2007-12-21 15:36:12.000000000 -0500 +@@ -45,6 +45,12 @@ + dprintk("RPC: %5u %s (status %d)\n", t->tk_pid, \ + __FUNCTION__, t->tk_status) + ++/* ++ * All RPC clients are linked into this list ++ */ ++static LIST_HEAD(all_clients); ++static DEFINE_SPINLOCK(rpc_client_lock); ++ + static DECLARE_WAIT_QUEUE_HEAD(destroy_wait); + + +@@ -67,6 +73,21 @@ + static __be32 * call_header(struct rpc_task *task); + static __be32 * call_verify(struct rpc_task *task); + ++static int rpc_ping(struct rpc_clnt *clnt, int flags); ++ ++static void rpc_register_client(struct rpc_clnt *clnt) ++{ ++ spin_lock(&rpc_client_lock); ++ list_add(&clnt->cl_clients, &all_clients); ++ spin_unlock(&rpc_client_lock); ++} ++ ++static void rpc_unregister_client(struct rpc_clnt *clnt) ++{ ++ spin_lock(&rpc_client_lock); ++ list_del(&clnt->cl_clients); ++ spin_unlock(&rpc_client_lock); ++} + + static int + rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) +@@ -112,6 +133,9 @@ + dprintk("RPC: creating %s client for %s (xprt %p)\n", + program->name, servname, xprt); + ++ err = rpciod_up(); ++ if (err) ++ goto out_no_rpciod; + err = -EINVAL; + if (!xprt) + goto out_no_xprt; +@@ -122,8 +146,6 @@ + clnt = kzalloc(sizeof(*clnt), GFP_KERNEL); + if (!clnt) + goto out_err; +- atomic_set(&clnt->cl_users, 0); +- atomic_set(&clnt->cl_count, 1); + clnt->cl_parent = clnt; + + clnt->cl_server = clnt->cl_inline_name; +@@ -149,6 +171,8 @@ + if (clnt->cl_metrics == NULL) + goto out_no_stats; + clnt->cl_program = program; ++ INIT_LIST_HEAD(&clnt->cl_tasks); ++ spin_lock_init(&clnt->cl_lock); + + if (!xprt_bound(clnt->cl_xprt)) + clnt->cl_autobind = 1; +@@ -156,6 +180,8 @@ + clnt->cl_rtt = &clnt->cl_rtt_default; + rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval); + ++ kref_init(&clnt->cl_kref); ++ + err = rpc_setup_pipedir(clnt, program->pipe_dir_name); + if (err < 0) + goto out_no_path; +@@ -173,6 +199,7 @@ + if (clnt->cl_nodelen > UNX_MAXNODENAME) + clnt->cl_nodelen = UNX_MAXNODENAME; + memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen); ++ rpc_register_client(clnt); + return clnt; + + out_no_auth: +@@ -189,6 +216,8 @@ + out_err: + xprt_put(xprt); + out_no_xprt: ++ rpciod_down(); ++out_no_rpciod: + return ERR_PTR(err); + } + +@@ -246,8 +275,6 @@ + clnt->cl_intr = 1; + if (args->flags & RPC_CLNT_CREATE_AUTOBIND) + clnt->cl_autobind = 1; +- if (args->flags & RPC_CLNT_CREATE_ONESHOT) +- clnt->cl_oneshot = 1; + if (args->flags & RPC_CLNT_CREATE_DISCRTRY) + clnt->cl_discrtry = 1; + /* TODO: handle RPC_CLNT_CREATE_TAGGED +@@ -271,24 +298,25 @@ + new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); + if (!new) + goto out_no_clnt; +- atomic_set(&new->cl_count, 1); +- atomic_set(&new->cl_users, 0); ++ new->cl_parent = clnt; ++ /* Turn off autobind on clones */ ++ new->cl_autobind = 0; ++ INIT_LIST_HEAD(&new->cl_tasks); ++ spin_lock_init(&new->cl_lock); ++ rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); + new->cl_metrics = rpc_alloc_iostats(clnt); + if (new->cl_metrics == NULL) + goto out_no_stats; ++ kref_init(&new->cl_kref); + err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); + if (err != 0) + goto out_no_path; +- new->cl_parent = clnt; +- atomic_inc(&clnt->cl_count); +- new->cl_xprt = xprt_get(clnt->cl_xprt); +- /* Turn off autobind on clones */ +- new->cl_autobind = 0; +- new->cl_oneshot = 0; +- new->cl_dead = 0; +- rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); + if (new->cl_auth) + atomic_inc(&new->cl_auth->au_count); ++ xprt_get(clnt->cl_xprt); ++ kref_get(&clnt->cl_kref); ++ rpc_register_client(new); ++ rpciod_up(); + return new; + out_no_path: + rpc_free_iostats(new->cl_metrics); +@@ -301,52 +329,34 @@ + + /* + * Properly shut down an RPC client, terminating all outstanding +- * requests. Note that we must be certain that cl_oneshot and +- * cl_dead are cleared, or else the client would be destroyed +- * when the last task releases it. ++ * requests. + */ +-int +-rpc_shutdown_client(struct rpc_clnt *clnt) ++void rpc_shutdown_client(struct rpc_clnt *clnt) + { +- dprintk("RPC: shutting down %s client for %s, tasks=%d\n", +- clnt->cl_protname, clnt->cl_server, +- atomic_read(&clnt->cl_users)); +- +- while (atomic_read(&clnt->cl_users) > 0) { +- /* Don't let rpc_release_client destroy us */ +- clnt->cl_oneshot = 0; +- clnt->cl_dead = 0; ++ dprintk("RPC: shutting down %s client for %s\n", ++ clnt->cl_protname, clnt->cl_server); ++ ++ while (!list_empty(&clnt->cl_tasks)) { + rpc_killall_tasks(clnt); + wait_event_timeout(destroy_wait, +- !atomic_read(&clnt->cl_users), 1*HZ); +- } +- +- if (atomic_read(&clnt->cl_users) < 0) { +- printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n", +- clnt, atomic_read(&clnt->cl_users)); +-#ifdef RPC_DEBUG +- rpc_show_tasks(); +-#endif +- BUG(); ++ list_empty(&clnt->cl_tasks), 1*HZ); + } + +- return rpc_destroy_client(clnt); ++ rpc_release_client(clnt); + } + + /* +- * Delete an RPC client ++ * Free an RPC client + */ +-int +-rpc_destroy_client(struct rpc_clnt *clnt) ++static void ++rpc_free_client(struct kref *kref) + { +- if (!atomic_dec_and_test(&clnt->cl_count)) +- return 1; +- BUG_ON(atomic_read(&clnt->cl_users) != 0); ++ struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); + + dprintk("RPC: destroying %s client for %s\n", + clnt->cl_protname, clnt->cl_server); + if (clnt->cl_auth) { +- rpcauth_destroy(clnt->cl_auth); ++ rpcauth_release(clnt->cl_auth); + clnt->cl_auth = NULL; + } + if (!IS_ERR(clnt->cl_dentry)) { +@@ -354,33 +364,31 @@ + rpc_put_mount(); + } + if (clnt->cl_parent != clnt) { +- rpc_destroy_client(clnt->cl_parent); ++ rpc_release_client(clnt->cl_parent); + goto out_free; + } + if (clnt->cl_server != clnt->cl_inline_name) + kfree(clnt->cl_server); + out_free: ++ rpc_unregister_client(clnt); + rpc_free_iostats(clnt->cl_metrics); + clnt->cl_metrics = NULL; + xprt_put(clnt->cl_xprt); ++ rpciod_down(); + kfree(clnt); +- return 0; + } + + /* +- * Release an RPC client ++ * Release reference to the RPC client + */ + void + rpc_release_client(struct rpc_clnt *clnt) + { +- dprintk("RPC: rpc_release_client(%p, %d)\n", +- clnt, atomic_read(&clnt->cl_users)); ++ dprintk("RPC: rpc_release_client(%p)\n", clnt); + +- if (!atomic_dec_and_test(&clnt->cl_users)) +- return; ++ if (list_empty(&clnt->cl_tasks)) + wake_up(&destroy_wait); +- if (clnt->cl_oneshot || clnt->cl_dead) +- rpc_destroy_client(clnt); ++ kref_put(&clnt->cl_kref, rpc_free_client); + } + + /** +@@ -471,82 +479,96 @@ + rpc_restore_sigmask(oldset); + } + +-/* +- * New rpc_call implementation +- */ +-int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) ++static ++struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt, ++ struct rpc_message *msg, ++ int flags, ++ const struct rpc_call_ops *ops, ++ void *data) + { +- struct rpc_task *task; ++ struct rpc_task *task, *ret; + sigset_t oldset; +- int status; +- +- /* If this client is slain all further I/O fails */ +- if (clnt->cl_dead) +- return -EIO; +- +- BUG_ON(flags & RPC_TASK_ASYNC); + +- task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL); +- if (task == NULL) +- return -ENOMEM; ++ task = rpc_new_task(clnt, flags, ops, data); ++ if (task == NULL) { ++ rpc_release_calldata(ops, data); ++ return ERR_PTR(-ENOMEM); ++ } + +- /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */ ++ /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */ + rpc_task_sigmask(task, &oldset); +- +- /* Set up the call info struct and execute the task */ ++ if (msg != NULL) { + rpc_call_setup(task, msg, 0); +- if (task->tk_status == 0) { ++ if (task->tk_status != 0) { ++ ret = ERR_PTR(task->tk_status); ++ rpc_put_task(task); ++ goto out; ++ } ++ } + atomic_inc(&task->tk_count); + rpc_execute(task); +- } ++ ret = task; ++out: ++ rpc_restore_sigmask(&oldset); ++ return ret; ++} ++ ++/** ++ * rpc_call_sync - Perform a synchronous RPC call ++ * @clnt: pointer to RPC client ++ * @msg: RPC call parameters ++ * @flags: RPC call flags ++ */ ++int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags) ++{ ++ struct rpc_task *task; ++ int status; ++ ++ BUG_ON(flags & RPC_TASK_ASYNC); ++ ++ task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL); ++ if (IS_ERR(task)) ++ return PTR_ERR(task); + status = task->tk_status; + rpc_put_task(task); +- rpc_restore_sigmask(&oldset); + return status; + } + +-/* +- * New rpc_call implementation ++/** ++ * rpc_call_async - Perform an asynchronous RPC call ++ * @clnt: pointer to RPC client ++ * @msg: RPC call parameters ++ * @flags: RPC call flags ++ * @ops: RPC call ops ++ * @data: user call data + */ + int + rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags, + const struct rpc_call_ops *tk_ops, void *data) + { + struct rpc_task *task; +- sigset_t oldset; +- int status; + +- /* If this client is slain all further I/O fails */ +- status = -EIO; +- if (clnt->cl_dead) +- goto out_release; +- +- flags |= RPC_TASK_ASYNC; +- +- /* Create/initialize a new RPC task */ +- status = -ENOMEM; +- if (!(task = rpc_new_task(clnt, flags, tk_ops, data))) +- goto out_release; +- +- /* Mask signals on GSS_AUTH upcalls */ +- rpc_task_sigmask(task, &oldset); +- +- rpc_call_setup(task, msg, 0); +- +- /* Set up the call info struct and execute the task */ +- status = task->tk_status; +- if (status == 0) +- rpc_execute(task); +- else ++ task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data); ++ if (IS_ERR(task)) ++ return PTR_ERR(task); + rpc_put_task(task); +- +- rpc_restore_sigmask(&oldset); +- return status; +-out_release: +- rpc_release_calldata(tk_ops, data); +- return status; ++ return 0; + } + ++/** ++ * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it ++ * @clnt: pointer to RPC client ++ * @flags: RPC flags ++ * @ops: RPC call ops ++ * @data: user call data ++ */ ++struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, ++ const struct rpc_call_ops *tk_ops, ++ void *data) ++{ ++ return rpc_do_run_task(clnt, NULL, flags, tk_ops, data); ++} ++EXPORT_SYMBOL(rpc_run_task); + + void + rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags) +@@ -1424,7 +1446,7 @@ + .p_decode = rpcproc_decode_null, + }; + +-int rpc_ping(struct rpc_clnt *clnt, int flags) ++static int rpc_ping(struct rpc_clnt *clnt, int flags) + { + struct rpc_message msg = { + .rpc_proc = &rpcproc_null, +@@ -1435,3 +1457,51 @@ + put_rpccred(msg.rpc_cred); + return err; + } ++ ++struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags) ++{ ++ struct rpc_message msg = { ++ .rpc_proc = &rpcproc_null, ++ .rpc_cred = cred, ++ }; ++ return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL); ++} ++EXPORT_SYMBOL(rpc_call_null); ++ ++#ifdef RPC_DEBUG ++void rpc_show_tasks(void) ++{ ++ struct rpc_clnt *clnt; ++ struct rpc_task *t; ++ ++ spin_lock(&rpc_client_lock); ++ if (list_empty(&all_clients)) ++ goto out; ++ printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " ++ "-rpcwait -action- ---ops--\n"); ++ list_for_each_entry(clnt, &all_clients, cl_clients) { ++ if (list_empty(&clnt->cl_tasks)) ++ continue; ++ spin_lock(&clnt->cl_lock); ++ list_for_each_entry(t, &clnt->cl_tasks, tk_task) { ++ const char *rpc_waitq = "none"; ++ ++ if (RPC_IS_QUEUED(t)) ++ rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); ++ ++ printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", ++ t->tk_pid, ++ (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), ++ t->tk_flags, t->tk_status, ++ t->tk_client, ++ (t->tk_client ? t->tk_client->cl_prog : 0), ++ t->tk_rqstp, t->tk_timeout, ++ rpc_waitq, ++ t->tk_action, t->tk_ops); ++ } ++ spin_unlock(&clnt->cl_lock); ++ } ++out: ++ spin_unlock(&rpc_client_lock); ++} ++#endif +diff -Nurb linux-2.6.22-570/net/sunrpc/rpc_pipe.c linux-2.6.22-591/net/sunrpc/rpc_pipe.c +--- linux-2.6.22-570/net/sunrpc/rpc_pipe.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sunrpc/rpc_pipe.c 2007-12-21 15:36:12.000000000 -0500 +@@ -14,7 +14,7 @@ + #include + #include + #include +-#include ++#include + #include + + #include +@@ -344,7 +344,7 @@ + mutex_lock(&inode->i_mutex); + clnt = RPC_I(inode)->private; + if (clnt) { +- atomic_inc(&clnt->cl_users); ++ kref_get(&clnt->cl_kref); + m->private = clnt; + } else { + single_release(inode, file); +@@ -448,6 +448,15 @@ + simple_release_fs(&rpc_mount, &rpc_mount_count); + } + ++static int rpc_delete_dentry(struct dentry *dentry) ++{ ++ return 1; ++} ++ ++static struct dentry_operations rpc_dentry_operations = { ++ .d_delete = rpc_delete_dentry, ++}; ++ + static int + rpc_lookup_parent(char *path, struct nameidata *nd) + { +@@ -506,7 +515,7 @@ + * FIXME: This probably has races. + */ + static void +-rpc_depopulate(struct dentry *parent) ++rpc_depopulate(struct dentry *parent, int start, int eof) + { + struct inode *dir = parent->d_inode; + struct list_head *pos, *next; +@@ -518,6 +527,10 @@ + spin_lock(&dcache_lock); + list_for_each_safe(pos, next, &parent->d_subdirs) { + dentry = list_entry(pos, struct dentry, d_u.d_child); ++ if (!dentry->d_inode || ++ dentry->d_inode->i_ino < start || ++ dentry->d_inode->i_ino >= eof) ++ continue; + spin_lock(&dentry->d_lock); + if (!d_unhashed(dentry)) { + dget_locked(dentry); +@@ -533,11 +546,11 @@ + if (n) { + do { + dentry = dvec[--n]; +- if (dentry->d_inode) { +- rpc_close_pipes(dentry->d_inode); ++ if (S_ISREG(dentry->d_inode->i_mode)) + simple_unlink(dir, dentry); +- } +- inode_dir_notify(dir, DN_DELETE); ++ else if (S_ISDIR(dentry->d_inode->i_mode)) ++ simple_rmdir(dir, dentry); ++ d_delete(dentry); + dput(dentry); + } while (n); + goto repeat; +@@ -560,6 +573,7 @@ + dentry = d_alloc_name(parent, files[i].name); + if (!dentry) + goto out_bad; ++ dentry->d_op = &rpc_dentry_operations; + mode = files[i].mode; + inode = rpc_get_inode(dir->i_sb, mode); + if (!inode) { +@@ -574,6 +588,7 @@ + if (S_ISDIR(mode)) + inc_nlink(dir); + d_add(dentry, inode); ++ fsnotify_create(dir, dentry); + } + mutex_unlock(&dir->i_mutex); + return 0; +@@ -595,7 +610,7 @@ + inode->i_ino = iunique(dir->i_sb, 100); + d_instantiate(dentry, inode); + inc_nlink(dir); +- inode_dir_notify(dir, DN_CREATE); ++ fsnotify_mkdir(dir, dentry); + return 0; + out_err: + printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", +@@ -607,21 +622,14 @@ + __rpc_rmdir(struct inode *dir, struct dentry *dentry) + { + int error; +- +- shrink_dcache_parent(dentry); +- if (d_unhashed(dentry)) +- return 0; +- if ((error = simple_rmdir(dir, dentry)) != 0) ++ error = simple_rmdir(dir, dentry); ++ if (!error) ++ d_delete(dentry); + return error; +- if (!error) { +- inode_dir_notify(dir, DN_DELETE); +- d_drop(dentry); +- } +- return 0; + } + + static struct dentry * +-rpc_lookup_create(struct dentry *parent, const char *name, int len) ++rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive) + { + struct inode *dir = parent->d_inode; + struct dentry *dentry; +@@ -630,7 +638,9 @@ + dentry = lookup_one_len(name, parent, len); + if (IS_ERR(dentry)) + goto out_err; +- if (dentry->d_inode) { ++ if (!dentry->d_inode) ++ dentry->d_op = &rpc_dentry_operations; ++ else if (exclusive) { + dput(dentry); + dentry = ERR_PTR(-EEXIST); + goto out_err; +@@ -649,7 +659,7 @@ + + if ((error = rpc_lookup_parent(path, nd)) != 0) + return ERR_PTR(error); +- dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len); ++ dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len, 1); + if (IS_ERR(dentry)) + rpc_release_path(nd); + return dentry; +@@ -681,7 +691,7 @@ + rpc_release_path(&nd); + return dentry; + err_depopulate: +- rpc_depopulate(dentry); ++ rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); + __rpc_rmdir(dir, dentry); + err_dput: + dput(dentry); +@@ -701,7 +711,7 @@ + parent = dget_parent(dentry); + dir = parent->d_inode; + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); +- rpc_depopulate(dentry); ++ rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); + error = __rpc_rmdir(dir, dentry); + dput(dentry); + mutex_unlock(&dir->i_mutex); +@@ -716,10 +726,21 @@ + struct inode *dir, *inode; + struct rpc_inode *rpci; + +- dentry = rpc_lookup_create(parent, name, strlen(name)); ++ dentry = rpc_lookup_create(parent, name, strlen(name), 0); + if (IS_ERR(dentry)) + return dentry; + dir = parent->d_inode; ++ if (dentry->d_inode) { ++ rpci = RPC_I(dentry->d_inode); ++ if (rpci->private != private || ++ rpci->ops != ops || ++ rpci->flags != flags) { ++ dput (dentry); ++ dentry = ERR_PTR(-EBUSY); ++ } ++ rpci->nkern_readwriters++; ++ goto out; ++ } + inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); + if (!inode) + goto err_dput; +@@ -730,7 +751,8 @@ + rpci->private = private; + rpci->flags = flags; + rpci->ops = ops; +- inode_dir_notify(dir, DN_CREATE); ++ rpci->nkern_readwriters = 1; ++ fsnotify_create(dir, dentry); + dget(dentry); + out: + mutex_unlock(&dir->i_mutex); +@@ -754,13 +776,11 @@ + parent = dget_parent(dentry); + dir = parent->d_inode; + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); +- if (!d_unhashed(dentry)) { +- d_drop(dentry); +- if (dentry->d_inode) { ++ if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) { + rpc_close_pipes(dentry->d_inode); + error = simple_unlink(dir, dentry); +- } +- inode_dir_notify(dir, DN_DELETE); ++ if (!error) ++ d_delete(dentry); + } + dput(dentry); + mutex_unlock(&dir->i_mutex); +@@ -833,6 +853,7 @@ + rpci->nreaders = 0; + rpci->nwriters = 0; + INIT_LIST_HEAD(&rpci->in_upcall); ++ INIT_LIST_HEAD(&rpci->in_downcall); + INIT_LIST_HEAD(&rpci->pipe); + rpci->pipelen = 0; + init_waitqueue_head(&rpci->waitq); +diff -Nurb linux-2.6.22-570/net/sunrpc/rpcb_clnt.c linux-2.6.22-591/net/sunrpc/rpcb_clnt.c +--- linux-2.6.22-570/net/sunrpc/rpcb_clnt.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sunrpc/rpcb_clnt.c 2007-12-21 15:36:12.000000000 -0500 +@@ -184,8 +184,7 @@ + .program = &rpcb_program, + .version = version, + .authflavor = RPC_AUTH_UNIX, +- .flags = (RPC_CLNT_CREATE_ONESHOT | +- RPC_CLNT_CREATE_NOPING), ++ .flags = RPC_CLNT_CREATE_NOPING, + }; + + ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); +@@ -238,6 +237,7 @@ + + error = rpc_call_sync(rpcb_clnt, &msg, 0); + ++ rpc_shutdown_client(rpcb_clnt); + if (error < 0) + printk(KERN_WARNING "RPC: failed to contact local rpcbind " + "server (errno %d).\n", -error); +@@ -286,6 +286,7 @@ + return PTR_ERR(rpcb_clnt); + + status = rpc_call_sync(rpcb_clnt, &msg, 0); ++ rpc_shutdown_client(rpcb_clnt); + + if (status >= 0) { + if (map.r_port != 0) +@@ -379,6 +380,7 @@ + } + + child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); ++ rpc_release_client(rpcb_clnt); + if (IS_ERR(child)) { + status = -EIO; + dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n", +diff -Nurb linux-2.6.22-570/net/sunrpc/sched.c linux-2.6.22-591/net/sunrpc/sched.c +--- linux-2.6.22-570/net/sunrpc/sched.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sunrpc/sched.c 2007-12-21 15:36:12.000000000 -0500 +@@ -25,7 +25,6 @@ + #ifdef RPC_DEBUG + #define RPCDBG_FACILITY RPCDBG_SCHED + #define RPC_TASK_MAGIC_ID 0xf00baa +-static int rpc_task_id; + #endif + + /* +@@ -40,7 +39,6 @@ + static mempool_t *rpc_buffer_mempool __read_mostly; + + static void __rpc_default_timer(struct rpc_task *task); +-static void rpciod_killall(void); + static void rpc_async_schedule(struct work_struct *); + static void rpc_release_task(struct rpc_task *task); + +@@ -50,23 +48,13 @@ + static RPC_WAITQ(delay_queue, "delayq"); + + /* +- * All RPC tasks are linked into this list +- */ +-static LIST_HEAD(all_tasks); +- +-/* + * rpciod-related stuff + */ + static DEFINE_MUTEX(rpciod_mutex); +-static unsigned int rpciod_users; ++static atomic_t rpciod_users = ATOMIC_INIT(0); + struct workqueue_struct *rpciod_workqueue; + + /* +- * Spinlock for other critical sections of code. +- */ +-static DEFINE_SPINLOCK(rpc_sched_lock); +- +-/* + * Disable the timer for a given RPC task. Should be called with + * queue->lock and bh_disabled in order to avoid races within + * rpc_run_timer(). +@@ -267,18 +255,33 @@ + return 0; + } + ++#ifdef RPC_DEBUG ++static void rpc_task_set_debuginfo(struct rpc_task *task) ++{ ++ static atomic_t rpc_pid; ++ ++ task->tk_magic = RPC_TASK_MAGIC_ID; ++ task->tk_pid = atomic_inc_return(&rpc_pid); ++} ++#else ++static inline void rpc_task_set_debuginfo(struct rpc_task *task) ++{ ++} ++#endif ++ + static void rpc_set_active(struct rpc_task *task) + { ++ struct rpc_clnt *clnt; + if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0) + return; +- spin_lock(&rpc_sched_lock); +-#ifdef RPC_DEBUG +- task->tk_magic = RPC_TASK_MAGIC_ID; +- task->tk_pid = rpc_task_id++; +-#endif ++ rpc_task_set_debuginfo(task); + /* Add to global list of all tasks */ +- list_add_tail(&task->tk_task, &all_tasks); +- spin_unlock(&rpc_sched_lock); ++ clnt = task->tk_client; ++ if (clnt != NULL) { ++ spin_lock(&clnt->cl_lock); ++ list_add_tail(&task->tk_task, &clnt->cl_tasks); ++ spin_unlock(&clnt->cl_lock); ++ } + } + + /* +@@ -818,6 +821,7 @@ + if (tk_ops->rpc_call_prepare != NULL) + task->tk_action = rpc_prepare_task; + task->tk_calldata = calldata; ++ INIT_LIST_HEAD(&task->tk_task); + + /* Initialize retry counters */ + task->tk_garb_retry = 2; +@@ -830,7 +834,7 @@ + task->tk_workqueue = rpciod_workqueue; + + if (clnt) { +- atomic_inc(&clnt->cl_users); ++ kref_get(&clnt->cl_kref); + if (clnt->cl_softrtry) + task->tk_flags |= RPC_TASK_SOFT; + if (!clnt->cl_intr) +@@ -860,9 +864,7 @@ + } + + /* +- * Create a new task for the specified client. We have to +- * clean up after an allocation failure, as the client may +- * have specified "oneshot". ++ * Create a new task for the specified client. + */ + struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata) + { +@@ -870,7 +872,7 @@ + + task = rpc_alloc_task(); + if (!task) +- goto cleanup; ++ goto out; + + rpc_init_task(task, clnt, flags, tk_ops, calldata); + +@@ -878,16 +880,6 @@ + task->tk_flags |= RPC_TASK_DYNAMIC; + out: + return task; +- +-cleanup: +- /* Check whether to release the client */ +- if (clnt) { +- printk("rpc_new_task: failed, users=%d, oneshot=%d\n", +- atomic_read(&clnt->cl_users), clnt->cl_oneshot); +- atomic_inc(&clnt->cl_users); /* pretend we were used ... */ +- rpc_release_client(clnt); +- } +- goto out; + } + + +@@ -920,11 +912,13 @@ + #endif + dprintk("RPC: %5u release task\n", task->tk_pid); + +- /* Remove from global task list */ +- spin_lock(&rpc_sched_lock); ++ if (!list_empty(&task->tk_task)) { ++ struct rpc_clnt *clnt = task->tk_client; ++ /* Remove from client task list */ ++ spin_lock(&clnt->cl_lock); + list_del(&task->tk_task); +- spin_unlock(&rpc_sched_lock); +- ++ spin_unlock(&clnt->cl_lock); ++ } + BUG_ON (RPC_IS_QUEUED(task)); + + /* Synchronously delete any running timer */ +@@ -939,29 +933,6 @@ + rpc_put_task(task); + } + +-/** +- * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it +- * @clnt: pointer to RPC client +- * @flags: RPC flags +- * @ops: RPC call ops +- * @data: user call data +- */ +-struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags, +- const struct rpc_call_ops *ops, +- void *data) +-{ +- struct rpc_task *task; +- task = rpc_new_task(clnt, flags, ops, data); +- if (task == NULL) { +- rpc_release_calldata(ops, data); +- return ERR_PTR(-ENOMEM); +- } +- atomic_inc(&task->tk_count); +- rpc_execute(task); +- return task; +-} +-EXPORT_SYMBOL(rpc_run_task); +- + /* + * Kill all tasks for the given client. + * XXX: kill their descendants as well? +@@ -969,44 +940,25 @@ + void rpc_killall_tasks(struct rpc_clnt *clnt) + { + struct rpc_task *rovr; +- struct list_head *le; + +- dprintk("RPC: killing all tasks for client %p\n", clnt); + ++ if (list_empty(&clnt->cl_tasks)) ++ return; ++ dprintk("RPC: killing all tasks for client %p\n", clnt); + /* + * Spin lock all_tasks to prevent changes... + */ +- spin_lock(&rpc_sched_lock); +- alltask_for_each(rovr, le, &all_tasks) { ++ spin_lock(&clnt->cl_lock); ++ list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) { + if (! RPC_IS_ACTIVATED(rovr)) + continue; +- if (!clnt || rovr->tk_client == clnt) { ++ if (!(rovr->tk_flags & RPC_TASK_KILLED)) { + rovr->tk_flags |= RPC_TASK_KILLED; + rpc_exit(rovr, -EIO); + rpc_wake_up_task(rovr); + } + } +- spin_unlock(&rpc_sched_lock); +-} +- +-static void rpciod_killall(void) +-{ +- unsigned long flags; +- +- while (!list_empty(&all_tasks)) { +- clear_thread_flag(TIF_SIGPENDING); +- rpc_killall_tasks(NULL); +- flush_workqueue(rpciod_workqueue); +- if (!list_empty(&all_tasks)) { +- dprintk("RPC: rpciod_killall: waiting for tasks " +- "to exit\n"); +- yield(); +- } +- } +- +- spin_lock_irqsave(¤t->sighand->siglock, flags); +- recalc_sigpending(); +- spin_unlock_irqrestore(¤t->sighand->siglock, flags); ++ spin_unlock(&clnt->cl_lock); + } + + /* +@@ -1018,28 +970,27 @@ + struct workqueue_struct *wq; + int error = 0; + ++ if (atomic_inc_not_zero(&rpciod_users)) ++ return 0; ++ + mutex_lock(&rpciod_mutex); +- dprintk("RPC: rpciod_up: users %u\n", rpciod_users); +- rpciod_users++; +- if (rpciod_workqueue) +- goto out; +- /* +- * If there's no pid, we should be the first user. +- */ +- if (rpciod_users > 1) +- printk(KERN_WARNING "rpciod_up: no workqueue, %u users??\n", rpciod_users); ++ ++ /* Guard against races with rpciod_down() */ ++ if (rpciod_workqueue != NULL) ++ goto out_ok; + /* + * Create the rpciod thread and wait for it to start. + */ ++ dprintk("RPC: creating workqueue rpciod\n"); + error = -ENOMEM; + wq = create_workqueue("rpciod"); +- if (wq == NULL) { +- printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error); +- rpciod_users--; ++ if (wq == NULL) + goto out; +- } ++ + rpciod_workqueue = wq; + error = 0; ++out_ok: ++ atomic_inc(&rpciod_users); + out: + mutex_unlock(&rpciod_mutex); + return error; +@@ -1048,58 +999,18 @@ + void + rpciod_down(void) + { +- mutex_lock(&rpciod_mutex); +- dprintk("RPC: rpciod_down sema %u\n", rpciod_users); +- if (rpciod_users) { +- if (--rpciod_users) +- goto out; +- } else +- printk(KERN_WARNING "rpciod_down: no users??\n"); ++ if (!atomic_dec_and_test(&rpciod_users)) ++ return; + +- if (!rpciod_workqueue) { +- dprintk("RPC: rpciod_down: Nothing to do!\n"); +- goto out; +- } +- rpciod_killall(); ++ mutex_lock(&rpciod_mutex); ++ dprintk("RPC: destroying workqueue rpciod\n"); + ++ if (atomic_read(&rpciod_users) == 0 && rpciod_workqueue != NULL) { + destroy_workqueue(rpciod_workqueue); + rpciod_workqueue = NULL; +- out: +- mutex_unlock(&rpciod_mutex); +-} +- +-#ifdef RPC_DEBUG +-void rpc_show_tasks(void) +-{ +- struct list_head *le; +- struct rpc_task *t; +- +- spin_lock(&rpc_sched_lock); +- if (list_empty(&all_tasks)) { +- spin_unlock(&rpc_sched_lock); +- return; +- } +- printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " +- "-rpcwait -action- ---ops--\n"); +- alltask_for_each(t, le, &all_tasks) { +- const char *rpc_waitq = "none"; +- +- if (RPC_IS_QUEUED(t)) +- rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); +- +- printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", +- t->tk_pid, +- (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), +- t->tk_flags, t->tk_status, +- t->tk_client, +- (t->tk_client ? t->tk_client->cl_prog : 0), +- t->tk_rqstp, t->tk_timeout, +- rpc_waitq, +- t->tk_action, t->tk_ops); + } +- spin_unlock(&rpc_sched_lock); ++ mutex_unlock(&rpciod_mutex); + } +-#endif + + void + rpc_destroy_mempool(void) +diff -Nurb linux-2.6.22-570/net/sunrpc/stats.c linux-2.6.22-591/net/sunrpc/stats.c +--- linux-2.6.22-570/net/sunrpc/stats.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sunrpc/stats.c 2007-12-21 15:36:15.000000000 -0500 +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + + #define RPCDBG_FACILITY RPCDBG_MISC + +@@ -265,7 +266,7 @@ + dprintk("RPC: registering /proc/net/rpc\n"); + if (!proc_net_rpc) { + struct proc_dir_entry *ent; +- ent = proc_mkdir("rpc", proc_net); ++ ent = proc_mkdir("rpc", init_net.proc_net); + if (ent) { + ent->owner = THIS_MODULE; + proc_net_rpc = ent; +@@ -279,7 +280,7 @@ + dprintk("RPC: unregistering /proc/net/rpc\n"); + if (proc_net_rpc) { + proc_net_rpc = NULL; +- remove_proc_entry("net/rpc", NULL); ++ remove_proc_entry("rpc", init_net.proc_net); + } + } + +diff -Nurb linux-2.6.22-570/net/sunrpc/sunrpc_syms.c linux-2.6.22-591/net/sunrpc/sunrpc_syms.c +--- linux-2.6.22-570/net/sunrpc/sunrpc_syms.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sunrpc/sunrpc_syms.c 2007-12-21 15:36:12.000000000 -0500 +@@ -28,15 +28,11 @@ + EXPORT_SYMBOL(rpc_sleep_on); + EXPORT_SYMBOL(rpc_wake_up_next); + EXPORT_SYMBOL(rpc_wake_up_task); +-EXPORT_SYMBOL(rpciod_down); +-EXPORT_SYMBOL(rpciod_up); +-EXPORT_SYMBOL(rpc_new_task); + EXPORT_SYMBOL(rpc_wake_up_status); + + /* RPC client functions */ + EXPORT_SYMBOL(rpc_clone_client); + EXPORT_SYMBOL(rpc_bind_new_program); +-EXPORT_SYMBOL(rpc_destroy_client); + EXPORT_SYMBOL(rpc_shutdown_client); + EXPORT_SYMBOL(rpc_killall_tasks); + EXPORT_SYMBOL(rpc_call_sync); +@@ -61,7 +57,7 @@ + EXPORT_SYMBOL(rpcauth_create); + EXPORT_SYMBOL(rpcauth_lookupcred); + EXPORT_SYMBOL(rpcauth_lookup_credcache); +-EXPORT_SYMBOL(rpcauth_free_credcache); ++EXPORT_SYMBOL(rpcauth_destroy_credcache); + EXPORT_SYMBOL(rpcauth_init_credcache); + EXPORT_SYMBOL(put_rpccred); + +@@ -156,6 +152,7 @@ + cache_register(&ip_map_cache); + cache_register(&unix_gid_cache); + init_socket_xprt(); ++ rpcauth_init_module(); + out: + return err; + } +@@ -163,6 +160,7 @@ + static void __exit + cleanup_sunrpc(void) + { ++ rpcauth_remove_module(); + cleanup_socket_xprt(); + unregister_rpc_pipefs(); + rpc_destroy_mempool(); +diff -Nurb linux-2.6.22-570/net/sunrpc/xprt.c linux-2.6.22-591/net/sunrpc/xprt.c +--- linux-2.6.22-570/net/sunrpc/xprt.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sunrpc/xprt.c 2007-12-21 15:36:12.000000000 -0500 +@@ -127,7 +127,7 @@ + clear_bit(XPRT_LOCKED, &xprt->state); + smp_mb__after_clear_bit(); + } else +- schedule_work(&xprt->task_cleanup); ++ queue_work(rpciod_workqueue, &xprt->task_cleanup); + } + + /* +@@ -515,7 +515,7 @@ + if (xprt_connecting(xprt)) + xprt_release_write(xprt, NULL); + else +- schedule_work(&xprt->task_cleanup); ++ queue_work(rpciod_workqueue, &xprt->task_cleanup); + return; + out_abort: + spin_unlock(&xprt->transport_lock); +diff -Nurb linux-2.6.22-570/net/sunrpc/xprtsock.c linux-2.6.22-591/net/sunrpc/xprtsock.c +--- linux-2.6.22-570/net/sunrpc/xprtsock.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sunrpc/xprtsock.c 2007-12-21 15:36:12.000000000 -0500 +@@ -653,8 +653,7 @@ + + dprintk("RPC: xs_destroy xprt %p\n", xprt); + +- cancel_delayed_work(&transport->connect_worker); +- flush_scheduled_work(); ++ cancel_rearming_delayed_work(&transport->connect_worker); + + xprt_disconnect(xprt); + xs_close(xprt); +@@ -1001,7 +1000,7 @@ + /* Try to schedule an autoclose RPC calls */ + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) +- schedule_work(&xprt->task_cleanup); ++ queue_work(rpciod_workqueue, &xprt->task_cleanup); + default: + xprt_disconnect(xprt); + } +@@ -1410,18 +1409,16 @@ + dprintk("RPC: xs_connect delayed xprt %p for %lu " + "seconds\n", + xprt, xprt->reestablish_timeout / HZ); +- schedule_delayed_work(&transport->connect_worker, ++ queue_delayed_work(rpciod_workqueue, ++ &transport->connect_worker, + xprt->reestablish_timeout); + xprt->reestablish_timeout <<= 1; + if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) + xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; + } else { + dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); +- schedule_delayed_work(&transport->connect_worker, 0); +- +- /* flush_scheduled_work can sleep... */ +- if (!RPC_IS_ASYNC(task)) +- flush_scheduled_work(); ++ queue_delayed_work(rpciod_workqueue, ++ &transport->connect_worker, 0); + } + } + +diff -Nurb linux-2.6.22-570/net/sysctl_net.c linux-2.6.22-591/net/sysctl_net.c +--- linux-2.6.22-570/net/sysctl_net.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/sysctl_net.c 2007-12-21 15:36:15.000000000 -0500 +@@ -54,3 +54,31 @@ + #endif + { 0 }, + }; ++ ++struct ctl_table multi_net_table[] = { ++ { ++ .ctl_name = NET_CORE, ++ .procname = "core", ++ .mode = 0555, ++ .child = multi_core_table, ++ }, ++#ifdef CONFIG_INET ++ { ++ .ctl_name = NET_IPV4, ++ .procname = "ipv4", ++ .mode = 0555, ++ .child = multi_ipv4_table, ++ }, ++#endif ++ {}, ++}; ++ ++struct ctl_table net_root_table[] = { ++ { ++ .ctl_name = CTL_NET, ++ .procname = "net", ++ .mode = 0555, ++ .child = multi_net_table, ++ }, ++ {}, ++}; +diff -Nurb linux-2.6.22-570/net/tipc/eth_media.c linux-2.6.22-591/net/tipc/eth_media.c +--- linux-2.6.22-570/net/tipc/eth_media.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/tipc/eth_media.c 2007-12-21 15:36:15.000000000 -0500 +@@ -1,8 +1,8 @@ + /* + * net/tipc/eth_media.c: Ethernet bearer support for TIPC + * +- * Copyright (c) 2001-2006, Ericsson AB +- * Copyright (c) 2005-2006, Wind River Systems ++ * Copyright (c) 2001-2007, Ericsson AB ++ * Copyright (c) 2005-2007, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + + #define MAX_ETH_BEARERS 2 + #define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI +@@ -87,6 +88,9 @@ + /** + * recv_msg - handle incoming TIPC message from an Ethernet interface + * ++ * Accept only packets explicitly sent to this node, or broadcast packets; ++ * ignores packets sent using Ethernet multicast, and traffic sent to other ++ * nodes (which can happen if interface is running in promiscuous mode). + * Routine truncates any Ethernet padding/CRC appended to the message, + * and ensures message size matches actual length + */ +@@ -97,10 +101,13 @@ + struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; + u32 size; + ++ if (dev->nd_net != &init_net) { ++ kfree_skb(buf); ++ return 0; ++ } ++ + if (likely(eb_ptr->bearer)) { +- if (likely(!dev->promiscuity) || +- !memcmp(skb_mac_header(buf), dev->dev_addr, ETH_ALEN) || +- !memcmp(skb_mac_header(buf), dev->broadcast, ETH_ALEN)) { ++ if (likely(buf->pkt_type <= PACKET_BROADCAST)) { + size = msg_size((struct tipc_msg *)buf->data); + skb_trim(buf, size); + if (likely(buf->len == size)) { +@@ -128,7 +135,7 @@ + + /* Find device with specified name */ + +- for_each_netdev(pdev){ ++ for_each_netdev(&init_net, pdev){ + if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { + dev = pdev; + break; +@@ -191,6 +198,9 @@ + struct eth_bearer *eb_ptr = ð_bearers[0]; + struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + while ((eb_ptr->dev != dev)) { + if (++eb_ptr == stop) + return NOTIFY_DONE; /* couldn't find device */ +diff -Nurb linux-2.6.22-570/net/tipc/link.c linux-2.6.22-591/net/tipc/link.c +--- linux-2.6.22-570/net/tipc/link.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/tipc/link.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1,8 +1,8 @@ + /* + * net/tipc/link.c: TIPC link code + * +- * Copyright (c) 1996-2006, Ericsson AB +- * Copyright (c) 2004-2006, Wind River Systems ++ * Copyright (c) 1996-2007, Ericsson AB ++ * Copyright (c) 2004-2007, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -1260,7 +1260,7 @@ + * (Must not hold any locks while building message.) + */ + +- res = msg_build(hdr, msg_sect, num_sect, sender->max_pkt, ++ res = msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt, + !sender->user_port, &buf); + + read_lock_bh(&tipc_net_lock); +@@ -1271,7 +1271,7 @@ + if (likely(l_ptr)) { + if (likely(buf)) { + res = link_send_buf_fast(l_ptr, buf, +- &sender->max_pkt); ++ &sender->publ.max_pkt); + if (unlikely(res < 0)) + buf_discard(buf); + exit: +@@ -1299,12 +1299,12 @@ + * then re-try fast path or fragment the message + */ + +- sender->max_pkt = link_max_pkt(l_ptr); ++ sender->publ.max_pkt = link_max_pkt(l_ptr); + tipc_node_unlock(node); + read_unlock_bh(&tipc_net_lock); + + +- if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt) ++ if ((msg_hdr_sz(hdr) + res) <= sender->publ.max_pkt) + goto again; + + return link_send_sections_long(sender, msg_sect, +@@ -1357,7 +1357,7 @@ + + again: + fragm_no = 1; +- max_pkt = sender->max_pkt - INT_H_SIZE; ++ max_pkt = sender->publ.max_pkt - INT_H_SIZE; + /* leave room for tunnel header in case of link changeover */ + fragm_sz = max_pkt - INT_H_SIZE; + /* leave room for fragmentation header in each fragment */ +@@ -1463,7 +1463,7 @@ + goto reject; + } + if (link_max_pkt(l_ptr) < max_pkt) { +- sender->max_pkt = link_max_pkt(l_ptr); ++ sender->publ.max_pkt = link_max_pkt(l_ptr); + tipc_node_unlock(node); + for (; buf_chain; buf_chain = buf) { + buf = buf_chain->next; +diff -Nurb linux-2.6.22-570/net/tipc/port.c linux-2.6.22-591/net/tipc/port.c +--- linux-2.6.22-570/net/tipc/port.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/tipc/port.c 2007-12-21 15:36:12.000000000 -0500 +@@ -1,8 +1,8 @@ + /* + * net/tipc/port.c: TIPC port code + * +- * Copyright (c) 1992-2006, Ericsson AB +- * Copyright (c) 2004-2005, Wind River Systems ++ * Copyright (c) 1992-2007, Ericsson AB ++ * Copyright (c) 2004-2007, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -239,6 +239,8 @@ + } + + tipc_port_lock(ref); ++ p_ptr->publ.usr_handle = usr_handle; ++ p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; + p_ptr->publ.ref = ref; + msg = &p_ptr->publ.phdr; + msg_init(msg, DATA_LOW, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, 0); +@@ -248,11 +250,9 @@ + msg_set_importance(msg,importance); + p_ptr->last_in_seqno = 41; + p_ptr->sent = 1; +- p_ptr->publ.usr_handle = usr_handle; + INIT_LIST_HEAD(&p_ptr->wait_list); + INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); + p_ptr->congested_link = NULL; +- p_ptr->max_pkt = MAX_PKT_DEFAULT; + p_ptr->dispatcher = dispatcher; + p_ptr->wakeup = wakeup; + p_ptr->user_port = NULL; +@@ -1243,7 +1243,7 @@ + res = TIPC_OK; + exit: + tipc_port_unlock(p_ptr); +- p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref); ++ p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref); + return res; + } + +diff -Nurb linux-2.6.22-570/net/tipc/port.h linux-2.6.22-591/net/tipc/port.h +--- linux-2.6.22-570/net/tipc/port.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/tipc/port.h 2007-12-21 15:36:12.000000000 -0500 +@@ -1,8 +1,8 @@ + /* + * net/tipc/port.h: Include file for TIPC port code + * +- * Copyright (c) 1994-2006, Ericsson AB +- * Copyright (c) 2004-2005, Wind River Systems ++ * Copyright (c) 1994-2007, Ericsson AB ++ * Copyright (c) 2004-2007, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -81,7 +81,6 @@ + * @acked: + * @publications: list of publications for port + * @pub_count: total # of publications port has made during its lifetime +- * @max_pkt: maximum packet size "hint" used when building messages sent by port + * @probing_state: + * @probing_interval: + * @last_in_seqno: +@@ -102,7 +101,6 @@ + u32 acked; + struct list_head publications; + u32 pub_count; +- u32 max_pkt; + u32 probing_state; + u32 probing_interval; + u32 last_in_seqno; +diff -Nurb linux-2.6.22-570/net/tipc/socket.c linux-2.6.22-591/net/tipc/socket.c +--- linux-2.6.22-570/net/tipc/socket.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/tipc/socket.c 2007-12-21 15:36:15.000000000 -0500 +@@ -1,8 +1,8 @@ + /* + * net/tipc/socket.c: TIPC socket API + * +- * Copyright (c) 2001-2006, Ericsson AB +- * Copyright (c) 2004-2006, Wind River Systems ++ * Copyright (c) 2001-2007, Ericsson AB ++ * Copyright (c) 2004-2007, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without +@@ -162,13 +162,16 @@ + * + * Returns 0 on success, errno otherwise + */ +-static int tipc_create(struct socket *sock, int protocol) ++static int tipc_create(struct net *net, struct socket *sock, int protocol) + { + struct tipc_sock *tsock; + struct tipc_port *port; + struct sock *sk; + u32 ref; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + if (unlikely(protocol != 0)) + return -EPROTONOSUPPORT; + +@@ -198,7 +201,7 @@ + return -EPROTOTYPE; + } + +- sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1); ++ sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, 1); + if (!sk) { + tipc_deleteport(ref); + return -ENOMEM; +@@ -607,23 +610,24 @@ + static int send_stream(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) + { ++ struct tipc_port *tport; + struct msghdr my_msg; + struct iovec my_iov; + struct iovec *curr_iov; + int curr_iovlen; + char __user *curr_start; ++ u32 hdr_size; + int curr_left; + int bytes_to_send; + int bytes_sent; + int res; + +- if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE)) +- return send_packet(iocb, sock, m, total_len); +- +- /* Can only send large data streams if already connected */ ++ /* Handle special cases where there is no connection */ + + if (unlikely(sock->state != SS_CONNECTED)) { +- if (sock->state == SS_DISCONNECTING) ++ if (sock->state == SS_UNCONNECTED) ++ return send_packet(iocb, sock, m, total_len); ++ else if (sock->state == SS_DISCONNECTING) + return -EPIPE; + else + return -ENOTCONN; +@@ -648,17 +652,25 @@ + my_msg.msg_name = NULL; + bytes_sent = 0; + ++ tport = tipc_sk(sock->sk)->p; ++ hdr_size = msg_hdr_sz(&tport->phdr); ++ + while (curr_iovlen--) { + curr_start = curr_iov->iov_base; + curr_left = curr_iov->iov_len; + + while (curr_left) { +- bytes_to_send = (curr_left < TIPC_MAX_USER_MSG_SIZE) +- ? curr_left : TIPC_MAX_USER_MSG_SIZE; ++ bytes_to_send = tport->max_pkt - hdr_size; ++ if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE) ++ bytes_to_send = TIPC_MAX_USER_MSG_SIZE; ++ if (curr_left < bytes_to_send) ++ bytes_to_send = curr_left; + my_iov.iov_base = curr_start; + my_iov.iov_len = bytes_to_send; + if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) { +- return bytes_sent ? bytes_sent : res; ++ if (bytes_sent != 0) ++ res = bytes_sent; ++ return res; + } + curr_left -= bytes_to_send; + curr_start += bytes_to_send; +@@ -1363,7 +1375,7 @@ + } + buf = skb_peek(&sock->sk->sk_receive_queue); + +- res = tipc_create(newsock, 0); ++ res = tipc_create(sock->sk->sk_net, newsock, 0); + if (!res) { + struct tipc_sock *new_tsock = tipc_sk(newsock->sk); + struct tipc_portid id; +@@ -1600,33 +1612,6 @@ + } + + /** +- * Placeholders for non-implemented functionality +- * +- * Returns error code (POSIX-compliant where defined) +- */ +- +-static int ioctl(struct socket *s, u32 cmd, unsigned long arg) +-{ +- return -EINVAL; +-} +- +-static int no_mmap(struct file *file, struct socket *sock, +- struct vm_area_struct *vma) +-{ +- return -EINVAL; +-} +-static ssize_t no_sendpage(struct socket *sock, struct page *page, +- int offset, size_t size, int flags) +-{ +- return -EINVAL; +-} +- +-static int no_skpair(struct socket *s1, struct socket *s2) +-{ +- return -EOPNOTSUPP; +-} +- +-/** + * Protocol switches for the various types of TIPC sockets + */ + +@@ -1636,19 +1621,19 @@ + .release = release, + .bind = bind, + .connect = connect, +- .socketpair = no_skpair, ++ .socketpair = sock_no_socketpair, + .accept = accept, + .getname = get_name, + .poll = poll, +- .ioctl = ioctl, ++ .ioctl = sock_no_ioctl, + .listen = listen, + .shutdown = shutdown, + .setsockopt = setsockopt, + .getsockopt = getsockopt, + .sendmsg = send_msg, + .recvmsg = recv_msg, +- .mmap = no_mmap, +- .sendpage = no_sendpage ++ .mmap = sock_no_mmap, ++ .sendpage = sock_no_sendpage + }; + + static struct proto_ops packet_ops = { +@@ -1657,19 +1642,19 @@ + .release = release, + .bind = bind, + .connect = connect, +- .socketpair = no_skpair, ++ .socketpair = sock_no_socketpair, + .accept = accept, + .getname = get_name, + .poll = poll, +- .ioctl = ioctl, ++ .ioctl = sock_no_ioctl, + .listen = listen, + .shutdown = shutdown, + .setsockopt = setsockopt, + .getsockopt = getsockopt, + .sendmsg = send_packet, + .recvmsg = recv_msg, +- .mmap = no_mmap, +- .sendpage = no_sendpage ++ .mmap = sock_no_mmap, ++ .sendpage = sock_no_sendpage + }; + + static struct proto_ops stream_ops = { +@@ -1678,19 +1663,19 @@ + .release = release, + .bind = bind, + .connect = connect, +- .socketpair = no_skpair, ++ .socketpair = sock_no_socketpair, + .accept = accept, + .getname = get_name, + .poll = poll, +- .ioctl = ioctl, ++ .ioctl = sock_no_ioctl, + .listen = listen, + .shutdown = shutdown, + .setsockopt = setsockopt, + .getsockopt = getsockopt, + .sendmsg = send_stream, + .recvmsg = recv_stream, +- .mmap = no_mmap, +- .sendpage = no_sendpage ++ .mmap = sock_no_mmap, ++ .sendpage = sock_no_sendpage + }; + + static struct net_proto_family tipc_family_ops = { +diff -Nurb linux-2.6.22-570/net/unix/af_unix.c linux-2.6.22-591/net/unix/af_unix.c +--- linux-2.6.22-570/net/unix/af_unix.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/unix/af_unix.c 2007-12-21 15:36:15.000000000 -0500 +@@ -117,8 +117,8 @@ + #include + #include + #include ++#include + +-int sysctl_unix_max_dgram_qlen __read_mostly = 10; + + struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; + DEFINE_SPINLOCK(unix_table_lock); +@@ -245,7 +245,8 @@ + spin_unlock(&unix_table_lock); + } + +-static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname, ++static struct sock *__unix_find_socket_byname(struct net *net, ++ struct sockaddr_un *sunname, + int len, int type, unsigned hash) + { + struct sock *s; +@@ -254,7 +255,7 @@ + sk_for_each(s, node, &unix_socket_table[hash ^ type]) { + struct unix_sock *u = unix_sk(s); + +- if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT)) ++ if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT) || (s->sk_net != net)) + continue; + if (u->addr->len == len && + !memcmp(u->addr->name, sunname, len)) +@@ -265,21 +266,22 @@ + return s; + } + +-static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname, ++static inline struct sock *unix_find_socket_byname(struct net *net, ++ struct sockaddr_un *sunname, + int len, int type, + unsigned hash) + { + struct sock *s; + + spin_lock(&unix_table_lock); +- s = __unix_find_socket_byname(sunname, len, type, hash); ++ s = __unix_find_socket_byname(net, sunname, len, type, hash); + if (s) + sock_hold(s); + spin_unlock(&unix_table_lock); + return s; + } + +-static struct sock *unix_find_socket_byinode(struct inode *i) ++static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) + { + struct sock *s; + struct hlist_node *node; +@@ -289,6 +291,9 @@ + &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { + struct dentry *dentry = unix_sk(s)->dentry; + ++ if (s->sk_net != net) ++ continue; ++ + if(dentry && dentry->d_inode == i) + { + sock_hold(s); +@@ -571,7 +576,7 @@ + */ + static struct lock_class_key af_unix_sk_receive_queue_lock_key; + +-static struct sock * unix_create1(struct socket *sock) ++static struct sock * unix_create1(struct net *net, struct socket *sock) + { + struct sock *sk = NULL; + struct unix_sock *u; +@@ -579,7 +584,7 @@ + if (atomic_read(&unix_nr_socks) >= 2*get_max_files()) + goto out; + +- sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1); ++ sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, 1); + if (!sk) + goto out; + +@@ -590,7 +595,7 @@ + &af_unix_sk_receive_queue_lock_key); + + sk->sk_write_space = unix_write_space; +- sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen; ++ sk->sk_max_ack_backlog = net->sysctl_unix_max_dgram_qlen; + sk->sk_destruct = unix_sock_destructor; + u = unix_sk(sk); + u->dentry = NULL; +@@ -604,7 +609,7 @@ + return sk; + } + +-static int unix_create(struct socket *sock, int protocol) ++static int unix_create(struct net *net, struct socket *sock, int protocol) + { + if (protocol && protocol != PF_UNIX) + return -EPROTONOSUPPORT; +@@ -631,7 +636,7 @@ + return -ESOCKTNOSUPPORT; + } + +- return unix_create1(sock) ? 0 : -ENOMEM; ++ return unix_create1(net, sock) ? 0 : -ENOMEM; + } + + static int unix_release(struct socket *sock) +@@ -649,6 +654,7 @@ + static int unix_autobind(struct socket *sock) + { + struct sock *sk = sock->sk; ++ struct net *net = sk->sk_net; + struct unix_sock *u = unix_sk(sk); + static u32 ordernum = 1; + struct unix_address * addr; +@@ -675,7 +681,7 @@ + spin_lock(&unix_table_lock); + ordernum = (ordernum+1)&0xFFFFF; + +- if (__unix_find_socket_byname(addr->name, addr->len, sock->type, ++ if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type, + addr->hash)) { + spin_unlock(&unix_table_lock); + /* Sanity yield. It is unusual case, but yet... */ +@@ -695,7 +701,8 @@ + return err; + } + +-static struct sock *unix_find_other(struct sockaddr_un *sunname, int len, ++static struct sock *unix_find_other(struct net *net, ++ struct sockaddr_un *sunname, int len, + int type, unsigned hash, int *error) + { + struct sock *u; +@@ -713,7 +720,7 @@ + err = -ECONNREFUSED; + if (!S_ISSOCK(nd.dentry->d_inode->i_mode)) + goto put_fail; +- u=unix_find_socket_byinode(nd.dentry->d_inode); ++ u=unix_find_socket_byinode(net, nd.dentry->d_inode); + if (!u) + goto put_fail; + +@@ -729,7 +736,7 @@ + } + } else { + err = -ECONNREFUSED; +- u=unix_find_socket_byname(sunname, len, type, hash); ++ u=unix_find_socket_byname(net, sunname, len, type, hash); + if (u) { + struct dentry *dentry; + dentry = unix_sk(u)->dentry; +@@ -751,6 +758,7 @@ + static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) + { + struct sock *sk = sock->sk; ++ struct net *net = sk->sk_net; + struct unix_sock *u = unix_sk(sk); + struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; + struct dentry * dentry = NULL; +@@ -825,7 +833,7 @@ + + if (!sunaddr->sun_path[0]) { + err = -EADDRINUSE; +- if (__unix_find_socket_byname(sunaddr, addr_len, ++ if (__unix_find_socket_byname(net, sunaddr, addr_len, + sk->sk_type, hash)) { + unix_release_addr(addr); + goto out_unlock; +@@ -891,6 +899,7 @@ + int alen, int flags) + { + struct sock *sk = sock->sk; ++ struct net *net = sk->sk_net; + struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr; + struct sock *other; + unsigned hash; +@@ -907,7 +916,7 @@ + goto out; + + restart: +- other=unix_find_other(sunaddr, alen, sock->type, hash, &err); ++ other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err); + if (!other) + goto out; + +@@ -987,6 +996,7 @@ + { + struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr; + struct sock *sk = sock->sk; ++ struct net *net = sk->sk_net; + struct unix_sock *u = unix_sk(sk), *newu, *otheru; + struct sock *newsk = NULL; + struct sock *other = NULL; +@@ -1015,7 +1025,7 @@ + err = -ENOMEM; + + /* create new sock for complete connection */ +- newsk = unix_create1(NULL); ++ newsk = unix_create1(sk->sk_net, NULL); + if (newsk == NULL) + goto out; + +@@ -1026,7 +1036,7 @@ + + restart: + /* Find listening sock. */ +- other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err); ++ other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err); + if (!other) + goto out; + +@@ -1305,6 +1315,7 @@ + { + struct sock_iocb *siocb = kiocb_to_siocb(kiocb); + struct sock *sk = sock->sk; ++ struct net *net = sk->sk_net; + struct unix_sock *u = unix_sk(sk); + struct sockaddr_un *sunaddr=msg->msg_name; + struct sock *other = NULL; +@@ -1368,7 +1379,7 @@ + if (sunaddr == NULL) + goto out_free; + +- other = unix_find_other(sunaddr, namelen, sk->sk_type, ++ other = unix_find_other(net, sunaddr, namelen, sk->sk_type, + hash, &err); + if (other==NULL) + goto out_free; +@@ -1974,12 +1985,18 @@ + + + #ifdef CONFIG_PROC_FS +-static struct sock *unix_seq_idx(int *iter, loff_t pos) ++struct unix_iter_state { ++ struct net *net; ++ int i; ++}; ++static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos) + { + loff_t off = 0; + struct sock *s; + +- for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) { ++ for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) { ++ if (s->sk_net != iter->net) ++ continue; + if (off == pos) + return s; + ++off; +@@ -1990,17 +2007,24 @@ + + static void *unix_seq_start(struct seq_file *seq, loff_t *pos) + { ++ struct unix_iter_state *iter = seq->private; + spin_lock(&unix_table_lock); +- return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1); ++ return *pos ? unix_seq_idx(iter, *pos - 1) : ((void *) 1); + } + + static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) + { ++ struct unix_iter_state *iter = seq->private; ++ struct sock *sk = v; + ++*pos; + + if (v == (void *)1) +- return first_unix_socket(seq->private); +- return next_unix_socket(seq->private, v); ++ sk = first_unix_socket(&iter->i); ++ else ++ sk = next_unix_socket(&iter->i, sk); ++ while (sk && (sk->sk_net != iter->net)) ++ sk = next_unix_socket(&iter->i, sk); ++ return sk; + } + + static void unix_seq_stop(struct seq_file *seq, void *v) +@@ -2064,7 +2088,7 @@ + { + struct seq_file *seq; + int rc = -ENOMEM; +- int *iter = kmalloc(sizeof(int), GFP_KERNEL); ++ struct unix_iter_state *iter = kmalloc(sizeof(*iter), GFP_KERNEL); + + if (!iter) + goto out; +@@ -2075,7 +2099,8 @@ + + seq = file->private_data; + seq->private = iter; +- *iter = 0; ++ iter->net = get_net(PROC_NET(inode)); ++ iter->i = 0; + out: + return rc; + out_kfree: +@@ -2083,12 +2108,20 @@ + goto out; + } + ++static int unix_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct unix_iter_state *iter = seq->private; ++ put_net(iter->net); ++ return seq_release_private(inode, file); ++} ++ + static const struct file_operations unix_seq_fops = { + .owner = THIS_MODULE, + .open = unix_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release_private, ++ .release = unix_seq_release, + }; + + #endif +@@ -2099,6 +2132,33 @@ + .owner = THIS_MODULE, + }; + ++ ++static int unix_net_init(struct net *net) ++{ ++ int error = -ENOMEM; ++ ++ net->sysctl_unix_max_dgram_qlen = 10; ++#ifdef CONFIG_PROC_FS ++ if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) ++ goto out; ++#endif ++ unix_sysctl_register(net); ++ error = 0; ++out: ++ return 0; ++} ++ ++static void unix_net_exit(struct net *net) ++{ ++ unix_sysctl_unregister(net); ++ proc_net_remove(net, "unix"); ++} ++ ++static struct pernet_operations unix_net_ops = { ++ .init = unix_net_init, ++ .exit = unix_net_exit, ++}; ++ + static int __init af_unix_init(void) + { + int rc = -1; +@@ -2114,10 +2174,7 @@ + } + + sock_register(&unix_family_ops); +-#ifdef CONFIG_PROC_FS +- proc_net_fops_create("unix", 0, &unix_seq_fops); +-#endif +- unix_sysctl_register(); ++ register_pernet_subsys(&unix_net_ops); + out: + return rc; + } +@@ -2125,9 +2182,8 @@ + static void __exit af_unix_exit(void) + { + sock_unregister(PF_UNIX); +- unix_sysctl_unregister(); +- proc_net_remove("unix"); + proto_unregister(&unix_proto); ++ unregister_pernet_subsys(&unix_net_ops); + } + + module_init(af_unix_init); +diff -Nurb linux-2.6.22-570/net/unix/sysctl_net_unix.c linux-2.6.22-591/net/unix/sysctl_net_unix.c +--- linux-2.6.22-570/net/unix/sysctl_net_unix.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/unix/sysctl_net_unix.c 2007-12-21 15:36:15.000000000 -0500 +@@ -14,47 +14,71 @@ + + #include + +-static ctl_table unix_table[] = { ++static struct unix_sysctl_table { ++ struct ctl_table_header *sysctl_header; ++ struct ctl_table unix_table[2]; ++ struct ctl_table unix_net_table[2]; ++ struct ctl_table unix_root_table[2]; ++} unix_sysctl = { ++ .unix_table = { + { + .ctl_name = NET_UNIX_MAX_DGRAM_QLEN, + .procname = "max_dgram_qlen", +- .data = &sysctl_unix_max_dgram_qlen, ++ .data = &init_net.sysctl_unix_max_dgram_qlen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, +- { .ctl_name = 0 } +-}; +- +-static ctl_table unix_net_table[] = { ++ {} ++ }, ++ .unix_net_table = { + { + .ctl_name = NET_UNIX, + .procname = "unix", + .mode = 0555, +- .child = unix_table ++ .child = unix_sysctl.unix_table + }, +- { .ctl_name = 0 } +-}; +- +-static ctl_table unix_root_table[] = { ++ {} ++ }, ++ .unix_root_table = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, +- .child = unix_net_table ++ .child = unix_sysctl.unix_net_table + }, +- { .ctl_name = 0 } ++ {} ++ } + }; + +-static struct ctl_table_header * unix_sysctl_header; +- +-void unix_sysctl_register(void) ++void unix_sysctl_register(struct net *net) + { +- unix_sysctl_header = register_sysctl_table(unix_root_table); ++ struct unix_sysctl_table *table; ++ int i; ++ ++ table = kmemdup(&unix_sysctl, sizeof(*table), GFP_KERNEL); ++ if (!table) ++ return; ++ for (i = 0; i < ARRAY_SIZE(table->unix_table) - 1; i++) ++ table->unix_table[i].data += (char *)net - (char *)&init_net; ++ ++ table->unix_net_table[0].child = table->unix_table; ++ table->unix_root_table[0].child = table->unix_net_table; ++ ++ table->sysctl_header = ++ register_net_sysctl_table(net, table->unix_root_table); ++ if (!table->sysctl_header) { ++ kfree(table); ++ return; ++ } ++ net->unix_sysctl = table; + } + +-void unix_sysctl_unregister(void) ++void unix_sysctl_unregister(struct net *net) + { +- unregister_sysctl_table(unix_sysctl_header); ++ struct unix_sysctl_table *table = net->unix_sysctl; ++ if (table) ++ unregister_net_sysctl_table(table->sysctl_header); ++ kfree(table); + } + +diff -Nurb linux-2.6.22-570/net/wanrouter/wanproc.c linux-2.6.22-591/net/wanrouter/wanproc.c +--- linux-2.6.22-570/net/wanrouter/wanproc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/wanrouter/wanproc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -28,6 +28,7 @@ + #include /* WAN router API definitions */ + #include + #include ++#include + + #include + +@@ -287,7 +288,7 @@ + int __init wanrouter_proc_init(void) + { + struct proc_dir_entry *p; +- proc_router = proc_mkdir(ROUTER_NAME, proc_net); ++ proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net); + if (!proc_router) + goto fail; + +@@ -303,7 +304,7 @@ + fail_stat: + remove_proc_entry("config", proc_router); + fail_config: +- remove_proc_entry(ROUTER_NAME, proc_net); ++ remove_proc_entry(ROUTER_NAME, init_net.proc_net); + fail: + return -ENOMEM; + } +@@ -316,7 +317,7 @@ + { + remove_proc_entry("config", proc_router); + remove_proc_entry("status", proc_router); +- remove_proc_entry(ROUTER_NAME, proc_net); ++ remove_proc_entry(ROUTER_NAME, init_net.proc_net); + } + + /* +diff -Nurb linux-2.6.22-570/net/wireless/wext.c linux-2.6.22-591/net/wireless/wext.c +--- linux-2.6.22-570/net/wireless/wext.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/wireless/wext.c 2007-12-21 15:36:15.000000000 -0500 +@@ -95,6 +95,7 @@ + #include + + #include /* Pretty obvious */ ++#include + #include /* New driver API */ + #include + #include +@@ -672,7 +673,22 @@ + + static int wireless_seq_open(struct inode *inode, struct file *file) + { +- return seq_open(file, &wireless_seq_ops); ++ struct seq_file *seq; ++ int res; ++ res = seq_open(file, &wireless_seq_ops); ++ if (!res) { ++ seq = file->private_data; ++ seq->private = get_net(PROC_NET(inode)); ++ } ++ return res; ++} ++ ++static int wireless_seq_release(struct inode *inode, struct file *file) ++{ ++ struct seq_file *seq = file->private_data; ++ struct net *net = seq->private; ++ put_net(net); ++ return seq_release(inode, file); + } + + static const struct file_operations wireless_seq_fops = { +@@ -680,17 +696,22 @@ + .open = wireless_seq_open, + .read = seq_read, + .llseek = seq_lseek, +- .release = seq_release, ++ .release = wireless_seq_release, + }; + +-int __init wext_proc_init(void) ++int wext_proc_init(struct net *net) + { + /* Create /proc/net/wireless entry */ +- if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops)) ++ if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops)) + return -ENOMEM; + + return 0; + } ++ ++void wext_proc_exit(struct net *net) ++{ ++ proc_net_remove(net, "wireless"); ++} + #endif /* CONFIG_PROC_FS */ + + /************************** IOCTL SUPPORT **************************/ +@@ -1010,7 +1031,7 @@ + * Main IOCTl dispatcher. + * Check the type of IOCTL and call the appropriate wrapper... + */ +-static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd) ++static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd) + { + struct net_device *dev; + iw_handler handler; +@@ -1019,7 +1040,7 @@ + * The copy_to/from_user() of ifr is also dealt with in there */ + + /* Make sure the device exist */ +- if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) ++ if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL) + return -ENODEV; + + /* A bunch of special cases, then the generic case... +@@ -1053,7 +1074,7 @@ + } + + /* entry point from dev ioctl */ +-int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd, ++int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd, + void __user *arg) + { + int ret; +@@ -1065,9 +1086,9 @@ + && !capable(CAP_NET_ADMIN)) + return -EPERM; + +- dev_load(ifr->ifr_name); ++ dev_load(net, ifr->ifr_name); + rtnl_lock(); +- ret = wireless_process_ioctl(ifr, cmd); ++ ret = wireless_process_ioctl(net, ifr, cmd); + rtnl_unlock(); + if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct ifreq))) + return -EFAULT; +@@ -1111,8 +1132,13 @@ + { + struct sk_buff *skb; + +- while ((skb = skb_dequeue(&wireless_nlevent_queue))) +- rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); ++ while ((skb = skb_dequeue(&wireless_nlevent_queue))) { ++ struct net_device *dev = skb->dev; ++ struct net *net = dev->nd_net; ++ skb->dev = NULL; ++ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); ++ dev_put(dev); ++ } + } + + static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); +@@ -1173,6 +1199,9 @@ + kfree_skb(skb); + return; + } ++ /* Remember the device until we are in process context */ ++ dev_hold(dev); ++ skb->dev = dev; + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; + skb_queue_tail(&wireless_nlevent_queue, skb); + tasklet_schedule(&wireless_nlevent_tasklet); +diff -Nurb linux-2.6.22-570/net/x25/af_x25.c linux-2.6.22-591/net/x25/af_x25.c +--- linux-2.6.22-570/net/x25/af_x25.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/net/x25/af_x25.c 2007-12-21 15:36:15.000000000 -0500 +@@ -191,6 +191,9 @@ + struct net_device *dev = ptr; + struct x25_neigh *nb; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (dev->type == ARPHRD_X25 + #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE) + || dev->type == ARPHRD_ETHER +@@ -466,10 +469,10 @@ + .obj_size = sizeof(struct x25_sock), + }; + +-static struct sock *x25_alloc_socket(void) ++static struct sock *x25_alloc_socket(struct net *net) + { + struct x25_sock *x25; +- struct sock *sk = sk_alloc(AF_X25, GFP_ATOMIC, &x25_proto, 1); ++ struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, 1); + + if (!sk) + goto out; +@@ -485,17 +488,20 @@ + return sk; + } + +-static int x25_create(struct socket *sock, int protocol) ++static int x25_create(struct net *net, struct socket *sock, int protocol) + { + struct sock *sk; + struct x25_sock *x25; + int rc = -ESOCKTNOSUPPORT; + ++ if (net != &init_net) ++ return -EAFNOSUPPORT; ++ + if (sock->type != SOCK_SEQPACKET || protocol) + goto out; + + rc = -ENOMEM; +- if ((sk = x25_alloc_socket()) == NULL) ++ if ((sk = x25_alloc_socket(net)) == NULL) + goto out; + + x25 = x25_sk(sk); +@@ -546,7 +552,7 @@ + if (osk->sk_type != SOCK_SEQPACKET) + goto out; + +- if ((sk = x25_alloc_socket()) == NULL) ++ if ((sk = x25_alloc_socket(osk->sk_net)) == NULL) + goto out; + + x25 = x25_sk(sk); +diff -Nurb linux-2.6.22-570/net/x25/x25_dev.c linux-2.6.22-591/net/x25/x25_dev.c +--- linux-2.6.22-570/net/x25/x25_dev.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/x25/x25_dev.c 2007-12-21 15:36:15.000000000 -0500 +@@ -95,6 +95,9 @@ + struct sk_buff *nskb; + struct x25_neigh *nb; + ++ if (dev->nd_net != &init_net) ++ goto drop; ++ + nskb = skb_copy(skb, GFP_ATOMIC); + if (!nskb) + goto drop; +diff -Nurb linux-2.6.22-570/net/x25/x25_proc.c linux-2.6.22-591/net/x25/x25_proc.c +--- linux-2.6.22-570/net/x25/x25_proc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/x25/x25_proc.c 2007-12-21 15:36:15.000000000 -0500 +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -301,7 +302,7 @@ + struct proc_dir_entry *p; + int rc = -ENOMEM; + +- x25_proc_dir = proc_mkdir("x25", proc_net); ++ x25_proc_dir = proc_mkdir("x25", init_net.proc_net); + if (!x25_proc_dir) + goto out; + +@@ -328,7 +329,7 @@ + out_socket: + remove_proc_entry("route", x25_proc_dir); + out_route: +- remove_proc_entry("x25", proc_net); ++ remove_proc_entry("x25", init_net.proc_net); + goto out; + } + +@@ -337,7 +338,7 @@ + remove_proc_entry("forward", x25_proc_dir); + remove_proc_entry("route", x25_proc_dir); + remove_proc_entry("socket", x25_proc_dir); +- remove_proc_entry("x25", proc_net); ++ remove_proc_entry("x25", init_net.proc_net); + } + + #else /* CONFIG_PROC_FS */ +diff -Nurb linux-2.6.22-570/net/x25/x25_route.c linux-2.6.22-591/net/x25/x25_route.c +--- linux-2.6.22-570/net/x25/x25_route.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/x25/x25_route.c 2007-12-21 15:36:15.000000000 -0500 +@@ -129,7 +129,7 @@ + */ + struct net_device *x25_dev_get(char *devname) + { +- struct net_device *dev = dev_get_by_name(devname); ++ struct net_device *dev = dev_get_by_name(&init_net, devname); + + if (dev && + (!(dev->flags & IFF_UP) || (dev->type != ARPHRD_X25 +diff -Nurb linux-2.6.22-570/net/xfrm/xfrm_policy.c linux-2.6.22-591/net/xfrm/xfrm_policy.c +--- linux-2.6.22-570/net/xfrm/xfrm_policy.c 2007-12-21 15:35:57.000000000 -0500 ++++ linux-2.6.22-591/net/xfrm/xfrm_policy.c 2007-12-21 15:36:15.000000000 -0500 +@@ -30,8 +30,6 @@ + + #include "xfrm_hash.h" + +-int sysctl_xfrm_larval_drop __read_mostly; +- + DEFINE_MUTEX(xfrm_cfg_mutex); + EXPORT_SYMBOL(xfrm_cfg_mutex); + +@@ -1570,7 +1568,7 @@ + + if (unlikely(nx<0)) { + err = nx; +- if (err == -EAGAIN && sysctl_xfrm_larval_drop) { ++ if (err == -EAGAIN && init_net.sysctl_xfrm_larval_drop) { + /* EREMOTE tells the caller to generate + * a one-shot blackhole route. + */ +@@ -1954,8 +1952,8 @@ + void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) + { + while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { +- dst->dev = &loopback_dev; +- dev_hold(&loopback_dev); ++ dst->dev = &init_net.loopback_dev; ++ dev_hold(dst->dev); + dev_put(dev); + } + } +@@ -2357,6 +2355,11 @@ + + static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) + { ++ struct net_device *dev = ptr; ++ ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + switch (event) { + case NETDEV_DOWN: + xfrm_flush_bundles(); +diff -Nurb linux-2.6.22-570/net/xfrm/xfrm_state.c linux-2.6.22-591/net/xfrm/xfrm_state.c +--- linux-2.6.22-570/net/xfrm/xfrm_state.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/xfrm/xfrm_state.c 2007-12-21 15:36:15.000000000 -0500 +@@ -28,14 +28,6 @@ + struct sock *xfrm_nl; + EXPORT_SYMBOL(xfrm_nl); + +-u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME; +-EXPORT_SYMBOL(sysctl_xfrm_aevent_etime); +- +-u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE; +-EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); +- +-u32 sysctl_xfrm_acq_expires __read_mostly = 30; +- + /* Each xfrm_state may be linked to two tables: + + 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) +@@ -665,8 +657,8 @@ + h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); + } +- x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; +- x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; ++ x->lft.hard_add_expires_seconds = init_net.sysctl_xfrm_acq_expires; ++ x->timer.expires = jiffies + init_net.sysctl_xfrm_acq_expires*HZ; + add_timer(&x->timer); + xfrm_state_num++; + xfrm_hash_grow_check(x->bydst.next != NULL); +@@ -815,9 +807,9 @@ + x->props.family = family; + x->props.mode = mode; + x->props.reqid = reqid; +- x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires; ++ x->lft.hard_add_expires_seconds = init_net.sysctl_xfrm_acq_expires; + xfrm_state_hold(x); +- x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; ++ x->timer.expires = jiffies + init_net.sysctl_xfrm_acq_expires*HZ; + add_timer(&x->timer); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); + h = xfrm_src_hash(daddr, saddr, family); +@@ -1775,6 +1767,19 @@ + + EXPORT_SYMBOL(xfrm_init_state); + ++ ++static int xfrm_state_pernet_init(struct net *net) ++{ ++ net->sysctl_xfrm_aevent_etime = XFRM_AE_ETIME; ++ net->sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE; ++ net->sysctl_xfrm_acq_expires = 30; ++ return 0; ++} ++ ++static struct pernet_operations xfrm_state_net_ops = { ++ .init = xfrm_state_pernet_init, ++}; ++ + void __init xfrm_state_init(void) + { + unsigned int sz; +@@ -1789,5 +1794,7 @@ + xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); + + INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task); ++ ++ register_pernet_subsys(&xfrm_state_net_ops); + } + +diff -Nurb linux-2.6.22-570/net/xfrm/xfrm_user.c linux-2.6.22-591/net/xfrm/xfrm_user.c +--- linux-2.6.22-570/net/xfrm/xfrm_user.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/net/xfrm/xfrm_user.c 2007-12-21 15:36:15.000000000 -0500 +@@ -374,7 +374,8 @@ + return err; + } + +-static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, ++static struct xfrm_state *xfrm_state_construct(struct net *net, ++ struct xfrm_usersa_info *p, + struct rtattr **xfrma, + int *errp) + { +@@ -410,9 +411,9 @@ + goto error; + + x->km.seq = p->seq; +- x->replay_maxdiff = sysctl_xfrm_aevent_rseqth; ++ x->replay_maxdiff = net->sysctl_xfrm_aevent_rseqth; + /* sysctl_xfrm_aevent_etime is in 100ms units */ +- x->replay_maxage = (sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M; ++ x->replay_maxage = (net->sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M; + x->preplay.bitmap = 0; + x->preplay.seq = x->replay.seq+x->replay_maxdiff; + x->preplay.oseq = x->replay.oseq +x->replay_maxdiff; +@@ -436,6 +437,7 @@ + static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, + struct rtattr **xfrma) + { ++ struct net *net = skb->sk->sk_net; + struct xfrm_usersa_info *p = NLMSG_DATA(nlh); + struct xfrm_state *x; + int err; +@@ -445,7 +447,7 @@ + if (err) + return err; + +- x = xfrm_state_construct(p, xfrma, &err); ++ x = xfrm_state_construct(net, p, xfrma, &err); + if (!x) + return err; + +@@ -2559,7 +2561,7 @@ + + printk(KERN_INFO "Initializing XFRM netlink socket\n"); + +- nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, ++ nlsk = netlink_kernel_create(&init_net, NETLINK_XFRM, XFRMNLGRP_MAX, + xfrm_netlink_rcv, NULL, THIS_MODULE); + if (nlsk == NULL) + return -ENOMEM; +diff -Nurb linux-2.6.22-570/rej linux-2.6.22-591/rej +--- linux-2.6.22-570/rej 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/rej 2007-12-21 15:36:12.000000000 -0500 +@@ -0,0 +1,28 @@ ++vi -o ./drivers/dma/ioatdma.c ./drivers/dma/ioatdma.c.rej ++vi -o ./fs/nfs/super.c ./fs/nfs/super.c.rej ++vi -o ./fs/ocfs2/aops.c ./fs/ocfs2/aops.c.rej ++vi -o ./fs/ocfs2/file.c ./fs/ocfs2/file.c.rej ++vi -o ./fs/ocfs2/super.c ./fs/ocfs2/super.c.rej ++vi -o ./fs/proc/base.c ./fs/proc/base.c.rej ++vi -o ./fs/sysfs/file.c ./fs/sysfs/file.c.rej ++vi -o ./fs/sync.c ./fs/sync.c.rej ++vi -o ./include/acpi/processor.h ./include/acpi/processor.h.rej ++vi -o ./include/linux/sunrpc/clnt.h ./include/linux/sunrpc/clnt.h.rej ++vi -o ./include/linux/syscalls.h ./include/linux/syscalls.h.rej ++vi -o ./include/linux/nfs_mount.h ./include/linux/nfs_mount.h.rej ++vi -o ./include/linux/sched.h ./include/linux/sched.h.rej ++vi -o ./include/linux/nsproxy.h ./include/linux/nsproxy.h.rej ++vi -o ./include/linux/fs.h ./include/linux/fs.h.rej ++vi -o ./kernel/timer.c ./kernel/timer.c.rej ++vi -o ./kernel/fork.c ./kernel/fork.c.rej ++vi -o ./kernel/nsproxy.c ./kernel/nsproxy.c.rej ++vi -o ./kernel/sys.c ./kernel/sys.c.rej ++vi -o ./kernel/user.c ./kernel/user.c.rej ++vi -o ./kernel/utsname.c ./kernel/utsname.c.rej ++vi -o ./kernel/sched.c ./kernel/sched.c.rej ++vi -o ./kernel/container.c ./kernel/container.c.rej ++vi -o ./mm/memory.c ./mm/memory.c.rej ++vi -o ./mm/hugetlb.c ./mm/hugetlb.c.rej ++vi -o ./net/bridge/br_if.c ./net/bridge/br_if.c.rej ++vi -o ./net/sunrpc/auth_unix.c ./net/sunrpc/auth_unix.c.rej ++vi -o ./scripts/checksyscalls.sh ./scripts/checksyscalls.sh.rej +diff -Nurb linux-2.6.22-570/scripts/Makefile.build.orig linux-2.6.22-591/scripts/Makefile.build.orig +--- linux-2.6.22-570/scripts/Makefile.build.orig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/scripts/Makefile.build.orig 1969-12-31 19:00:00.000000000 -0500 +@@ -1,348 +0,0 @@ +-# ========================================================================== +-# Building +-# ========================================================================== +- +-src := $(obj) +- +-PHONY := __build +-__build: +- +-# Read .config if it exist, otherwise ignore +--include include/config/auto.conf +- +-include scripts/Kbuild.include +- +-# The filename Kbuild has precedence over Makefile +-kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) +-include $(if $(wildcard $(kbuild-dir)/Kbuild), $(kbuild-dir)/Kbuild, $(kbuild-dir)/Makefile) +- +-include scripts/Makefile.lib +- +-ifdef host-progs +-ifneq ($(hostprogs-y),$(host-progs)) +-$(warning kbuild: $(obj)/Makefile - Usage of host-progs is deprecated. Please replace with hostprogs-y!) +-hostprogs-y += $(host-progs) +-endif +-endif +- +-# Do not include host rules unles needed +-ifneq ($(hostprogs-y)$(hostprogs-m),) +-include scripts/Makefile.host +-endif +- +-ifneq ($(KBUILD_SRC),) +-# Create output directory if not already present +-_dummy := $(shell [ -d $(obj) ] || mkdir -p $(obj)) +- +-# Create directories for object files if directory does not exist +-# Needed when obj-y := dir/file.o syntax is used +-_dummy := $(foreach d,$(obj-dirs), $(shell [ -d $(d) ] || mkdir -p $(d))) +-endif +- +- +-ifdef EXTRA_TARGETS +-$(warning kbuild: $(obj)/Makefile - Usage of EXTRA_TARGETS is obsolete in 2.6. Please fix!) +-endif +- +-ifdef build-targets +-$(warning kbuild: $(obj)/Makefile - Usage of build-targets is obsolete in 2.6. Please fix!) +-endif +- +-ifdef export-objs +-$(warning kbuild: $(obj)/Makefile - Usage of export-objs is obsolete in 2.6. Please fix!) +-endif +- +-ifdef O_TARGET +-$(warning kbuild: $(obj)/Makefile - Usage of O_TARGET := $(O_TARGET) is obsolete in 2.6. Please fix!) +-endif +- +-ifdef L_TARGET +-$(error kbuild: $(obj)/Makefile - Use of L_TARGET is replaced by lib-y in 2.6. Please fix!) +-endif +- +-ifdef list-multi +-$(warning kbuild: $(obj)/Makefile - list-multi := $(list-multi) is obsolete in 2.6. Please fix!) +-endif +- +-ifndef obj +-$(warning kbuild: Makefile.build is included improperly) +-endif +- +-# =========================================================================== +- +-ifneq ($(strip $(lib-y) $(lib-m) $(lib-n) $(lib-)),) +-lib-target := $(obj)/lib.a +-endif +- +-ifneq ($(strip $(obj-y) $(obj-m) $(obj-n) $(obj-) $(lib-target)),) +-builtin-target := $(obj)/built-in.o +-endif +- +-# We keep a list of all modules in $(MODVERDIR) +- +-__build: $(if $(KBUILD_BUILTIN),$(builtin-target) $(lib-target) $(extra-y)) \ +- $(if $(KBUILD_MODULES),$(obj-m)) \ +- $(subdir-ym) $(always) +- @: +- +-# Linus' kernel sanity checking tool +-ifneq ($(KBUILD_CHECKSRC),0) +- ifeq ($(KBUILD_CHECKSRC),2) +- quiet_cmd_force_checksrc = CHECK $< +- cmd_force_checksrc = $(CHECK) $(CHECKFLAGS) $(c_flags) $< ; +- else +- quiet_cmd_checksrc = CHECK $< +- cmd_checksrc = $(CHECK) $(CHECKFLAGS) $(c_flags) $< ; +- endif +-endif +- +- +-# Compile C sources (.c) +-# --------------------------------------------------------------------------- +- +-# Default is built-in, unless we know otherwise +-modkern_cflags := $(CFLAGS_KERNEL) +-quiet_modtag := $(empty) $(empty) +- +-$(real-objs-m) : modkern_cflags := $(CFLAGS_MODULE) +-$(real-objs-m:.o=.i) : modkern_cflags := $(CFLAGS_MODULE) +-$(real-objs-m:.o=.s) : modkern_cflags := $(CFLAGS_MODULE) +-$(real-objs-m:.o=.lst): modkern_cflags := $(CFLAGS_MODULE) +- +-$(real-objs-m) : quiet_modtag := [M] +-$(real-objs-m:.o=.i) : quiet_modtag := [M] +-$(real-objs-m:.o=.s) : quiet_modtag := [M] +-$(real-objs-m:.o=.lst): quiet_modtag := [M] +- +-$(obj-m) : quiet_modtag := [M] +- +-# Default for not multi-part modules +-modname = $(basetarget) +- +-$(multi-objs-m) : modname = $(modname-multi) +-$(multi-objs-m:.o=.i) : modname = $(modname-multi) +-$(multi-objs-m:.o=.s) : modname = $(modname-multi) +-$(multi-objs-m:.o=.lst) : modname = $(modname-multi) +-$(multi-objs-y) : modname = $(modname-multi) +-$(multi-objs-y:.o=.i) : modname = $(modname-multi) +-$(multi-objs-y:.o=.s) : modname = $(modname-multi) +-$(multi-objs-y:.o=.lst) : modname = $(modname-multi) +- +-quiet_cmd_cc_s_c = CC $(quiet_modtag) $@ +-cmd_cc_s_c = $(CC) $(c_flags) -fverbose-asm -S -o $@ $< +- +-$(obj)/%.s: $(src)/%.c FORCE +- $(call if_changed_dep,cc_s_c) +- +-quiet_cmd_cc_i_c = CPP $(quiet_modtag) $@ +-cmd_cc_i_c = $(CPP) $(c_flags) -o $@ $< +- +-$(obj)/%.i: $(src)/%.c FORCE +- $(call if_changed_dep,cc_i_c) +- +-quiet_cmd_cc_symtypes_c = SYM $(quiet_modtag) $@ +-cmd_cc_symtypes_c = \ +- $(CPP) -D__GENKSYMS__ $(c_flags) $< \ +- | $(GENKSYMS) -T $@ >/dev/null; \ +- test -s $@ || rm -f $@ +- +-$(obj)/%.symtypes : $(src)/%.c FORCE +- $(call if_changed_dep,cc_symtypes_c) +- +-# C (.c) files +-# The C file is compiled and updated dependency information is generated. +-# (See cmd_cc_o_c + relevant part of rule_cc_o_c) +- +-quiet_cmd_cc_o_c = CC $(quiet_modtag) $@ +- +-ifndef CONFIG_MODVERSIONS +-cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< +- +-else +-# When module versioning is enabled the following steps are executed: +-# o compile a .tmp_.o from .c +-# o if .tmp_.o doesn't contain a __ksymtab version, i.e. does +-# not export symbols, we just rename .tmp_.o to .o and +-# are done. +-# o otherwise, we calculate symbol versions using the good old +-# genksyms on the preprocessed source and postprocess them in a way +-# that they are usable as a linker script +-# o generate .o from .tmp_.o using the linker to +-# replace the unresolved symbols __crc_exported_symbol with +-# the actual value of the checksum generated by genksyms +- +-cmd_cc_o_c = $(CC) $(c_flags) -c -o $(@D)/.tmp_$(@F) $< +-cmd_modversions = \ +- if $(OBJDUMP) -h $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ +- $(CPP) -D__GENKSYMS__ $(c_flags) $< \ +- | $(GENKSYMS) $(if $(KBUILD_SYMTYPES), \ +- -T $(@D)/$(@F:.o=.symtypes)) -a $(ARCH) \ +- > $(@D)/.tmp_$(@F:.o=.ver); \ +- \ +- $(LD) $(LDFLAGS) -r -o $@ $(@D)/.tmp_$(@F) \ +- -T $(@D)/.tmp_$(@F:.o=.ver); \ +- rm -f $(@D)/.tmp_$(@F) $(@D)/.tmp_$(@F:.o=.ver); \ +- else \ +- mv -f $(@D)/.tmp_$(@F) $@; \ +- fi; +-endif +- +-define rule_cc_o_c +- $(call echo-cmd,checksrc) $(cmd_checksrc) \ +- $(call echo-cmd,cc_o_c) $(cmd_cc_o_c); \ +- $(cmd_modversions) \ +- scripts/basic/fixdep $(depfile) $@ '$(call make-cmd,cc_o_c)' > \ +- $(dot-target).tmp; \ +- rm -f $(depfile); \ +- mv -f $(dot-target).tmp $(dot-target).cmd +-endef +- +-# Built-in and composite module parts +-$(obj)/%.o: $(src)/%.c FORCE +- $(call cmd,force_checksrc) +- $(call if_changed_rule,cc_o_c) +- +-# Single-part modules are special since we need to mark them in $(MODVERDIR) +- +-$(single-used-m): $(obj)/%.o: $(src)/%.c FORCE +- $(call cmd,force_checksrc) +- $(call if_changed_rule,cc_o_c) +- @{ echo $(@:.o=.ko); echo $@; } > $(MODVERDIR)/$(@F:.o=.mod) +- +-quiet_cmd_cc_lst_c = MKLST $@ +- cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \ +- $(CONFIG_SHELL) $(srctree)/scripts/makelst $*.o \ +- System.map $(OBJDUMP) > $@ +- +-$(obj)/%.lst: $(src)/%.c FORCE +- $(call if_changed_dep,cc_lst_c) +- +-# Compile assembler sources (.S) +-# --------------------------------------------------------------------------- +- +-modkern_aflags := $(AFLAGS_KERNEL) +- +-$(real-objs-m) : modkern_aflags := $(AFLAGS_MODULE) +-$(real-objs-m:.o=.s): modkern_aflags := $(AFLAGS_MODULE) +- +-quiet_cmd_as_s_S = CPP $(quiet_modtag) $@ +-cmd_as_s_S = $(CPP) $(a_flags) -o $@ $< +- +-$(obj)/%.s: $(src)/%.S FORCE +- $(call if_changed_dep,as_s_S) +- +-quiet_cmd_as_o_S = AS $(quiet_modtag) $@ +-cmd_as_o_S = $(CC) $(a_flags) -c -o $@ $< +- +-$(obj)/%.o: $(src)/%.S FORCE +- $(call if_changed_dep,as_o_S) +- +-targets += $(real-objs-y) $(real-objs-m) $(lib-y) +-targets += $(extra-y) $(MAKECMDGOALS) $(always) +- +-# Linker scripts preprocessor (.lds.S -> .lds) +-# --------------------------------------------------------------------------- +-quiet_cmd_cpp_lds_S = LDS $@ +- cmd_cpp_lds_S = $(CPP) $(cpp_flags) -D__ASSEMBLY__ -o $@ $< +- +-$(obj)/%.lds: $(src)/%.lds.S FORCE +- $(call if_changed_dep,cpp_lds_S) +- +-# Build the compiled-in targets +-# --------------------------------------------------------------------------- +- +-# To build objects in subdirs, we need to descend into the directories +-$(sort $(subdir-obj-y)): $(subdir-ym) ; +- +-# +-# Rule to compile a set of .o files into one .o file +-# +-ifdef builtin-target +-quiet_cmd_link_o_target = LD $@ +-# If the list of objects to link is empty, just create an empty built-in.o +-cmd_link_o_target = $(if $(strip $(obj-y)),\ +- $(LD) $(ld_flags) -r -o $@ $(filter $(obj-y), $^),\ +- rm -f $@; $(AR) rcs $@) +- +-$(builtin-target): $(obj-y) FORCE +- $(call if_changed,link_o_target) +- +-targets += $(builtin-target) +-endif # builtin-target +- +-# +-# Rule to compile a set of .o files into one .a file +-# +-ifdef lib-target +-quiet_cmd_link_l_target = AR $@ +-cmd_link_l_target = rm -f $@; $(AR) $(EXTRA_ARFLAGS) rcs $@ $(lib-y) +- +-$(lib-target): $(lib-y) FORCE +- $(call if_changed,link_l_target) +- +-targets += $(lib-target) +-endif +- +-# +-# Rule to link composite objects +-# +-# Composite objects are specified in kbuild makefile as follows: +-# -objs := +-# or +-# -y := +-link_multi_deps = \ +-$(filter $(addprefix $(obj)/, \ +-$($(subst $(obj)/,,$(@:.o=-objs))) \ +-$($(subst $(obj)/,,$(@:.o=-y)))), $^) +- +-quiet_cmd_link_multi-y = LD $@ +-cmd_link_multi-y = $(LD) $(ld_flags) -r -o $@ $(link_multi_deps) +- +-quiet_cmd_link_multi-m = LD [M] $@ +-cmd_link_multi-m = $(LD) $(ld_flags) $(LDFLAGS_MODULE) -o $@ $(link_multi_deps) +- +-# We would rather have a list of rules like +-# foo.o: $(foo-objs) +-# but that's not so easy, so we rather make all composite objects depend +-# on the set of all their parts +-$(multi-used-y) : %.o: $(multi-objs-y) FORCE +- $(call if_changed,link_multi-y) +- +-$(multi-used-m) : %.o: $(multi-objs-m) FORCE +- $(call if_changed,link_multi-m) +- @{ echo $(@:.o=.ko); echo $(link_multi_deps); } > $(MODVERDIR)/$(@F:.o=.mod) +- +-targets += $(multi-used-y) $(multi-used-m) +- +- +-# Descending +-# --------------------------------------------------------------------------- +- +-PHONY += $(subdir-ym) +-$(subdir-ym): +- $(Q)$(MAKE) $(build)=$@ +- +-# Add FORCE to the prequisites of a target to force it to be always rebuilt. +-# --------------------------------------------------------------------------- +- +-PHONY += FORCE +- +-FORCE: +- +-# Read all saved command lines and dependencies for the $(targets) we +-# may be building above, using $(if_changed{,_dep}). As an +-# optimization, we don't need to read them if the target does not +-# exist, we will rebuild anyway in that case. +- +-targets := $(wildcard $(sort $(targets))) +-cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) +- +-ifneq ($(cmd_files),) +- include $(cmd_files) +-endif +- +- +-# Declare the contents of the .PHONY variable as phony. We keep that +-# information in a variable se we can use it in if_changed and friends. +- +-.PHONY: $(PHONY) +diff -Nurb linux-2.6.22-570/scripts/Makefile.modpost.orig linux-2.6.22-591/scripts/Makefile.modpost.orig +--- linux-2.6.22-570/scripts/Makefile.modpost.orig 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/scripts/Makefile.modpost.orig 1969-12-31 19:00:00.000000000 -0500 +@@ -1,132 +0,0 @@ +-# =========================================================================== +-# Module versions +-# =========================================================================== +-# +-# Stage one of module building created the following: +-# a) The individual .o files used for the module +-# b) A .o file which is the .o files above linked together +-# c) A .mod file in $(MODVERDIR)/, listing the name of the +-# the preliminary .o file, plus all .o files +- +-# Stage 2 is handled by this file and does the following +-# 1) Find all modules from the files listed in $(MODVERDIR)/ +-# 2) modpost is then used to +-# 3) create one .mod.c file pr. module +-# 4) create one Module.symvers file with CRC for all exported symbols +-# 5) compile all .mod.c files +-# 6) final link of the module to a file +- +-# Step 3 is used to place certain information in the module's ELF +-# section, including information such as: +-# Version magic (see include/vermagic.h for full details) +-# - Kernel release +-# - SMP is CONFIG_SMP +-# - PREEMPT is CONFIG_PREEMPT +-# - GCC Version +-# Module info +-# - Module version (MODULE_VERSION) +-# - Module alias'es (MODULE_ALIAS) +-# - Module license (MODULE_LICENSE) +-# - See include/linux/module.h for more details +- +-# Step 4 is solely used to allow module versioning in external modules, +-# where the CRC of each module is retrieved from the Module.symers file. +- +-# KBUILD_MODPOST_WARN can be set to avoid error out in case of undefined +-# symbols in the final module linking stage +-# KBUILD_MODPOST_NOFINAL can be set to skip the final link of modules. +-# This is solely usefull to speed up test compiles +-PHONY := _modpost +-_modpost: __modpost +- +-include include/config/auto.conf +-include scripts/Kbuild.include +-include scripts/Makefile.lib +- +-kernelsymfile := $(objtree)/Module.symvers +-modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers +- +-# Step 1), find all modules listed in $(MODVERDIR)/ +-__modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) +-modules := $(patsubst %.o,%.ko, $(wildcard $(__modules:.ko=.o))) +- +-# Stop after building .o files if NOFINAL is set. Makes compile tests quicker +-_modpost: $(if $(KBUILD_MODPOST_NOFINAL), $(modules:.ko:.o),$(modules)) +- +- +-# Step 2), invoke modpost +-# Includes step 3,4 +-quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules +- cmd_modpost = scripts/mod/modpost \ +- $(if $(CONFIG_MODVERSIONS),-m) \ +- $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a,) \ +- $(if $(KBUILD_EXTMOD),-i,-o) $(kernelsymfile) \ +- $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ +- $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ +- $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) +- +-PHONY += __modpost +-__modpost: $(modules:.ko=.o) FORCE +- $(call cmd,modpost) $(wildcard vmlinux) $(filter-out FORCE,$^) +- +-quiet_cmd_kernel-mod = MODPOST $@ +- cmd_kernel-mod = $(cmd_modpost) $(KBUILD_VMLINUX_OBJS) +- +-PHONY += vmlinux +-vmlinux: FORCE +- $(call cmd,kernel-mod) +- +-# Declare generated files as targets for modpost +-$(symverfile): __modpost ; +-$(modules:.ko=.mod.c): __modpost ; +- +- +-# Step 5), compile all *.mod.c files +- +-# modname is set to make c_flags define KBUILD_MODNAME +-modname = $(notdir $(@:.mod.o=)) +- +-quiet_cmd_cc_o_c = CC $@ +- cmd_cc_o_c = $(CC) $(c_flags) $(CFLAGS_MODULE) \ +- -c -o $@ $< +- +-$(modules:.ko=.mod.o): %.mod.o: %.mod.c FORCE +- $(call if_changed_dep,cc_o_c) +- +-targets += $(modules:.ko=.mod.o) +- +-# Step 6), final link of the modules +-quiet_cmd_ld_ko_o = LD [M] $@ +- cmd_ld_ko_o = $(LD) $(LDFLAGS) $(LDFLAGS_MODULE) -o $@ \ +- $(filter-out FORCE,$^) +- +-$(modules): %.ko :%.o %.mod.o FORCE +- $(call if_changed,ld_ko_o) +- +-targets += $(modules) +- +- +-# Add FORCE to the prequisites of a target to force it to be always rebuilt. +-# --------------------------------------------------------------------------- +- +-PHONY += FORCE +- +-FORCE: +- +-# Read all saved command lines and dependencies for the $(targets) we +-# may be building above, using $(if_changed{,_dep}). As an +-# optimization, we don't need to read them if the target does not +-# exist, we will rebuild anyway in that case. +- +-targets := $(wildcard $(sort $(targets))) +-cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) +- +-ifneq ($(cmd_files),) +- include $(cmd_files) +-endif +- +- +-# Declare the contents of the .PHONY variable as phony. We keep that +-# information in a variable se we can use it in if_changed and friends. +- +-.PHONY: $(PHONY) +diff -Nurb linux-2.6.22-570/security/commoncap.c linux-2.6.22-591/security/commoncap.c +--- linux-2.6.22-570/security/commoncap.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/security/commoncap.c 2007-12-21 15:36:13.000000000 -0500 +@@ -150,7 +150,7 @@ + + if (bprm->e_uid != current->uid || bprm->e_gid != current->gid || + !cap_issubset (new_permitted, current->cap_permitted)) { +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + + if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { + if (!capable(CAP_SETUID)) { +diff -Nurb linux-2.6.22-570/security/dummy.c linux-2.6.22-591/security/dummy.c +--- linux-2.6.22-570/security/dummy.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/security/dummy.c 2007-12-21 15:36:13.000000000 -0500 +@@ -131,7 +131,7 @@ + static void dummy_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) + { + if (bprm->e_uid != current->uid || bprm->e_gid != current->gid) { +- current->mm->dumpable = suid_dumpable; ++ set_dumpable(current->mm, suid_dumpable); + + if ((unsafe & ~LSM_UNSAFE_PTRACE_CAP) && !capable(CAP_SETUID)) { + bprm->e_uid = current->uid; +@@ -421,8 +421,12 @@ + + static int dummy_file_mmap (struct file *file, unsigned long reqprot, + unsigned long prot, +- unsigned long flags) ++ unsigned long flags, ++ unsigned long addr, ++ unsigned long addr_only) + { ++ if (addr < mmap_min_addr) ++ return -EACCES; + return 0; + } + +diff -Nurb linux-2.6.22-570/security/keys/request_key.c linux-2.6.22-591/security/keys/request_key.c +--- linux-2.6.22-570/security/keys/request_key.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/keys/request_key.c 2007-12-21 15:36:13.000000000 -0500 +@@ -108,7 +108,8 @@ + argv[i] = NULL; + + /* do it */ +- ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, 1); ++ ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, ++ UMH_WAIT_PROC); + + error_link: + key_put(keyring); +diff -Nurb linux-2.6.22-570/security/security.c linux-2.6.22-591/security/security.c +--- linux-2.6.22-570/security/security.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/security.c 2007-12-21 15:36:13.000000000 -0500 +@@ -24,6 +24,7 @@ + extern void security_fixup_ops(struct security_operations *ops); + + struct security_operations *security_ops; /* Initialized to NULL */ ++unsigned long mmap_min_addr; /* 0 means no protection */ + + static inline int verify(struct security_operations *ops) + { +@@ -176,4 +177,5 @@ + EXPORT_SYMBOL_GPL(unregister_security); + EXPORT_SYMBOL_GPL(mod_reg_security); + EXPORT_SYMBOL_GPL(mod_unreg_security); ++EXPORT_SYMBOL_GPL(mmap_min_addr); + EXPORT_SYMBOL(security_ops); +diff -Nurb linux-2.6.22-570/security/selinux/avc.c linux-2.6.22-591/security/selinux/avc.c +--- linux-2.6.22-570/security/selinux/avc.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/selinux/avc.c 2007-12-21 15:36:13.000000000 -0500 +@@ -586,7 +586,7 @@ + } + } + if (inode) +- audit_log_format(ab, " dev=%s ino=%ld", ++ audit_log_format(ab, " dev=%s ino=%lu", + inode->i_sb->s_id, + inode->i_ino); + break; +@@ -832,6 +832,7 @@ + * @tsid: target security identifier + * @tclass: target security class + * @requested: requested permissions, interpreted based on @tclass ++ * @flags: AVC_STRICT or 0 + * @avd: access vector decisions + * + * Check the AVC to determine whether the @requested permissions are granted +@@ -847,6 +848,7 @@ + */ + int avc_has_perm_noaudit(u32 ssid, u32 tsid, + u16 tclass, u32 requested, ++ unsigned flags, + struct av_decision *avd) + { + struct avc_node *node; +@@ -874,7 +876,7 @@ + denied = requested & ~(p_ae->avd.allowed); + + if (!requested || denied) { +- if (selinux_enforcing) ++ if (selinux_enforcing || (flags & AVC_STRICT)) + rc = -EACCES; + else + if (node) +@@ -909,7 +911,7 @@ + struct av_decision avd; + int rc; + +- rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, &avd); ++ rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd); + avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata); + return rc; + } +diff -Nurb linux-2.6.22-570/security/selinux/hooks.c linux-2.6.22-591/security/selinux/hooks.c +--- linux-2.6.22-570/security/selinux/hooks.c 2007-12-21 15:35:59.000000000 -0500 ++++ linux-2.6.22-591/security/selinux/hooks.c 2007-12-21 15:36:15.000000000 -0500 +@@ -111,6 +111,9 @@ + /* Original (dummy) security module. */ + static struct security_operations *original_ops = NULL; + ++/* Did we enable minimum mmap address checking? */ ++static int enabled_mmap_min_addr; ++ + /* Minimal support for a secondary security module, + just to allow the use of the dummy or capability modules. + The owlsm module can alternatively be used as a secondary +@@ -1593,6 +1596,7 @@ + rc = avc_has_perm_noaudit(tsec->sid, tsec->sid, + SECCLASS_CAPABILITY, + CAP_TO_MASK(CAP_SYS_ADMIN), ++ 0, + NULL); + + if (rc == 0) +@@ -2570,12 +2574,16 @@ + } + + static int selinux_file_mmap(struct file *file, unsigned long reqprot, +- unsigned long prot, unsigned long flags) ++ unsigned long prot, unsigned long flags, ++ unsigned long addr, unsigned long addr_only) + { +- int rc; ++ int rc = 0; ++ u32 sid = ((struct task_security_struct*)(current->security))->sid; + +- rc = secondary_ops->file_mmap(file, reqprot, prot, flags); +- if (rc) ++ if (addr < mmap_min_addr) ++ rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT, ++ MEMPROTECT__MMAP_ZERO, NULL); ++ if (rc || addr_only) + return rc; + + if (selinux_checkreqprot) +@@ -3223,8 +3231,8 @@ + /* Range of port numbers used to automatically bind. + Need to determine whether we should perform a name_bind + permission check between the socket and the port number. */ +-#define ip_local_port_range_0 sysctl_local_port_range[0] +-#define ip_local_port_range_1 sysctl_local_port_range[1] ++#define ip_local_port_range_0 (sk->sk_net->sysctl_local_port_range[0]) ++#define ip_local_port_range_1 (sk->sk_net->sysctl_local_port_range[1]) + + static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) + { +@@ -3968,6 +3976,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET); + } + +@@ -3979,6 +3991,10 @@ + const struct net_device *out, + int (*okfn)(struct sk_buff *)) + { ++ /* Only filter packets in the initial network namespace */ ++ if ((in?in:out)->nd_net != &init_net) ++ return NF_ACCEPT; ++ + return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET6); + } + +@@ -4628,7 +4644,7 @@ + if (p->ptrace & PT_PTRACED) { + error = avc_has_perm_noaudit(tsec->ptrace_sid, sid, + SECCLASS_PROCESS, +- PROCESS__PTRACE, &avd); ++ PROCESS__PTRACE, 0, &avd); + if (!error) + tsec->sid = sid; + task_unlock(p); +@@ -4910,6 +4926,16 @@ + sel_inode_cache = kmem_cache_create("selinux_inode_security", + sizeof(struct inode_security_struct), + 0, SLAB_PANIC, NULL, NULL); ++ ++ /* ++ * Tasks cannot mmap below this without the mmap_zero permission. ++ * If not enabled already, do so by setting it to 64KB. ++ */ ++ if (mmap_min_addr == 0) { ++ enabled_mmap_min_addr = 1; ++ mmap_min_addr = 65536; ++ } ++ + avc_init(); + + original_ops = secondary_ops = security_ops; +@@ -5060,6 +5086,10 @@ + selinux_disabled = 1; + selinux_enabled = 0; + ++ /* Disable minimum mmap address check only if we enabled it */ ++ if (enabled_mmap_min_addr) ++ mmap_min_addr = 0; ++ + /* Reset security_ops to the secondary module, dummy or capability. */ + security_ops = secondary_ops; + +diff -Nurb linux-2.6.22-570/security/selinux/include/av_perm_to_string.h linux-2.6.22-591/security/selinux/include/av_perm_to_string.h +--- linux-2.6.22-570/security/selinux/include/av_perm_to_string.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/selinux/include/av_perm_to_string.h 2007-12-21 15:36:13.000000000 -0500 +@@ -158,3 +158,4 @@ + S_(SECCLASS_KEY, KEY__CREATE, "create") + S_(SECCLASS_DCCP_SOCKET, DCCP_SOCKET__NODE_BIND, "node_bind") + S_(SECCLASS_DCCP_SOCKET, DCCP_SOCKET__NAME_CONNECT, "name_connect") ++ S_(SECCLASS_MEMPROTECT, MEMPROTECT__MMAP_ZERO, "mmap_zero") +diff -Nurb linux-2.6.22-570/security/selinux/include/av_permissions.h linux-2.6.22-591/security/selinux/include/av_permissions.h +--- linux-2.6.22-570/security/selinux/include/av_permissions.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/selinux/include/av_permissions.h 2007-12-21 15:36:13.000000000 -0500 +@@ -823,3 +823,4 @@ + #define DCCP_SOCKET__NAME_BIND 0x00200000UL + #define DCCP_SOCKET__NODE_BIND 0x00400000UL + #define DCCP_SOCKET__NAME_CONNECT 0x00800000UL ++#define MEMPROTECT__MMAP_ZERO 0x00000001UL +diff -Nurb linux-2.6.22-570/security/selinux/include/avc.h linux-2.6.22-591/security/selinux/include/avc.h +--- linux-2.6.22-570/security/selinux/include/avc.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/selinux/include/avc.h 2007-12-21 15:36:13.000000000 -0500 +@@ -102,8 +102,10 @@ + u16 tclass, u32 requested, + struct av_decision *avd, int result, struct avc_audit_data *auditdata); + ++#define AVC_STRICT 1 /* Ignore permissive mode. */ + int avc_has_perm_noaudit(u32 ssid, u32 tsid, + u16 tclass, u32 requested, ++ unsigned flags, + struct av_decision *avd); + + int avc_has_perm(u32 ssid, u32 tsid, +diff -Nurb linux-2.6.22-570/security/selinux/include/class_to_string.h linux-2.6.22-591/security/selinux/include/class_to_string.h +--- linux-2.6.22-570/security/selinux/include/class_to_string.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/selinux/include/class_to_string.h 2007-12-21 15:36:13.000000000 -0500 +@@ -63,3 +63,4 @@ + S_("key") + S_(NULL) + S_("dccp_socket") ++ S_("memprotect") +diff -Nurb linux-2.6.22-570/security/selinux/include/flask.h linux-2.6.22-591/security/selinux/include/flask.h +--- linux-2.6.22-570/security/selinux/include/flask.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/selinux/include/flask.h 2007-12-21 15:36:13.000000000 -0500 +@@ -49,6 +49,7 @@ + #define SECCLASS_PACKET 57 + #define SECCLASS_KEY 58 + #define SECCLASS_DCCP_SOCKET 60 ++#define SECCLASS_MEMPROTECT 61 + + /* + * Security identifier indices for initial entities +diff -Nurb linux-2.6.22-570/security/selinux/include/security.h linux-2.6.22-591/security/selinux/include/security.h +--- linux-2.6.22-570/security/selinux/include/security.h 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/selinux/include/security.h 2007-12-21 15:36:13.000000000 -0500 +@@ -41,6 +41,7 @@ + + int security_load_policy(void * data, size_t len); + ++#define SEL_VEC_MAX 32 + struct av_decision { + u32 allowed; + u32 decided; +@@ -87,6 +88,9 @@ + + int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid); + ++int security_get_classes(char ***classes, int *nclasses); ++int security_get_permissions(char *class, char ***perms, int *nperms); ++ + #define SECURITY_FS_USE_XATTR 1 /* use xattr */ + #define SECURITY_FS_USE_TRANS 2 /* use transition SIDs, e.g. devpts/tmpfs */ + #define SECURITY_FS_USE_TASK 3 /* use task SIDs, e.g. pipefs/sockfs */ +diff -Nurb linux-2.6.22-570/security/selinux/netif.c linux-2.6.22-591/security/selinux/netif.c +--- linux-2.6.22-570/security/selinux/netif.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/selinux/netif.c 2007-12-21 15:36:15.000000000 -0500 +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + + #include "security.h" + #include "objsec.h" +@@ -234,6 +235,9 @@ + { + struct net_device *dev = ptr; + ++ if (dev->nd_net != &init_net) ++ return NOTIFY_DONE; ++ + if (event == NETDEV_DOWN) + sel_netif_kill(dev); + +diff -Nurb linux-2.6.22-570/security/selinux/netlink.c linux-2.6.22-591/security/selinux/netlink.c +--- linux-2.6.22-570/security/selinux/netlink.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/selinux/netlink.c 2007-12-21 15:36:15.000000000 -0500 +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + static struct sock *selnl; + +@@ -104,8 +105,8 @@ + + static int __init selnl_init(void) + { +- selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, NULL, +- THIS_MODULE); ++ selnl = netlink_kernel_create(&init_net, NETLINK_SELINUX, ++ SELNLGRP_MAX, NULL, NULL, THIS_MODULE); + if (selnl == NULL) + panic("SELinux: Cannot create netlink socket."); + netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV); +diff -Nurb linux-2.6.22-570/security/selinux/selinuxfs.c linux-2.6.22-591/security/selinux/selinuxfs.c +--- linux-2.6.22-570/security/selinux/selinuxfs.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/selinux/selinuxfs.c 2007-12-21 15:36:13.000000000 -0500 +@@ -67,6 +67,10 @@ + static int bool_num = 0; + static int *bool_pending_values = NULL; + ++/* global data for classes */ ++static struct dentry *class_dir = NULL; ++static unsigned long last_class_ino; ++ + extern void selnl_notify_setenforce(int val); + + /* Check whether a task is allowed to use a security operation. */ +@@ -106,6 +110,7 @@ + + #define SEL_INITCON_INO_OFFSET 0x01000000 + #define SEL_BOOL_INO_OFFSET 0x02000000 ++#define SEL_CLASS_INO_OFFSET 0x04000000 + #define SEL_INO_MASK 0x00ffffff + + #define TMPBUFLEN 12 +@@ -237,6 +242,11 @@ + + /* declaration for sel_write_load */ + static int sel_make_bools(void); ++static int sel_make_classes(void); ++ ++/* declaration for sel_make_class_dirs */ ++static int sel_make_dir(struct inode *dir, struct dentry *dentry, ++ unsigned long *ino); + + static ssize_t sel_read_mls(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +@@ -287,10 +297,18 @@ + goto out; + + ret = sel_make_bools(); ++ if (ret) { ++ length = ret; ++ goto out1; ++ } ++ ++ ret = sel_make_classes(); + if (ret) + length = ret; + else + length = count; ++ ++out1: + audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_POLICY_LOAD, + "policy loaded auid=%u", + audit_get_loginuid(current->audit_context)); +@@ -940,9 +958,8 @@ + .write = sel_commit_bools_write, + }; + +-/* delete booleans - partial revoke() from +- * fs/proc/generic.c proc_kill_inodes */ +-static void sel_remove_bools(struct dentry *de) ++/* partial revoke() from fs/proc/generic.c proc_kill_inodes */ ++static void sel_remove_entries(struct dentry *de) + { + struct list_head *p, *node; + struct super_block *sb = de->d_sb; +@@ -998,7 +1015,7 @@ + kfree(bool_pending_values); + bool_pending_values = NULL; + +- sel_remove_bools(dir); ++ sel_remove_entries(dir); + + if (!(page = (char*)get_zeroed_page(GFP_KERNEL))) + return -ENOMEM; +@@ -1048,7 +1065,7 @@ + return ret; + err: + kfree(values); +- sel_remove_bools(dir); ++ sel_remove_entries(dir); + ret = -ENOMEM; + goto out; + } +@@ -1294,7 +1311,227 @@ + return ret; + } + +-static int sel_make_dir(struct inode *dir, struct dentry *dentry) ++static inline unsigned int sel_div(unsigned long a, unsigned long b) ++{ ++ return a / b - (a % b < 0); ++} ++ ++static inline unsigned long sel_class_to_ino(u16 class) ++{ ++ return (class * (SEL_VEC_MAX + 1)) | SEL_CLASS_INO_OFFSET; ++} ++ ++static inline u16 sel_ino_to_class(unsigned long ino) ++{ ++ return sel_div(ino & SEL_INO_MASK, SEL_VEC_MAX + 1); ++} ++ ++static inline unsigned long sel_perm_to_ino(u16 class, u32 perm) ++{ ++ return (class * (SEL_VEC_MAX + 1) + perm) | SEL_CLASS_INO_OFFSET; ++} ++ ++static inline u32 sel_ino_to_perm(unsigned long ino) ++{ ++ return (ino & SEL_INO_MASK) % (SEL_VEC_MAX + 1); ++} ++ ++static ssize_t sel_read_class(struct file * file, char __user *buf, ++ size_t count, loff_t *ppos) ++{ ++ ssize_t rc, len; ++ char *page; ++ unsigned long ino = file->f_path.dentry->d_inode->i_ino; ++ ++ page = (char *)__get_free_page(GFP_KERNEL); ++ if (!page) { ++ rc = -ENOMEM; ++ goto out; ++ } ++ ++ len = snprintf(page, PAGE_SIZE, "%d", sel_ino_to_class(ino)); ++ rc = simple_read_from_buffer(buf, count, ppos, page, len); ++ free_page((unsigned long)page); ++out: ++ return rc; ++} ++ ++static const struct file_operations sel_class_ops = { ++ .read = sel_read_class, ++}; ++ ++static ssize_t sel_read_perm(struct file * file, char __user *buf, ++ size_t count, loff_t *ppos) ++{ ++ ssize_t rc, len; ++ char *page; ++ unsigned long ino = file->f_path.dentry->d_inode->i_ino; ++ ++ page = (char *)__get_free_page(GFP_KERNEL); ++ if (!page) { ++ rc = -ENOMEM; ++ goto out; ++ } ++ ++ len = snprintf(page, PAGE_SIZE,"%d", sel_ino_to_perm(ino)); ++ rc = simple_read_from_buffer(buf, count, ppos, page, len); ++ free_page((unsigned long)page); ++out: ++ return rc; ++} ++ ++static const struct file_operations sel_perm_ops = { ++ .read = sel_read_perm, ++}; ++ ++static int sel_make_perm_files(char *objclass, int classvalue, ++ struct dentry *dir) ++{ ++ int i, rc = 0, nperms; ++ char **perms; ++ ++ rc = security_get_permissions(objclass, &perms, &nperms); ++ if (rc) ++ goto out; ++ ++ for (i = 0; i < nperms; i++) { ++ struct inode *inode; ++ struct dentry *dentry; ++ ++ dentry = d_alloc_name(dir, perms[i]); ++ if (!dentry) { ++ rc = -ENOMEM; ++ goto out1; ++ } ++ ++ inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO); ++ if (!inode) { ++ rc = -ENOMEM; ++ goto out1; ++ } ++ inode->i_fop = &sel_perm_ops; ++ /* i+1 since perm values are 1-indexed */ ++ inode->i_ino = sel_perm_to_ino(classvalue, i+1); ++ d_add(dentry, inode); ++ } ++ ++out1: ++ for (i = 0; i < nperms; i++) ++ kfree(perms[i]); ++ kfree(perms); ++out: ++ return rc; ++} ++ ++static int sel_make_class_dir_entries(char *classname, int index, ++ struct dentry *dir) ++{ ++ struct dentry *dentry = NULL; ++ struct inode *inode = NULL; ++ int rc; ++ ++ dentry = d_alloc_name(dir, "index"); ++ if (!dentry) { ++ rc = -ENOMEM; ++ goto out; ++ } ++ ++ inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO); ++ if (!inode) { ++ rc = -ENOMEM; ++ goto out; ++ } ++ ++ inode->i_fop = &sel_class_ops; ++ inode->i_ino = sel_class_to_ino(index); ++ d_add(dentry, inode); ++ ++ dentry = d_alloc_name(dir, "perms"); ++ if (!dentry) { ++ rc = -ENOMEM; ++ goto out; ++ } ++ ++ rc = sel_make_dir(dir->d_inode, dentry, &last_class_ino); ++ if (rc) ++ goto out; ++ ++ rc = sel_make_perm_files(classname, index, dentry); ++ ++out: ++ return rc; ++} ++ ++static void sel_remove_classes(void) ++{ ++ struct list_head *class_node; ++ ++ list_for_each(class_node, &class_dir->d_subdirs) { ++ struct dentry *class_subdir = list_entry(class_node, ++ struct dentry, d_u.d_child); ++ struct list_head *class_subdir_node; ++ ++ list_for_each(class_subdir_node, &class_subdir->d_subdirs) { ++ struct dentry *d = list_entry(class_subdir_node, ++ struct dentry, d_u.d_child); ++ ++ if (d->d_inode) ++ if (d->d_inode->i_mode & S_IFDIR) ++ sel_remove_entries(d); ++ } ++ ++ sel_remove_entries(class_subdir); ++ } ++ ++ sel_remove_entries(class_dir); ++} ++ ++static int sel_make_classes(void) ++{ ++ int rc = 0, nclasses, i; ++ char **classes; ++ ++ /* delete any existing entries */ ++ sel_remove_classes(); ++ ++ rc = security_get_classes(&classes, &nclasses); ++ if (rc < 0) ++ goto out; ++ ++ /* +2 since classes are 1-indexed */ ++ last_class_ino = sel_class_to_ino(nclasses+2); ++ ++ for (i = 0; i < nclasses; i++) { ++ struct dentry *class_name_dir; ++ ++ class_name_dir = d_alloc_name(class_dir, classes[i]); ++ if (!class_name_dir) { ++ rc = -ENOMEM; ++ goto out1; ++ } ++ ++ rc = sel_make_dir(class_dir->d_inode, class_name_dir, ++ &last_class_ino); ++ if (rc) ++ goto out1; ++ ++ /* i+1 since class values are 1-indexed */ ++ rc = sel_make_class_dir_entries(classes[i], i+1, ++ class_name_dir); ++ if (rc) ++ goto out1; ++ } ++ ++out1: ++ for (i = 0; i < nclasses; i++) ++ kfree(classes[i]); ++ kfree(classes); ++out: ++ return rc; ++} ++ ++static int sel_make_dir(struct inode *dir, struct dentry *dentry, ++ unsigned long *ino) + { + int ret = 0; + struct inode *inode; +@@ -1306,7 +1543,7 @@ + } + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; +- inode->i_ino = ++sel_last_ino; ++ inode->i_ino = ++(*ino); + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inc_nlink(inode); + d_add(dentry, inode); +@@ -1352,7 +1589,7 @@ + goto err; + } + +- ret = sel_make_dir(root_inode, dentry); ++ ret = sel_make_dir(root_inode, dentry, &sel_last_ino); + if (ret) + goto err; + +@@ -1385,7 +1622,7 @@ + goto err; + } + +- ret = sel_make_dir(root_inode, dentry); ++ ret = sel_make_dir(root_inode, dentry, &sel_last_ino); + if (ret) + goto err; + +@@ -1399,7 +1636,7 @@ + goto err; + } + +- ret = sel_make_dir(root_inode, dentry); ++ ret = sel_make_dir(root_inode, dentry, &sel_last_ino); + if (ret) + goto err; + +@@ -1407,6 +1644,18 @@ + if (ret) + goto err; + ++ dentry = d_alloc_name(sb->s_root, "class"); ++ if (!dentry) { ++ ret = -ENOMEM; ++ goto err; ++ } ++ ++ ret = sel_make_dir(root_inode, dentry, &sel_last_ino); ++ if (ret) ++ goto err; ++ ++ class_dir = dentry; ++ + out: + return ret; + err: +diff -Nurb linux-2.6.22-570/security/selinux/ss/policydb.c linux-2.6.22-591/security/selinux/ss/policydb.c +--- linux-2.6.22-570/security/selinux/ss/policydb.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/selinux/ss/policydb.c 2007-12-21 15:36:13.000000000 -0500 +@@ -21,6 +21,7 @@ + */ + + #include ++#include + #include + #include + #include +@@ -598,6 +599,7 @@ + struct range_trans *rt, *lrt = NULL; + + for (i = 0; i < SYM_NUM; i++) { ++ cond_resched(); + hashtab_map(p->symtab[i].table, destroy_f[i], NULL); + hashtab_destroy(p->symtab[i].table); + } +@@ -612,6 +614,7 @@ + avtab_destroy(&p->te_avtab); + + for (i = 0; i < OCON_NUM; i++) { ++ cond_resched(); + c = p->ocontexts[i]; + while (c) { + ctmp = c; +@@ -623,6 +626,7 @@ + + g = p->genfs; + while (g) { ++ cond_resched(); + kfree(g->fstype); + c = g->head; + while (c) { +@@ -639,18 +643,21 @@ + cond_policydb_destroy(p); + + for (tr = p->role_tr; tr; tr = tr->next) { ++ cond_resched(); + kfree(ltr); + ltr = tr; + } + kfree(ltr); + + for (ra = p->role_allow; ra; ra = ra -> next) { ++ cond_resched(); + kfree(lra); + lra = ra; + } + kfree(lra); + + for (rt = p->range_tr; rt; rt = rt -> next) { ++ cond_resched(); + if (lrt) { + ebitmap_destroy(&lrt->target_range.level[0].cat); + ebitmap_destroy(&lrt->target_range.level[1].cat); +diff -Nurb linux-2.6.22-570/security/selinux/ss/services.c linux-2.6.22-591/security/selinux/ss/services.c +--- linux-2.6.22-570/security/selinux/ss/services.c 2007-07-08 19:32:17.000000000 -0400 ++++ linux-2.6.22-591/security/selinux/ss/services.c 2007-12-21 15:36:13.000000000 -0500 +@@ -1587,19 +1587,18 @@ + u32 *nel) + { + struct context *fromcon, usercon; +- u32 *mysids, *mysids2, sid; ++ u32 *mysids = NULL, *mysids2, sid; + u32 mynel = 0, maxnel = SIDS_NEL; + struct user_datum *user; + struct role_datum *role; +- struct av_decision avd; + struct ebitmap_node *rnode, *tnode; + int rc = 0, i, j; + +- if (!ss_initialized) { + *sids = NULL; + *nel = 0; ++ ++ if (!ss_initialized) + goto out; +- } + + POLICY_RDLOCK; + +@@ -1635,17 +1634,9 @@ + if (mls_setup_user_range(fromcon, user, &usercon)) + continue; + +- rc = context_struct_compute_av(fromcon, &usercon, +- SECCLASS_PROCESS, +- PROCESS__TRANSITION, +- &avd); +- if (rc || !(avd.allowed & PROCESS__TRANSITION)) +- continue; + rc = sidtab_context_to_sid(&sidtab, &usercon, &sid); +- if (rc) { +- kfree(mysids); ++ if (rc) + goto out_unlock; +- } + if (mynel < maxnel) { + mysids[mynel++] = sid; + } else { +@@ -1653,7 +1644,6 @@ + mysids2 = kcalloc(maxnel, sizeof(*mysids2), GFP_ATOMIC); + if (!mysids2) { + rc = -ENOMEM; +- kfree(mysids); + goto out_unlock; + } + memcpy(mysids2, mysids, mynel * sizeof(*mysids2)); +@@ -1664,11 +1654,32 @@ + } + } + +- *sids = mysids; +- *nel = mynel; +- + out_unlock: + POLICY_RDUNLOCK; ++ if (rc || !mynel) { ++ kfree(mysids); ++ goto out; ++ } ++ ++ mysids2 = kcalloc(mynel, sizeof(*mysids2), GFP_KERNEL); ++ if (!mysids2) { ++ rc = -ENOMEM; ++ kfree(mysids); ++ goto out; ++ } ++ for (i = 0, j = 0; i < mynel; i++) { ++ rc = avc_has_perm_noaudit(fromsid, mysids[i], ++ SECCLASS_PROCESS, ++ PROCESS__TRANSITION, AVC_STRICT, ++ NULL); ++ if (!rc) ++ mysids2[j++] = mysids[i]; ++ cond_resched(); ++ } ++ rc = 0; ++ kfree(mysids); ++ *sids = mysids2; ++ *nel = j; + out: + return rc; + } +@@ -1996,6 +2007,101 @@ + return rc; + } + ++static int get_classes_callback(void *k, void *d, void *args) ++{ ++ struct class_datum *datum = d; ++ char *name = k, **classes = args; ++ int value = datum->value - 1; ++ ++ classes[value] = kstrdup(name, GFP_ATOMIC); ++ if (!classes[value]) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++int security_get_classes(char ***classes, int *nclasses) ++{ ++ int rc = -ENOMEM; ++ ++ POLICY_RDLOCK; ++ ++ *nclasses = policydb.p_classes.nprim; ++ *classes = kcalloc(*nclasses, sizeof(*classes), GFP_ATOMIC); ++ if (!*classes) ++ goto out; ++ ++ rc = hashtab_map(policydb.p_classes.table, get_classes_callback, ++ *classes); ++ if (rc < 0) { ++ int i; ++ for (i = 0; i < *nclasses; i++) ++ kfree((*classes)[i]); ++ kfree(*classes); ++ } ++ ++out: ++ POLICY_RDUNLOCK; ++ return rc; ++} ++ ++static int get_permissions_callback(void *k, void *d, void *args) ++{ ++ struct perm_datum *datum = d; ++ char *name = k, **perms = args; ++ int value = datum->value - 1; ++ ++ perms[value] = kstrdup(name, GFP_ATOMIC); ++ if (!perms[value]) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++int security_get_permissions(char *class, char ***perms, int *nperms) ++{ ++ int rc = -ENOMEM, i; ++ struct class_datum *match; ++ ++ POLICY_RDLOCK; ++ ++ match = hashtab_search(policydb.p_classes.table, class); ++ if (!match) { ++ printk(KERN_ERR "%s: unrecognized class %s\n", ++ __FUNCTION__, class); ++ rc = -EINVAL; ++ goto out; ++ } ++ ++ *nperms = match->permissions.nprim; ++ *perms = kcalloc(*nperms, sizeof(*perms), GFP_ATOMIC); ++ if (!*perms) ++ goto out; ++ ++ if (match->comdatum) { ++ rc = hashtab_map(match->comdatum->permissions.table, ++ get_permissions_callback, *perms); ++ if (rc < 0) ++ goto err; ++ } ++ ++ rc = hashtab_map(match->permissions.table, get_permissions_callback, ++ *perms); ++ if (rc < 0) ++ goto err; ++ ++out: ++ POLICY_RDUNLOCK; ++ return rc; ++ ++err: ++ POLICY_RDUNLOCK; ++ for (i = 0; i < *nperms; i++) ++ kfree((*perms)[i]); ++ kfree(*perms); ++ return rc; ++} ++ + struct selinux_audit_rule { + u32 au_seqno; + struct context au_ctxt; +diff -Nurb linux-2.6.22-570/toapply linux-2.6.22-591/toapply +--- linux-2.6.22-570/toapply 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/toapply 2007-12-21 15:36:15.000000000 -0500 +@@ -0,0 +1,51 @@ ++cat ../broken-out/cpuidle-fix-the-uninitialized-variable-in-sysfs-routine.patch | patch -p1 ++cat ../broken-out/cpuidle-make-cpuidle-sysfs-driver-governor-switch-off-by-default.patch | patch -p1 ++cat ../broken-out/acpi-video-dont-export-sysfs-backlight-interface-if-query-_bcl-fail.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-rules.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-move-release_sysfs_dirent-to-dirc.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-allocate-inode-number-using-ida.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_put-ignore-null-sd.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-fix-error-handling-in-binattr-write.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-flatten-cleanup-paths-in-sysfs_add_link-and-create_dir.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-flatten-and-fix-sysfs_rename_dir-error-handling.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs_dirent-creation-functions.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_parent.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_name.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_dirent-s_element-a-union.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-implement-kobj_sysfs_assoc_lock.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-reimplement-symlink-using-sysfs_dirent-tree.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-implement-bin_buffer.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_dirent-active-reference-and-immediate-disconnect.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-kill-attribute-file-orphaning.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-separate-out-sysfs_attach_dentry.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-reimplement-sysfs_drop_dentry.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-kill-unnecessary-attribute-owner.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_alloc_ino-static.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-fix-parent-refcounting-during-rename-and-move.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-reorganize-sysfs_new_indoe-and-sysfs_create.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-use-iget_locked-instead-of-new_inode.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-fix-root-sysfs_dirent-root-dentry-association.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-move-s_active-functions-to-fs-sysfs-dirc.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-slim-down-sysfs_dirent-s_active.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-fix-oops-in-sysfs_drop_dentry-on-x86_64.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_drop_dentry-access-inodes-using-ilookup.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-rename-sysfs_dirent-s_type-to-s_flags-and-make-room-for-flags.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_flag_removed-flag.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_find_dirent-and-sysfs_get_dirent.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-make-kobj-point-to-sysfs_dirent-instead-of-dentry.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs-spinlocks.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-use-sysfs_mutex-to-protect-the-sysfs_dirent-tree.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-restructure-add-remove-paths-and-fix-inode-update.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-move-sysfs_drop_dentry-to-dirc-and-make-it-static.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_get_dentry.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-make-directory-dentries-and-inodes-reclaimable.patch | patch -p1 ++cat ../broken-out/driver-core-check-return-code-of-sysfs_create_link.patch | patch -p1 ++cat ../broken-out/driver-core-check-return-code-of-sysfs_create_link-fix.patch | patch -p1 ++cat ../broken-out/git-scsi-misc-vs-greg-sysfs-stuff.patch | patch -p1 ++cat ../broken-out/gregkh-usb-usb-cxacru-cleanup-sysfs-attribute-code.patch | patch -p1 ++cat ../broken-out/gregkh-usb-usb-add-iad-support-to-usbfs-and-sysfs.patch | patch -p1 ++cat ../broken-out/x86_64-mm-xen-add-the-xenbus-sysfs-and-virtual-device-hotplug-driver.patch | patch -p1 ++cat ../broken-out/drivers-edac-mc-sysfs-add-missing-mem-types.patch | patch -p1 ++cat ../broken-out/drivers-edac-edac_device-sysfs-cleanup.patch | patch -p1 ++cat ../broken-out/drivers-edac-add-device-sysfs-attributes.patch | patch -p1 +diff -Nurb linux-2.6.22-570/trellis-mm1-1.sh linux-2.6.22-591/trellis-mm1-1.sh +--- linux-2.6.22-570/trellis-mm1-1.sh 1969-12-31 19:00:00.000000000 -0500 ++++ linux-2.6.22-591/trellis-mm1-1.sh 2007-12-21 15:36:13.000000000 -0500 +@@ -0,0 +1,142 @@ ++cat ../broken-out/origin.patch | patch -p1 ++cat ../broken-out/ioatdma-fix-section-mismatches.patch | patch -p1 ++cat ../broken-out/introduce-fixed-sys_sync_file_range2-syscall-implement-on.patch | patch -p1 ++cat ../broken-out/git-acpi.patch | patch -p1 ++cat ../broken-out/agk-dm-dm-netlink.patch | patch -p1 ++cat ../broken-out/git-powerpc.patch | patch -p1 ++cat ../broken-out/make-drivers-char-hvc_consoleckhvcd-static.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-move-release_sysfs_dirent-to-dirc.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-allocate-inode-number-using-ida.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_put-ignore-null-sd.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-fix-error-handling-in-binattr-write.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-flatten-cleanup-paths-in-sysfs_add_link-and-create_dir.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-flatten-and-fix-sysfs_rename_dir-error-handling.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs_dirent-creation-functions.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_parent.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_name.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_dirent-s_element-a-union.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-implement-kobj_sysfs_assoc_lock.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-reimplement-symlink-using-sysfs_dirent-tree.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-implement-bin_buffer.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_dirent-active-reference-and-immediate-disconnect.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-kill-attribute-file-orphaning.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-separate-out-sysfs_attach_dentry.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-reimplement-sysfs_drop_dentry.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-kill-unnecessary-attribute-owner.patch | patch -p1 ++cat ../broken-out/gregkh-driver-driver-core-make-devt_attr-and-uevent_attr-static.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_alloc_ino-static.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-fix-parent-refcounting-during-rename-and-move.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-reorganize-sysfs_new_indoe-and-sysfs_create.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-use-iget_locked-instead-of-new_inode.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-move-s_active-functions-to-fs-sysfs-dirc.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-slim-down-sysfs_dirent-s_active.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-fix-oops-in-sysfs_drop_dentry-on-x86_64.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_drop_dentry-access-inodes-using-ilookup.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-rename-sysfs_dirent-s_type-to-s_flags-and-make-room-for-flags.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_flag_removed-flag.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_find_dirent-and-sysfs_get_dirent.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-make-kobj-point-to-sysfs_dirent-instead-of-dentry.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs-spinlocks.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-use-sysfs_mutex-to-protect-the-sysfs_dirent-tree.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-restructure-add-remove-paths-and-fix-inode-update.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-move-sysfs_drop_dentry-to-dirc-and-make-it-static.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_get_dentry.patch | patch -p1 ++cat ../broken-out/gregkh-driver-sysfs-make-directory-dentries-and-inodes-reclaimable.patch | patch -p1 ++cat ../broken-out/gregkh-driver-block-device.patch | patch -p1 ++cat ../broken-out/revert-gregkh-driver-block-device.patch | patch -p1 ++cat ../broken-out/driver-core-check-return-code-of-sysfs_create_link.patch | patch -p1 ++cat ../broken-out/git-md-accel.patch | patch -p1 ++cat ../broken-out/git-mmc.patch | patch -p1 ++cat ../broken-out/git-net.patch | patch -p1 ++cat ../broken-out/tun-tap-allow-group-ownership-of-tun-tap-devices.patch | patch -p1 ++cat ../broken-out/git-nfs.patch | patch -p1 ++cat ../broken-out/git-ocfs2.patch | patch -p1 ++cat ../broken-out/git-selinux.patch | patch -p1 ++cat ../broken-out/revert-acpi-change-for-scsi.patch | patch -p1 ++cat ../broken-out/git-scsi-misc.patch | patch -p1 ++cat ../broken-out/git-unionfs.patch | patch -p1 ++cat ../broken-out/x86_64-mm-unwinder.patch | patch -p1 ++cat ../broken-out/x86_64-mm-xencleanup-add-kstrndup.patch | patch -p1 ++cat ../broken-out/x86_64-mm-xencleanup-add-argv_split.patch | patch -p1 ++cat ../broken-out/x86_64-mm-xencleanup-split-usermodehelper-setup-from-execution.patch | patch -p1 ++cat ../broken-out/x86_64-mm-add-common-orderly_poweroff.patch | patch -p1 ++cat ../broken-out/x86_64-mm-xencleanup-tidy-up-usermode-helper-waiting-a-bit.patch | patch -p1 ++cat ../broken-out/x86_64-mm-xen-add-the-xen-virtual-network-device-driver.patch | patch -p1 ++cat ../broken-out/i386-show-unhandled-signals.patch | patch -p1 ++cat ../broken-out/git-kgdb.patch | patch -p1 ++cat ../broken-out/hugetlb-remove-unnecessary-nid-initialization.patch | patch -p1 ++cat ../broken-out/mm-alloc_large_system_hash-can-free-some-memory-for.patch | patch -p1 ++cat ../broken-out/mm-fix-fault-vs-invalidate-race-for-linear-mappings.patch | patch -p1 ++cat ../broken-out/mm-fix-fault-vs-invalidate-race-for-linear-mappings-fix.patch | patch -p1 ++cat ../broken-out/mm-merge-populate-and-nopage-into-fault-fixes-nonlinear.patch | patch -p1 ++cat ../broken-out/add-a-bitmap-that-is-used-to-track-flags-affecting-a-block-of-pages.patch | patch -p1 ++cat ../broken-out/add-__gfp_movable-for-callers-to-flag-allocations-from-high-memory-that-may-be-migrated.patch | patch -p1 ++cat ../broken-out/split-the-free-lists-for-movable-and-unmovable-allocations.patch | patch -p1 ++cat ../broken-out/choose-pages-from-the-per-cpu-list-based-on-migration-type.patch | patch -p1 ++cat ../broken-out/add-a-configure-option-to-group-pages-by-mobility.patch | patch -p1 ++cat ../broken-out/move-free-pages-between-lists-on-steal.patch | patch -p1 ++cat ../broken-out/group-short-lived-and-reclaimable-kernel-allocations.patch | patch -p1 ++cat ../broken-out/allow-huge-page-allocations-to-use-gfp_high_movable.patch | patch -p1 ++cat ../broken-out/maps2-uninline-some-functions-in-the-page-walker.patch | patch -p1 ++cat ../broken-out/maps2-eliminate-the-pmd_walker-struct-in-the-page-walker.patch | patch -p1 ++cat ../broken-out/maps2-remove-vma-from-args-in-the-page-walker.patch | patch -p1 ++cat ../broken-out/maps2-propagate-errors-from-callback-in-page-walker.patch | patch -p1 ++cat ../broken-out/maps2-add-callbacks-for-each-level-to-page-walker.patch | patch -p1 ++cat ../broken-out/maps2-move-the-page-walker-code-to-lib.patch | patch -p1 ++cat ../broken-out/maps2-simplify-interdependence-of-proc-pid-maps-and-smaps.patch | patch -p1 ++cat ../broken-out/maps2-move-clear_refs-code-to-task_mmuc.patch | patch -p1 ++cat ../broken-out/maps2-regroup-task_mmu-by-interface.patch | patch -p1 ++cat ../broken-out/maps2-make-proc-pid-smaps-optional-under-config_embedded.patch | patch -p1 ++cat ../broken-out/maps2-make-proc-pid-clear_refs-option-under-config_embedded.patch | patch -p1 ++cat ../broken-out/maps2-add-proc-pid-pagemap-interface.patch | patch -p1 ++cat ../broken-out/have-kswapd-keep-a-minimum-order-free-other-than-order-0.patch | patch -p1 ++cat ../broken-out/freezer-make-kernel-threads-nonfreezable-by-default.patch | patch -p1 ++cat ../broken-out/freezer-make-kernel-threads-nonfreezable-by-default-fix.patch | patch -p1 ++cat ../broken-out/freezer-make-kernel-threads-nonfreezable-by-default-fix-2.patch | patch -p1 ++cat ../broken-out/uml-use-get_free_pages-to-allocate-kernel-stacks.patch | patch -p1 ++cat ../broken-out/add-generic-exit-time-stack-depth-checking-to-config_debug_stack_usage.patch | patch -p1 ++cat ../broken-out/cpuset-remove-sched-domain-hooks-from-cpusets.patch | patch -p1 ++cat ../broken-out/clone-flag-clone_parent_tidptr-leaves-invalid-results-in-memory.patch | patch -p1 ++cat ../broken-out/use-boot-based-time-for-process-start-time-and-boot-time.patch | patch -p1 ++cat ../broken-out/reduce-cpusetc-write_lock_irq-to-read_lock.patch | patch -p1 ++cat ../broken-out/reduce-cpusetc-write_lock_irq-to-read_lock-fix.patch | patch -p1 ++cat ../broken-out/taskstats-add-context-switch-counters.patch | patch -p1 ++cat ../broken-out/taskstats-add-context-switch-counters-fix.patch | patch -p1 ++cat ../broken-out/remove-config_uts_ns-and-config_ipc_ns.patch | patch -p1 ++cat ../broken-out/user-namespace-add-the-framework.patch | patch -p1 ++cat ../broken-out/user-namespace-add-unshare.patch | patch -p1 ++cat ../broken-out/mm-fix-create_new_namespaces-return-value.patch | patch -p1 ++cat ../broken-out/add-a-kmem_cache-for-nsproxy-objects.patch | patch -p1 ++cat ../broken-out/namespace-ensure-clone_flags-are-always-stored-in-an-unsigned-long.patch | patch -p1 ++cat ../broken-out/sysctlc-add-text-telling-people-to-use-ctl_unnumbered.patch | patch -p1 ++cat ../broken-out/proper-prototype-for-proc_nr_files.patch | patch -p1 ++cat ../broken-out/move-seccomp-from-proc-to-a-prctl.patch | patch -p1 ++cat ../broken-out/uninline-check_signature.patch | patch -p1 ++cat ../broken-out/revoke-core-code.patch | patch -p1 ++cat ../broken-out/revoke-wire-up-i386-system-calls.patch | patch -p1 ++cat ../broken-out/fallocate-implementation-on-i86-x86_64-and-powerpc.patch | patch -p1 ++cat ../broken-out/coredump-masking-reimplementation-of-dumpable-using-two-flags.patch | patch -p1 ++cat ../broken-out/coredump-masking-add-an-interface-for-core-dump-filter.patch | patch -p1 ++cat ../broken-out/cpuset-zero-malloc-revert-the-old-cpuset-fix.patch | patch -p1 ++cat ../broken-out/containersv10-basic-container-framework.patch | patch -p1 ++cat ../broken-out/containersv10-basic-container-framework-fix.patch | patch -p1 ++cat ../broken-out/containersv10-basic-container-framework-fix-for-bad-lock-balance-in-containers.patch | patch -p1 ++cat ../broken-out/containersv10-example-cpu-accounting-subsystem.patch | patch -p1 ++cat ../broken-out/containersv10-add-tasks-file-interface.patch | patch -p1 ++cat ../broken-out/containersv10-add-tasks-file-interface-fix.patch | patch -p1 ++cat ../broken-out/containersv10-add-tasks-file-interface-fix-2.patch | patch -p1 ++cat ../broken-out/containersv10-add-fork-exit-hooks.patch | patch -p1 ++cat ../broken-out/containersv10-add-fork-exit-hooks-fix.patch | patch -p1 ++cat ../broken-out/containersv10-add-container_clone-interface.patch | patch -p1 ++cat ../broken-out/containersv10-add-procfs-interface.patch | patch -p1 ++cat ../broken-out/containersv10-add-procfs-interface-fix.patch | patch -p1 ++cat ../broken-out/containersv10-make-cpusets-a-client-of-containers.patch | patch -p1 ++cat ../broken-out/containersv10-share-css_group-arrays-between-tasks-with-same-container-memberships.patch | patch -p1 ++cat ../broken-out/containersv10-share-css_group-arrays-between-tasks-with-same-container-memberships-cpuset-zero-malloc-fix-for-new-containers.patch | patch -p1 ++cat ../broken-out/containersv10-simple-debug-info-subsystem.patch | patch -p1 ++cat ../broken-out/containersv10-support-for-automatic-userspace-release-agents.patch | patch -p1 ++cat ../broken-out/containers-implement-subsys-post_clone.patch | patch -p1 ++cat ../broken-out/containers-implement-namespace-tracking-subsystem-v3.patch | patch -p1 ++cat ../broken-out/keep-track-of-network-interface-renaming.patch | patch -p1 ++cat ../broken-out/v2.6.22-rc6-mm1-netns23.patch | patch -p1